1 /* 2 * $LynxId: UCDefs.h,v 1.17 2009/03/10 20:02:44 tom Exp $ 3 * 4 * Definitions for Unicode character-translations 5 */ 6 7 #ifndef UCDEFS_H 8 #define UCDEFS_H 9 10 #ifndef HTUTILS_H 11 #include <HTUtils.h> 12 #endif 13 14 typedef struct _LYUCcharset { 15 int UChndl; /* -1 for "old" charsets, >= 0 for chartrans tables */ 16 17 const char *MIMEname; 18 int enc; 19 int codepage; /* if positive, an IBM OS/2 specific number; 20 if negative, flag for no table translation */ 21 22 /* parameters below are not used by chartrans mechanism, */ 23 /* they describe some relationships against built-in Latin1 charset... */ 24 int repertoire; /* unused */ 25 int codepoints; /* subset/superset of Latin1 ? */ 26 int cpranges; /* unused, obsolete by LYlowest_eightbit; 27 "which ranges have valid displayable chars 28 (including nbsp and shy)" */ 29 int like8859; /* currently used for nbsp and shy only 30 (but UCT_R_8859SPECL assumed for any UCT_R_8BIT...); 31 "for which ranges is it like 8859-1" */ 32 } LYUCcharset; 33 34 typedef enum { 35 UCT_ENC_7BIT, 36 UCT_ENC_8BIT, 37 UCT_ENC_8859, /* no displayable chars in 0x80-0x9F */ 38 UCT_ENC_8BIT_C0, /* 8-bit + some chars in C0 control area */ 39 UCT_ENC_MAYBE2022, 40 UCT_ENC_CJK, 41 UCT_ENC_16BIT, 42 UCT_ENC_UTF8 43 } eUCT_ENC; 44 45 #define UCT_REP_SUBSETOF_LAT1 0x01 46 #define UCT_REP_SUPERSETOF_LAT1 0x02 47 #define UCT_REP_IS_LAT1 UCT_REP_SUBSETOF_LAT1 | UCT_REP_SUPERSETOF_LAT1 48 /* 49 * Assume everything we deal with is included in the UCS2 reperoire, 50 * so a flag for _REP_SUBSETOF_UCS2 would be redundant. 51 */ 52 53 /* 54 * More general description how the code points relate to 8859-1 and UCS: 55 */ 56 #define UCT_CP_SUBSETOF_LAT1 0x01 /* implies UCT_CP_SUBSETOF_UCS2 */ 57 #define UCT_CP_SUPERSETOF_LAT1 0x02 58 #define UCT_CP_SUBSETOF_UCS2 0x04 59 60 #define UCT_CP_IS_LAT1 UCT_CP_SUBSETOF_LAT1 | UCT_CP_SUPERSETOF_LAT1 61 62 /* 63 * More specific bitflags for practically important code point ranges: 64 */ 65 #define UCT_R_LOWCTRL 0x08 /* 0x00-0x1F, for completeness */ 66 #define UCT_R_7BITINV 0x10 /* invariant???, displayable 7bit chars */ 67 #define UCT_R_7BITNAT 0x20 /* displayable 7bit, national??? */ 68 #define UCT_R_HIGHCTRL 0x40 /* chars in 0x80-0x9F range */ 69 #define UCT_R_8859SPECL 0x80 /* special chars in 8859-x sets: nbsp and shy */ 70 #define UCT_R_HIGH8BIT 0x100 /* rest of 0xA0-0xFF range */ 71 72 #define UCT_R_ASCII UCT_R_7BITINV | UCT_R_7BITNAT /* displayable US-ASCII */ 73 #define UCT_R_LAT1 UCT_R_ASCII | UCT_R_8859SPECL | UCT_R_HIGH8BIT 74 #define UCT_R_8BIT UCT_R_LAT1 | UCT_R_HIGHCTRL /* full 8bit range */ 75 76 /* 77 * For the following some comments are in HTAnchor.c. 78 */ 79 typedef enum { 80 UCT_STAGE_MIME, 81 UCT_STAGE_PARSER, /* What the parser (SGML.c) gets to see */ 82 UCT_STAGE_STRUCTURED, /* What the structured stream (HTML) gets fed */ 83 UCT_STAGE_HTEXT, /* What gets fed to the HText_* functions */ 84 UCT_STAGEMAX 85 } eUCT_STAGE; 86 87 typedef enum { 88 UCT_SETBY_NONE, 89 UCT_SETBY_DEFAULT, 90 UCT_SETBY_LINK, /* set by A or LINK CHARSET= hint */ 91 UCT_SETBY_STRUCTURED, /* structured stream stage (HTML.c) */ 92 UCT_SETBY_PARSER, /* set by SGML parser or similar */ 93 UCT_SETBY_MIME /* set explicitly by MIME charset parameter */ 94 } eUCT_SETBY; 95 96 typedef struct _UCStageInfo { 97 int lock; /* by what it has been set */ 98 int LYhndl; 99 LYUCcharset C; 100 } UCStageInfo; 101 102 typedef struct _UCAnchorInfo { 103 struct _UCStageInfo s[UCT_STAGEMAX]; 104 } UCAnchorInfo; 105 106 #endif /* UCDEFS_H */ 107