1 /*
2  * $LynxId: UCDefs.h,v 1.17 2009/03/10 20:02:44 tom Exp $
3  *
4  * Definitions for Unicode character-translations
5  */
6 
7 #ifndef UCDEFS_H
8 #define UCDEFS_H
9 
10 #ifndef HTUTILS_H
11 #include <HTUtils.h>
12 #endif
13 
14 typedef struct _LYUCcharset {
15     int UChndl;			/* -1 for "old" charsets, >= 0 for chartrans tables */
16 
17     const char *MIMEname;
18     int enc;
19     int codepage;		/* if positive, an IBM OS/2 specific number;
20 				   if negative, flag for no table translation */
21 
22     /* parameters below are not used by chartrans mechanism, */
23     /* they describe some relationships against built-in Latin1 charset... */
24     int repertoire;		/* unused */
25     int codepoints;		/* subset/superset of Latin1 ? */
26     int cpranges;		/* unused, obsolete by LYlowest_eightbit;
27 				   "which ranges have valid displayable chars
28 				   (including nbsp and shy)" */
29     int like8859;		/* currently used for nbsp and shy only
30 				   (but UCT_R_8859SPECL assumed for any UCT_R_8BIT...);
31 				   "for which ranges is it like 8859-1" */
32 } LYUCcharset;
33 
34 typedef enum {
35     UCT_ENC_7BIT,
36     UCT_ENC_8BIT,
37     UCT_ENC_8859,		/* no displayable chars in 0x80-0x9F */
38     UCT_ENC_8BIT_C0,		/* 8-bit + some chars in C0 control area */
39     UCT_ENC_MAYBE2022,
40     UCT_ENC_CJK,
41     UCT_ENC_16BIT,
42     UCT_ENC_UTF8
43 } eUCT_ENC;
44 
45 #define UCT_REP_SUBSETOF_LAT1 0x01
46 #define UCT_REP_SUPERSETOF_LAT1 0x02
47 #define UCT_REP_IS_LAT1 UCT_REP_SUBSETOF_LAT1 | UCT_REP_SUPERSETOF_LAT1
48 /*
49  *  Assume everything we deal with is included in the UCS2 reperoire,
50  *  so a flag for _REP_SUBSETOF_UCS2 would be redundant.
51  */
52 
53 /*
54  *  More general description how the code points relate to 8859-1 and UCS:
55  */
56 #define UCT_CP_SUBSETOF_LAT1 0x01	/* implies UCT_CP_SUBSETOF_UCS2 */
57 #define UCT_CP_SUPERSETOF_LAT1 0x02
58 #define UCT_CP_SUBSETOF_UCS2 0x04
59 
60 #define UCT_CP_IS_LAT1 UCT_CP_SUBSETOF_LAT1 | UCT_CP_SUPERSETOF_LAT1
61 
62 /*
63  *  More specific bitflags for practically important code point ranges:
64  */
65 #define UCT_R_LOWCTRL 0x08	/* 0x00-0x1F, for completeness */
66 #define UCT_R_7BITINV 0x10	/* invariant???, displayable 7bit chars */
67 #define UCT_R_7BITNAT 0x20	/* displayable 7bit, national??? */
68 #define UCT_R_HIGHCTRL 0x40	/* chars in 0x80-0x9F range */
69 #define UCT_R_8859SPECL 0x80	/* special chars in 8859-x sets: nbsp and shy */
70 #define UCT_R_HIGH8BIT 0x100	/* rest of 0xA0-0xFF range */
71 
72 #define UCT_R_ASCII UCT_R_7BITINV | UCT_R_7BITNAT	/* displayable US-ASCII */
73 #define UCT_R_LAT1  UCT_R_ASCII   | UCT_R_8859SPECL | UCT_R_HIGH8BIT
74 #define UCT_R_8BIT  UCT_R_LAT1    | UCT_R_HIGHCTRL	/* full 8bit range */
75 
76 /*
77  *  For the following some comments are in HTAnchor.c.
78  */
79 typedef enum {
80     UCT_STAGE_MIME,
81     UCT_STAGE_PARSER,		/* What the parser (SGML.c) gets to see */
82     UCT_STAGE_STRUCTURED,	/* What the structured stream (HTML) gets fed */
83     UCT_STAGE_HTEXT,		/* What gets fed to the HText_* functions */
84     UCT_STAGEMAX
85 } eUCT_STAGE;
86 
87 typedef enum {
88     UCT_SETBY_NONE,
89     UCT_SETBY_DEFAULT,
90     UCT_SETBY_LINK,		/* set by A or LINK CHARSET= hint */
91     UCT_SETBY_STRUCTURED,	/* structured stream stage (HTML.c) */
92     UCT_SETBY_PARSER,		/* set by SGML parser or similar */
93     UCT_SETBY_MIME		/* set explicitly by MIME charset parameter */
94 } eUCT_SETBY;
95 
96 typedef struct _UCStageInfo {
97     int lock;			/* by what it has been set */
98     int LYhndl;
99     LYUCcharset C;
100 } UCStageInfo;
101 
102 typedef struct _UCAnchorInfo {
103     struct _UCStageInfo s[UCT_STAGEMAX];
104 } UCAnchorInfo;
105 
106 #endif /* UCDEFS_H */
107