1 /*
2 * $LynxId: LYCharSets.c,v 1.68 2013/01/04 21:47:16 tom Exp $
3 */
4 #include <HTUtils.h>
5 #include <HTCJK.h>
6 #include <HTMLDTD.h>
7
8 #include <LYGlobalDefs.h>
9 #include <UCMap.h>
10 #include <UCdomap.h>
11 #include <UCDefs.h>
12 #include <LYCharSets.h>
13 #include <GridText.h>
14 #include <LYCurses.h>
15 #include <LYStrings.h>
16
17 #include <LYLeaks.h>
18
19 #if defined(__MirBSD__) && defined(_nc_set_locale)
20 extern void _nc_set_locale(char *);
21 #endif
22
23 HTkcode kanji_code = NOKANJI;
24 BOOLEAN LYHaveCJKCharacterSet = FALSE;
25 BOOLEAN DisplayCharsetMatchLocale = TRUE;
26 BOOL force_old_UCLYhndl_on_reload = FALSE;
27 int forced_UCLYhdnl;
28 int LYNumCharsets = 0; /* Will be initialized later by UC_Register. */
29 int current_char_set = -1; /* will be intitialized later in LYMain.c */
30 int linedrawing_char_set = -1;
31 STRING2PTR p_entity_values = NULL; /* Pointer, for HTML_put_entity() */
32
33 /* obsolete and probably not used(???) */
34 /* will be initialized in HTMLUseCharacterSet */
35 #ifdef USE_CHARSET_CHOICE
36 charset_subset_t charset_subsets[MAXCHARSETS];
37 BOOL custom_display_charset = FALSE;
38 BOOL custom_assumed_doc_charset = FALSE;
39
40 #ifndef ALL_CHARSETS_IN_O_MENU_SCREEN
41 int display_charset_map[MAXCHARSETS];
42 int assumed_doc_charset_map[MAXCHARSETS];
43
44 const char *display_charset_choices[MAXCHARSETS + 1];
45 const char *assumed_charset_choices[MAXCHARSETS + 1];
46 int displayed_display_charset_idx;
47 #endif
48 #endif /* USE_CHARSET_CHOICE */
49
50 /*
51 * New character sets now declared with UCInit() in UCdomap.c
52 *
53 * INSTRUCTIONS for adding new character sets which do not have
54 * Unicode tables now in UCdomap.h
55 *
56 *
57 * [We hope you need not correct/add old-style mapping below as in ISO_LATIN1[]
58 * or SevenBitApproximations[] any more - it works now via new chartrans
59 * mechanism, but kept for compatibility only: we should cleanup the stuff,
60 * but this is not so easy...]
61 *
62 * Currently we only declare some charset's properties here (such as MIME
63 * names, etc.), it does not include real mapping.
64 *
65 * There is a place marked "Add your new character sets HERE" in this file.
66 * Make up a character set and add it in the same style as the ISO_LATIN1 set
67 * below, giving it a unique name.
68 *
69 * Add the name of the set to LYCharSets. Similarly add the appropriate
70 * information to the tables below: LYchar_set_names, LYCharSet_UC,
71 * LYlowest_eightbit. These 4 tables all MUST have the same order. (And this
72 * is the order you will see in Lynx Options Menu, which is why few
73 * unicode-based charsets are listed here).
74 *
75 */
76
77 /* Entity values -- for ISO Latin 1 local representation
78 *
79 * This MUST match exactly the table referred to in the DTD!
80 */
81 static const char *ISO_Latin1[] =
82 {
83 "\306", /* capital AE diphthong (ligature) (Æ) - AElig */
84 "\301", /* capital A, acute accent (Á) - Aacute */
85 "\302", /* capital A, circumflex accent (Â) - Acirc */
86 "\300", /* capital A, grave accent (À) - Agrave */
87 "\305", /* capital A, ring - Aring (Å) */
88 "\303", /* capital A, tilde - Atilde (Ã) */
89 "\304", /* capital A, dieresis or umlaut mark (Ä) - Auml */
90 "\307", /* capital C, cedilla - Ccedil (Ç) */
91 "\320", /* capital Eth or D with stroke (Ð) - Dstrok */
92 "\320", /* capital Eth, Icelandic (Ð) - ETH */
93 "\311", /* capital E, acute accent (É) - Eacute */
94 "\312", /* capital E, circumflex accent (Ê) - Ecirc */
95 "\310", /* capital E, grave accent (È) - Egrave */
96 "\313", /* capital E, dieresis or umlaut mark (Ë) - Euml */
97 "\315", /* capital I, acute accent (Í) - Iacute */
98 "\316", /* capital I, circumflex accent (Î) - Icirc */
99 "\314", /* capital I, grave accent (Ì) - Igrave */
100 "\317", /* capital I, dieresis or umlaut mark (Ï) - Iuml */
101 "\321", /* capital N, tilde (Ñ) - Ntilde */
102 "\323", /* capital O, acute accent (Ó) - Oacute */
103 "\324", /* capital O, circumflex accent (Ô) - Ocirc */
104 "\322", /* capital O, grave accent (Ò) - Ograve */
105 "\330", /* capital O, slash (Ø) - Oslash */
106 "\325", /* capital O, tilde (Õ) - Otilde */
107 "\326", /* capital O, dieresis or umlaut mark (Ö) - Ouml */
108 "\336", /* capital THORN, Icelandic (Þ) - THORN */
109 "\332", /* capital U, acute accent (Ú) - Uacute */
110 "\333", /* capital U, circumflex accent (Û) - Ucirc */
111 "\331", /* capital U, grave accent (Ù) - Ugrave */
112 "\334", /* capital U, dieresis or umlaut mark (Ü) - Uuml */
113 "\335", /* capital Y, acute accent (Ý) - Yacute */
114 "\341", /* small a, acute accent (á) - aacute */
115 "\342", /* small a, circumflex accent (â) - acirc */
116 "\264", /* spacing acute (´) - acute */
117 "\346", /* small ae diphthong (ligature) (æ) - aelig */
118 "\340", /* small a, grave accent (à) - agrave */
119 "\046", /* ampersand (&) - amp */
120 "\345", /* small a, ring (å) - aring */
121 "\343", /* small a, tilde (ã) - atilde */
122 "\344", /* small a, dieresis or umlaut mark (ä) - auml */
123 "\246", /* broken vertical bar (¦) - brkbar */
124 "\246", /* broken vertical bar (¦) - brvbar */
125 "\347", /* small c, cedilla (ç) - ccedil */
126 "\270", /* spacing cedilla (¸) - cedil */
127 "\242", /* cent sign (¢) - cent */
128 "\251", /* copyright sign (©) - copy */
129 "\244", /* currency sign (¤) - curren */
130 "\260", /* degree sign (°) - deg */
131 "\250", /* spacing dieresis (¨) - die */
132 "\367", /* division sign (÷) - divide */
133 "\351", /* small e, acute accent (é) - eacute */
134 "\352", /* small e, circumflex accent (ê) - ecirc */
135 "\350", /* small e, grave accent (è) - egrave */
136 "-", /* dash the width of emsp - emdash */
137 "\002", /* emsp, em space - not collapsed NEVER CHANGE THIS - emsp */
138 "-", /* dash the width of ensp - endash */
139 "\002", /* ensp, en space - not collapsed NEVER CHANGE THIS - ensp */
140 "\360", /* small eth, Icelandic (ð) - eth */
141 "\353", /* small e, dieresis or umlaut mark (ë) - euml */
142 "\275", /* fraction 1/2 (½) - frac12 */
143 "\274", /* fraction 1/4 (¼) - frac14 */
144 "\276", /* fraction 3/4 (¾) - frac34 */
145 "\076", /* greater than (>) - gt */
146 "\257", /* spacing macron (¯) - hibar */
147 "\355", /* small i, acute accent (í) - iacute */
148 "\356", /* small i, circumflex accent (î) - icirc */
149 "\241", /* inverted exclamation mark (¡) - iexcl */
150 "\354", /* small i, grave accent (ì) - igrave */
151 "\277", /* inverted question mark (¿) - iquest */
152 "\357", /* small i, dieresis or umlaut mark (ï) - iuml */
153 "\253", /* angle quotation mark, left («) - laquo */
154 "\074", /* less than (<) - lt */
155 "\257", /* spacing macron (¯) - macr */
156 "-", /* dash the width of emsp - mdash */
157 "\265", /* micro sign (µ) - micro */
158 "\267", /* middle dot (·) - middot */
159 "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */
160 "-", /* dash the width of ensp - ndash */
161 "\254", /* negation sign (¬) - not */
162 "\361", /* small n, tilde (ñ) - ntilde */
163 "\363", /* small o, acute accent (ó) - oacute */
164 "\364", /* small o, circumflex accent (ô) - ocirc */
165 "\362", /* small o, grave accent (ò) - ograve */
166 "\252", /* feminine ordinal indicator (ª) - ordf */
167 "\272", /* masculine ordinal indicator (º) - ordm */
168 "\370", /* small o, slash (ø) - oslash */
169 "\365", /* small o, tilde (õ) - otilde */
170 "\366", /* small o, dieresis or umlaut mark (ö) - ouml */
171 "\266", /* paragraph sign (¶) - para */
172 "\261", /* plus-or-minus sign (±) - plusmn */
173 "\243", /* pound sign (£) - pound */
174 "\042", /* quote '"' (") - quot */
175 "\273", /* angle quotation mark, right (») - raquo */
176 "\256", /* circled R registered sign (®) - reg */
177 "\247", /* section sign (§) - sect */
178 "\007", /* soft hyphen (­) NEVER CHANGE THIS - shy */
179 "\271", /* superscript 1 (¹) - sup1 */
180 "\262", /* superscript 2 (²) - sup2 */
181 "\263", /* superscript 3 (³) - sup3 */
182 "\337", /* small sharp s, German (sz ligature) (ß) - szlig */
183 "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */
184 "\376", /* small thorn, Icelandic (þ) - thorn */
185 "\327", /* multiplication sign (×) - times */
186 "(TM)", /* circled TM trade mark sign (™) - trade */
187 "\372", /* small u, acute accent (ú) - uacute */
188 "\373", /* small u, circumflex accent (û) - ucirc */
189 "\371", /* small u, grave accent (ù) - ugrave */
190 "\250", /* spacing dieresis (¨) - uml */
191 "\374", /* small u, dieresis or umlaut mark (ü) - uuml */
192 "\375", /* small y, acute accent (ý) - yacute */
193 "\245", /* yen sign (¥) - yen */
194 "\377", /* small y, dieresis or umlaut mark (ÿ) - yuml */
195 };
196
197 /* Entity values -- 7 bit character approximations
198 *
199 * This MUST match exactly the table referred to in the DTD!
200 */
201 const char *SevenBitApproximations[] =
202 {
203 "AE", /* capital AE diphthong (ligature) (Æ) - AElig */
204 "A", /* capital A, acute accent (Á) - Aacute */
205 "A", /* capital A, circumflex accent (Â) - Acirc */
206 "A", /* capital A, grave accent (À) - Agrave */
207 "A", /* capital A, ring - Aring (Å) */
208 "A", /* capital A, tilde - Atilde (Ã) */
209 #ifdef LY_UMLAUT
210 "Ae", /* capital A, dieresis or umlaut mark (Ä) - Auml */
211 #else
212 "A", /* capital A, dieresis or umlaut mark (Ä) - Auml */
213 #endif /* LY_UMLAUT */
214 "C", /* capital C, cedilla (Ç) - Ccedil */
215 "Dj", /* capital D with stroke (Ð) - Dstrok */
216 "DH", /* capital Eth, Icelandic (Ð) - ETH */
217 "E", /* capital E, acute accent (É) - Eacute */
218 "E", /* capital E, circumflex accent (Ê) - Ecirc */
219 "E", /* capital E, grave accent (È) - Egrave */
220 "E", /* capital E, dieresis or umlaut mark (Ë) - Euml */
221 "I", /* capital I, acute accent (Í) - Iacute */
222 "I", /* capital I, circumflex accent (Î) - Icirc */
223 "I", /* capital I, grave accent (Ì) - Igrave */
224 "I", /* capital I, dieresis or umlaut mark (Ï) - Iuml */
225 "N", /* capital N, tilde - Ntilde (Ñ) */
226 "O", /* capital O, acute accent (Ó) - Oacute */
227 "O", /* capital O, circumflex accent (Ô) - Ocirc */
228 "O", /* capital O, grave accent (Ò) - Ograve */
229 "O", /* capital O, slash (Ø) - Oslash */
230 "O", /* capital O, tilde (Õ) - Otilde */
231 #ifdef LY_UMLAUT
232 "Oe", /* capital O, dieresis or umlaut mark (Ö) - Ouml */
233 #else
234 "O", /* capital O, dieresis or umlaut mark (Ö) - Ouml */
235 #endif /* LY_UMLAUT */
236 "P", /* capital THORN, Icelandic (Þ) - THORN */
237 "U", /* capital U, acute accent (Ú) - Uacute */
238 "U", /* capital U, circumflex accent (Û) - Ucirc */
239 "U", /* capital U, grave accent (Ù) - Ugrave */
240 #ifdef LY_UMLAUT
241 "Ue", /* capital U, dieresis or umlaut mark (Ü) - Uuml */
242 #else
243 "U", /* capital U, dieresis or umlaut mark (Ü) - Uuml */
244 #endif /* LY_UMLAUT */
245 "Y", /* capital Y, acute accent (Ý) - Yacute */
246 "a", /* small a, acute accent (á) - aacute */
247 "a", /* small a, circumflex accent (â) - acirc */
248 "'", /* spacing acute (´) - acute */
249 "ae", /* small ae diphthong (ligature) (æ) - aelig */
250 "`a", /* small a, grave accent (è) - agrave */
251 "&", /* ampersand (&) - amp */
252 "a", /* small a, ring (å) - aring */
253 "a", /* small a, tilde (ã) - atilde */
254 #ifdef LY_UMLAUT
255 "ae", /* small a, dieresis or umlaut mark (ä) - auml */
256 #else
257 "a", /* small a, dieresis or umlaut mark (ä) - auml */
258 #endif /* LY_UMLAUT */
259 "|", /* broken vertical bar (¦) - brkbar */
260 "|", /* broken vertical bar (¦) - brvbar */
261 "c", /* small c, cedilla (ç) - ccedil */
262 ",", /* spacing cedilla (¸) - cedil */
263 "-c-", /* cent sign (¢) - cent */
264 "(c)", /* copyright sign (©) - copy */
265 "CUR", /* currency sign (¤) - curren */
266 "DEG", /* degree sign (°) - deg */
267 "\042", /* spacing dieresis (¨) - die */
268 "/", /* division sign (÷) - divide */
269 "e", /* small e, acute accent (é) - eacute */
270 "e", /* small e, circumflex accent (ê) - ecirc */
271 "e", /* small e, grave accent (è) - egrave */
272 "-", /* dash the width of emsp - emdash */
273 "\002", /* emsp NEVER CHANGE THIS - emsp */
274 "-", /* dash the width of ensp - endash */
275 "\002", /* ensp NEVER CHANGE THIS - ensp */
276 "dh", /* small eth, Icelandic eth (ð) */
277 "e", /* small e, dieresis or umlaut mark (ë) - euml */
278 " 1/2", /* fraction 1/2 (½) - frac12 */
279 " 1/4", /* fraction 1/4 (¼) - frac14 */
280 " 3/4", /* fraction 3/4 (¾) - frac34 */
281 ">", /* greater than (>) - gt */
282 "-", /* spacing macron (¯) - hibar */
283 "i", /* small i, acute accent (í) - iacute */
284 "i", /* small i, circumflex accent (î) - icirc */
285 "!", /* inverted exclamation mark (¡) - iexcl */
286 "`i", /* small i, grave accent (ì) - igrave */
287 "?", /* inverted question mark (¿) - iquest */
288 "i", /* small i, dieresis or umlaut mark (ï) - iuml */
289 "<<", /* angle quotation mark, left («) - laquo */
290 "<", /* less than - lt (<) */
291 "-", /* spacing macron (¯) - macr */
292 "-", /* dash the width of emsp - mdash */
293 "u", /* micro sign (µ) - micro */
294 ".", /* middle dot (·) - middot */
295 "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */
296 "-", /* dash the width of ensp - ndash */
297 "NOT", /* negation sign (¬) - not */
298 "n", /* small n, tilde (ñ) - ntilde */
299 "o", /* small o, acute accent (ó) - oacute */
300 "o", /* small o, circumflex accent (ô) - ocirc */
301 "o", /* small o, grave accent (ò) - ograve */
302 "-a", /* feminine ordinal indicator (ª) - ordf */
303 "-o", /* masculine ordinal indicator (º) - ordm */
304 "o", /* small o, slash (ø) - oslash */
305 "o", /* small o, tilde (õ) - otilde */
306 #ifdef LY_UMLAUT
307 "oe", /* small o, dieresis or umlaut mark (ö) - ouml */
308 #else
309 "o", /* small o, dieresis or umlaut mark (ö) - ouml */
310 #endif /* LY_UMLAUT */
311 "P:", /* paragraph sign (¶) - para */
312 "+-", /* plus-or-minus sign (±) - plusmn */
313 "-L-", /* pound sign (£) - pound */
314 "\"", /* quote '"' (") - quot */
315 ">>", /* angle quotation mark, right (») - raquo */
316 "(R)", /* circled R registered sign (®) - reg */
317 "S:", /* section sign (§) - sect */
318 "\007", /* soft hyphen (­) NEVER CHANGE THIS - shy */
319 "^1", /* superscript 1 (¹) - sup1 */
320 "^2", /* superscript 2 (²) - sup2 */
321 "^3", /* superscript 3 (³) - sup3 */
322 "ss", /* small sharp s, German (sz ligature) (ß) - szlig */
323 "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */
324 "p", /* small thorn, Icelandic (þ) - thorn */
325 "*", /* multiplication sign (×) - times */
326 "(TM)", /* circled TM trade mark sign (™) - trade */
327 "u", /* small u, acute accent (ú) - uacute */
328 "u", /* small u, circumflex accent (û) - ucirc */
329 "u", /* small u, grave accent (ù) - ugrave */
330 "\042", /* spacing dieresis (¨) - uml */
331 #ifdef LY_UMLAUT
332 "ue", /* small u, dieresis or umlaut mark (ü) - uuml */
333 #else
334 "u", /* small u, dieresis or umlaut mark (ü) - uuml */
335 #endif /* LY_UMLAUT */
336 "y", /* small y, acute accent (ý) - yacute */
337 "YEN", /* yen sign (¥) - yen */
338 "y", /* small y, dieresis or umlaut mark (ÿ) - yuml */
339 };
340
341 /*
342 * Add your new character sets HERE (but only if you can't construct Unicode
343 * tables for them). - FM
344 */
345
346 /*
347 * Add the array name to LYCharSets
348 */
349 STRING2PTR LYCharSets[MAXCHARSETS] =
350 {
351 ISO_Latin1, /* ISO Latin 1 */
352 SevenBitApproximations, /* 7 Bit Approximations */
353 };
354
355 /*
356 * Add the name that the user will see below. The order of LYCharSets and
357 * LYchar_set_names MUST be the same
358 */
359 const char *LYchar_set_names[MAXCHARSETS + 1] =
360 {
361 "Western (ISO-8859-1)",
362 "7 bit approximations (US-ASCII)",
363 (char *) 0
364 };
365
366 /*
367 * Associate additional pieces of info with each of the charsets listed above.
368 * Will be automatically modified (and extended) by charset translations which
369 * are loaded using the chartrans mechanism. Most important piece of info to
370 * put here is a MIME charset name. Used for chartrans (see UCDefs.h). The
371 * order of LYCharSets and LYCharSet_UC MUST be the same.
372 *
373 * Note that most of the charsets added by the new mechanism in src/chrtrans
374 * don't show up here at all. They don't have to.
375 */
376 LYUCcharset LYCharSet_UC[MAXCHARSETS] =
377 {
378 /*
379 * Zero position placeholder and HTMLGetEntityUCValue() reference. - FM
380 */
381 {-1, "iso-8859-1", UCT_ENC_8BIT, 0,
382 UCT_REP_IS_LAT1,
383 UCT_CP_IS_LAT1, UCT_R_LAT1, UCT_R_LAT1},
384
385 /*
386 * Placeholders for Unicode tables. - FM
387 */
388 {-1, "us-ascii", UCT_ENC_7BIT, 0,
389 UCT_REP_SUBSETOF_LAT1,
390 UCT_CP_SUBSETOF_LAT1, UCT_R_ASCII, UCT_R_ASCII},
391
392 };
393
394 /*
395 * Add the code of the the lowest character with the high bit set that can be
396 * directly displayed. The order of LYCharSets and LYlowest_eightbit MUST be
397 * the same.
398 *
399 * (If charset have chartrans unicode table, LYlowest_eightbit will be
400 * verified/modified anyway.)
401 */
402 int LYlowest_eightbit[MAXCHARSETS] =
403 {
404 160, /* ISO Latin 1 */
405 999, /* 7 bit approximations */
406 };
407
408 /*
409 * Function to set the handling of selected character sets based on the current
410 * LYUseDefaultRawMode value. - FM
411 */
HTMLSetCharacterHandling(int i)412 void HTMLSetCharacterHandling(int i)
413 {
414 int chndl = safeUCGetLYhndl_byMIME(UCAssume_MIMEcharset);
415 BOOLEAN LYRawMode_flag = LYRawMode;
416 int UCLYhndl_for_unspec_flag = UCLYhndl_for_unspec;
417
418 if (LYCharSet_UC[i].enc != UCT_ENC_CJK) {
419 HTCJK = NOCJK;
420 kanji_code = NOKANJI;
421 if (i == chndl)
422 LYRawMode = LYUseDefaultRawMode;
423 else
424 LYRawMode = (BOOL) (!LYUseDefaultRawMode);
425
426 HTPassEightBitNum = (BOOL) ((LYCharSet_UC[i].codepoints & UCT_CP_SUPERSETOF_LAT1)
427 || (LYCharSet_UC[i].like8859 & UCT_R_HIGH8BIT));
428
429 if (LYRawMode) {
430 HTPassEightBitRaw = (BOOL) (LYlowest_eightbit[i] <= 160);
431 } else {
432 HTPassEightBitRaw = FALSE;
433 }
434 if (LYRawMode || i == chndl) {
435 HTPassHighCtrlRaw = (BOOL) (LYlowest_eightbit[i] <= 130);
436 } else {
437 HTPassHighCtrlRaw = FALSE;
438 }
439
440 HTPassHighCtrlNum = FALSE;
441
442 } else { /* CJK encoding: */
443 const char *mime = LYCharSet_UC[i].MIMEname;
444
445 if (!strcmp(mime, "euc-cn")) {
446 HTCJK = CHINESE;
447 kanji_code = EUC;
448 } else if (!strcmp(mime, "euc-jp")) {
449 HTCJK = JAPANESE;
450 kanji_code = EUC;
451 } else if (!strcmp(mime, "shift_jis")) {
452 HTCJK = JAPANESE;
453 kanji_code = SJIS;
454 } else if (!strcmp(mime, "euc-kr")) {
455 HTCJK = KOREAN;
456 kanji_code = EUC;
457 } else if (!strcmp(mime, "big5")) {
458 HTCJK = TAIPEI;
459 kanji_code = EUC;
460 }
461
462 /* for any CJK: */
463 if (!LYUseDefaultRawMode)
464 HTCJK = NOCJK;
465 LYRawMode = (BOOL) (IS_CJK_TTY ? TRUE : FALSE);
466 HTPassEightBitRaw = FALSE;
467 HTPassEightBitNum = FALSE;
468 HTPassHighCtrlRaw = (BOOL) (IS_CJK_TTY ? TRUE : FALSE);
469 HTPassHighCtrlNum = FALSE;
470 }
471
472 /*
473 * Comment for coding below:
474 * UCLYhndl_for_unspec is "current" state with LYRawMode, but
475 * UCAssume_MIMEcharset is independent from LYRawMode: holds the history
476 * and may be changed from 'O'ptions menu only. - LP
477 */
478 if (LYRawMode) {
479 UCLYhndl_for_unspec = i; /* UCAssume_MIMEcharset not changed! */
480 } else {
481 if (chndl != i &&
482 (LYCharSet_UC[i].enc != UCT_ENC_CJK ||
483 LYCharSet_UC[chndl].enc != UCT_ENC_CJK)) {
484 UCLYhndl_for_unspec = chndl; /* fall to UCAssume_MIMEcharset */
485 } else {
486 UCLYhndl_for_unspec = LATIN1; /* UCAssume_MIMEcharset not changed! */
487 }
488 }
489
490 #ifdef USE_SLANG
491 if (LYlowest_eightbit[i] > 191) {
492 /*
493 * Higher than this may output cntrl chars to screen. - KW
494 */
495 SLsmg_Display_Eight_Bit = 191;
496 } else {
497 SLsmg_Display_Eight_Bit = LYlowest_eightbit[i];
498 }
499 #endif /* USE_SLANG */
500
501 ena_csi(LYlowest_eightbit[current_char_set] > 155);
502
503 /* some diagnostics */
504 if (TRACE) {
505 if (LYRawMode_flag != LYRawMode)
506 CTRACE((tfp,
507 "HTMLSetCharacterHandling: LYRawMode changed %s -> %s\n",
508 (LYRawMode_flag ? "ON" : "OFF"),
509 (LYRawMode ? "ON" : "OFF")));
510 if (UCLYhndl_for_unspec_flag != UCLYhndl_for_unspec)
511 CTRACE((tfp,
512 "HTMLSetCharacterHandling: UCLYhndl_for_unspec changed %d -> %d\n",
513 UCLYhndl_for_unspec_flag,
514 UCLYhndl_for_unspec));
515 }
516
517 return;
518 }
519
520 /*
521 * Function to set HTCJK based on "in" and "out" charsets.
522 */
Set_HTCJK(const char * inMIMEname,const char * outMIMEname)523 void Set_HTCJK(const char *inMIMEname,
524 const char *outMIMEname)
525 {
526 /* need not check for synonyms: MIMEnames got from LYCharSet_UC */
527
528 if (LYRawMode) {
529 if ((!strcmp(inMIMEname, "euc-jp") ||
530 #ifdef EXP_JAPANESEUTF8_SUPPORT
531 !strcmp(inMIMEname, "utf-8") ||
532 #endif
533 !strcmp(inMIMEname, "shift_jis")) &&
534 (!strcmp(outMIMEname, "euc-jp") ||
535 !strcmp(outMIMEname, "shift_jis"))) {
536 HTCJK = JAPANESE;
537 } else if (!strcmp(inMIMEname, "euc-cn") &&
538 !strcmp(outMIMEname, "euc-cn")) {
539 HTCJK = CHINESE;
540 } else if (!strcmp(inMIMEname, "big5") &&
541 !strcmp(outMIMEname, "big5")) {
542 HTCJK = TAIPEI;
543 } else if (!strcmp(inMIMEname, "euc-kr") &&
544 !strcmp(outMIMEname, "euc-kr")) {
545 HTCJK = KOREAN;
546 } else {
547 HTCJK = NOCJK;
548 }
549 } else {
550 HTCJK = NOCJK;
551 }
552 }
553
554 /*
555 * Function to set the LYDefaultRawMode value based on the selected character
556 * set. - FM
557 *
558 * Currently unused: the default value so obvious that LYUseDefaultRawMode
559 * utilized directly by someone's mistake. - LP
560 */
HTMLSetRawModeDefault(int i)561 static void HTMLSetRawModeDefault(int i)
562 {
563 LYDefaultRawMode = (BOOL) (LYCharSet_UC[i].enc == UCT_ENC_CJK);
564 return;
565 }
566
567 /*
568 * Function to set the LYUseDefaultRawMode value based on the selected
569 * character set and the current LYRawMode value. - FM
570 */
HTMLSetUseDefaultRawMode(int i,int modeflag)571 void HTMLSetUseDefaultRawMode(int i,
572 int modeflag)
573 {
574 if (LYCharSet_UC[i].enc != UCT_ENC_CJK) {
575
576 int chndl = safeUCGetLYhndl_byMIME(UCAssume_MIMEcharset);
577
578 if (i == chndl)
579 LYUseDefaultRawMode = (BOOLEAN) modeflag;
580 else
581 LYUseDefaultRawMode = (BOOL) (!modeflag);
582 } else /* CJK encoding: */
583 LYUseDefaultRawMode = (BOOLEAN) modeflag;
584
585 return;
586 }
587
588 /*
589 * Function to set the LYHaveCJKCharacterSet value based on the selected
590 * character set. - FM
591 */
HTMLSetHaveCJKCharacterSet(int i)592 static void HTMLSetHaveCJKCharacterSet(int i)
593 {
594 LYHaveCJKCharacterSet = (BOOL) (LYCharSet_UC[i].enc == UCT_ENC_CJK);
595 return;
596 }
597
598 /*
599 * Function to set the DisplayCharsetMatchLocale value based on the selected
600 * character set. It is used in UPPER8 for 8bit case-insensitive search by
601 * matching def7_uni.tbl images. - LP
602 */
HTMLSetDisplayCharsetMatchLocale(int i)603 static void HTMLSetDisplayCharsetMatchLocale(int i)
604 {
605 BOOLEAN match;
606
607 #if defined(__MirBSD__) && defined(_nc_set_locale)
608 if (LYCharSet_UC[i].enc == UCT_ENC_UTF8) {
609 _nc_set_locale(NULL);
610 match = TRUE;
611 } else {
612 static char locbuf[32];
613
614 snprintf(locbuf, sizeof(locbuf), "en_US.%s", LYCharSet_UC[i].MIMEname);
615 _nc_set_locale(locbuf);
616 match = FALSE;
617 }
618 #else
619 if (LYHaveCJKCharacterSet) {
620 /*
621 * We have no intention to pass CJK via UCTransChar if that happened.
622 * Let someone from CJK correct this if necessary.
623 */
624 DisplayCharsetMatchLocale = TRUE; /* old-style */
625 return;
626
627 } else if (strncasecomp(LYCharSet_UC[i].MIMEname, "cp", 2) ||
628 strncasecomp(LYCharSet_UC[i].MIMEname, "windows", 7)) {
629 /*
630 * Assume dos/windows displays usually on remote terminal, hence it
631 * rarely matches locale. (In fact, MS Windows codepoints locale are
632 * never seen on UNIX).
633 */
634 match = FALSE;
635 } else {
636 match = TRUE; /* guess, but see below */
637
638 #if !defined(LOCALE)
639 if (LYCharSet_UC[i].enc != UCT_ENC_UTF8)
640 /*
641 * Leave true for utf-8 display - the code doesn't deal very well
642 * with this case. - kw
643 */
644 match = FALSE;
645 #else
646 if (UCForce8bitTOUPPER) {
647 /*
648 * Force disable locale (from lynx.cfg)
649 */
650 match = FALSE;
651 }
652 #endif
653 }
654 #endif /* MirBSD, _nc_set_locale */
655
656 DisplayCharsetMatchLocale = match;
657 return;
658 }
659
660 /*
661 * lynx 2.8/2.7.2(and more early) compatibility code: "human-readable" charset
662 * names changes with time so we map that history names to MIME here to get old
663 * lynx.cfg and (especially) .lynxrc always recognized. Please update this
664 * table when you change "fullname" of any present charset.
665 */
666 typedef struct _names_pairs {
667 const char *fullname;
668 const char *MIMEname;
669 } names_pairs;
670 /* *INDENT-OFF* */
671 static const names_pairs OLD_charset_names[] =
672 {
673 {"ISO Latin 1", "iso-8859-1"},
674 {"ISO Latin 2", "iso-8859-2"},
675 {"WinLatin1 (cp1252)", "windows-1252"},
676 {"DEC Multinational", "dec-mcs"},
677 {"Macintosh (8 bit)", "macintosh"},
678 {"NeXT character set", "next"},
679 {"KOI8-R Cyrillic", "koi8-r"},
680 {"Chinese", "euc-cn"},
681 {"Japanese (EUC)", "euc-jp"},
682 {"Japanese (SJIS)", "shift_jis"},
683 {"Korean", "euc-kr"},
684 {"Taipei (Big5)", "big5"},
685 {"Vietnamese (VISCII)", "viscii"},
686 {"7 bit approximations", "us-ascii"},
687 {"Transparent", "x-transparent"},
688 {"DosLatinUS (cp437)", "cp437"},
689 {"IBM PC character set", "cp437"},
690 {"DosLatin1 (cp850)", "cp850"},
691 {"IBM PC codepage 850", "cp850"},
692 {"DosLatin2 (cp852)", "cp852"},
693 {"PC Latin2 CP 852", "cp852"},
694 {"DosCyrillic (cp866)", "cp866"},
695 {"DosArabic (cp864)", "cp864"},
696 {"DosGreek (cp737)", "cp737"},
697 {"DosBaltRim (cp775)", "cp775"},
698 {"DosGreek2 (cp869)", "cp869"},
699 {"DosHebrew (cp862)", "cp862"},
700 {"WinLatin2 (cp1250)", "windows-1250"},
701 {"WinCyrillic (cp1251)", "windows-1251"},
702 {"WinGreek (cp1253)", "windows-1253"},
703 {"WinHebrew (cp1255)", "windows-1255"},
704 {"WinArabic (cp1256)", "windows-1256"},
705 {"WinBaltRim (cp1257)", "windows-1257"},
706 {"ISO Latin 3", "iso-8859-3"},
707 {"ISO Latin 4", "iso-8859-4"},
708 {"ISO 8859-5 Cyrillic", "iso-8859-5"},
709 {"ISO 8859-6 Arabic", "iso-8859-6"},
710 {"ISO 8859-7 Greek", "iso-8859-7"},
711 {"ISO 8859-8 Hebrew", "iso-8859-8"},
712 {"ISO-8859-8-I", "iso-8859-8"},
713 {"ISO-8859-8-E", "iso-8859-8"},
714 {"ISO 8859-9 (Latin 5)", "iso-8859-9"},
715 {"ISO 8859-10", "iso-8859-10"},
716 {"UNICODE UTF 8", "utf-8"},
717 {"RFC 1345 w/o Intro", "mnemonic+ascii+0"},
718 {"RFC 1345 Mnemonic", "mnemonic"},
719 {NULL, NULL}, /* terminated with NULL */
720 };
721 /* *INDENT-ON* */
722
723 /*
724 * lynx 2.8/2.7.2 compatibility code: read "character_set" parameter from
725 * lynx.cfg and .lynxrc in both MIME name and "human-readable" name (old and
726 * new style). Returns -1 if not recognized.
727 */
UCGetLYhndl_byAnyName(char * value)728 int UCGetLYhndl_byAnyName(char *value)
729 {
730 int i;
731
732 if (value == NULL)
733 return -1;
734
735 LYTrimTrailing(value);
736 CTRACE((tfp, "UCGetLYhndl_byAnyName(%s)\n", value));
737
738 /* search by name */
739 for (i = 0; (i < MAXCHARSETS && LYchar_set_names[i]); i++) {
740 if (!strcmp(value, LYchar_set_names[i])) {
741 return i; /* OK */
742 }
743 }
744
745 /* search by old name from 2.8/2.7.2 version */
746 for (i = 0; (OLD_charset_names[i].fullname); i++) {
747 if (!strcmp(value, OLD_charset_names[i].fullname)) {
748 return UCGetLYhndl_byMIME(OLD_charset_names[i].MIMEname); /* OK */
749 }
750 }
751
752 return UCGetLYhndl_byMIME(value); /* by MIME */
753 }
754
755 /*
756 * Entity names -- Ordered by ISO Latin 1 value.
757 * ---------------------------------------------
758 * For conversions of DECIMAL escaped entities.
759 * Must be in order of ascending value.
760 */
761 static const char *LYEntityNames[] =
762 {
763 /* NAME DECIMAL VALUE */
764 "nbsp", /* 160, non breaking space */
765 "iexcl", /* 161, inverted exclamation mark */
766 "cent", /* 162, cent sign */
767 "pound", /* 163, pound sign */
768 "curren", /* 164, currency sign */
769 "yen", /* 165, yen sign */
770 "brvbar", /* 166, broken vertical bar, (brkbar) */
771 "sect", /* 167, section sign */
772 "uml", /* 168, spacing dieresis */
773 "copy", /* 169, copyright sign */
774 "ordf", /* 170, feminine ordinal indicator */
775 "laquo", /* 171, angle quotation mark, left */
776 "not", /* 172, negation sign */
777 "shy", /* 173, soft hyphen */
778 "reg", /* 174, circled R registered sign */
779 "hibar", /* 175, spacing macron */
780 "deg", /* 176, degree sign */
781 "plusmn", /* 177, plus-or-minus sign */
782 "sup2", /* 178, superscript 2 */
783 "sup3", /* 179, superscript 3 */
784 "acute", /* 180, spacing acute (96) */
785 "micro", /* 181, micro sign */
786 "para", /* 182, paragraph sign */
787 "middot", /* 183, middle dot */
788 "cedil", /* 184, spacing cedilla */
789 "sup1", /* 185, superscript 1 */
790 "ordm", /* 186, masculine ordinal indicator */
791 "raquo", /* 187, angle quotation mark, right */
792 "frac14", /* 188, fraction 1/4 */
793 "frac12", /* 189, fraction 1/2 */
794 "frac34", /* 190, fraction 3/4 */
795 "iquest", /* 191, inverted question mark */
796 "Agrave", /* 192, capital A, grave accent */
797 "Aacute", /* 193, capital A, acute accent */
798 "Acirc", /* 194, capital A, circumflex accent */
799 "Atilde", /* 195, capital A, tilde */
800 "Auml", /* 196, capital A, dieresis or umlaut mark */
801 "Aring", /* 197, capital A, ring */
802 "AElig", /* 198, capital AE diphthong (ligature) */
803 "Ccedil", /* 199, capital C, cedilla */
804 "Egrave", /* 200, capital E, grave accent */
805 "Eacute", /* 201, capital E, acute accent */
806 "Ecirc", /* 202, capital E, circumflex accent */
807 "Euml", /* 203, capital E, dieresis or umlaut mark */
808 "Igrave", /* 204, capital I, grave accent */
809 "Iacute", /* 205, capital I, acute accent */
810 "Icirc", /* 206, capital I, circumflex accent */
811 "Iuml", /* 207, capital I, dieresis or umlaut mark */
812 "ETH", /* 208, capital Eth, Icelandic (or Latin2 Dstrok) */
813 "Ntilde", /* 209, capital N, tilde */
814 "Ograve", /* 210, capital O, grave accent */
815 "Oacute", /* 211, capital O, acute accent */
816 "Ocirc", /* 212, capital O, circumflex accent */
817 "Otilde", /* 213, capital O, tilde */
818 "Ouml", /* 214, capital O, dieresis or umlaut mark */
819 "times", /* 215, multiplication sign */
820 "Oslash", /* 216, capital O, slash */
821 "Ugrave", /* 217, capital U, grave accent */
822 "Uacute", /* 218, capital U, acute accent */
823 "Ucirc", /* 219, capital U, circumflex accent */
824 "Uuml", /* 220, capital U, dieresis or umlaut mark */
825 "Yacute", /* 221, capital Y, acute accent */
826 "THORN", /* 222, capital THORN, Icelandic */
827 "szlig", /* 223, small sharp s, German (sz ligature) */
828 "agrave", /* 224, small a, grave accent */
829 "aacute", /* 225, small a, acute accent */
830 "acirc", /* 226, small a, circumflex accent */
831 "atilde", /* 227, small a, tilde */
832 "auml", /* 228, small a, dieresis or umlaut mark */
833 "aring", /* 229, small a, ring */
834 "aelig", /* 230, small ae diphthong (ligature) */
835 "ccedil", /* 231, small c, cedilla */
836 "egrave", /* 232, small e, grave accent */
837 "eacute", /* 233, small e, acute accent */
838 "ecirc", /* 234, small e, circumflex accent */
839 "euml", /* 235, small e, dieresis or umlaut mark */
840 "igrave", /* 236, small i, grave accent */
841 "iacute", /* 237, small i, acute accent */
842 "icirc", /* 238, small i, circumflex accent */
843 "iuml", /* 239, small i, dieresis or umlaut mark */
844 "eth", /* 240, small eth, Icelandic */
845 "ntilde", /* 241, small n, tilde */
846 "ograve", /* 242, small o, grave accent */
847 "oacute", /* 243, small o, acute accent */
848 "ocirc", /* 244, small o, circumflex accent */
849 "otilde", /* 245, small o, tilde */
850 "ouml", /* 246, small o, dieresis or umlaut mark */
851 "divide", /* 247, division sign */
852 "oslash", /* 248, small o, slash */
853 "ugrave", /* 249, small u, grave accent */
854 "uacute", /* 250, small u, acute accent */
855 "ucirc", /* 251, small u, circumflex accent */
856 "uuml", /* 252, small u, dieresis or umlaut mark */
857 "yacute", /* 253, small y, acute accent */
858 "thorn", /* 254, small thorn, Icelandic */
859 "yuml", /* 255, small y, dieresis or umlaut mark */
860 };
861
862 /*
863 * Function to return the entity names of ISO-8859-1 8-bit characters. - FM
864 */
HTMLGetEntityName(UCode_t code)865 const char *HTMLGetEntityName(UCode_t code)
866 {
867 #define IntValue code
868 int MaxValue = (TABLESIZE(LYEntityNames) - 1);
869
870 if (IntValue < 0 || IntValue > MaxValue) {
871 return "";
872 }
873
874 return LYEntityNames[IntValue];
875 }
876
877 /*
878 * Function to return the UCode_t (long int) value for entity names. It
879 * returns 0 if not found.
880 *
881 * unicode_entities[] handles all the names from old style entities[] too.
882 * Lynx now calls unicode_entities[] only through this function:
883 * HTMLGetEntityUCValue(). Note, we need not check for special characters here
884 * in function or even before it, we should check them *after* invoking this
885 * function, see put_special_unicodes() in SGML.c.
886 *
887 * In the future we will try to isolate all calls to entities[] in favor of new
888 * unicode-based chartrans scheme. - LP
889 */
HTMLGetEntityUCValue(const char * name)890 UCode_t HTMLGetEntityUCValue(const char *name)
891 {
892 #include <entities.h>
893
894 UCode_t value = 0;
895 size_t i, high, low;
896 int diff = 0;
897 size_t number_of_unicode_entities = TABLESIZE(unicode_entities);
898
899 /*
900 * Make sure we have a non-zero length name. - FM
901 */
902 if (isEmpty(name))
903 return (value);
904
905 /*
906 * Try UC_entity_info unicode_entities[].
907 */
908 for (low = 0, high = number_of_unicode_entities;
909 high > low;
910 diff < 0 ? (low = i + 1) : (high = i)) {
911 /*
912 * Binary search.
913 */
914 i = (low + (high - low) / 2);
915 diff = AS_cmp(unicode_entities[i].name, name); /* Case sensitive! */
916 if (diff == 0) {
917 value = unicode_entities[i].code;
918 break;
919 }
920 }
921 return (value);
922 }
923
924 /*
925 * Original comment -
926 * Assume these are Microsoft code points, inflicted on us by FrontPage. - FM
927 *
928 * MS FrontPage uses syntax like ™ in 128-159 range and doesn't follow
929 * Unicode standards for this area. Windows-1252 codepoints are assumed here.
930 *
931 * However see -
932 * http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#character-encodings-0
933 */
LYcp1252ToUnicode(UCode_t code)934 UCode_t LYcp1252ToUnicode(UCode_t code)
935 {
936 if ((code == 1) ||
937 (code > 127 && code < 160)) {
938 switch (code) {
939 case 1:
940 /*
941 * WHITE SMILING FACE
942 */
943 code = 0x263a;
944 break;
945 case 128:
946 /*
947 * EURO currency sign
948 */
949 code = 0x20ac;
950 break;
951 case 130:
952 /*
953 * SINGLE LOW-9 QUOTATION MARK (sbquo)
954 */
955 code = 0x201a;
956 break;
957 case 131:
958 /*
959 * LATIN SMALL LETTER F WITH HOOK
960 */
961 code = 0x192;
962 break;
963 case 132:
964 /*
965 * DOUBLE LOW-9 QUOTATION MARK (bdquo)
966 */
967 code = 0x201e;
968 break;
969 case 133:
970 /*
971 * HORIZONTAL ELLIPSIS (hellip)
972 */
973 code = 0x2026;
974 break;
975 case 134:
976 /*
977 * DAGGER (dagger)
978 */
979 code = 0x2020;
980 break;
981 case 135:
982 /*
983 * DOUBLE DAGGER (Dagger)
984 */
985 code = 0x2021;
986 break;
987 case 136:
988 /*
989 * MODIFIER LETTER CIRCUMFLEX ACCENT
990 */
991 code = 0x2c6;
992 break;
993 case 137:
994 /*
995 * PER MILLE SIGN (permil)
996 */
997 code = 0x2030;
998 break;
999 case 138:
1000 /*
1001 * LATIN CAPITAL LETTER S WITH CARON
1002 */
1003 code = 0x160;
1004 break;
1005 case 139:
1006 /*
1007 * SINGLE LEFT-POINTING ANGLE QUOTATION MARK (lsaquo)
1008 */
1009 code = 0x2039;
1010 break;
1011 case 140:
1012 /*
1013 * LATIN CAPITAL LIGATURE OE
1014 */
1015 code = 0x152;
1016 break;
1017 case 142:
1018 /*
1019 * LATIN CAPITAL LETTER Z WITH CARON
1020 */
1021 code = 0x17d;
1022 break;
1023 case 145:
1024 /*
1025 * LEFT SINGLE QUOTATION MARK (lsquo)
1026 */
1027 code = 0x2018;
1028 break;
1029 case 146:
1030 /*
1031 * RIGHT SINGLE QUOTATION MARK (rsquo)
1032 */
1033 code = 0x2019;
1034 break;
1035 case 147:
1036 /*
1037 * LEFT DOUBLE QUOTATION MARK (ldquo)
1038 */
1039 code = 0x201c;
1040 break;
1041 case 148:
1042 /*
1043 * RIGHT DOUBLE QUOTATION MARK (rdquo)
1044 */
1045 code = 0x201d;
1046 break;
1047 case 149:
1048 /*
1049 * BULLET (bull)
1050 */
1051 code = 0x2022;
1052 break;
1053 case 150:
1054 /*
1055 * EN DASH (ndash)
1056 */
1057 code = 0x2013;
1058 break;
1059 case 151:
1060 /*
1061 * EM DASH (mdash)
1062 */
1063 code = 0x2014;
1064 break;
1065 case 152:
1066 /*
1067 * SMALL TILDE (tilde)
1068 */
1069 code = 0x02dc;
1070 break;
1071 case 153:
1072 /*
1073 * TRADE MARK SIGN (trade)
1074 */
1075 code = 0x2122;
1076 break;
1077 case 154:
1078 /*
1079 * LATIN SMALL LETTER S WITH CARON
1080 */
1081 code = 0x161;
1082 break;
1083 case 155:
1084 /*
1085 * SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (rsaquo)
1086 */
1087 code = 0x203a;
1088 break;
1089 case 156:
1090 /*
1091 * LATIN SMALL LIGATURE OE
1092 */
1093 code = 0x153;
1094 break;
1095 case 158:
1096 /*
1097 * LATIN SMALL LETTER Z WITH CARON
1098 */
1099 code = 0x17e;
1100 break;
1101 case 159:
1102 /*
1103 * LATIN CAPITAL LETTER Y WITH DIAERESIS
1104 */
1105 code = 0x178;
1106 break;
1107 default:
1108 /*
1109 * Undefined (by convention, use the replacement character).
1110 */
1111 code = 0xfffd;
1112 break;
1113 }
1114 }
1115 return code;
1116 }
1117
1118 /*
1119 * Function to select a character set and then set the character handling and
1120 * LYHaveCJKCharacterSet flag. - FM
1121 */
HTMLUseCharacterSet(int i)1122 void HTMLUseCharacterSet(int i)
1123 {
1124 HTMLSetRawModeDefault(i);
1125 p_entity_values = LYCharSets[i];
1126 HTMLSetCharacterHandling(i); /* set LYRawMode and CJK attributes */
1127 HTMLSetHaveCJKCharacterSet(i);
1128 HTMLSetDisplayCharsetMatchLocale(i);
1129 return;
1130 }
1131
1132 /*
1133 * Initializer, calls initialization function for the CHARTRANS handling. - KW
1134 */
LYCharSetsDeclared(void)1135 int LYCharSetsDeclared(void)
1136 {
1137 UCInit();
1138
1139 return UCInitialized;
1140 }
1141
1142 #ifdef USE_CHARSET_CHOICE
init_charset_subsets(void)1143 void init_charset_subsets(void)
1144 {
1145 int i, n;
1146 int cur_display = 0;
1147 int cur_assumed = 0;
1148
1149 /* add them to displayed values */
1150 charset_subsets[UCLYhndl_for_unspec].hide_assumed = FALSE;
1151 charset_subsets[current_char_set].hide_display = FALSE;
1152
1153 #ifndef ALL_CHARSETS_IN_O_MENU_SCREEN
1154 /*all this stuff is for supporting old menu screen... */
1155 for (i = 0; i < LYNumCharsets; ++i) {
1156 if (charset_subsets[i].hide_display == FALSE) {
1157 n = cur_display++;
1158 if (i == current_char_set)
1159 displayed_display_charset_idx = n;
1160 display_charset_map[n] = i;
1161 display_charset_choices[n] = LYchar_set_names[i];
1162 }
1163 if (charset_subsets[i].hide_assumed == FALSE) {
1164 n = cur_assumed++;
1165 assumed_doc_charset_map[n] = i;
1166 assumed_charset_choices[n] = LYCharSet_UC[i].MIMEname;
1167 charset_subsets[i].assumed_idx = n;
1168 }
1169 display_charset_choices[cur_display] = NULL;
1170 assumed_charset_choices[cur_assumed] = NULL;
1171 }
1172 #endif
1173 }
1174 #endif /* USE_CHARSET_CHOICE */
1175