1 /*
2 * $LynxId: HTMLDTD.c,v 1.57 2010/09/25 00:30:56 tom Exp $
3 *
4 * Our Static DTD for HTML
5 * -----------------------
6 */
7
8 /* Implements:
9 */
10
11 #include <HTUtils.h>
12 #include <HTMLDTD.h>
13 #include <LYLeaks.h>
14 #include <LYJustify.h>
15
16 /*
17 * Character entities like   now excluded from our DTD tables, they are
18 * mapped to Unicode and handled by chartrans code directly the similar way the
19 * numeric entities like { does. See src/chrtrans/entities.h for real
20 * mapping.
21 */
22
23 /* Entity Names
24 * ------------
25 *
26 * This table must be matched exactly with ALL the translation tables
27 * (this is an obsolete translation mechanism, probably unused,
28 * currently replaced with Unicode chartrans in most cases...)
29 */
30 static const char *entities[] =
31 {
32 "AElig", /* capital AE diphthong (ligature) */
33 "Aacute", /* capital A, acute accent */
34 "Acirc", /* capital A, circumflex accent */
35 "Agrave", /* capital A, grave accent */
36 "Aring", /* capital A, ring */
37 "Atilde", /* capital A, tilde */
38 "Auml", /* capital A, dieresis or umlaut mark */
39 "Ccedil", /* capital C, cedilla */
40 "Dstrok", /* capital Eth, Icelandic */
41 "ETH", /* capital Eth, Icelandic */
42 "Eacute", /* capital E, acute accent */
43 "Ecirc", /* capital E, circumflex accent */
44 "Egrave", /* capital E, grave accent */
45 "Euml", /* capital E, dieresis or umlaut mark */
46 "Iacute", /* capital I, acute accent */
47 "Icirc", /* capital I, circumflex accent */
48 "Igrave", /* capital I, grave accent */
49 "Iuml", /* capital I, dieresis or umlaut mark */
50 "Ntilde", /* capital N, tilde */
51 "Oacute", /* capital O, acute accent */
52 "Ocirc", /* capital O, circumflex accent */
53 "Ograve", /* capital O, grave accent */
54 "Oslash", /* capital O, slash */
55 "Otilde", /* capital O, tilde */
56 "Ouml", /* capital O, dieresis or umlaut mark */
57 "THORN", /* capital THORN, Icelandic */
58 "Uacute", /* capital U, acute accent */
59 "Ucirc", /* capital U, circumflex accent */
60 "Ugrave", /* capital U, grave accent */
61 "Uuml", /* capital U, dieresis or umlaut mark */
62 "Yacute", /* capital Y, acute accent */
63 "aacute", /* small a, acute accent */
64 "acirc", /* small a, circumflex accent */
65 "acute", /* spacing acute */
66 "aelig", /* small ae diphthong (ligature) */
67 "agrave", /* small a, grave accent */
68 "amp", /* ampersand */
69 "aring", /* small a, ring */
70 "atilde", /* small a, tilde */
71 "auml", /* small a, dieresis or umlaut mark */
72 "brkbar", /* broken vertical bar */
73 "brvbar", /* broken vertical bar */
74 "ccedil", /* small c, cedilla */
75 "cedil", /* spacing cedilla */
76 "cent", /* cent sign */
77 "copy", /* copyright sign */
78 "curren", /* currency sign */
79 "deg", /* degree sign */
80 "die", /* spacing dieresis */
81 "divide", /* division sign */
82 "eacute", /* small e, acute accent */
83 "ecirc", /* small e, circumflex accent */
84 "egrave", /* small e, grave accent */
85 "emdash", /* dash the width of emsp */
86 "emsp", /* em space - not collapsed */
87 "endash", /* dash the width of ensp */
88 "ensp", /* en space - not collapsed */
89 "eth", /* small eth, Icelandic */
90 "euml", /* small e, dieresis or umlaut mark */
91 "frac12", /* fraction 1/2 */
92 "frac14", /* fraction 1/4 */
93 "frac34", /* fraction 3/4 */
94 "gt", /* greater than */
95 "hibar", /* spacing macron */
96 "iacute", /* small i, acute accent */
97 "icirc", /* small i, circumflex accent */
98 "iexcl", /* inverted exclamation mark */
99 "igrave", /* small i, grave accent */
100 "iquest", /* inverted question mark */
101 "iuml", /* small i, dieresis or umlaut mark */
102 "laquo", /* angle quotation mark, left */
103 "lt", /* less than */
104 "macr", /* spacing macron */
105 "mdash", /* dash the width of emsp */
106 "micro", /* micro sign */
107 "middot", /* middle dot */
108 "nbsp", /* non breaking space */
109 "ndash", /* dash the width of ensp */
110 "not", /* negation sign */
111 "ntilde", /* small n, tilde */
112 "oacute", /* small o, acute accent */
113 "ocirc", /* small o, circumflex accent */
114 "ograve", /* small o, grave accent */
115 "ordf", /* feminine ordinal indicator */
116 "ordm", /* masculine ordinal indicator */
117 "oslash", /* small o, slash */
118 "otilde", /* small o, tilde */
119 "ouml", /* small o, dieresis or umlaut mark */
120 "para", /* paragraph sign */
121 "plusmn", /* plus-or-minus sign */
122 "pound", /* pound sign */
123 "quot", /* quote '"' */
124 "raquo", /* angle quotation mark, right */
125 "reg", /* circled R registered sign */
126 "sect", /* section sign */
127 "shy", /* soft hyphen */
128 "sup1", /* superscript 1 */
129 "sup2", /* superscript 2 */
130 "sup3", /* superscript 3 */
131 "szlig", /* small sharp s, German (sz ligature) */
132 "thinsp", /* thin space (not collapsed) */
133 "thorn", /* small thorn, Icelandic */
134 "times", /* multiplication sign */
135 "trade", /* trade mark sign (U+2122) */
136 "uacute", /* small u, acute accent */
137 "ucirc", /* small u, circumflex accent */
138 "ugrave", /* small u, grave accent */
139 "uml", /* spacing dieresis */
140 "uuml", /* small u, dieresis or umlaut mark */
141 "yacute", /* small y, acute accent */
142 "yen", /* yen sign */
143 "yuml", /* small y, dieresis or umlaut mark */
144 };
145
146 /* Attribute Lists
147 * ---------------
148 *
149 * Lists must be in alphabetical order by attribute name
150 * The tag elements contain the number of attributes
151 */
152
153 /* From Peter Flynn's intro to the HTML Pro DTD:
154
155 %structure;
156
157 DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ,
158 2 1 2 2 1 8 8 8 8 8 8 8 8 4 4
159 MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH
160 8 n ?1 n 8 8 2 2 2 2 2 ?1 nE 4 4
161
162 %insertions;
163
164 Elements which usually contain special-purpose material, or no text material at all.
165
166 BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG,
167 1 e? 2 2 l 1 e 2 l 8 4 4 E 1? E 1 E ! E ?1 E
168 IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area
169 1 n 1 E n n n n n 8 E
170
171 %text;
172
173 Elements within the %structure; which directly contain running text.
174
175 Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU,
176 2 2 2 2 2 2 2 2 2 2 n 2
177 AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN
178 2 2 n 2 2 2 2 2
179 Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB,
180 1 1 1 1 1 1 ?1 n ?1nE? 1 E 1 1 1 1 l 1 1 E?
181 BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW
182 1 n 1 n 1 n 1 n
183 Hypertext and graphics: A and?IMG
184 8 ?8 E
185 Mathematical: SUB, SUP, and MATH
186 4 4 4 l
187 Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB
188 4 4 n 4 n 4 n
189 %formula;
190 */
191
192 /* Elements
193 * --------
194 *
195 * Must match definitions in HTMLDTD.html!
196 * Must be in alphabetical order.
197 *
198 * The T_* extra info is listed here, even though most fields are not used
199 * in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags).
200 * This simplifies comparison of the tags_table0[] table (otherwise unchanged
201 * from original Lynx treatment) with the tags_table1[] table below. - kw
202 *
203 * Name*, Attributes, No. of attributes, content, extra info...
204 */
205
206 #include <src0_HTMLDTD.h>
207 #include <src1_HTMLDTD.h>
208
209 /* Dummy space, will be filled with the contents of either tags_table1
210 or tags_table0 on calling HTSwitchDTD - kw */
211
212 static HTTag tags[HTML_ALL_ELEMENTS];
213
214 const SGML_dtd HTML_dtd =
215 {
216 tags,
217 HTML_ELEMENTS,
218 entities, /* probably unused */
219 TABLESIZE(entities),
220 };
221
222 /* This function fills the "tags" part of the HTML_dtd structure with
223 what we want to use, either tags_table0 or tags_table1. Note that it
224 has to be called at least once before HTML_dtd is used, otherwise
225 the HTML_dtd contents will be invalid! This could be coded in a way
226 that would make an initialisation call unnecessary, but my C knowledge
227 is limited and I didn't want to list the whole tags_table1 table
228 twice... - kw */
HTSwitchDTD(int new_flag)229 void HTSwitchDTD(int new_flag)
230 {
231 if (TRACE)
232 CTRACE((tfp,
233 "HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n",
234 new_flag ? "strict" : "tagsoup",
235 (int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)),
236 HTML_ALL_ELEMENTS,
237 (int) sizeof(HTTag)));
238 if (new_flag)
239 MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag));
240 else
241 MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag));
242 }
243
244 HTTag HTTag_unrecognized =
245
246 {NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_};
247
248 /*
249 * Utility Routine: Useful for people building HTML objects.
250 */
251
252 /* Start anchor element
253 * --------------------
254 *
255 * It is kinda convenient to have a particulr routine for
256 * starting an anchor element, as everything else for HTML is
257 * simple anyway.
258 */
259 struct _HTStructured {
260 HTStructuredClass *isa;
261 /* ... */
262 };
263
HTStartAnchor(HTStructured * obj,const char * name,const char * href)264 void HTStartAnchor(HTStructured * obj, const char *name,
265 const char *href)
266 {
267 BOOL present[HTML_A_ATTRIBUTES];
268 const char *value[HTML_A_ATTRIBUTES];
269 int i;
270
271 for (i = 0; i < HTML_A_ATTRIBUTES; i++)
272 present[i] = NO;
273
274 if (name && *name) {
275 present[HTML_A_NAME] = YES;
276 value[HTML_A_NAME] = (const char *) name;
277 }
278 if (href) {
279 present[HTML_A_HREF] = YES;
280 value[HTML_A_HREF] = (const char *) href;
281 }
282
283 (*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0);
284 }
285
HTStartAnchor5(HTStructured * obj,const char * name,const char * href,const char * linktype,int tag_charset)286 void HTStartAnchor5(HTStructured * obj, const char *name,
287 const char *href,
288 const char *linktype,
289 int tag_charset)
290 {
291 BOOL present[HTML_A_ATTRIBUTES];
292 const char *value[HTML_A_ATTRIBUTES];
293 int i;
294
295 for (i = 0; i < HTML_A_ATTRIBUTES; i++)
296 present[i] = NO;
297
298 if (name && *name) {
299 present[HTML_A_NAME] = YES;
300 value[HTML_A_NAME] = name;
301 }
302 if (href && *href) {
303 present[HTML_A_HREF] = YES;
304 value[HTML_A_HREF] = href;
305 }
306 if (linktype && *linktype) {
307 present[HTML_A_TYPE] = YES;
308 value[HTML_A_TYPE] = linktype;
309 }
310
311 (*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0);
312 }
313
HTStartIsIndex(HTStructured * obj,const char * prompt,const char * href)314 void HTStartIsIndex(HTStructured * obj, const char *prompt,
315 const char *href)
316 {
317 BOOL present[HTML_ISINDEX_ATTRIBUTES];
318 const char *value[HTML_ISINDEX_ATTRIBUTES];
319 int i;
320
321 for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++)
322 present[i] = NO;
323
324 if (prompt && *prompt) {
325 present[HTML_ISINDEX_PROMPT] = YES;
326 value[HTML_ISINDEX_PROMPT] = (const char *) prompt;
327 }
328 if (href) {
329 present[HTML_ISINDEX_HREF] = YES;
330 value[HTML_ISINDEX_HREF] = (const char *) href;
331 }
332
333 (*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0);
334 }
335