1 /*
2 * $LynxId: LYPrettySrc.c,v 1.28 2013/05/06 00:53:30 tom Exp $
3 *
4 * HTML source syntax highlighting
5 * by Vlad Harchev <hvv@hippo.ru>
6 * March 1999
7 */
8 #include <HTUtils.h>
9 #include <LYHash.h>
10 #include <LYPrettySrc.h>
11 #include <LYStrings.h>
12 #include <LYLeaks.h>
13
14 /* This file creates too many "leak detected" entries in Lynx.leaks. */
15 #define NO_MEMORY_TRACKING
16 #include <LYLeaks.h>
17
18 #ifdef USE_PRETTYSRC
19 BOOL psrc_convert_string = FALSE;
20 BOOL psrc_view = FALSE; /* this is read by SGML_put_character - TRUE
21
22 when viewing pretty source */
23 BOOLEAN LYpsrc = FALSE; /* this tells what will be shown on '\':
24
25 if TRUE, then pretty source, normal source view otherwise. Toggled by
26 -prettysrc commandline option. */
27 BOOL sgml_in_psrc_was_initialized;
28 BOOL psrc_nested_call;
29 BOOL psrc_first_tag;
30 BOOL mark_htext_as_source = FALSE;
31
32 /* tagspecs from lynx.cfg are read here. After .lss file is read (is with lss
33 support), the style cache and markup are created before entering the
34 mainloop. */
35 BOOLEAN psrcview_no_anchor_numbering = FALSE;
36 static const char *HTL_tagspecs_defaults[HTL_num_lexemes] =
37 {
38 /* these values are defaults. They are also listed in comments of distibution's
39 lynx.cfg. */
40 #ifdef USE_COLOR_STYLE
41 "span.htmlsrc_comment:!span",
42 "span.htmlsrc_tag:!span",
43 "span.htmlsrc_attrib:!span",
44 "span.htmlsrc_attrval:!span",
45 "span.htmlsrc_abracket:!span",
46 "span.htmlsrc_entity:!span",
47 "span.htmlsrc_href:!span",
48 "span.htmlsrc_entire:!span",
49 "span.htmlsrc_badseq:!span",
50 "span.htmlsrc_badtag:!span",
51 "span.htmlsrc_badattr:!span",
52 "span.htmlsrc_sgmlspecial:!span"
53 #else
54 "b:!b", /* comment */
55 "b:!b", /* tag */
56 "b:!b", /* attrib */
57 ":", /* attrval */
58 "b:!b", /* abracket */
59 "b:!b", /* entity */
60 ":", /* href */
61 ":", /* entire */
62 "b:!b", /* badseq */
63 ":", /* badtag */
64 ":", /* badattr */
65 "b:!b" /* sgmlspec */
66 #endif
67 };
68
69 char *HTL_tagspecs[HTL_num_lexemes];
70
71 /* these are pointers since tagspec can be empty (the pointer will be NULL
72 in that case) */
73 HT_tagspec *lexeme_start[HTL_num_lexemes];
74 HT_tagspec *lexeme_end[HTL_num_lexemes];
75
76 int tagname_transform = 2;
77 int attrname_transform = 2;
78
html_src_tag_index(char * tagname)79 static int html_src_tag_index(char *tagname)
80 {
81 HTTag *tag = SGMLFindTag(&HTML_dtd, tagname);
82
83 return (tag && tag != &HTTag_unrecognized) ? (int) (tag - HTML_dtd.tags) : -1;
84 }
85
86 typedef enum {
87 HTSRC_CK_normal,
88 HTSRC_CK_seen_excl,
89 HTSRC_CK_after_tagname,
90 HTSRC_CK_seen_dot
91 } html_src_check_state;
92
append_close_tag(char * tagname,HT_tagspec ** head,HT_tagspec ** tail)93 static void append_close_tag(char *tagname,
94 HT_tagspec ** head,
95 HT_tagspec ** tail)
96 {
97 int idx, nattr;
98 HTTag *tag;
99 HT_tagspec *subj;
100
101 idx = html_src_tag_index(tagname);
102 tag = HTML_dtd.tags + idx;
103 nattr = tag->number_of_attributes;
104
105 if (idx == -1) {
106 fprintf(stderr,
107 "internal error: previous check didn't find bad HTML tag %s", tagname);
108 exit_immediately(EXIT_FAILURE);
109 }
110
111 subj = typecalloc(HT_tagspec);
112 if (subj == 0)
113 outofmem(__FILE__, "append_close_tag");
114
115 subj->element = (HTMLElement) idx;
116
117 subj->present = typecallocn(BOOL, (unsigned) nattr);
118
119 if (subj->present == 0)
120 outofmem(__FILE__, "append_close_tag");
121
122 subj->value = typecallocn(char *, (unsigned) nattr);
123
124 if (subj->value == 0)
125 outofmem(__FILE__, "append_close_tag");
126
127 subj->start = FALSE;
128 #ifdef USE_COLOR_STYLE
129 subj->class_name = NULL;
130 #endif
131
132 if (!*head) {
133 *head = subj;
134 *tail = subj;
135 } else {
136 (*tail)->next = subj;
137 *tail = subj;
138 }
139 }
140
141 /* this will allocate node, initialize all members, and node
142 append to the list, possibly modifying head and modifying tail */
append_open_tag(char * tagname,char * classname GCC_UNUSED,HT_tagspec ** head,HT_tagspec ** tail)143 static void append_open_tag(char *tagname,
144 char *classname GCC_UNUSED,
145 HT_tagspec ** head,
146 HT_tagspec ** tail)
147 {
148 HT_tagspec *subj;
149
150 #ifdef USE_COLOR_STYLE
151 int hcode;
152 #endif
153
154 append_close_tag(tagname, head, tail); /* initialize common members */
155 subj = *tail;
156 subj->start = TRUE;
157
158 #ifdef USE_COLOR_STYLE
159 hcode = hash_code_lowercase_on_fly(tagname);
160 if (non_empty(classname)) {
161
162 # if 0
163 /*
164 * we don't provide a classname as attribute of that tag, since for
165 * plain formatting tags they are not used directly for anything except
166 * style - and we provide style value directly.
167 */
168 HTTag *tag = HTML_dtd.tags + subj->element;
169 int class_attr_idx = 0;
170 int n = tag->number_of_attributes;
171 attr *attrs = tag->attributes;
172
173 /*.... */
174 /* this is not implemented though it's easy */
175 # endif
176
177 hcode = hash_code_aggregate_char('.', hcode);
178 hcode = hash_code_aggregate_lower_str(classname, hcode);
179 StrAllocCopy(subj->class_name, classname);
180 } else {
181 StrAllocCopy(subj->class_name, "");
182 }
183 subj->style = hcode;
184 #endif
185 }
186
187 #define isLeadP(p) ((isalpha(UCH(*p)) || *p == '_'))
188 #define isNextP(p) ((isalnum(UCH(*p)) || *p == '_'))
189
190 #define FMT_AT " at column %d:\n\t%s\n"
191 #define TXT_AT (int) (1 + p - ts), ts
192
193 /* returns FALSE if incorrect */
html_src_parse_tagspec(char * ts,HTlexeme lexeme,int checkonly,int isstart)194 int html_src_parse_tagspec(char *ts,
195 HTlexeme lexeme,
196 int checkonly,
197 int isstart)
198 {
199 BOOL stop = FALSE;
200 BOOL code = FALSE;
201 char *p = ts;
202 char *tagstart = 0;
203 char *tagend = 0;
204 char *classstart;
205 char *classend;
206 char save, save1;
207 char after_excl = FALSE;
208 html_src_check_state state = HTSRC_CK_normal;
209 HT_tagspec *head = NULL;
210 HT_tagspec *tail = NULL;
211 HT_tagspec **slot = (isstart ? lexeme_start : lexeme_end) + lexeme;
212
213 while (!stop) {
214 switch (state) {
215 case HTSRC_CK_normal:
216 case HTSRC_CK_seen_excl:
217 switch (*p) {
218 case '\0':
219 stop = TRUE;
220 code = TRUE;
221 break;
222 case ' ':
223 case '\t':
224 break;
225 case '!':
226 if (state == HTSRC_CK_seen_excl) {
227 CTRACE2(TRACE_CFG,
228 (tfp, "second '!'" FMT_AT,
229 TXT_AT));
230 stop = TRUE;
231 break;
232 }
233 state = HTSRC_CK_seen_excl;
234 after_excl = TRUE;
235 break;
236 default:
237 if (!isLeadP(p)) {
238 CTRACE2(TRACE_CFG,
239 (tfp, "no name starting" FMT_AT,
240 TXT_AT));
241 stop = TRUE;
242 break;
243 }
244 tagstart = p;
245 while (*p && isNextP(p))
246 ++p;
247 tagend = p--;
248 state = HTSRC_CK_after_tagname;
249 }
250 break;
251 case HTSRC_CK_after_tagname:
252 switch (*p) {
253 case '\0':
254 stop = TRUE;
255 code = TRUE;
256 /* FALLTHRU */
257 case ' ':
258 /* FALLTHRU */
259 case '\t':
260 save = *tagend;
261
262 *tagend = '\0';
263 classstart = 0;
264 if (checkonly) {
265 int idx = html_src_tag_index(tagstart);
266
267 CTRACE2(TRACE_CFG,
268 (tfp, "tag index(%s) = %d\n",
269 tagstart, idx));
270
271 *tagend = save;
272 if (idx == -1) {
273 stop = TRUE;
274 break;
275 }
276 } else {
277 if (after_excl)
278 append_close_tag(tagstart, &head, &tail);
279 else
280 append_open_tag(tagstart, NULL, &head, &tail);
281 }
282 state = HTSRC_CK_normal;
283 after_excl = FALSE;
284 break;
285 case '.':
286 if (after_excl) {
287 CTRACE2(TRACE_CFG,
288 (tfp, "dot after '!'" FMT_AT,
289 TXT_AT));
290 stop = TRUE;
291 break;
292 }
293 state = HTSRC_CK_seen_dot;
294 break;
295 default:
296 CTRACE2(TRACE_CFG,
297 (tfp, "unexpected char '%c' after tagname" FMT_AT,
298 *p, TXT_AT));
299 stop = TRUE;
300 break;
301 }
302 break;
303 case HTSRC_CK_seen_dot:
304 switch (*p) {
305 case ' ':
306 case '\t':
307 break;
308 case '\0':
309 CTRACE2(TRACE_CFG,
310 (tfp, "expected text after dot" FMT_AT,
311 TXT_AT));
312 stop = TRUE;
313 break;
314 default:
315 if (!isLeadP(p)) {
316 CTRACE2(TRACE_CFG,
317 (tfp, "no name starting" FMT_AT,
318 TXT_AT));
319 stop = TRUE;
320 break;
321 }
322 classstart = p;
323 while (*p && isNextP(p))
324 ++p;
325 classend = p--;
326 save = *classend;
327 *classend = '\0';
328 save1 = *tagend;
329 *tagend = '\0';
330 if (checkonly) {
331 int idx = html_src_tag_index(tagstart);
332
333 *tagend = save1;
334 *classend = save;
335 if (idx == -1)
336 return FALSE;
337 } else {
338 append_open_tag(tagstart, classstart, &head, &tail);
339 }
340 state = HTSRC_CK_normal;
341 after_excl = FALSE;
342 break;
343 } /* of switch(*p) */
344 break;
345 } /* of switch */
346 ++p;
347 }
348
349 if (code && !checkonly)
350 *slot = head;
351
352 return code;
353 }
354
355 /*this will clean the data associated with lexeme 'l' */
html_src_clean_item(HTlexeme l)356 void html_src_clean_item(HTlexeme l)
357 {
358 int i;
359
360 if (HTL_tagspecs[l])
361 FREE(HTL_tagspecs[l]);
362 for (i = 0; i < 2; ++i) {
363 HT_tagspec *cur;
364 HT_tagspec **pts = (i ? lexeme_start : lexeme_end) + l;
365 HT_tagspec *ts = *pts;
366
367 *pts = NULL;
368 while (ts) {
369 FREE(ts->present);
370 FREE(ts->value);
371 #ifdef USE_COLOR_STYLE
372 if (ts->start) {
373 FREE(ts->class_name);
374 }
375 #endif
376 cur = ts;
377 ts = ts->next;
378 FREE(cur);
379 }
380 }
381 }
382
383 /*this will be registered with atexit*/
html_src_clean_data(void)384 void html_src_clean_data(void)
385 {
386 int i;
387
388 for (i = 0; i < HTL_num_lexemes; ++i)
389 html_src_clean_item((HTlexeme) i);
390 }
391
html_src_on_lynxcfg_reload(void)392 void html_src_on_lynxcfg_reload(void)
393 {
394 html_src_clean_data();
395 HTMLSRC_init_caches(TRUE);
396 }
397
failed_init(const char * tag,int lexeme)398 static void failed_init(const char *tag, int lexeme)
399 {
400 fprintf(stderr,
401 gettext("parse-error while caching %s tagspec of lexeme %d\n"),
402 tag, lexeme);
403 fprintf(stderr,
404 gettext("Use -trace -trace-mask=8 to see details in log.\n"));
405 exit_immediately(EXIT_FAILURE);
406 }
407
HTMLSRC_init_caches(int dont_exit)408 void HTMLSRC_init_caches(int dont_exit)
409 {
410 int i;
411 char *p;
412 char buf[1000];
413 static char empty[] = "";
414
415 CTRACE2(TRACE_CFG, (tfp, "HTMLSRC_init_caches(%d tagspecs)\n", HTL_num_lexemes));
416 for (i = 0; i < HTL_num_lexemes; ++i) {
417 /*we assume that HT_tagspecs was NULLs at when program started */
418 LYStrNCpy(buf,
419 HTL_tagspecs[i]
420 ? HTL_tagspecs[i]
421 : HTL_tagspecs_defaults[i],
422 sizeof(buf) - 1);
423 StrAllocCopy(HTL_tagspecs[i], buf);
424
425 CTRACE2(TRACE_CFG, (tfp, "parsing lexeme %d: %s\n", i + 1, buf));
426
427 if ((p = strchr(buf, ':')) != 0)
428 *p = '\0';
429 if (!html_src_parse_tagspec(buf,
430 (HTlexeme) i,
431 FALSE,
432 TRUE) && !dont_exit) {
433 failed_init("1st", i);
434 }
435 if (!html_src_parse_tagspec(p ? p + 1 : empty,
436 (HTlexeme) i,
437 FALSE,
438 FALSE) && !dont_exit) {
439 failed_init("2nd", i);
440 }
441 }
442 }
443
444 #endif /* ifdef USE_PRETTYSRC */
445