1 /*
2  * $LynxId: LYPrettySrc.c,v 1.28 2013/05/06 00:53:30 tom Exp $
3  *
4  * HTML source syntax highlighting
5  * by Vlad Harchev <hvv@hippo.ru>
6  * March 1999
7  */
8 #include <HTUtils.h>
9 #include <LYHash.h>
10 #include <LYPrettySrc.h>
11 #include <LYStrings.h>
12 #include <LYLeaks.h>
13 
14  /* This file creates too many "leak detected" entries in Lynx.leaks. */
15 #define NO_MEMORY_TRACKING
16 #include <LYLeaks.h>
17 
18 #ifdef USE_PRETTYSRC
19 BOOL psrc_convert_string = FALSE;
20 BOOL psrc_view = FALSE;		/* this is read by SGML_put_character - TRUE
21 
22 				   when viewing pretty source */
23 BOOLEAN LYpsrc = FALSE;		/* this tells what will be shown on '\':
24 
25 				   if TRUE, then pretty source, normal source view otherwise. Toggled by
26 				   -prettysrc commandline option.  */
27 BOOL sgml_in_psrc_was_initialized;
28 BOOL psrc_nested_call;
29 BOOL psrc_first_tag;
30 BOOL mark_htext_as_source = FALSE;
31 
32   /* tagspecs from lynx.cfg are read here. After .lss file is read (is with lss
33      support), the style cache and markup are created before entering the
34      mainloop. */
35 BOOLEAN psrcview_no_anchor_numbering = FALSE;
36 static const char *HTL_tagspecs_defaults[HTL_num_lexemes] =
37 {
38  /* these values are defaults. They are also listed in comments of distibution's
39     lynx.cfg. */
40 #ifdef USE_COLOR_STYLE
41     "span.htmlsrc_comment:!span",
42     "span.htmlsrc_tag:!span",
43     "span.htmlsrc_attrib:!span",
44     "span.htmlsrc_attrval:!span",
45     "span.htmlsrc_abracket:!span",
46     "span.htmlsrc_entity:!span",
47     "span.htmlsrc_href:!span",
48     "span.htmlsrc_entire:!span",
49     "span.htmlsrc_badseq:!span",
50     "span.htmlsrc_badtag:!span",
51     "span.htmlsrc_badattr:!span",
52     "span.htmlsrc_sgmlspecial:!span"
53 #else
54     "b:!b",			/* comment */
55     "b:!b",			/* tag     */
56     "b:!b",			/* attrib  */
57     ":",			/* attrval */
58     "b:!b",			/* abracket */
59     "b:!b",			/* entity  */
60     ":",			/* href    */
61     ":",			/* entire  */
62     "b:!b",			/* badseq  */
63     ":",			/* badtag  */
64     ":",			/* badattr */
65     "b:!b"			/* sgmlspec */
66 #endif
67 };
68 
69 char *HTL_tagspecs[HTL_num_lexemes];
70 
71  /* these are pointers since tagspec can be empty (the pointer will be NULL
72     in that case) */
73 HT_tagspec *lexeme_start[HTL_num_lexemes];
74 HT_tagspec *lexeme_end[HTL_num_lexemes];
75 
76 int tagname_transform = 2;
77 int attrname_transform = 2;
78 
html_src_tag_index(char * tagname)79 static int html_src_tag_index(char *tagname)
80 {
81     HTTag *tag = SGMLFindTag(&HTML_dtd, tagname);
82 
83     return (tag && tag != &HTTag_unrecognized) ? (int) (tag - HTML_dtd.tags) : -1;
84 }
85 
86 typedef enum {
87     HTSRC_CK_normal,
88     HTSRC_CK_seen_excl,
89     HTSRC_CK_after_tagname,
90     HTSRC_CK_seen_dot
91 } html_src_check_state;
92 
append_close_tag(char * tagname,HT_tagspec ** head,HT_tagspec ** tail)93 static void append_close_tag(char *tagname,
94 			     HT_tagspec ** head,
95 			     HT_tagspec ** tail)
96 {
97     int idx, nattr;
98     HTTag *tag;
99     HT_tagspec *subj;
100 
101     idx = html_src_tag_index(tagname);
102     tag = HTML_dtd.tags + idx;
103     nattr = tag->number_of_attributes;
104 
105     if (idx == -1) {
106 	fprintf(stderr,
107 		"internal error: previous check didn't find bad HTML tag %s", tagname);
108 	exit_immediately(EXIT_FAILURE);
109     }
110 
111     subj = typecalloc(HT_tagspec);
112     if (subj == 0)
113 	outofmem(__FILE__, "append_close_tag");
114 
115     subj->element = (HTMLElement) idx;
116 
117     subj->present = typecallocn(BOOL, (unsigned) nattr);
118 
119     if (subj->present == 0)
120 	outofmem(__FILE__, "append_close_tag");
121 
122     subj->value = typecallocn(char *, (unsigned) nattr);
123 
124     if (subj->value == 0)
125 	outofmem(__FILE__, "append_close_tag");
126 
127     subj->start = FALSE;
128 #ifdef USE_COLOR_STYLE
129     subj->class_name = NULL;
130 #endif
131 
132     if (!*head) {
133 	*head = subj;
134 	*tail = subj;
135     } else {
136 	(*tail)->next = subj;
137 	*tail = subj;
138     }
139 }
140 
141 /* this will allocate node, initialize all members, and node
142    append to the list, possibly modifying head and modifying tail */
append_open_tag(char * tagname,char * classname GCC_UNUSED,HT_tagspec ** head,HT_tagspec ** tail)143 static void append_open_tag(char *tagname,
144 			    char *classname GCC_UNUSED,
145 			    HT_tagspec ** head,
146 			    HT_tagspec ** tail)
147 {
148     HT_tagspec *subj;
149 
150 #ifdef USE_COLOR_STYLE
151     int hcode;
152 #endif
153 
154     append_close_tag(tagname, head, tail);	/* initialize common members */
155     subj = *tail;
156     subj->start = TRUE;
157 
158 #ifdef USE_COLOR_STYLE
159     hcode = hash_code_lowercase_on_fly(tagname);
160     if (non_empty(classname)) {
161 
162 #  if 0
163 	/*
164 	 * we don't provide a classname as attribute of that tag, since for
165 	 * plain formatting tags they are not used directly for anything except
166 	 * style - and we provide style value directly.
167 	 */
168 	HTTag *tag = HTML_dtd.tags + subj->element;
169 	int class_attr_idx = 0;
170 	int n = tag->number_of_attributes;
171 	attr *attrs = tag->attributes;
172 
173 /*.... */
174 /* this is not implemented though it's easy */
175 #  endif
176 
177 	hcode = hash_code_aggregate_char('.', hcode);
178 	hcode = hash_code_aggregate_lower_str(classname, hcode);
179 	StrAllocCopy(subj->class_name, classname);
180     } else {
181 	StrAllocCopy(subj->class_name, "");
182     }
183     subj->style = hcode;
184 #endif
185 }
186 
187 #define isLeadP(p) ((isalpha(UCH(*p)) || *p == '_'))
188 #define isNextP(p) ((isalnum(UCH(*p)) || *p == '_'))
189 
190 #define FMT_AT " at column %d:\n\t%s\n"
191 #define TXT_AT (int) (1 + p - ts), ts
192 
193 /* returns FALSE if incorrect */
html_src_parse_tagspec(char * ts,HTlexeme lexeme,int checkonly,int isstart)194 int html_src_parse_tagspec(char *ts,
195 			   HTlexeme lexeme,
196 			   int checkonly,
197 			   int isstart)
198 {
199     BOOL stop = FALSE;
200     BOOL code = FALSE;
201     char *p = ts;
202     char *tagstart = 0;
203     char *tagend = 0;
204     char *classstart;
205     char *classend;
206     char save, save1;
207     char after_excl = FALSE;
208     html_src_check_state state = HTSRC_CK_normal;
209     HT_tagspec *head = NULL;
210     HT_tagspec *tail = NULL;
211     HT_tagspec **slot = (isstart ? lexeme_start : lexeme_end) + lexeme;
212 
213     while (!stop) {
214 	switch (state) {
215 	case HTSRC_CK_normal:
216 	case HTSRC_CK_seen_excl:
217 	    switch (*p) {
218 	    case '\0':
219 		stop = TRUE;
220 		code = TRUE;
221 		break;
222 	    case ' ':
223 	    case '\t':
224 		break;
225 	    case '!':
226 		if (state == HTSRC_CK_seen_excl) {
227 		    CTRACE2(TRACE_CFG,
228 			    (tfp, "second '!'" FMT_AT,
229 			     TXT_AT));
230 		    stop = TRUE;
231 		    break;
232 		}
233 		state = HTSRC_CK_seen_excl;
234 		after_excl = TRUE;
235 		break;
236 	    default:
237 		if (!isLeadP(p)) {
238 		    CTRACE2(TRACE_CFG,
239 			    (tfp, "no name starting" FMT_AT,
240 			     TXT_AT));
241 		    stop = TRUE;
242 		    break;
243 		}
244 		tagstart = p;
245 		while (*p && isNextP(p))
246 		    ++p;
247 		tagend = p--;
248 		state = HTSRC_CK_after_tagname;
249 	    }
250 	    break;
251 	case HTSRC_CK_after_tagname:
252 	    switch (*p) {
253 	    case '\0':
254 		stop = TRUE;
255 		code = TRUE;
256 		/* FALLTHRU */
257 	    case ' ':
258 		/* FALLTHRU */
259 	    case '\t':
260 		save = *tagend;
261 
262 		*tagend = '\0';
263 		classstart = 0;
264 		if (checkonly) {
265 		    int idx = html_src_tag_index(tagstart);
266 
267 		    CTRACE2(TRACE_CFG,
268 			    (tfp, "tag index(%s) = %d\n",
269 			     tagstart, idx));
270 
271 		    *tagend = save;
272 		    if (idx == -1) {
273 			stop = TRUE;
274 			break;
275 		    }
276 		} else {
277 		    if (after_excl)
278 			append_close_tag(tagstart, &head, &tail);
279 		    else
280 			append_open_tag(tagstart, NULL, &head, &tail);
281 		}
282 		state = HTSRC_CK_normal;
283 		after_excl = FALSE;
284 		break;
285 	    case '.':
286 		if (after_excl) {
287 		    CTRACE2(TRACE_CFG,
288 			    (tfp, "dot after '!'" FMT_AT,
289 			     TXT_AT));
290 		    stop = TRUE;
291 		    break;
292 		}
293 		state = HTSRC_CK_seen_dot;
294 		break;
295 	    default:
296 		CTRACE2(TRACE_CFG,
297 			(tfp, "unexpected char '%c' after tagname" FMT_AT,
298 			 *p, TXT_AT));
299 		stop = TRUE;
300 		break;
301 	    }
302 	    break;
303 	case HTSRC_CK_seen_dot:
304 	    switch (*p) {
305 	    case ' ':
306 	    case '\t':
307 		break;
308 	    case '\0':
309 		CTRACE2(TRACE_CFG,
310 			(tfp, "expected text after dot" FMT_AT,
311 			 TXT_AT));
312 		stop = TRUE;
313 		break;
314 	    default:
315 		if (!isLeadP(p)) {
316 		    CTRACE2(TRACE_CFG,
317 			    (tfp, "no name starting" FMT_AT,
318 			     TXT_AT));
319 		    stop = TRUE;
320 		    break;
321 		}
322 		classstart = p;
323 		while (*p && isNextP(p))
324 		    ++p;
325 		classend = p--;
326 		save = *classend;
327 		*classend = '\0';
328 		save1 = *tagend;
329 		*tagend = '\0';
330 		if (checkonly) {
331 		    int idx = html_src_tag_index(tagstart);
332 
333 		    *tagend = save1;
334 		    *classend = save;
335 		    if (idx == -1)
336 			return FALSE;
337 		} else {
338 		    append_open_tag(tagstart, classstart, &head, &tail);
339 		}
340 		state = HTSRC_CK_normal;
341 		after_excl = FALSE;
342 		break;
343 	    }			/* of switch(*p) */
344 	    break;
345 	}			/* of switch */
346 	++p;
347     }
348 
349     if (code && !checkonly)
350 	*slot = head;
351 
352     return code;
353 }
354 
355 /*this will clean the data associated with lexeme 'l' */
html_src_clean_item(HTlexeme l)356 void html_src_clean_item(HTlexeme l)
357 {
358     int i;
359 
360     if (HTL_tagspecs[l])
361 	FREE(HTL_tagspecs[l]);
362     for (i = 0; i < 2; ++i) {
363 	HT_tagspec *cur;
364 	HT_tagspec **pts = (i ? lexeme_start : lexeme_end) + l;
365 	HT_tagspec *ts = *pts;
366 
367 	*pts = NULL;
368 	while (ts) {
369 	    FREE(ts->present);
370 	    FREE(ts->value);
371 #ifdef USE_COLOR_STYLE
372 	    if (ts->start) {
373 		FREE(ts->class_name);
374 	    }
375 #endif
376 	    cur = ts;
377 	    ts = ts->next;
378 	    FREE(cur);
379 	}
380     }
381 }
382 
383 /*this will be registered with atexit*/
html_src_clean_data(void)384 void html_src_clean_data(void)
385 {
386     int i;
387 
388     for (i = 0; i < HTL_num_lexemes; ++i)
389 	html_src_clean_item((HTlexeme) i);
390 }
391 
html_src_on_lynxcfg_reload(void)392 void html_src_on_lynxcfg_reload(void)
393 {
394     html_src_clean_data();
395     HTMLSRC_init_caches(TRUE);
396 }
397 
failed_init(const char * tag,int lexeme)398 static void failed_init(const char *tag, int lexeme)
399 {
400     fprintf(stderr,
401 	    gettext("parse-error while caching %s tagspec of lexeme %d\n"),
402 	    tag, lexeme);
403     fprintf(stderr,
404 	    gettext("Use -trace -trace-mask=8 to see details in log.\n"));
405     exit_immediately(EXIT_FAILURE);
406 }
407 
HTMLSRC_init_caches(int dont_exit)408 void HTMLSRC_init_caches(int dont_exit)
409 {
410     int i;
411     char *p;
412     char buf[1000];
413     static char empty[] = "";
414 
415     CTRACE2(TRACE_CFG, (tfp, "HTMLSRC_init_caches(%d tagspecs)\n", HTL_num_lexemes));
416     for (i = 0; i < HTL_num_lexemes; ++i) {
417 	/*we assume that HT_tagspecs was NULLs at when program started */
418 	LYStrNCpy(buf,
419 		  HTL_tagspecs[i]
420 		  ? HTL_tagspecs[i]
421 		  : HTL_tagspecs_defaults[i],
422 		  sizeof(buf) - 1);
423 	StrAllocCopy(HTL_tagspecs[i], buf);
424 
425 	CTRACE2(TRACE_CFG, (tfp, "parsing lexeme %d: %s\n", i + 1, buf));
426 
427 	if ((p = strchr(buf, ':')) != 0)
428 	    *p = '\0';
429 	if (!html_src_parse_tagspec(buf,
430 				    (HTlexeme) i,
431 				    FALSE,
432 				    TRUE) && !dont_exit) {
433 	    failed_init("1st", i);
434 	}
435 	if (!html_src_parse_tagspec(p ? p + 1 : empty,
436 				    (HTlexeme) i,
437 				    FALSE,
438 				    FALSE) && !dont_exit) {
439 	    failed_init("2nd", i);
440 	}
441     }
442 }
443 
444 #endif /* ifdef USE_PRETTYSRC */
445