1 /*	$OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $	*/
2 /*	$NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $	*/
3 
4 /*
5  * Copyright (c) 1980, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1980, 1993\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif /* not lint */
38 
39 #ifndef lint
40 #if 0
41 static const char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
42 #else
43 static const char rcsid[] = "$OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $";
44 #endif
45 #endif /* not lint */
46 
47 /*
48  * checknr: check an nroff/troff input file for matching macro calls.
49  * we also attempt to match size and font changes, but only the embedded
50  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
51  * later but for now think of these restrictions as contributions to
52  * structured typesetting.
53  */
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <unistd.h>
58 #include <ctype.h>
59 #include <err.h>
60 
61 #define MAXSTK	100	/* Stack size */
62 #define MAXBR	100	/* Max number of bracket pairs known */
63 #define MAXCMDS	500	/* Max number of commands known */
64 
65 /*
66  * The stack on which we remember what we've seen so far.
67  */
68 struct stkstr {
69 	int opno;	/* number of opening bracket */
70 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
71 	int parm;	/* parm to size, font, etc */
72 	int lno;	/* line number the thing came in in */
73 } stk[MAXSTK];
74 int stktop;
75 
76 void	usage(void);
77 void	addmac(char *);
78 void	process(FILE *);
79 void	pe(int);
80 int	eq(char *, char *);
81 void	complain(int);
82 void	prop(int);
83 void	chkcmd(char *, char *);
84 void	addcmd(char *);
85 void	nomatch(char *);
86 void	checkknown(char *);
87 int	binsrch(char *);
88 
89 /*
90  * The kinds of opening and closing brackets.
91  */
92 struct brstr {
93 	char *opbr;
94 	char *clbr;
95 } br[MAXBR] = {
96 	/* A few bare bones troff commands */
97 #define SZ	0
98 	{ "sz",	"sz" },	/* also \s */
99 #define FT	1
100 	{ "ft",	"ft" },	/* also \f */
101 	/* the -mm package */
102 	{ "AL",	"LE" },
103 	{ "AS",	"AE" },
104 	{ "BL",	"LE" },
105 	{ "BS",	"BE" },
106 	{ "DF",	"DE" },
107 	{ "DL",	"LE" },
108 	{ "DS",	"DE" },
109 	{ "FS",	"FE" },
110 	{ "ML",	"LE" },
111 	{ "NS",	"NE" },
112 	{ "RL",	"LE" },
113 	{ "VL",	"LE" },
114 	/* the -ms package */
115 	{ "AB",	"AE" },
116 	{ "BD",	"DE" },
117 	{ "CD",	"DE" },
118 	{ "DS",	"DE" },
119 	{ "FS",	"FE" },
120 	{ "ID",	"DE" },
121 	{ "KF",	"KE" },
122 	{ "KS",	"KE" },
123 	{ "LD",	"DE" },
124 	{ "LG",	"NL" },
125 	{ "QS",	"QE" },
126 	{ "RS",	"RE" },
127 	{ "SM",	"NL" },
128 	{ "XA",	"XE" },
129 	{ "XS",	"XE" },
130 	/* The -me package */
131 	{ "(b",	")b" },
132 	{ "(c",	")c" },
133 	{ "(d",	")d" },
134 	{ "(f",	")f" },
135 	{ "(l",	")l" },
136 	{ "(q",	")q" },
137 	{ "(x",	")x" },
138 	{ "(z",	")z" },
139 	/* Things needed by preprocessors */
140 	{ "EQ",	"EN" },
141 	{ "TS",	"TE" },
142 	/* Refer */
143 	{ "[",	"]" },
144 	{ 0,	 }
145 };
146 
147 /*
148  * All commands known to nroff, plus macro packages.
149  * Used so we can complain about unrecognized commands.
150  */
151 char *knowncmds[MAXCMDS] = {
152 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
153 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
154 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
155 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
156 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
157 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
158 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
159 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
160 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
161 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
162 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
163 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
164 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
165 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
166 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
167 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
168 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
169 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
170 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
171 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
172 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
173 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
174 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
175 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
176 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
177 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
178 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
179 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
180 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
181 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
182 "yr", 0
183 };
184 
185 int	lineno;		/* current line number in input file */
186 char	line[256];	/* the current line */
187 char	*cfilename;	/* name of current file */
188 int	nfiles;		/* number of files to process */
189 int	fflag;		/* -f: ignore \f */
190 int	sflag;		/* -s: ignore \s */
191 int	ncmds;		/* size of knowncmds */
192 int	slot;		/* slot in knowncmds found by binsrch */
193 
194 int
main(int argc,char * argv[])195 main(int argc, char *argv[])
196 {
197 	FILE *f;
198 	int i;
199 	char *cp;
200 	char b1[4];
201 
202 	/* Figure out how many known commands there are */
203 	while (knowncmds[ncmds])
204 		ncmds++;
205 	while (argc > 1 && argv[1][0] == '-') {
206 		switch(argv[1][1]) {
207 
208 		/* -a: add pairs of macros */
209 		case 'a':
210 			i = strlen(argv[1]) - 2;
211 			if (i % 6 != 0)
212 				usage();
213 			/* look for empty macro slots */
214 			for (i=0; br[i].opbr; i++)
215 				;
216 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
217 				if (i >= MAXBR)
218 					errx(1, "too many pairs");
219 				if ((br[i].opbr = malloc(3)) == NULL)
220 					err(1, "malloc");
221 				strlcpy(br[i].opbr, cp, 3);
222 				if ((br[i].clbr = malloc(3)) == NULL)
223 					err(1, "malloc");
224 				strlcpy(br[i].clbr, cp+3, 3);
225 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
226 				addmac(br[i].clbr);
227 				i++;
228 			}
229 			break;
230 
231 		/* -c: add known commands */
232 		case 'c':
233 			i = strlen(argv[1]) - 2;
234 			if (i % 3 != 0)
235 				usage();
236 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
237 				if (cp[2] && cp[2] != '.')
238 					usage();
239 				strncpy(b1, cp, 2);
240 				addmac(b1);
241 			}
242 			break;
243 
244 		/* -f: ignore font changes */
245 		case 'f':
246 			fflag = 1;
247 			break;
248 
249 		/* -s: ignore size changes */
250 		case 's':
251 			sflag = 1;
252 			break;
253 		default:
254 			usage();
255 		}
256 		argc--; argv++;
257 	}
258 
259 	nfiles = argc - 1;
260 
261 	if (nfiles > 0) {
262 		for (i=1; i<argc; i++) {
263 			cfilename = argv[i];
264 			f = fopen(cfilename, "r");
265 			if (f == NULL)
266 				warn("%s", cfilename);
267 			else
268 				process(f);
269 		}
270 	} else {
271 		cfilename = "stdin";
272 		process(stdin);
273 	}
274 	exit(0);
275 }
276 
277 void
usage(void)278 usage(void)
279 {
280 	extern char *__progname;
281 	(void)fprintf(stderr,
282 	    "usage: %s [-fs] [-a.x1.y1.x2.y2. ... .xn.yn] "
283 	    "[-c.x1.x2.x3. ... .xn] [file]\n", __progname);
284 	exit(1);
285 }
286 
287 void
process(FILE * f)288 process(FILE *f)
289 {
290 	int i, n;
291 	char mac[5];	/* The current macro or nroff command */
292 	int pl;
293 
294 	stktop = -1;
295 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
296 		if (line[0] == '.') {
297 			/*
298 			 * find and isolate the macro/command name.
299 			 */
300 			strncpy(mac, line+1, 4);
301 			if (isspace(mac[0])) {
302 				pe(lineno);
303 				printf("Empty command\n");
304 			} else if (isspace(mac[1])) {
305 				mac[1] = 0;
306 			} else if (isspace(mac[2])) {
307 				mac[2] = 0;
308 			} else if (mac[0] != '\\' || mac[1] != '\"') {
309 				pe(lineno);
310 				printf("Command too long\n");
311 			}
312 
313 			/*
314 			 * Is it a known command?
315 			 */
316 			checkknown(mac);
317 
318 			/*
319 			 * Should we add it?
320 			 */
321 			if (eq(mac, "de"))
322 				addcmd(line);
323 
324 			chkcmd(line, mac);
325 		}
326 
327 		/*
328 		 * At this point we process the line looking
329 		 * for \s and \f.
330 		 */
331 		for (i=0; line[i]; i++)
332 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
333 				if (!sflag && line[++i]=='s') {
334 					pl = line[++i];
335 					if (isdigit(pl)) {
336 						n = pl - '0';
337 						pl = ' ';
338 					} else
339 						n = 0;
340 					while (isdigit(line[++i]))
341 						n = 10 * n + line[i] - '0';
342 					i--;
343 					if (n == 0) {
344 						if (stk[stktop].opno == SZ) {
345 							stktop--;
346 						} else {
347 							pe(lineno);
348 							printf("unmatched \\s0\n");
349 						}
350 					} else {
351 						stk[++stktop].opno = SZ;
352 						stk[stktop].pl = pl;
353 						stk[stktop].parm = n;
354 						stk[stktop].lno = lineno;
355 					}
356 				} else if (!fflag && line[i]=='f') {
357 					n = line[++i];
358 					if (n == 'P') {
359 						if (stk[stktop].opno == FT) {
360 							stktop--;
361 						} else {
362 							pe(lineno);
363 							printf("unmatched \\fP\n");
364 						}
365 					} else {
366 						stk[++stktop].opno = FT;
367 						stk[stktop].pl = 1;
368 						stk[stktop].parm = n;
369 						stk[stktop].lno = lineno;
370 					}
371 				}
372 			}
373 	}
374 	/*
375 	 * We've hit the end and look at all this stuff that hasn't been
376 	 * matched yet!  Complain, complain.
377 	 */
378 	for (i=stktop; i>=0; i--) {
379 		complain(i);
380 	}
381 }
382 
383 void
complain(int i)384 complain(int i)
385 {
386 	pe(stk[i].lno);
387 	printf("Unmatched ");
388 	prop(i);
389 	printf("\n");
390 }
391 
392 void
prop(int i)393 prop(int i)
394 {
395 	if (stk[i].pl == 0)
396 		printf(".%s", br[stk[i].opno].opbr);
397 	else switch(stk[i].opno) {
398 	case SZ:
399 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
400 		break;
401 	case FT:
402 		printf("\\f%c", stk[i].parm);
403 		break;
404 	default:
405 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
406 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
407 	}
408 }
409 
410 void
chkcmd(char * line,char * mac)411 chkcmd(char *line, char *mac)
412 {
413 	int i;
414 
415 	/*
416 	 * Check to see if it matches top of stack.
417 	 */
418 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
419 		stktop--;	/* OK. Pop & forget */
420 	else {
421 		/* No. Maybe it's an opener */
422 		for (i=0; br[i].opbr; i++) {
423 			if (eq(mac, br[i].opbr)) {
424 				/* Found. Push it. */
425 				stktop++;
426 				stk[stktop].opno = i;
427 				stk[stktop].pl = 0;
428 				stk[stktop].parm = 0;
429 				stk[stktop].lno = lineno;
430 				break;
431 			}
432 			/*
433 			 * Maybe it's an unmatched closer.
434 			 * NOTE: this depends on the fact
435 			 * that none of the closers can be
436 			 * openers too.
437 			 */
438 			if (eq(mac, br[i].clbr)) {
439 				nomatch(mac);
440 				break;
441 			}
442 		}
443 	}
444 }
445 
446 void
nomatch(char * mac)447 nomatch(char *mac)
448 {
449 	int i, j;
450 
451 	/*
452 	 * Look for a match further down on stack
453 	 * If we find one, it suggests that the stuff in
454 	 * between is supposed to match itself.
455 	 */
456 	for (j=stktop; j>=0; j--)
457 		if (eq(mac,br[stk[j].opno].clbr)) {
458 			/* Found.  Make a good diagnostic. */
459 			if (j == stktop-2) {
460 				/*
461 				 * Check for special case \fx..\fR and don't
462 				 * complain.
463 				 */
464 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
465 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
466 					stktop = j -1;
467 					return;
468 				}
469 				/*
470 				 * We have two unmatched frobs.  Chances are
471 				 * they were intended to match, so we mention
472 				 * them together.
473 				 */
474 				pe(stk[j+1].lno);
475 				prop(j+1);
476 				printf(" does not match %d: ", stk[j+2].lno);
477 				prop(j+2);
478 				printf("\n");
479 			} else for (i=j+1; i <= stktop; i++) {
480 				complain(i);
481 			}
482 			stktop = j-1;
483 			return;
484 		}
485 	/* Didn't find one.  Throw this away. */
486 	pe(lineno);
487 	printf("Unmatched .%s\n", mac);
488 }
489 
490 /* eq: are two strings equal? */
491 int
eq(char * s1,char * s2)492 eq(char *s1, char *s2)
493 {
494 	return (strcmp(s1, s2) == 0);
495 }
496 
497 /* print the first part of an error message, given the line number */
498 void
pe(int lineno)499 pe(int lineno)
500 {
501 	if (nfiles > 1)
502 		printf("%s: ", cfilename);
503 	printf("%d: ", lineno);
504 }
505 
506 void
checkknown(char * mac)507 checkknown(char *mac)
508 {
509 
510 	if (eq(mac, "."))
511 		return;
512 	if (binsrch(mac) >= 0)
513 		return;
514 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
515 		return;
516 
517 	pe(lineno);
518 	printf("Unknown command: .%s\n", mac);
519 }
520 
521 /*
522  * We have a .de xx line in "line".  Add xx to the list of known commands.
523  */
524 void
addcmd(char * line)525 addcmd(char *line)
526 {
527 	char *mac;
528 
529 	/* grab the macro being defined */
530 	mac = line+4;
531 	while (isspace(*mac))
532 		mac++;
533 	if (*mac == 0) {
534 		pe(lineno);
535 		printf("illegal define: %s\n", line);
536 		return;
537 	}
538 	mac[2] = 0;
539 	if (isspace(mac[1]) || mac[1] == '\\')
540 		mac[1] = 0;
541 	if (ncmds >= MAXCMDS) {
542 		printf("Only %d known commands allowed\n", MAXCMDS);
543 		exit(1);
544 	}
545 	addmac(mac);
546 }
547 
548 /*
549  * Add mac to the list.  We should really have some kind of tree
550  * structure here but this is a quick-and-dirty job and I just don't
551  * have time to mess with it.  (I wonder if this will come back to haunt
552  * me someday?)  Anyway, I claim that .de is fairly rare in user
553  * nroff programs, and the register loop below is pretty fast.
554  */
555 void
addmac(char * mac)556 addmac(char *mac)
557 {
558 	char **src, **dest, **loc;
559 
560 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
561 #ifdef DEBUG
562 		printf("binsrch(%s) -> already in table\n", mac);
563 #endif /* DEBUG */
564 		return;
565 	}
566 	/* binsrch sets slot as a side effect */
567 #ifdef DEBUG
568 printf("binsrch(%s) -> %d\n", mac, slot);
569 #endif
570 	loc = &knowncmds[slot];
571 	src = &knowncmds[ncmds-1];
572 	dest = src+1;
573 	while (dest > loc)
574 		*dest-- = *src--;
575 	if ((*loc = strdup(mac)) == NULL)
576 		err(1, "strdup");
577 	ncmds++;
578 #ifdef DEBUG
579 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
580 #endif
581 }
582 
583 /*
584  * Do a binary search in knowncmds for mac.
585  * If found, return the index.  If not, return -1.
586  */
587 int
binsrch(char * mac)588 binsrch(char *mac)
589 {
590 	char *p;		/* pointer to current cmd in list */
591 	int d;			/* difference if any */
592 	int mid;		/* mid point in binary search */
593 	int top, bot;		/* boundaries of bin search, inclusive */
594 
595 	top = ncmds-1;
596 	bot = 0;
597 	while (top >= bot) {
598 		mid = (top+bot)/2;
599 		p = knowncmds[mid];
600 		d = p[0] - mac[0];
601 		if (d == 0)
602 			d = p[1] - mac[1];
603 		if (d == 0)
604 			return mid;
605 		if (d < 0)
606 			bot = mid + 1;
607 		else
608 			top = mid - 1;
609 	}
610 	slot = bot;	/* place it would have gone */
611 	return -1;
612 }
613