1 /* $OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $ */
2 /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */
3
4 /*
5 * Copyright (c) 1980, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1980, 1993\n\
36 The Regents of the University of California. All rights reserved.\n";
37 #endif /* not lint */
38
39 #ifndef lint
40 #if 0
41 static const char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42 #else
43 static const char rcsid[] = "$OpenBSD: checknr.c,v 1.14 2005/03/29 23:46:19 jaredy Exp $";
44 #endif
45 #endif /* not lint */
46
47 /*
48 * checknr: check an nroff/troff input file for matching macro calls.
49 * we also attempt to match size and font changes, but only the embedded
50 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
51 * later but for now think of these restrictions as contributions to
52 * structured typesetting.
53 */
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <unistd.h>
58 #include <ctype.h>
59 #include <err.h>
60
61 #define MAXSTK 100 /* Stack size */
62 #define MAXBR 100 /* Max number of bracket pairs known */
63 #define MAXCMDS 500 /* Max number of commands known */
64
65 /*
66 * The stack on which we remember what we've seen so far.
67 */
68 struct stkstr {
69 int opno; /* number of opening bracket */
70 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
71 int parm; /* parm to size, font, etc */
72 int lno; /* line number the thing came in in */
73 } stk[MAXSTK];
74 int stktop;
75
76 void usage(void);
77 void addmac(char *);
78 void process(FILE *);
79 void pe(int);
80 int eq(char *, char *);
81 void complain(int);
82 void prop(int);
83 void chkcmd(char *, char *);
84 void addcmd(char *);
85 void nomatch(char *);
86 void checkknown(char *);
87 int binsrch(char *);
88
89 /*
90 * The kinds of opening and closing brackets.
91 */
92 struct brstr {
93 char *opbr;
94 char *clbr;
95 } br[MAXBR] = {
96 /* A few bare bones troff commands */
97 #define SZ 0
98 { "sz", "sz" }, /* also \s */
99 #define FT 1
100 { "ft", "ft" }, /* also \f */
101 /* the -mm package */
102 { "AL", "LE" },
103 { "AS", "AE" },
104 { "BL", "LE" },
105 { "BS", "BE" },
106 { "DF", "DE" },
107 { "DL", "LE" },
108 { "DS", "DE" },
109 { "FS", "FE" },
110 { "ML", "LE" },
111 { "NS", "NE" },
112 { "RL", "LE" },
113 { "VL", "LE" },
114 /* the -ms package */
115 { "AB", "AE" },
116 { "BD", "DE" },
117 { "CD", "DE" },
118 { "DS", "DE" },
119 { "FS", "FE" },
120 { "ID", "DE" },
121 { "KF", "KE" },
122 { "KS", "KE" },
123 { "LD", "DE" },
124 { "LG", "NL" },
125 { "QS", "QE" },
126 { "RS", "RE" },
127 { "SM", "NL" },
128 { "XA", "XE" },
129 { "XS", "XE" },
130 /* The -me package */
131 { "(b", ")b" },
132 { "(c", ")c" },
133 { "(d", ")d" },
134 { "(f", ")f" },
135 { "(l", ")l" },
136 { "(q", ")q" },
137 { "(x", ")x" },
138 { "(z", ")z" },
139 /* Things needed by preprocessors */
140 { "EQ", "EN" },
141 { "TS", "TE" },
142 /* Refer */
143 { "[", "]" },
144 { 0, }
145 };
146
147 /*
148 * All commands known to nroff, plus macro packages.
149 * Used so we can complain about unrecognized commands.
150 */
151 char *knowncmds[MAXCMDS] = {
152 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
153 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
154 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
155 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
156 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
157 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
158 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
159 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
160 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
161 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
162 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
163 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
164 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
165 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
166 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
167 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
168 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
169 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
170 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
171 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
172 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
173 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
174 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
175 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
176 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
177 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
178 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
179 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
180 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
181 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
182 "yr", 0
183 };
184
185 int lineno; /* current line number in input file */
186 char line[256]; /* the current line */
187 char *cfilename; /* name of current file */
188 int nfiles; /* number of files to process */
189 int fflag; /* -f: ignore \f */
190 int sflag; /* -s: ignore \s */
191 int ncmds; /* size of knowncmds */
192 int slot; /* slot in knowncmds found by binsrch */
193
194 int
main(int argc,char * argv[])195 main(int argc, char *argv[])
196 {
197 FILE *f;
198 int i;
199 char *cp;
200 char b1[4];
201
202 /* Figure out how many known commands there are */
203 while (knowncmds[ncmds])
204 ncmds++;
205 while (argc > 1 && argv[1][0] == '-') {
206 switch(argv[1][1]) {
207
208 /* -a: add pairs of macros */
209 case 'a':
210 i = strlen(argv[1]) - 2;
211 if (i % 6 != 0)
212 usage();
213 /* look for empty macro slots */
214 for (i=0; br[i].opbr; i++)
215 ;
216 for (cp=argv[1]+3; cp[-1]; cp += 6) {
217 if (i >= MAXBR)
218 errx(1, "too many pairs");
219 if ((br[i].opbr = malloc(3)) == NULL)
220 err(1, "malloc");
221 strlcpy(br[i].opbr, cp, 3);
222 if ((br[i].clbr = malloc(3)) == NULL)
223 err(1, "malloc");
224 strlcpy(br[i].clbr, cp+3, 3);
225 addmac(br[i].opbr); /* knows pairs are also known cmds */
226 addmac(br[i].clbr);
227 i++;
228 }
229 break;
230
231 /* -c: add known commands */
232 case 'c':
233 i = strlen(argv[1]) - 2;
234 if (i % 3 != 0)
235 usage();
236 for (cp=argv[1]+3; cp[-1]; cp += 3) {
237 if (cp[2] && cp[2] != '.')
238 usage();
239 strncpy(b1, cp, 2);
240 addmac(b1);
241 }
242 break;
243
244 /* -f: ignore font changes */
245 case 'f':
246 fflag = 1;
247 break;
248
249 /* -s: ignore size changes */
250 case 's':
251 sflag = 1;
252 break;
253 default:
254 usage();
255 }
256 argc--; argv++;
257 }
258
259 nfiles = argc - 1;
260
261 if (nfiles > 0) {
262 for (i=1; i<argc; i++) {
263 cfilename = argv[i];
264 f = fopen(cfilename, "r");
265 if (f == NULL)
266 warn("%s", cfilename);
267 else
268 process(f);
269 }
270 } else {
271 cfilename = "stdin";
272 process(stdin);
273 }
274 exit(0);
275 }
276
277 void
usage(void)278 usage(void)
279 {
280 extern char *__progname;
281 (void)fprintf(stderr,
282 "usage: %s [-fs] [-a.x1.y1.x2.y2. ... .xn.yn] "
283 "[-c.x1.x2.x3. ... .xn] [file]\n", __progname);
284 exit(1);
285 }
286
287 void
process(FILE * f)288 process(FILE *f)
289 {
290 int i, n;
291 char mac[5]; /* The current macro or nroff command */
292 int pl;
293
294 stktop = -1;
295 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
296 if (line[0] == '.') {
297 /*
298 * find and isolate the macro/command name.
299 */
300 strncpy(mac, line+1, 4);
301 if (isspace(mac[0])) {
302 pe(lineno);
303 printf("Empty command\n");
304 } else if (isspace(mac[1])) {
305 mac[1] = 0;
306 } else if (isspace(mac[2])) {
307 mac[2] = 0;
308 } else if (mac[0] != '\\' || mac[1] != '\"') {
309 pe(lineno);
310 printf("Command too long\n");
311 }
312
313 /*
314 * Is it a known command?
315 */
316 checkknown(mac);
317
318 /*
319 * Should we add it?
320 */
321 if (eq(mac, "de"))
322 addcmd(line);
323
324 chkcmd(line, mac);
325 }
326
327 /*
328 * At this point we process the line looking
329 * for \s and \f.
330 */
331 for (i=0; line[i]; i++)
332 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
333 if (!sflag && line[++i]=='s') {
334 pl = line[++i];
335 if (isdigit(pl)) {
336 n = pl - '0';
337 pl = ' ';
338 } else
339 n = 0;
340 while (isdigit(line[++i]))
341 n = 10 * n + line[i] - '0';
342 i--;
343 if (n == 0) {
344 if (stk[stktop].opno == SZ) {
345 stktop--;
346 } else {
347 pe(lineno);
348 printf("unmatched \\s0\n");
349 }
350 } else {
351 stk[++stktop].opno = SZ;
352 stk[stktop].pl = pl;
353 stk[stktop].parm = n;
354 stk[stktop].lno = lineno;
355 }
356 } else if (!fflag && line[i]=='f') {
357 n = line[++i];
358 if (n == 'P') {
359 if (stk[stktop].opno == FT) {
360 stktop--;
361 } else {
362 pe(lineno);
363 printf("unmatched \\fP\n");
364 }
365 } else {
366 stk[++stktop].opno = FT;
367 stk[stktop].pl = 1;
368 stk[stktop].parm = n;
369 stk[stktop].lno = lineno;
370 }
371 }
372 }
373 }
374 /*
375 * We've hit the end and look at all this stuff that hasn't been
376 * matched yet! Complain, complain.
377 */
378 for (i=stktop; i>=0; i--) {
379 complain(i);
380 }
381 }
382
383 void
complain(int i)384 complain(int i)
385 {
386 pe(stk[i].lno);
387 printf("Unmatched ");
388 prop(i);
389 printf("\n");
390 }
391
392 void
prop(int i)393 prop(int i)
394 {
395 if (stk[i].pl == 0)
396 printf(".%s", br[stk[i].opno].opbr);
397 else switch(stk[i].opno) {
398 case SZ:
399 printf("\\s%c%d", stk[i].pl, stk[i].parm);
400 break;
401 case FT:
402 printf("\\f%c", stk[i].parm);
403 break;
404 default:
405 printf("Bug: stk[%d].opno = %d = .%s, .%s",
406 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
407 }
408 }
409
410 void
chkcmd(char * line,char * mac)411 chkcmd(char *line, char *mac)
412 {
413 int i;
414
415 /*
416 * Check to see if it matches top of stack.
417 */
418 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
419 stktop--; /* OK. Pop & forget */
420 else {
421 /* No. Maybe it's an opener */
422 for (i=0; br[i].opbr; i++) {
423 if (eq(mac, br[i].opbr)) {
424 /* Found. Push it. */
425 stktop++;
426 stk[stktop].opno = i;
427 stk[stktop].pl = 0;
428 stk[stktop].parm = 0;
429 stk[stktop].lno = lineno;
430 break;
431 }
432 /*
433 * Maybe it's an unmatched closer.
434 * NOTE: this depends on the fact
435 * that none of the closers can be
436 * openers too.
437 */
438 if (eq(mac, br[i].clbr)) {
439 nomatch(mac);
440 break;
441 }
442 }
443 }
444 }
445
446 void
nomatch(char * mac)447 nomatch(char *mac)
448 {
449 int i, j;
450
451 /*
452 * Look for a match further down on stack
453 * If we find one, it suggests that the stuff in
454 * between is supposed to match itself.
455 */
456 for (j=stktop; j>=0; j--)
457 if (eq(mac,br[stk[j].opno].clbr)) {
458 /* Found. Make a good diagnostic. */
459 if (j == stktop-2) {
460 /*
461 * Check for special case \fx..\fR and don't
462 * complain.
463 */
464 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
465 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
466 stktop = j -1;
467 return;
468 }
469 /*
470 * We have two unmatched frobs. Chances are
471 * they were intended to match, so we mention
472 * them together.
473 */
474 pe(stk[j+1].lno);
475 prop(j+1);
476 printf(" does not match %d: ", stk[j+2].lno);
477 prop(j+2);
478 printf("\n");
479 } else for (i=j+1; i <= stktop; i++) {
480 complain(i);
481 }
482 stktop = j-1;
483 return;
484 }
485 /* Didn't find one. Throw this away. */
486 pe(lineno);
487 printf("Unmatched .%s\n", mac);
488 }
489
490 /* eq: are two strings equal? */
491 int
eq(char * s1,char * s2)492 eq(char *s1, char *s2)
493 {
494 return (strcmp(s1, s2) == 0);
495 }
496
497 /* print the first part of an error message, given the line number */
498 void
pe(int lineno)499 pe(int lineno)
500 {
501 if (nfiles > 1)
502 printf("%s: ", cfilename);
503 printf("%d: ", lineno);
504 }
505
506 void
checkknown(char * mac)507 checkknown(char *mac)
508 {
509
510 if (eq(mac, "."))
511 return;
512 if (binsrch(mac) >= 0)
513 return;
514 if (mac[0] == '\\' && mac[1] == '"') /* comments */
515 return;
516
517 pe(lineno);
518 printf("Unknown command: .%s\n", mac);
519 }
520
521 /*
522 * We have a .de xx line in "line". Add xx to the list of known commands.
523 */
524 void
addcmd(char * line)525 addcmd(char *line)
526 {
527 char *mac;
528
529 /* grab the macro being defined */
530 mac = line+4;
531 while (isspace(*mac))
532 mac++;
533 if (*mac == 0) {
534 pe(lineno);
535 printf("illegal define: %s\n", line);
536 return;
537 }
538 mac[2] = 0;
539 if (isspace(mac[1]) || mac[1] == '\\')
540 mac[1] = 0;
541 if (ncmds >= MAXCMDS) {
542 printf("Only %d known commands allowed\n", MAXCMDS);
543 exit(1);
544 }
545 addmac(mac);
546 }
547
548 /*
549 * Add mac to the list. We should really have some kind of tree
550 * structure here but this is a quick-and-dirty job and I just don't
551 * have time to mess with it. (I wonder if this will come back to haunt
552 * me someday?) Anyway, I claim that .de is fairly rare in user
553 * nroff programs, and the register loop below is pretty fast.
554 */
555 void
addmac(char * mac)556 addmac(char *mac)
557 {
558 char **src, **dest, **loc;
559
560 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
561 #ifdef DEBUG
562 printf("binsrch(%s) -> already in table\n", mac);
563 #endif /* DEBUG */
564 return;
565 }
566 /* binsrch sets slot as a side effect */
567 #ifdef DEBUG
568 printf("binsrch(%s) -> %d\n", mac, slot);
569 #endif
570 loc = &knowncmds[slot];
571 src = &knowncmds[ncmds-1];
572 dest = src+1;
573 while (dest > loc)
574 *dest-- = *src--;
575 if ((*loc = strdup(mac)) == NULL)
576 err(1, "strdup");
577 ncmds++;
578 #ifdef DEBUG
579 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
580 #endif
581 }
582
583 /*
584 * Do a binary search in knowncmds for mac.
585 * If found, return the index. If not, return -1.
586 */
587 int
binsrch(char * mac)588 binsrch(char *mac)
589 {
590 char *p; /* pointer to current cmd in list */
591 int d; /* difference if any */
592 int mid; /* mid point in binary search */
593 int top, bot; /* boundaries of bin search, inclusive */
594
595 top = ncmds-1;
596 bot = 0;
597 while (top >= bot) {
598 mid = (top+bot)/2;
599 p = knowncmds[mid];
600 d = p[0] - mac[0];
601 if (d == 0)
602 d = p[1] - mac[1];
603 if (d == 0)
604 return mid;
605 if (d < 0)
606 bot = mid + 1;
607 else
608 top = mid - 1;
609 }
610 slot = bot; /* place it would have gone */
611 return -1;
612 }
613