1 /* $NetBSD: deroff.c,v 1.14 2025/02/20 19:32:16 rillig Exp $ */
2
3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5 /*-
6 * Copyright (c) 1988, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33 /*
34 * Copyright (C) Caldera International Inc. 2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 * copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed or owned by Caldera
48 * International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 * contributors may be used to endorse or promote products derived from
51 * this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67 #include <sys/cdefs.h>
68 __RCSID("$NetBSD: deroff.c,v 1.14 2025/02/20 19:32:16 rillig Exp $");
69
70 #include <err.h>
71 #include <limits.h>
72 #include <stddef.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <unistd.h>
77
78 /*
79 * Deroff command -- strip troff, eqn, and Tbl sequences from
80 * a file. Has two flags argument, -w, to cause output one word per line
81 * rather than in the original format.
82 * -mm (or -ms) causes the corresponding macro's to be interpreted
83 * so that just sentences are output
84 * -ml also gets rid of lists.
85 * Deroff follows .so and .nx commands, removes contents of macro
86 * definitions, equations (both .EQ ... .EN and $...$),
87 * Tbl command sequences, and Troff backslash constructions.
88 *
89 * All input is through the Cget macro;
90 * the most recently read character is in c.
91 *
92 * Modified by Robert Henry to process -me and -man macros.
93 */
94
95 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
96 #define C1get ( (c=getc(infile)) == EOF ? eof() : c)
97
98 #ifdef DEBUG
99 # define C _C()
100 # define C1 _C1()
101 #else /* not DEBUG */
102 # define C Cget
103 # define C1 C1get
104 #endif /* not DEBUG */
105
106 #define SKIP while (C != '\n')
107 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
108
109 #define YES 1
110 #define NO 0
111 #define MS 0 /* -ms */
112 #define MM 1 /* -mm */
113 #define ME 2 /* -me */
114 #define MA 3 /* -man */
115
116 #ifdef DEBUG
117 static char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
118 #endif /* DEBUG */
119
120 #define ONE 1
121 #define TWO 2
122
123 #define NOCHAR -2
124 #define SPECIAL 0
125 #define APOS 1
126 #define PUNCT 2
127 #define DIGIT 3
128 #define LETTER 4
129
130 #define MAXFILES 20
131
132 static int iflag;
133 static int wordflag;
134 static int msflag; /* processing a source written using a mac package */
135 static int mac; /* which package */
136 static int disp;
137 static int parag;
138 static int inmacro;
139 static int intable;
140 static int keepblock; /* keep blocks of text; normally false when msflag */
141
142 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
143
144 static char line[LINE_MAX];
145 static char *lp;
146
147 static int c;
148 static int pc;
149 static int ldelim;
150 static int rdelim;
151
152 static char fname[PATH_MAX];
153 static FILE *files[MAXFILES];
154 static FILE **filesp;
155 static FILE *infile;
156
157 static int argc;
158 static char **argv;
159
160 /*
161 * Macro processing
162 *
163 * Macro table definitions
164 */
165 typedef int pacmac; /* compressed macro name */
166 static int argconcat = 0; /* concat arguments together (-me only) */
167
168 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
169 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF), __USE(c1), __USE(c2))
170
171 struct mactab {
172 int condition;
173 pacmac macname;
174 int (*func)(pacmac);
175 };
176
177 static const struct mactab troffmactab[];
178 static const struct mactab ppmactab[];
179 static const struct mactab msmactab[];
180 static const struct mactab mmmactab[];
181 static const struct mactab memactab[];
182 static const struct mactab manmactab[];
183
184 /*
185 * Macro table initialization
186 */
187 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
188
189 /*
190 * Flags for matching conditions other than
191 * the macro name
192 */
193 #define NONE 0
194 #define FNEST 1 /* no nested files */
195 #define NOMAC 2 /* no macro */
196 #define MAC 3 /* macro */
197 #define PARAG 4 /* in a paragraph */
198 #define MSF 5 /* msflag is on */
199 #define NBLK 6 /* set if no blocks to be kept */
200
201 /*
202 * Return codes from macro minions, determine where to jump,
203 * how to repeat/reprocess text
204 */
205 #define COMX 1 /* goto comx */
206 #define COM 2 /* goto com */
207
208 static int skeqn(void);
209 static int eof(void);
210 #ifdef DEBUG
211 static int _C1(void);
212 static int _C(void);
213 #endif
214 static int EQ(pacmac);
215 static int domacro(pacmac);
216 static int PS(pacmac);
217 static int skip(pacmac);
218 static int intbl(pacmac);
219 static int outtbl(pacmac);
220 static int so(pacmac);
221 static int nx(pacmac);
222 static int skiptocom(pacmac);
223 static int PP(pacmac);
224 static int AU(pacmac);
225 static int SH(pacmac);
226 static int UX(pacmac);
227 static int MMHU(pacmac);
228 static int mesnblock(pacmac);
229 static int mssnblock(pacmac);
230 static int nf(pacmac);
231 static int ce(pacmac);
232 static int meip(pacmac);
233 static int mepp(pacmac);
234 static int mesh(pacmac);
235 static int mefont(pacmac);
236 static int manfont(pacmac);
237 static int manpp(pacmac);
238 static int macsort(const void *, const void *);
239 static int sizetab(const struct mactab *);
240 static void getfname(void);
241 static void textline(char *, int);
242 static void work(void) __dead;
243 static void regline(void (*)(char *, int), int);
244 static void macro(void);
245 static void tbl(void);
246 static void stbl(void);
247 static void eqn(void);
248 static void backsl(void);
249 static void sce(void);
250 static void refer(int);
251 static void inpic(void);
252 static void msputmac(char *, int);
253 static void msputwords(void);
254 static void meputmac(char *, int);
255 static void meputwords(void);
256 static void noblock(char, char);
257 static void defcomline(pacmac);
258 static void comline(void);
259 static void buildtab(const struct mactab **, int *);
260 static FILE *opn(char *);
261 static struct mactab *macfill(struct mactab *, const struct mactab *);
262 static void usage(void) __dead;
263
264 int
main(int ac,char ** av)265 main(int ac, char **av)
266 {
267 int i, ch;
268 int errflg = 0;
269 int kflag = NO;
270
271 iflag = NO;
272 wordflag = NO;
273 msflag = NO;
274 mac = ME;
275 disp = NO;
276 parag = NO;
277 inmacro = NO;
278 intable = NO;
279 ldelim = NOCHAR;
280 rdelim = NOCHAR;
281 keepblock = YES;
282
283 while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
284 switch (ch) {
285 case 'i':
286 iflag = YES;
287 break;
288 case 'k':
289 kflag = YES;
290 break;
291 case 'm':
292 msflag = YES;
293 keepblock = NO;
294 switch (optarg[0]) {
295 case 'm':
296 mac = MM;
297 break;
298 case 's':
299 mac = MS;
300 break;
301 case 'e':
302 mac = ME;
303 break;
304 case 'a':
305 mac = MA;
306 break;
307 case 'l':
308 disp = YES;
309 break;
310 default:
311 errflg++;
312 break;
313 }
314 if (errflg == 0 && optarg[1] != '\0')
315 errflg++;
316 break;
317 case 'p':
318 parag = YES;
319 break;
320 case 'w':
321 wordflag = YES;
322 kflag = YES;
323 break;
324 default:
325 errflg++;
326 }
327 }
328 argc = ac - optind;
329 argv = av + optind;
330
331 if (kflag)
332 keepblock = YES;
333 if (errflg)
334 usage();
335
336 #ifdef DEBUG
337 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
338 msflag, mactab[mac], keepblock, disp);
339 #endif /* DEBUG */
340 if (argc == 0) {
341 infile = stdin;
342 } else {
343 infile = opn(argv[0]);
344 --argc;
345 ++argv;
346 }
347 files[0] = infile;
348 filesp = &files[0];
349
350 for (i = 'a'; i <= 'z' ; ++i)
351 chars[i] = LETTER;
352 for (i = 'A'; i <= 'Z'; ++i)
353 chars[i] = LETTER;
354 for (i = '0'; i <= '9'; ++i)
355 chars[i] = DIGIT;
356 chars['\''] = APOS;
357 chars['&'] = APOS;
358 chars['.'] = PUNCT;
359 chars[','] = PUNCT;
360 chars[';'] = PUNCT;
361 chars['?'] = PUNCT;
362 chars[':'] = PUNCT;
363 work();
364 }
365
366 static int
skeqn(void)367 skeqn(void)
368 {
369
370 while ((c = getc(infile)) != rdelim) {
371 if (c == EOF)
372 c = eof();
373 else if (c == '"') {
374 while ((c = getc(infile)) != '"') {
375 if (c == EOF ||
376 (c == '\\' && (c = getc(infile)) == EOF))
377 c = eof();
378 }
379 }
380 }
381 if (msflag)
382 return c == 'x';
383 return c == ' ';
384 }
385
386 static FILE *
opn(char * p)387 opn(char *p)
388 {
389 FILE *fd;
390
391 if ((fd = fopen(p, "r")) == NULL)
392 err(1, "fopen %s", p);
393
394 return fd;
395 }
396
397 static int
eof(void)398 eof(void)
399 {
400
401 if (infile != stdin)
402 fclose(infile);
403 if (filesp > files)
404 infile = *--filesp;
405 else if (argc > 0) {
406 infile = opn(argv[0]);
407 --argc;
408 ++argv;
409 } else
410 exit(0);
411 return C;
412 }
413
414 static void
getfname(void)415 getfname(void)
416 {
417 char *p;
418 struct chain {
419 struct chain *nextp;
420 char *datap;
421 } *q;
422 static struct chain *namechain= NULL;
423
424 while (C == ' ')
425 ; /* nothing */
426
427 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) &&
428 (*p = c) != '\n' &&
429 c != ' ' && c != '\t' && c != '\\'; ++p)
430 C;
431 *p = '\0';
432 while (c != '\n')
433 C;
434
435 /* see if this name has already been used */
436 for (q = namechain ; q; q = q->nextp)
437 if (strcmp(fname, q->datap) == 0) {
438 fname[0] = '\0';
439 return;
440 }
441
442 q = (struct chain *) malloc(sizeof(struct chain));
443 if (q == NULL)
444 err(1, NULL);
445 q->nextp = namechain;
446 q->datap = strdup(fname);
447 if (q->datap == NULL)
448 err(1, NULL);
449 namechain = q;
450 }
451
452 /*ARGSUSED*/
453 static void
textline(char * str,int constant)454 textline(char *str, int constant)
455 {
456
457 if (wordflag) {
458 msputwords();
459 return;
460 }
461 puts(str);
462 }
463
464 static void
work(void)465 work(void)
466 {
467
468 for (;;) {
469 C;
470 #ifdef FULLDEBUG
471 printf("Starting work with `%c'\n", c);
472 #endif /* FULLDEBUG */
473 if (c == '.' || c == '\'')
474 comline();
475 else
476 regline(textline, TWO);
477 }
478 }
479
480 static void
regline(void (* pfunc)(char *,int),int constant)481 regline(void (*pfunc)(char *, int), int constant)
482 {
483
484 line[0] = c;
485 lp = line;
486 while (lp - line < (ptrdiff_t)sizeof(line)) {
487 if (c == '\\') {
488 *lp = ' ';
489 backsl();
490 }
491 if (c == '\n')
492 break;
493 if (intable && c == 'T') {
494 *++lp = C;
495 if (c == '{' || c == '}') {
496 lp[-1] = ' ';
497 *lp = C;
498 }
499 } else {
500 *++lp = C;
501 }
502 }
503 *lp = '\0';
504
505 if (line[0] != '\0')
506 (*pfunc)(line, constant);
507 }
508
509 static void
macro(void)510 macro(void)
511 {
512
513 if (msflag) {
514 do {
515 SKIP;
516 } while (C!='.' || C!='.' || C=='.'); /* look for .. */
517 if (c != '\n')
518 SKIP;
519 return;
520 }
521 SKIP;
522 inmacro = YES;
523 }
524
525 static void
tbl(void)526 tbl(void)
527 {
528
529 while (C != '.')
530 ; /* nothing */
531 SKIP;
532 intable = YES;
533 }
534
535 static void
stbl(void)536 stbl(void)
537 {
538
539 while (C != '.')
540 ; /* nothing */
541 SKIP_TO_COM;
542 if (c != 'T' || C != 'E') {
543 SKIP;
544 pc = c;
545 while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
546 pc = c;
547 }
548 }
549
550 static void
eqn(void)551 eqn(void)
552 {
553 int c1, c2;
554 int dflg;
555 char last;
556
557 last=0;
558 dflg = 1;
559 SKIP;
560
561 for (;;) {
562 if (C1 == '.' || c == '\'') {
563 while (C1 == ' ' || c == '\t')
564 ;
565 if (c == 'E' && C1 == 'N') {
566 SKIP;
567 if (msflag && dflg) {
568 putchar('x');
569 putchar(' ');
570 if (last) {
571 putchar(last);
572 putchar('\n');
573 }
574 }
575 return;
576 }
577 } else if (c == 'd') {
578 /* look for delim */
579 if (C1 == 'e' && C1 == 'l')
580 if (C1 == 'i' && C1 == 'm') {
581 while (C1 == ' ')
582 ; /* nothing */
583
584 if ((c1 = c) == '\n' ||
585 (c2 = C1) == '\n' ||
586 (c1 == 'o' && c2 == 'f' && C1=='f')) {
587 ldelim = NOCHAR;
588 rdelim = NOCHAR;
589 } else {
590 ldelim = c1;
591 rdelim = c2;
592 }
593 }
594 dflg = 0;
595 }
596
597 if (c != '\n')
598 while (C1 != '\n') {
599 if (chars[c] == PUNCT)
600 last = c;
601 else if (c != ' ')
602 last = 0;
603 }
604 }
605 }
606
607 /* skip over a complete backslash construction */
608 static void
backsl(void)609 backsl(void)
610 {
611 int bdelim;
612
613 sw:
614 switch (C) {
615 case '"':
616 SKIP;
617 return;
618
619 case 's':
620 if (C == '\\')
621 backsl();
622 else {
623 while (C >= '0' && c <= '9')
624 ; /* nothing */
625 ungetc(c, infile);
626 c = '0';
627 }
628 --lp;
629 return;
630
631 case 'f':
632 case 'n':
633 case '*':
634 if (C != '(')
635 return;
636
637 /* FALLTHROUGH */
638 case '(':
639 if (msflag) {
640 if (C == 'e') {
641 if (C == 'm') {
642 *lp = '-';
643 return;
644 }
645 }
646 else if (c != '\n')
647 C;
648 return;
649 }
650 if (C != '\n')
651 C;
652 return;
653
654 case '$':
655 C; /* discard argument number */
656 return;
657
658 case 'b':
659 case 'x':
660 case 'v':
661 case 'h':
662 case 'w':
663 case 'o':
664 case 'l':
665 case 'L':
666 if ((bdelim = C) == '\n')
667 return;
668 while (C != '\n' && c != bdelim)
669 if (c == '\\')
670 backsl();
671 return;
672
673 case '\\':
674 if (inmacro)
675 goto sw;
676 return;
677
678 default:
679 return;
680 }
681 }
682
683 static void
sce(void)684 sce(void)
685 {
686 char *ap;
687 int n, i;
688 char a[10];
689
690 for (ap = a; C != '\n'; ap++) {
691 *ap = c;
692 if (ap == &a[9]) {
693 SKIP;
694 ap = a;
695 break;
696 }
697 }
698 if (ap != a)
699 n = atoi(a);
700 else
701 n = 1;
702 for (i = 0; i < n;) {
703 if (C == '.') {
704 if (C == 'c') {
705 if (C == 'e') {
706 while (C == ' ')
707 ; /* nothing */
708 if (c == '0') {
709 SKIP;
710 break;
711 } else
712 SKIP;
713 }
714 else
715 SKIP;
716 } else if (c == 'P' || C == 'P') {
717 if (c != '\n')
718 SKIP;
719 break;
720 } else if (c != '\n')
721 SKIP;
722 } else {
723 SKIP;
724 i++;
725 }
726 }
727 }
728
729 static void
refer(int c1)730 refer(int c1)
731 {
732 int c2;
733
734 if (c1 != '\n')
735 SKIP;
736
737 for (c2 = -1;;) {
738 if (C != '.')
739 SKIP;
740 else {
741 if (C != ']')
742 SKIP;
743 else {
744 while (C != '\n')
745 c2 = c;
746 if (c2 != -1 && chars[c2] == PUNCT)
747 putchar(c2);
748 return;
749 }
750 }
751 }
752 }
753
754 static void
inpic(void)755 inpic(void)
756 {
757 int c1;
758 char *p1;
759
760 SKIP;
761 p1 = line;
762 c = '\n';
763 for (;;) {
764 c1 = c;
765 if (C == '.' && c1 == '\n') {
766 if (C != 'P') {
767 if (c == '\n')
768 continue;
769 else {
770 SKIP;
771 c = '\n';
772 continue;
773 }
774 }
775 if (C != 'E') {
776 if (c == '\n')
777 continue;
778 else {
779 SKIP;
780 c = '\n';
781 continue;
782 }
783 }
784 SKIP;
785 return;
786 }
787 else if (c == '\"') {
788 while (C != '\"') {
789 if (c == '\\') {
790 if (C == '\"')
791 continue;
792 ungetc(c, infile);
793 backsl();
794 } else
795 *p1++ = c;
796 }
797 *p1++ = ' ';
798 }
799 else if (c == '\n' && p1 != line) {
800 *p1 = '\0';
801 if (wordflag)
802 msputwords();
803 else {
804 puts(line);
805 putchar('\n');
806 }
807 p1 = line;
808 }
809 }
810 }
811
812 #ifdef DEBUG
813 static int
_C1(void)814 _C1(void)
815 {
816
817 return C1get;
818 }
819
820 static int
_C(void)821 _C(void)
822 {
823
824 return Cget;
825 }
826 #endif /* DEBUG */
827
828 /*
829 * Put out a macro line, using ms and mm conventions.
830 */
831 static void
msputmac(char * s,int constant)832 msputmac(char *s, int constant)
833 {
834 char *t;
835 int found;
836 int last;
837
838 last = 0;
839 found = 0;
840 if (wordflag) {
841 msputwords();
842 return;
843 }
844 while (*s) {
845 while (*s == ' ' || *s == '\t')
846 putchar(*s++);
847 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
848 ; /* nothing */
849 if (*s == '\"')
850 s++;
851 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
852 chars[(unsigned char)s[1]] == LETTER) {
853 while (s < t)
854 if (*s == '\"')
855 s++;
856 else
857 putchar(*s++);
858 last = *(t-1);
859 found++;
860 } else if (found && chars[(unsigned char)s[0]] == PUNCT &&
861 s[1] == '\0') {
862 putchar(*s++);
863 } else {
864 last = *(t - 1);
865 s = t;
866 }
867 }
868 putchar('\n');
869 if (msflag && chars[last] == PUNCT) {
870 putchar(last);
871 putchar('\n');
872 }
873 }
874
875 /*
876 * put out words (for the -w option) with ms and mm conventions
877 */
878 static void
msputwords(void)879 msputwords(void)
880 {
881 char *p, *p1;
882 int i, nlet;
883
884 for (p1 = line;;) {
885 /*
886 * skip initial specials ampersands and apostrophes
887 */
888 while (chars[(unsigned char)*p1] < DIGIT)
889 if (*p1++ == '\0')
890 return;
891 nlet = 0;
892 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
893 if (i == LETTER)
894 ++nlet;
895
896 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
897 /*
898 * delete trailing ampersands and apostrophes
899 */
900 while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
901 i == APOS )
902 --p;
903 while (p1 < p)
904 putchar(*p1++);
905 putchar('\n');
906 } else {
907 p1 = p;
908 }
909 }
910 }
911
912 /*
913 * put out a macro using the me conventions
914 */
915 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; }
916
917 static void
meputmac(char * cp,int constant)918 meputmac(char *cp, int constant)
919 {
920 char *np;
921 int found;
922 int argno;
923 int last;
924 int inquote;
925
926 last = 0;
927 found = 0;
928 if (wordflag) {
929 meputwords();
930 return;
931 }
932 for (argno = 0; *cp; argno++) {
933 SKIPBLANK(cp);
934 inquote = (*cp == '"');
935 if (inquote)
936 cp++;
937 for (np = cp; *np; np++) {
938 switch (*np) {
939 case '\n':
940 case '\0':
941 break;
942
943 case '\t':
944 case ' ':
945 if (inquote)
946 continue;
947 else
948 goto endarg;
949
950 case '"':
951 if (inquote && np[1] == '"') {
952 memmove(np, np + 1, strlen(np));
953 np++;
954 continue;
955 } else {
956 *np = ' '; /* bye bye " */
957 goto endarg;
958 }
959
960 default:
961 continue;
962 }
963 }
964 endarg: ;
965 /*
966 * cp points at the first char in the arg
967 * np points one beyond the last char in the arg
968 */
969 if ((argconcat == 0) || (argconcat != argno))
970 putchar(' ');
971 #ifdef FULLDEBUG
972 {
973 char *p;
974 printf("[%d,%d: ", argno, np - cp);
975 for (p = cp; p < np; p++) {
976 putchar(*p);
977 }
978 printf("]");
979 }
980 #endif /* FULLDEBUG */
981 /*
982 * Determine if the argument merits being printed
983 *
984 * constant is the cut off point below which something
985 * is not a word.
986 */
987 if (((np - cp) > constant) &&
988 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
989 for (; cp < np; cp++)
990 putchar(*cp);
991 last = np[-1];
992 found++;
993 } else if (found && (np - cp == 1) &&
994 chars[(unsigned char)*cp] == PUNCT) {
995 putchar(*cp);
996 } else {
997 last = np[-1];
998 }
999 cp = np;
1000 }
1001 if (msflag && chars[last] == PUNCT)
1002 putchar(last);
1003 putchar('\n');
1004 }
1005
1006 /*
1007 * put out words (for the -w option) with ms and mm conventions
1008 */
1009 static void
meputwords(void)1010 meputwords(void)
1011 {
1012
1013 msputwords();
1014 }
1015
1016 /*
1017 *
1018 * Skip over a nested set of macros
1019 *
1020 * Possible arguments to noblock are:
1021 *
1022 * fi end of unfilled text
1023 * PE pic ending
1024 * DE display ending
1025 *
1026 * for ms and mm only:
1027 * KE keep ending
1028 *
1029 * NE undocumented match to NS (for mm?)
1030 * LE mm only: matches RL or *L (for lists)
1031 *
1032 * for me:
1033 * ([lqbzcdf]
1034 */
1035 static void
noblock(char a1,char a2)1036 noblock(char a1, char a2)
1037 {
1038 int c1,c2;
1039 int eqnf;
1040 int lct;
1041
1042 lct = 0;
1043 eqnf = 1;
1044 SKIP;
1045 for (;;) {
1046 while (C != '.')
1047 if (c == '\n')
1048 continue;
1049 else
1050 SKIP;
1051 if ((c1 = C) == '\n')
1052 continue;
1053 if ((c2 = C) == '\n')
1054 continue;
1055 if (c1 == a1 && c2 == a2) {
1056 SKIP;
1057 if (lct != 0) {
1058 lct--;
1059 continue;
1060 }
1061 if (eqnf)
1062 putchar('.');
1063 putchar('\n');
1064 return;
1065 } else if (a1 == 'L' && c2 == 'L') {
1066 lct++;
1067 SKIP;
1068 }
1069 /*
1070 * equations (EQ) nested within a display
1071 */
1072 else if (c1 == 'E' && c2 == 'Q') {
1073 if ((mac == ME && a1 == ')')
1074 || (mac != ME && a1 == 'D')) {
1075 eqn();
1076 eqnf=0;
1077 }
1078 }
1079 /*
1080 * turning on filling is done by the paragraphing
1081 * macros
1082 */
1083 else if (a1 == 'f') { /* .fi */
1084 if ((mac == ME && (c2 == 'h' || c2 == 'p'))
1085 || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1086 SKIP;
1087 return;
1088 }
1089 } else {
1090 SKIP;
1091 }
1092 }
1093 }
1094
1095 static int
1096 /*ARGSUSED*/
EQ(pacmac unused)1097 EQ(pacmac unused)
1098 {
1099
1100 eqn();
1101 return 0;
1102 }
1103
1104 static int
1105 /*ARGSUSED*/
domacro(pacmac unused)1106 domacro(pacmac unused)
1107 {
1108
1109 macro();
1110 return 0;
1111 }
1112
1113 static int
1114 /*ARGSUSED*/
PS(pacmac unused)1115 PS(pacmac unused)
1116 {
1117
1118 for (C; c == ' ' || c == '\t'; C)
1119 ; /* nothing */
1120
1121 if (c == '<') { /* ".PS < file" -- don't expect a .PE */
1122 SKIP;
1123 return 0;
1124 }
1125 if (!msflag)
1126 inpic();
1127 else
1128 noblock('P', 'E');
1129 return 0;
1130 }
1131
1132 static int
1133 /*ARGSUSED*/
skip(pacmac unused)1134 skip(pacmac unused)
1135 {
1136
1137 SKIP;
1138 return 0;
1139 }
1140
1141 static int
1142 /*ARGSUSED*/
intbl(pacmac unused)1143 intbl(pacmac unused)
1144 {
1145
1146 if (msflag)
1147 stbl();
1148 else
1149 tbl();
1150 return 0;
1151 }
1152
1153 static int
1154 /*ARGSUSED*/
outtbl(pacmac unused)1155 outtbl(pacmac unused)
1156 {
1157
1158 intable = NO;
1159 return 0;
1160 }
1161
1162 static int
1163 /*ARGSUSED*/
so(pacmac unused)1164 so(pacmac unused)
1165 {
1166
1167 if (!iflag) {
1168 getfname();
1169 if (fname[0]) {
1170 if (++filesp - &files[0] > MAXFILES)
1171 err(1, "too many nested files (max %d)",
1172 MAXFILES);
1173 infile = *filesp = opn(fname);
1174 }
1175 }
1176 return 0;
1177 }
1178
1179 static int
1180 /*ARGSUSED*/
nx(pacmac unused)1181 nx(pacmac unused)
1182 {
1183
1184 if (!iflag) {
1185 getfname();
1186 if (fname[0] == '\0')
1187 exit(0);
1188 if (infile != stdin)
1189 fclose(infile);
1190 infile = *filesp = opn(fname);
1191 }
1192 return 0;
1193 }
1194
1195 static int
1196 /*ARGSUSED*/
skiptocom(pacmac unused)1197 skiptocom(pacmac unused)
1198 {
1199
1200 SKIP_TO_COM;
1201 return COMX;
1202 }
1203
1204 static int
PP(pacmac c12)1205 PP(pacmac c12)
1206 {
1207 int c1, c2;
1208
1209 frommac(c12, c1, c2);
1210 printf(".%c%c", c1, c2);
1211 while (C != '\n')
1212 putchar(c);
1213 putchar('\n');
1214 return 0;
1215 }
1216
1217 static int
1218 /*ARGSUSED*/
AU(pacmac unused)1219 AU(pacmac unused)
1220 {
1221
1222 if (mac == MM)
1223 return 0;
1224 SKIP_TO_COM;
1225 return COMX;
1226 }
1227
1228 static int
SH(pacmac c12)1229 SH(pacmac c12)
1230 {
1231 int c1, c2;
1232
1233 frommac(c12, c1, c2);
1234
1235 if (parag) {
1236 printf(".%c%c", c1, c2);
1237 while (C != '\n')
1238 putchar(c);
1239 putchar(c);
1240 putchar('!');
1241 for (;;) {
1242 while (C != '\n')
1243 putchar(c);
1244 putchar('\n');
1245 if (C == '.')
1246 return COM;
1247 putchar('!');
1248 putchar(c);
1249 }
1250 /*NOTREACHED*/
1251 } else {
1252 SKIP_TO_COM;
1253 return COMX;
1254 }
1255 }
1256
1257 static int
1258 /*ARGSUSED*/
UX(pacmac unused)1259 UX(pacmac unused)
1260 {
1261
1262 if (wordflag)
1263 printf("UNIX\n");
1264 else
1265 printf("UNIX ");
1266 return 0;
1267 }
1268
1269 static int
MMHU(pacmac c12)1270 MMHU(pacmac c12)
1271 {
1272 int c1, c2;
1273
1274 frommac(c12, c1, c2);
1275 if (parag) {
1276 printf(".%c%c", c1, c2);
1277 while (C != '\n')
1278 putchar(c);
1279 putchar('\n');
1280 } else {
1281 SKIP;
1282 }
1283 return 0;
1284 }
1285
1286 static int
mesnblock(pacmac c12)1287 mesnblock(pacmac c12)
1288 {
1289 int c1, c2;
1290
1291 frommac(c12, c1, c2);
1292 noblock(')', c2);
1293 return 0;
1294 }
1295
1296 static int
mssnblock(pacmac c12)1297 mssnblock(pacmac c12)
1298 {
1299 int c1, c2;
1300
1301 frommac(c12, c1, c2);
1302 noblock(c1, 'E');
1303 return 0;
1304 }
1305
1306 static int
1307 /*ARGSUSED*/
nf(pacmac unused)1308 nf(pacmac unused)
1309 {
1310
1311 noblock('f', 'i');
1312 return 0;
1313 }
1314
1315 static int
1316 /*ARGSUSED*/
ce(pacmac unused)1317 ce(pacmac unused)
1318 {
1319
1320 sce();
1321 return 0;
1322 }
1323
1324 static int
meip(pacmac c12)1325 meip(pacmac c12)
1326 {
1327
1328 if (parag)
1329 mepp(c12);
1330 else if (wordflag) /* save the tag */
1331 regline(meputmac, ONE);
1332 else
1333 SKIP;
1334 return 0;
1335 }
1336
1337 /*
1338 * only called for -me .pp or .sh, when parag is on
1339 */
1340 static int
mepp(pacmac c12)1341 mepp(pacmac c12)
1342 {
1343
1344 PP(c12); /* eats the line */
1345 return 0;
1346 }
1347
1348 /*
1349 * Start of a section heading; output the section name if doing words
1350 */
1351 static int
mesh(pacmac c12)1352 mesh(pacmac c12)
1353 {
1354
1355 if (parag)
1356 mepp(c12);
1357 else if (wordflag)
1358 defcomline(c12);
1359 else
1360 SKIP;
1361 return 0;
1362 }
1363
1364 /*
1365 * process a font setting
1366 */
1367 static int
mefont(pacmac c12)1368 mefont(pacmac c12)
1369 {
1370
1371 argconcat = 1;
1372 defcomline(c12);
1373 argconcat = 0;
1374 return 0;
1375 }
1376
1377 static int
manfont(pacmac c12)1378 manfont(pacmac c12)
1379 {
1380
1381 return mefont(c12);
1382 }
1383
1384 static int
manpp(pacmac c12)1385 manpp(pacmac c12)
1386 {
1387
1388 return mepp(c12);
1389 }
1390
1391 static void
defcomline(pacmac c12)1392 defcomline(pacmac c12)
1393 {
1394 int c1, c2;
1395
1396 frommac(c12, c1, c2);
1397 if (msflag && mac == MM && c2 == 'L') {
1398 if (disp || c1 == 'R') {
1399 noblock('L', 'E');
1400 } else {
1401 SKIP;
1402 putchar('.');
1403 }
1404 }
1405 else if (c1 == '.' && c2 == '.') {
1406 if (msflag) {
1407 SKIP;
1408 return;
1409 }
1410 while (C == '.')
1411 /*VOID*/;
1412 }
1413 ++inmacro;
1414 /*
1415 * Process the arguments to the macro
1416 */
1417 switch (mac) {
1418 default:
1419 case MM:
1420 case MS:
1421 if (c1 <= 'Z' && msflag)
1422 regline(msputmac, ONE);
1423 else
1424 regline(msputmac, TWO);
1425 break;
1426 case ME:
1427 regline(meputmac, ONE);
1428 break;
1429 }
1430 --inmacro;
1431 }
1432
1433 static void
comline(void)1434 comline(void)
1435 {
1436 int c1;
1437 int c2;
1438 pacmac c12;
1439 int mid;
1440 int lb, ub;
1441 int hit;
1442 static int tabsize = 0;
1443 static const struct mactab *mactab = NULL;
1444 const struct mactab *mp;
1445
1446 if (mactab == 0)
1447 buildtab(&mactab, &tabsize);
1448 com:
1449 while (C == ' ' || c == '\t')
1450 ;
1451 comx:
1452 if ((c1 = c) == '\n')
1453 return;
1454 c2 = C;
1455 if (c1 == '.' && c2 != '.')
1456 inmacro = NO;
1457 if (msflag && c1 == '[') {
1458 refer(c2);
1459 return;
1460 }
1461 if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1462 printf(".P\n");
1463 return;
1464 }
1465 if (c2 == '\n')
1466 return;
1467 /*
1468 * Single letter macro
1469 */
1470 if (mac == ME && (c2 == ' ' || c2 == '\t') )
1471 c2 = ' ';
1472 c12 = tomac(c1, c2);
1473 /*
1474 * binary search through the table of macros
1475 */
1476 lb = 0;
1477 ub = tabsize - 1;
1478 while (lb <= ub) {
1479 mid = (ub + lb) / 2;
1480 mp = &mactab[mid];
1481 if (mp->macname < c12)
1482 lb = mid + 1;
1483 else if (mp->macname > c12)
1484 ub = mid - 1;
1485 else {
1486 hit = 1;
1487 #ifdef FULLDEBUG
1488 printf("preliminary hit macro %c%c ", c1, c2);
1489 #endif /* FULLDEBUG */
1490 switch (mp->condition) {
1491 case NONE:
1492 hit = YES;
1493 break;
1494 case FNEST:
1495 hit = (filesp == files);
1496 break;
1497 case NOMAC:
1498 hit = !inmacro;
1499 break;
1500 case MAC:
1501 hit = inmacro;
1502 break;
1503 case PARAG:
1504 hit = parag;
1505 break;
1506 case NBLK:
1507 hit = !keepblock;
1508 break;
1509 default:
1510 hit = 0;
1511 }
1512
1513 if (hit) {
1514 #ifdef FULLDEBUG
1515 printf("MATCH\n");
1516 #endif /* FULLDEBUG */
1517 switch ((*(mp->func))(c12)) {
1518 default:
1519 return;
1520 case COMX:
1521 goto comx;
1522 case COM:
1523 goto com;
1524 }
1525 }
1526 #ifdef FULLDEBUG
1527 printf("FAIL\n");
1528 #endif /* FULLDEBUG */
1529 break;
1530 }
1531 }
1532 defcomline(c12);
1533 }
1534
1535 static int
macsort(const void * p1,const void * p2)1536 macsort(const void *p1, const void *p2)
1537 {
1538 const struct mactab *t1 = p1;
1539 const struct mactab *t2 = p2;
1540
1541 return t1->macname - t2->macname;
1542 }
1543
1544 static int
sizetab(const struct mactab * mp)1545 sizetab(const struct mactab *mp)
1546 {
1547 int i;
1548
1549 i = 0;
1550 if (mp) {
1551 for (; mp->macname; mp++, i++)
1552 /*VOID*/ ;
1553 }
1554 return i;
1555 }
1556
1557 static struct mactab *
macfill(struct mactab * dst,const struct mactab * src)1558 macfill(struct mactab *dst, const struct mactab *src)
1559 {
1560
1561 if (src) {
1562 while (src->macname)
1563 *dst++ = *src++;
1564 }
1565 return dst;
1566 }
1567
1568 static void
usage(void)1569 usage(void)
1570 {
1571 extern char *__progname;
1572
1573 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname);
1574 exit(1);
1575 }
1576
1577 static void
buildtab(const struct mactab ** r_back,int * r_size)1578 buildtab(const struct mactab **r_back, int *r_size)
1579 {
1580 size_t size;
1581 const struct mactab *p1, *p2;
1582 struct mactab *back, *p;
1583
1584 size = sizetab(troffmactab) + sizetab(ppmactab);
1585 p1 = p2 = NULL;
1586 if (msflag) {
1587 switch (mac) {
1588 case ME:
1589 p1 = memactab;
1590 break;
1591 case MM:
1592 p1 = msmactab;
1593 p2 = mmmactab;
1594 break;
1595 case MS:
1596 p1 = msmactab;
1597 break;
1598 case MA:
1599 p1 = manmactab;
1600 break;
1601 default:
1602 break;
1603 }
1604 }
1605 size += sizetab(p1);
1606 size += sizetab(p2);
1607 back = calloc(size + 2, sizeof(struct mactab));
1608 if (back == NULL)
1609 err(1, NULL);
1610
1611 p = macfill(back, troffmactab);
1612 p = macfill(p, ppmactab);
1613 p = macfill(p, p1);
1614 p = macfill(p, p2);
1615
1616 qsort(back, size, sizeof(struct mactab), macsort);
1617 *r_size = size;
1618 *r_back = back;
1619 }
1620
1621 /*
1622 * troff commands
1623 */
1624 static const struct mactab troffmactab[] = {
1625 M(NONE, '\\','"', skip), /* comment */
1626 M(NOMAC, 'd','e', domacro), /* define */
1627 M(NOMAC, 'i','g', domacro), /* ignore till .. */
1628 M(NOMAC, 'a','m', domacro), /* append macro */
1629 M(NBLK, 'n','f', nf), /* filled */
1630 M(NBLK, 'c','e', ce), /* centered */
1631
1632 M(NONE, 's','o', so), /* source a file */
1633 M(NONE, 'n','x', nx), /* go to next file */
1634
1635 M(NONE, 't','m', skip), /* print string on tty */
1636 M(NONE, 'h','w', skip), /* exception hyphen words */
1637 M(NONE, 0,0, 0)
1638 };
1639
1640 /*
1641 * Preprocessor output
1642 */
1643 static const struct mactab ppmactab[] = {
1644 M(FNEST, 'E','Q', EQ), /* equation starting */
1645 M(FNEST, 'T','S', intbl), /* table starting */
1646 M(FNEST, 'T','C', intbl), /* alternative table? */
1647 M(FNEST, 'T','&', intbl), /* table reformatting */
1648 M(NONE, 'T','E', outtbl),/* table ending */
1649 M(NONE, 'P','S', PS), /* picture starting */
1650 M(NONE, 0,0, 0)
1651 };
1652
1653 /*
1654 * Particular to ms and mm
1655 */
1656 static const struct mactab msmactab[] = {
1657 M(NONE, 'T','L', skiptocom), /* title follows */
1658 M(NONE, 'F','S', skiptocom), /* start footnote */
1659 M(NONE, 'O','K', skiptocom), /* Other kws */
1660
1661 M(NONE, 'N','R', skip), /* undocumented */
1662 M(NONE, 'N','D', skip), /* use supplied date */
1663
1664 M(PARAG, 'P','P', PP), /* begin parag */
1665 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */
1666 M(PARAG, 'L','P', PP), /* left blocked parag */
1667
1668 M(NONE, 'A','U', AU), /* author */
1669 M(NONE, 'A','I', AU), /* authors institution */
1670
1671 M(NONE, 'S','H', SH), /* section heading */
1672 M(NONE, 'S','N', SH), /* undocumented */
1673 M(NONE, 'U','X', UX), /* unix */
1674
1675 M(NBLK, 'D','S', mssnblock), /* start display text */
1676 M(NBLK, 'K','S', mssnblock), /* start keep */
1677 M(NBLK, 'K','F', mssnblock), /* start float keep */
1678 M(NONE, 0,0, 0)
1679 };
1680
1681 static const struct mactab mmmactab[] = {
1682 M(NONE, 'H',' ', MMHU), /* -mm ? */
1683 M(NONE, 'H','U', MMHU), /* -mm ? */
1684 M(PARAG, 'P',' ', PP), /* paragraph for -mm */
1685 M(NBLK, 'N','S', mssnblock), /* undocumented */
1686 M(NONE, 0,0, 0)
1687 };
1688
1689 static const struct mactab memactab[] = {
1690 M(PARAG, 'p','p', mepp),
1691 M(PARAG, 'l','p', mepp),
1692 M(PARAG, 'n','p', mepp),
1693 M(NONE, 'i','p', meip),
1694
1695 M(NONE, 's','h', mesh),
1696 M(NONE, 'u','h', mesh),
1697
1698 M(NBLK, '(','l', mesnblock),
1699 M(NBLK, '(','q', mesnblock),
1700 M(NBLK, '(','b', mesnblock),
1701 M(NBLK, '(','z', mesnblock),
1702 M(NBLK, '(','c', mesnblock),
1703
1704 M(NBLK, '(','d', mesnblock),
1705 M(NBLK, '(','f', mesnblock),
1706 M(NBLK, '(','x', mesnblock),
1707
1708 M(NONE, 'r',' ', mefont),
1709 M(NONE, 'i',' ', mefont),
1710 M(NONE, 'b',' ', mefont),
1711 M(NONE, 'u',' ', mefont),
1712 M(NONE, 'q',' ', mefont),
1713 M(NONE, 'r','b', mefont),
1714 M(NONE, 'b','i', mefont),
1715 M(NONE, 'b','x', mefont),
1716 M(NONE, 0,0, 0)
1717 };
1718
1719 static const struct mactab manmactab[] = {
1720 M(PARAG, 'B','I', manfont),
1721 M(PARAG, 'B','R', manfont),
1722 M(PARAG, 'I','B', manfont),
1723 M(PARAG, 'I','R', manfont),
1724 M(PARAG, 'R','B', manfont),
1725 M(PARAG, 'R','I', manfont),
1726
1727 M(PARAG, 'P','P', manpp),
1728 M(PARAG, 'L','P', manpp),
1729 M(PARAG, 'H','P', manpp),
1730 M(NONE, 0,0, 0)
1731 };
1732