xref: /dragonfly/usr.bin/m4/main.c (revision 4da66bbfa353d0fb44e7a3c17f7268748edba48b)
1 /*        $OpenBSD: main.c,v 1.80 2011/09/27 07:24:02 espie Exp $     */
2 /*        $NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $        */
3 
4 /*-
5  * Copyright (c) 1989, 1993
6  *        The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Ozan Yigit at York University.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $FreeBSD: src/usr.bin/m4/main.c,v 1.33 2012/11/17 01:54:24 svnexp Exp $
36  */
37 
38 /*
39  * main.c
40  * Facility: m4 macro processor
41  * by: oz
42  */
43 
44 #include <assert.h>
45 #include <signal.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <unistd.h>
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <string.h>
52 #include <stddef.h>
53 #include <stdint.h>
54 #include <stdlib.h>
55 #include <ohash.h>
56 #include "mdef.h"
57 #include "stdd.h"
58 #include "extern.h"
59 #include "pathnames.h"
60 
61 stae *mstack;                           /* stack of m4 machine */
62 char *sstack;                           /* shadow stack, for string space extension */
63 static size_t STACKMAX;                 /* current maximum size of stack */
64 int sp;                                 /* current m4 stack pointer */
65 int fp;                                 /* m4 call frame pointer */
66 struct input_file infile[MAXINP];/* input file stack (0=stdin) */
67 FILE **outfile;                         /* diversion array(0=bitbucket) */
68 int maxout;
69 FILE *active;                           /* active output file pointer */
70 int ilevel = 0;                         /* input file stack pointer */
71 int oindex = 0;                         /* diversion index.. */
72 const char *null = "";                  /* as it says.. just a null.. */
73 char **m4wraps = NULL;                  /* m4wraps array. */
74 int maxwraps = 0;             /* size of m4wraps array */
75 int wrapindex = 0;            /* current offset in m4wraps */
76 char lquote[MAXCCHARS + 1] = {LQUOTE};  /* left quote character  (`) */
77 char rquote[MAXCCHARS + 1] = {RQUOTE};  /* right quote character (') */
78 char scommt[MAXCCHARS + 1] = {SCOMMT};  /* start character for comment */
79 char ecommt[MAXCCHARS + 1] = {ECOMMT};  /* end character for comment */
80 int synch_lines = 0;                    /* line synchronisation for C preprocessor */
81 int prefix_builtins = 0;      /* -P option to prefix builtin keywords */
82 
83 struct keyblk {
84           const char          *knam;    /* keyword name */
85           int                 ktyp;     /* keyword type */
86 };
87 
88 static struct keyblk keywrds[] = {      /* m4 keywords to be installed */
89           { "include",        INCLTYPE },
90           { "sinclude",       SINCTYPE },
91           { "define",         DEFITYPE },
92           { "defn", DEFNTYPE },
93           { "divert",         DIVRTYPE | NOARGS },
94           { "expr", EXPRTYPE },
95           { "eval", EXPRTYPE },
96           { "substr",         SUBSTYPE },
97           { "ifelse",         IFELTYPE },
98           { "ifdef",          IFDFTYPE },
99           { "len",  LENGTYPE },
100           { "incr", INCRTYPE },
101           { "decr", DECRTYPE },
102           { "dnl",  DNLNTYPE | NOARGS },
103           { "changequote",CHNQTYPE | NOARGS },
104           { "changecom",      CHNCTYPE | NOARGS },
105           { "index",          INDXTYPE },
106 #ifdef EXTENDED
107           { "paste",          PASTTYPE },
108           { "spaste",         SPASTYPE },
109           /* Newer extensions, needed to handle gnu-m4 scripts */
110           { "indir",          INDIRTYPE},
111           { "builtin",        BUILTINTYPE},
112           { "patsubst",       PATSTYPE},
113           { "regexp",         REGEXPTYPE},
114           { "esyscmd",        ESYSCMDTYPE},
115           { "__file__",       FILENAMETYPE | NOARGS},
116           { "__line__",       LINETYPE | NOARGS},
117 #endif
118           { "popdef",         POPDTYPE },
119           { "pushdef",        PUSDTYPE },
120           { "dumpdef",        DUMPTYPE | NOARGS },
121           { "shift",          SHIFTYPE | NOARGS },
122           { "translit",       TRNLTYPE },
123           { "undefine",       UNDFTYPE },
124           { "undivert",       UNDVTYPE | NOARGS },
125           { "divnum",         DIVNTYPE | NOARGS },
126           { "maketemp",       MKTMTYPE },
127           { "mkstemp",        MKTMTYPE },
128           { "errprint",       ERRPTYPE | NOARGS },
129           { "m4wrap",         M4WRTYPE | NOARGS },
130           { "m4exit",         EXITTYPE | NOARGS },
131           { "syscmd",         SYSCTYPE },
132           { "sysval",         SYSVTYPE | NOARGS },
133           { "traceon",        TRACEONTYPE | NOARGS },
134           { "traceoff",       TRACEOFFTYPE | NOARGS },
135           { "unix", SELFTYPE | NOARGS },
136 };
137 
138 #define   MAXKEYS   (sizeof(keywrds) / sizeof(struct keyblk))
139 
140 #define   MAXRECORD 50
141 static struct position {
142           char *name;
143           unsigned long line;
144 } quotes[MAXRECORD], paren[MAXRECORD];
145 
146 static void record(struct position *, int);
147 static void dump_stack(struct position *, int);
148 
149 static void macro(void);
150 static void initkwds(void);
151 static ndptr inspect(int, char *);
152 static int do_look_ahead(int, const char *);
153 static void reallyoutputstr(const char *);
154 static void reallyputchar(int);
155 
156 static void enlarge_stack(void);
157 
158 int exit_code = 0;
159 
160 int
main(int argc,char * argv[])161 main(int argc, char *argv[])
162 {
163           int c;
164           int n;
165           char *p;
166 
167           if (signal(SIGINT, SIG_IGN) != SIG_IGN)
168                     signal(SIGINT, onintr);
169 
170           init_macros();
171           initspaces();
172           STACKMAX = INITSTACKMAX;
173 
174           mstack = (stae *)xalloc(sizeof(stae) * STACKMAX, NULL);
175           sstack = (char *)xalloc(STACKMAX, NULL);
176 
177           maxout = 0;
178           outfile = NULL;
179           resizedivs(MAXOUT);
180 
181           while ((c = getopt(argc, argv, "gst:d:D:U:o:I:P")) != -1)
182                     switch(c) {
183 
184                     case 'D': /* define something.. */
185                               for (p = optarg; *p; p++) {
186                                         if (*p == '=')
187                                                   break;
188                               }
189                               if (*p)
190                                         *p++ = EOS;
191                               dodefine(optarg, p);
192                               break;
193                     case 'I':
194                               addtoincludepath(optarg);
195                               break;
196                     case 'P':
197                               prefix_builtins = 1;
198                               break;
199                     case 'U': /* undefine... */
200                               macro_popdef(optarg);
201                               break;
202                     case 'g':
203                               mimic_gnu = 1;
204                               break;
205                     case 'd':
206                               set_trace_flags(optarg);
207                               break;
208                     case 's':
209                               synch_lines = 1;
210                               break;
211                     case 't':
212                               mark_traced(optarg, 1);
213                               break;
214                     case 'o':
215                               trace_file(optarg);
216                               break;
217                     case '?':
218                               usage();
219                     }
220 
221           argc -= optind;
222           argv += optind;
223 
224           initkwds();
225           if (mimic_gnu)
226                     setup_builtin("format", FORMATTYPE);
227 
228           active = stdout;              /* default active output     */
229           bbase[0] = bufbase;
230           if (argc == 0) {
231                     sp = -1;            /* stack pointer initialized */
232                     fp = 0;                       /* frame pointer initialized */
233                     set_input(infile + 0, stdin, "stdin");
234                                                   /* default input (naturally) */
235                     macro();
236           } else {
237                     for (; argc--; ++argv) {
238                               p = *argv;
239                               if (p[0] == '-' && p[1] == EOS)
240                                         set_input(infile, stdin, "stdin");
241                               else if (fopen_trypath(infile, p) == NULL)
242                                         err(1, "%s", p);
243                               sp = -1;
244                               fp = 0;
245                               macro();
246                               release_input(infile);
247                     }
248           }
249 
250           if (wrapindex) {
251                     int i;
252 
253                     ilevel = 0;                   /* in case m4wrap includes.. */
254                     bufbase = bp = buf; /* use the entire buffer   */
255                     if (mimic_gnu) {
256                               while (wrapindex != 0) {
257                                         for (i = 0; i < wrapindex; i++)
258                                                   pbstr(m4wraps[i]);
259                                         wrapindex =0;
260                                         macro();
261                               }
262                     } else {
263                               for (i = 0; i < wrapindex; i++) {
264                                         pbstr(m4wraps[i]);
265                                         macro();
266                               }
267                     }
268           }
269 
270           if (active != stdout)
271                     active = stdout;    /* reset output just in case */
272           for (n = 1; n < maxout; n++) {          /* default wrap-up: undivert */
273                     if (outfile[n] != NULL)
274                               getdiv(n);
275           }
276           /* remove bitbucket if used */
277           if (outfile[0] != NULL)
278                     fclose(outfile[0]);
279 
280           return exit_code;
281 }
282 
283 /*
284  * Look ahead for `token'.
285  * (on input `t == token[0]')
286  * Used for comment and quoting delimiters.
287  * Returns 1 if `token' present; copied to output.
288  *           0 if `token' not found; all characters pushed back
289  */
290 static int
do_look_ahead(int t,const char * token)291 do_look_ahead(int t, const char *token)
292 {
293           int i;
294 
295           assert((unsigned char)t == (unsigned char)token[0]);
296 
297           for (i = 1; *++token; i++) {
298                     t = gpbc();
299                     if (t == EOF || (unsigned char)t != (unsigned char)*token) {
300                               pushback(t);
301                               while (--i)
302                                         pushback(*--token);
303                               return 0;
304                     }
305           }
306           return 1;
307 }
308 
309 #define   LOOK_AHEAD(t, token) (t != EOF &&                 \
310     (unsigned char)(t) == (unsigned char)(token)[0] &&      \
311     do_look_ahead(t, token))
312 
313 /*
314  * macro - the work horse..
315  */
316 static void
macro(void)317 macro(void)
318 {
319           char token[MAXTOK + 1];
320           int t, l;
321           ndptr p;
322           int nlpar;
323 
324           for (;;) {
325                     t = gpbc();
326 
327                     if (LOOK_AHEAD(t, lquote)) {  /* strip quotes */
328                               nlpar = 0;
329                               record(quotes, nlpar++);
330                               /*
331                                * Opening quote: scan forward until matching
332                                * closing quote has been found.
333                                */
334                               do {
335                                         l = gpbc();
336                                         if (LOOK_AHEAD(l,rquote)) {
337                                                   if (--nlpar > 0)
338                                                             outputstr(rquote);
339                                         } else if (LOOK_AHEAD(l,lquote)) {
340                                                   record(quotes, nlpar++);
341                                                   outputstr(lquote);
342                                         } else if (l == EOF) {
343                                                   if (nlpar == 1)
344                                                             warnx("unclosed quote:");
345                                                   else
346                                                             warnx("%d unclosed quotes:", nlpar);
347                                                   dump_stack(quotes, nlpar);
348                                                   exit(1);
349                                         } else {
350                                                   if (nlpar > 0) {
351                                                             if (sp < 0)
352                                                                       reallyputchar(l);
353                                                             else
354                                                                       CHRSAVE(l);
355                                                   }
356                                         }
357                               }
358                               while (nlpar != 0);
359                     } else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
360                               reallyoutputstr(scommt);
361 
362                               for(;;) {
363                                         t = gpbc();
364                                         if (LOOK_AHEAD(t, ecommt)) {
365                                                   reallyoutputstr(ecommt);
366                                                   break;
367                                         }
368                                         if (t == EOF)
369                                                   break;
370                                         reallyputchar(t);
371                               }
372                     } else if (t == '_' || isalpha(t)) {
373                               p = inspect(t, token);
374                               if (p != NULL)
375                                         pushback(l = gpbc());
376                               if (p == NULL || (l != LPAREN &&
377                                   (macro_getdef(p)->type & NEEDARGS) != 0))
378                                         outputstr(token);
379                               else {
380                                         /*
381                                          * real thing.. First build a call frame:
382                                          */
383                                         pushf(fp);          /* previous call frm */
384                                         /* type of the call */
385                                         pushf(macro_getdef(p)->type);
386                                         pushf(is_traced(p));
387                                         pushf(0); /* parenthesis level */
388                                         fp = sp;  /* new frame pointer */
389                                         /*
390                                          * now push the string arguments:
391                                          */
392                                         pushs1(macro_getdef(p)->defn);          /* defn string */
393                                         pushs1((char *)macro_name(p));          /* macro name  */
394                                         pushs(ep);                              /* start next..*/
395 
396                                         if (l != LPAREN && PARLEV == 0) {
397                                                   /* no bracks  */
398                                                   chrsave(EOS);
399 
400                                                   if (sp == (int)STACKMAX) {
401                                                             errx(1,
402                                                                 "internal stack overflow");
403                                                   }
404                                                   eval((const char **) mstack + fp + 1, 2,
405                                                       CALTYP, TRACESTATUS);
406 
407                                                   ep = PREVEP;        /* flush strspace */
408                                                   sp = PREVSP;        /* previous sp..  */
409                                                   fp = PREVFP;        /* rewind stack...*/
410                                         }
411                               }
412                     } else if (t == EOF) {
413                               if (sp > -1 && ilevel <= 0) {
414                                         warnx( "unexpected end of input, unclosed parenthesis:");
415                                         dump_stack(paren, PARLEV);
416                                         exit(1);
417                               }
418                               if (ilevel <= 0)
419                                         break;                        /* all done thanks.. */
420                               release_input(infile+ilevel--);
421                               emit_synchline();
422                               bufbase = bbase[ilevel];
423                               continue;
424                     } else if (sp < 0) {                    /* not in a macro at all */
425                               reallyputchar(t);   /* output directly..           */
426                     }
427 
428                     else switch(t) {
429 
430                     case LPAREN:
431                               if (PARLEV > 0)
432                                         chrsave(t);
433                               while (isspace(l = gpbc())) /* skip blank, tab, nl.. */
434                                         if (PARLEV > 0)
435                                                   chrsave(l);
436                               pushback(l);
437                               record(paren, PARLEV++);
438                               break;
439 
440                     case RPAREN:
441                               if (--PARLEV > 0)
442                                         chrsave(t);
443                               else {                        /* end of argument list */
444                                         chrsave(EOS);
445 
446                                         if (sp == (int)STACKMAX)
447                                                   errx(1, "internal stack overflow");
448 
449                                         eval((const char **) mstack + fp + 1, sp-fp,
450                                             CALTYP, TRACESTATUS);
451 
452                                         ep = PREVEP;        /* flush strspace */
453                                         sp = PREVSP;        /* previous sp..  */
454                                         fp = PREVFP;        /* rewind stack...*/
455                               }
456                               break;
457 
458                     case COMMA:
459                               if (PARLEV == 1) {
460                                         chrsave(EOS);                 /* new argument   */
461                                         while (isspace(l = gpbc()))
462                                                   ;
463                                         pushback(l);
464                                         pushs(ep);
465                               } else
466                                         chrsave(t);
467                               break;
468 
469                     default:
470                               if (LOOK_AHEAD(t, scommt)) {
471                                         char *cp;
472                                         for (cp = scommt; *cp; cp++)
473                                                   chrsave(*cp);
474                                         for(;;) {
475                                                   t = gpbc();
476                                                   if (LOOK_AHEAD(t, ecommt)) {
477                                                             for (cp = ecommt; *cp; cp++)
478                                                                       chrsave(*cp);
479                                                             break;
480                                                   }
481                                                   if (t == EOF)
482                                                             break;
483                                                   CHRSAVE(t);
484                                         }
485                               } else
486                                         CHRSAVE(t);                   /* stack the char */
487                               break;
488                     }
489           }
490 }
491 
492 /*
493  * output string directly, without pushing it for reparses.
494  */
495 void
outputstr(const char * s)496 outputstr(const char *s)
497 {
498           if (sp < 0)
499                     reallyoutputstr(s);
500           else
501                     while (*s)
502                               CHRSAVE(*s++);
503 }
504 
505 void
reallyoutputstr(const char * s)506 reallyoutputstr(const char *s)
507 {
508           if (synch_lines) {
509                     while (*s) {
510                               fputc(*s, active);
511                               if (*s++ == '\n') {
512                                         infile[ilevel].synch_lineno++;
513                                         if (infile[ilevel].synch_lineno !=
514                                             infile[ilevel].lineno)
515                                                   do_emit_synchline();
516                               }
517                     }
518           } else
519                     fputs(s, active);
520 }
521 
522 void
reallyputchar(int c)523 reallyputchar(int c)
524 {
525           putc(c, active);
526           if (synch_lines && c == '\n') {
527                     infile[ilevel].synch_lineno++;
528                     if (infile[ilevel].synch_lineno != infile[ilevel].lineno)
529                               do_emit_synchline();
530           }
531 }
532 
533 /*
534  * build an input token..
535  * consider only those starting with _ or A-Za-z.
536  */
537 static ndptr
inspect(int c,char * tp)538 inspect(int c, char *tp)
539 {
540           char *name = tp;
541           char *etp = tp + MAXTOK;
542           ndptr p;
543 
544           *tp++ = c;
545 
546           while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
547                     *tp++ = c;
548           if (c != EOF)
549                     PUSHBACK(c);
550           *tp = EOS;
551           /* token is too long, it won't match anything, but it can still
552            * be output. */
553           if (tp == ep) {
554                     outputstr(name);
555                     while (isalnum(c = gpbc()) || c == '_') {
556                               if (sp < 0)
557                                         reallyputchar(c);
558                               else
559                                         CHRSAVE(c);
560                     }
561                     *name = EOS;
562                     return NULL;
563           }
564 
565           p = ohash_find(&macros, ohash_qlookupi(&macros, name, (const char **)(void *)&tp));
566           if (p == NULL)
567                     return NULL;
568           if (macro_getdef(p) == NULL)
569                     return NULL;
570           return p;
571 }
572 
573 /*
574  * initkwds - initialize m4 keywords as fast as possible.
575  * This very similar to install, but without certain overheads,
576  * such as calling lookup. Malloc is not used for storing the
577  * keyword strings, since we simply use the static pointers
578  * within keywrds block.
579  */
580 static void
initkwds(void)581 initkwds(void)
582 {
583           unsigned int type;
584           int i;
585 
586           for (i = 0; i < (int)MAXKEYS; i++) {
587                     type = keywrds[i].ktyp & TYPEMASK;
588                     if ((keywrds[i].ktyp & NOARGS) == 0)
589                               type |= NEEDARGS;
590                     setup_builtin(keywrds[i].knam, type);
591           }
592 }
593 
594 static void
record(struct position * t,int lev)595 record(struct position *t, int lev)
596 {
597           if (lev < MAXRECORD) {
598                     t[lev].name = CURRENT_NAME;
599                     t[lev].line = CURRENT_LINE;
600           }
601 }
602 
603 static void
dump_stack(struct position * t,int lev)604 dump_stack(struct position *t, int lev)
605 {
606           int i;
607 
608           for (i = 0; i < lev; i++) {
609                     if (i == MAXRECORD) {
610                               fprintf(stderr, "   ...\n");
611                               break;
612                     }
613                     fprintf(stderr, "   %s at line %lu\n",
614                               t[i].name, t[i].line);
615           }
616 }
617 
618 
619 static void
enlarge_stack(void)620 enlarge_stack(void)
621 {
622           STACKMAX += STACKMAX/2;
623           mstack = xrealloc(mstack, sizeof(stae) * STACKMAX,
624               "Evaluation stack overflow (%lu)",
625               (unsigned long)STACKMAX);
626           sstack = xrealloc(sstack, STACKMAX,
627               "Evaluation stack overflow (%lu)",
628               (unsigned long)STACKMAX);
629 }
630