1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include "awk.h"
34 #include "awkgram.h"
35 
36 extern YYSTYPE      yylval;
37 extern bool         infunc;
38 
39 int       lineno    = 1;
40 int       bracecnt = 0;
41 int       brackcnt  = 0;
42 int       parencnt = 0;
43 
44 typedef struct Keyword {
45           const char *word;
46           int       sub;
47           int       type;
48 } Keyword;
49 
50 const Keyword keywords[] = {  /* keep sorted: binary searched */
51           { "BEGIN",          XBEGIN,             XBEGIN },
52           { "END",  XEND,               XEND },
53           { "NF",             VARNF,              VARNF },
54           { "and",  FAND,               BLTIN },
55           { "atan2",          FATAN,              BLTIN },
56           { "break",          BREAK,              BREAK },
57           { "close",          CLOSE,              CLOSE },
58           { "compl",          FCOMPL,             BLTIN },
59           { "continue",       CONTINUE, CONTINUE },
60           { "cos",  FCOS,               BLTIN },
61           { "delete",         DELETE,             DELETE },
62           { "do",             DO,                 DO },
63           { "else", ELSE,               ELSE },
64           { "exit", EXIT,               EXIT },
65           { "exp",  FEXP,               BLTIN },
66           { "fflush",         FFLUSH,             BLTIN },
67           { "for",  FOR,                FOR },
68           { "func", FUNC,               FUNC },
69           { "function",       FUNC,               FUNC },
70           { "gensub",         GENSUB,             GENSUB },
71           { "getline",        GETLINE,  GETLINE },
72           { "gsub", GSUB,               GSUB },
73           { "if",             IF,                 IF },
74           { "in",             IN,                 IN },
75           { "index",          INDEX,              INDEX },
76           { "int",  FINT,               BLTIN },
77           { "length",         FLENGTH,  BLTIN },
78           { "log",  FLOG,               BLTIN },
79           { "lshift",         FLSHIFT,  BLTIN },
80           { "match",          MATCHFCN, MATCHFCN },
81           { "mktime",         FMKTIME,  BLTIN },
82           { "next", NEXT,               NEXT },
83           { "nextfile",       NEXTFILE, NEXTFILE },
84           { "or",             FFOR,               BLTIN },
85           { "print",          PRINT,              PRINT },
86           { "printf",         PRINTF,             PRINTF },
87           { "rand", FRAND,              BLTIN },
88           { "return",         RETURN,             RETURN },
89           { "rshift",         FRSHIFT,  BLTIN },
90           { "sin",  FSIN,               BLTIN },
91           { "split",          SPLIT,              SPLIT },
92           { "sprintf",        SPRINTF,  SPRINTF },
93           { "sqrt", FSQRT,              BLTIN },
94           { "srand",          FSRAND,             BLTIN },
95           { "strftime",       FSTRFTIME,          BLTIN },
96           { "sub",  SUB,                SUB },
97           { "substr",         SUBSTR,             SUBSTR },
98           { "system",         FSYSTEM,  BLTIN },
99           { "systime",        FSYSTIME, BLTIN },
100           { "tolower",        FTOLOWER, BLTIN },
101           { "toupper",        FTOUPPER, BLTIN },
102           { "while",          WHILE,              WHILE },
103           { "xor",  FXOR,               BLTIN },
104 };
105 
106 #define   RET(x)    { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
107 
peek(void)108 static int peek(void)
109 {
110           int c = input();
111           unput(c);
112           return c;
113 }
114 
gettok(char ** pbuf,int * psz)115 static int gettok(char **pbuf, int *psz)          /* get next input token */
116 {
117           int c, retc;
118           char *buf = *pbuf;
119           int sz = *psz;
120           char *bp = buf;
121 
122           c = input();
123           if (c == 0)
124                     return 0;
125           buf[0] = c;
126           buf[1] = 0;
127           if (!isalnum(c) && c != '.' && c != '_')
128                     return c;
129 
130           *bp++ = c;
131           if (isalpha(c) || c == '_') { /* it's a varname */
132                     for ( ; (c = input()) != 0; ) {
133                               if (bp-buf >= sz)
134                                         if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
135                                                   FATAL( "out of space for name %.10s...", buf );
136                               if (isalnum(c) || c == '_')
137                                         *bp++ = c;
138                               else {
139                                         *bp = 0;
140                                         unput(c);
141                                         break;
142                               }
143                     }
144                     *bp = 0;
145                     retc = 'a';         /* alphanumeric */
146           } else {  /* maybe it's a number, but could be . */
147                     char *rem;
148                     /* read input until can't be a number */
149                     for ( ; (c = input()) != 0; ) {
150                               if (bp-buf >= sz)
151                                         if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
152                                                   FATAL( "out of space for number %.10s...", buf );
153                               if (isdigit(c) || c == 'e' || c == 'E'
154                                 || c == '.' || c == '+' || c == '-')
155                                         *bp++ = c;
156                               else {
157                                         unput(c);
158                                         break;
159                               }
160                     }
161                     *bp = 0;
162                     strtod(buf, &rem);  /* parse the number */
163                     if (rem == buf) {   /* it wasn't a valid number at all */
164                               buf[1] = 0;         /* return one character as token */
165                               retc = (uschar)buf[0];        /* character is its own type */
166                               unputstr(rem+1); /* put rest back for later */
167                     } else {  /* some prefix was a number */
168                               unputstr(rem);      /* put rest back for later */
169                               rem[0] = 0;         /* truncate buf after number part */
170                               retc = '0';         /* type is number */
171                     }
172           }
173           *pbuf = buf;
174           *psz = sz;
175           return retc;
176 }
177 
178 int       word(char *);
179 int       string(void);
180 int       regexpr(void);
181 bool      sc        = false;  /* true => return a } right now */
182 bool      reg       = false;  /* true => return a REGEXPR now */
183 
yylex(void)184 int yylex(void)
185 {
186           int c;
187           static char *buf = NULL;
188           static int bufsize = 5; /* BUG: setting this small causes core dump! */
189 
190           if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
191                     FATAL( "out of space in yylex" );
192           if (sc) {
193                     sc = false;
194                     RET('}');
195           }
196           if (reg) {
197                     reg = false;
198                     return regexpr();
199           }
200           for (;;) {
201                     c = gettok(&buf, &bufsize);
202                     if (c == 0)
203                               return 0;
204                     if (isalpha(c) || c == '_')
205                               return word(buf);
206                     if (isdigit(c)) {
207                               char *cp = tostring(buf);
208                               double result;
209 
210                               if (is_number(cp, & result))
211                                         yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
212                               else
213                                         yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
214                               free(cp);
215                               /* should this also have STR set? */
216                               RET(NUMBER);
217                     }
218 
219                     yylval.i = c;
220                     switch (c) {
221                     case '\n':          /* {EOL} */
222                               lineno++;
223                               RET(NL);
224                     case '\r':          /* assume \n is coming */
225                     case ' ': /* {WS}+ */
226                     case '\t':
227                               break;
228                     case '#': /* #.* strip comments */
229                               while ((c = input()) != '\n' && c != 0)
230                                         ;
231                               unput(c);
232                               /*
233                                * Next line is a hack, it compensates for
234                                * unput's treatment of \n.
235                                */
236                               lineno++;
237                               break;
238                     case ';':
239                               RET(';');
240                     case '\\':
241                               if (peek() == '\n') {
242                                         input();
243                                         lineno++;
244                               } else if (peek() == '\r') {
245                                         input(); input();   /* \n */
246                                         lineno++;
247                               } else {
248                                         RET(c);
249                               }
250                               break;
251                     case '&':
252                               if (peek() == '&') {
253                                         input(); RET(AND);
254                               } else
255                                         RET('&');
256                     case '|':
257                               if (peek() == '|') {
258                                         input(); RET(BOR);
259                               } else
260                                         RET('|');
261                     case '!':
262                               if (peek() == '=') {
263                                         input(); yylval.i = NE; RET(NE);
264                               } else if (peek() == '~') {
265                                         input(); yylval.i = NOTMATCH; RET(MATCHOP);
266                               } else
267                                         RET(NOT);
268                     case '~':
269                               yylval.i = MATCH;
270                               RET(MATCHOP);
271                     case '<':
272                               if (peek() == '=') {
273                                         input(); yylval.i = LE; RET(LE);
274                               } else {
275                                         yylval.i = LT; RET(LT);
276                               }
277                     case '=':
278                               if (peek() == '=') {
279                                         input(); yylval.i = EQ; RET(EQ);
280                               } else {
281                                         yylval.i = ASSIGN; RET(ASGNOP);
282                               }
283                     case '>':
284                               if (peek() == '=') {
285                                         input(); yylval.i = GE; RET(GE);
286                               } else if (peek() == '>') {
287                                         input(); yylval.i = APPEND; RET(APPEND);
288                               } else {
289                                         yylval.i = GT; RET(GT);
290                               }
291                     case '+':
292                               if (peek() == '+') {
293                                         input(); yylval.i = INCR; RET(INCR);
294                               } else if (peek() == '=') {
295                                         input(); yylval.i = ADDEQ; RET(ASGNOP);
296                               } else
297                                         RET('+');
298                     case '-':
299                               if (peek() == '-') {
300                                         input(); yylval.i = DECR; RET(DECR);
301                               } else if (peek() == '=') {
302                                         input(); yylval.i = SUBEQ; RET(ASGNOP);
303                               } else
304                                         RET('-');
305                     case '*':
306                               if (peek() == '=') {          /* *= */
307                                         input(); yylval.i = MULTEQ; RET(ASGNOP);
308                               } else if (peek() == '*') {   /* ** or **= */
309                                         input();  /* eat 2nd * */
310                                         if (peek() == '=') {
311                                                   input(); yylval.i = POWEQ; RET(ASGNOP);
312                                         } else {
313                                                   RET(POWER);
314                                         }
315                               } else
316                                         RET('*');
317                     case '/':
318                               RET('/');
319                     case '%':
320                               if (peek() == '=') {
321                                         input(); yylval.i = MODEQ; RET(ASGNOP);
322                               } else
323                                         RET('%');
324                     case '^':
325                               if (peek() == '=') {
326                                         input(); yylval.i = POWEQ; RET(ASGNOP);
327                               } else
328                                         RET(POWER);
329 
330                     case '$':
331                               /* BUG: awkward, if not wrong */
332                               c = gettok(&buf, &bufsize);
333                               if (isalpha(c)) {
334                                         if (strcmp(buf, "NF") == 0) { /* very special */
335                                                   unputstr("(NF)");
336                                                   RET(INDIRECT);
337                                         }
338                                         c = peek();
339                                         if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
340                                                   unputstr(buf);
341                                                   RET(INDIRECT);
342                                         }
343                                         yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
344                                         RET(IVAR);
345                               } else if (c == 0) {          /*  */
346                                         SYNTAX( "unexpected end of input after $" );
347                                         RET(';');
348                               } else {
349                                         unputstr(buf);
350                                         RET(INDIRECT);
351                               }
352 
353                     case '}':
354                               if (--bracecnt < 0)
355                                         SYNTAX( "extra }" );
356                               sc = true;
357                               RET(';');
358                     case ']':
359                               if (--brackcnt < 0)
360                                         SYNTAX( "extra ]" );
361                               RET(']');
362                     case ')':
363                               if (--parencnt < 0)
364                                         SYNTAX( "extra )" );
365                               RET(')');
366                     case '{':
367                               bracecnt++;
368                               RET('{');
369                     case '[':
370                               brackcnt++;
371                               RET('[');
372                     case '(':
373                               parencnt++;
374                               RET('(');
375 
376                     case '"':
377                               return string();    /* BUG: should be like tran.c ? */
378 
379                     default:
380                               RET(c);
381                     }
382           }
383 }
384 
string(void)385 int string(void)
386 {
387           int c, n;
388           char *s, *bp;
389           static char *buf = NULL;
390           static int bufsz = 500;
391 
392           if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
393                     FATAL("out of space for strings");
394           for (bp = buf; (c = input()) != '"'; ) {
395                     if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
396                               FATAL("out of space for string %.10s...", buf);
397                     switch (c) {
398                     case '\n':
399                     case '\r':
400                     case 0:
401                               *bp = '\0';
402                               SYNTAX( "non-terminated string %.10s...", buf );
403                               if (c == 0)         /* hopeless */
404                                         FATAL( "giving up" );
405                               lineno++;
406                               break;
407                     case '\\':
408                               c = input();
409                               switch (c) {
410                               case '\n': break;
411                               case '"': *bp++ = '"'; break;
412                               case 'n': *bp++ = '\n'; break;
413                               case 't': *bp++ = '\t'; break;
414                               case 'f': *bp++ = '\f'; break;
415                               case 'r': *bp++ = '\r'; break;
416                               case 'b': *bp++ = '\b'; break;
417                               case 'v': *bp++ = '\v'; break;
418                               case 'a': *bp++ = '\a'; break;
419                               case '\\': *bp++ = '\\'; break;
420 
421                               case '0': case '1': case '2': /* octal: \d \dd \ddd */
422                               case '3': case '4': case '5': case '6': case '7':
423                                         n = c - '0';
424                                         if ((c = peek()) >= '0' && c < '8') {
425                                                   n = 8 * n + input() - '0';
426                                                   if ((c = peek()) >= '0' && c < '8')
427                                                             n = 8 * n + input() - '0';
428                                         }
429                                         *bp++ = n;
430                                         break;
431 
432                               case 'x': /* hex  \x0-9a-fA-F (exactly two) */
433                                   {
434                                         int i;
435 
436                                         if (!isxdigit(peek())) {
437                                                   unput(c);
438                                                   break;
439                                         }
440                                         n = 0;
441                                         for (i = 0; i < 2; i++) {
442                                                   c = input();
443                                                   if (c == 0)
444                                                             break;
445                                                   if (isxdigit(c)) {
446                                                             c = tolower(c);
447                                                             n *= 16;
448                                                             if (isdigit(c))
449                                                                       n += (c - '0');
450                                                             else
451                                                                       n += 10 + (c - 'a');
452                                                   } else {
453                                                             unput(c);
454                                                             break;
455                                                   }
456                                         }
457                                         if (i)
458                                                   *bp++ = n;
459                                         break;
460                                   }
461 
462                               case 'u': /* utf  \u0-9a-fA-F (1..8) */
463                                   {
464                                         int i;
465 
466                                         n = 0;
467                                         for (i = 0; i < 8; i++) {
468                                                   c = input();
469                                                   if (!isxdigit(c) || c == 0)
470                                                             break;
471                                                   c = tolower(c);
472                                                   n *= 16;
473                                                   if (isdigit(c))
474                                                             n += (c - '0');
475                                                   else
476                                                             n += 10 + (c - 'a');
477                                         }
478                                         unput(c);
479                                         bp += runetochar(bp, n);
480                                         break;
481                                   }
482 
483                               default:
484                                         *bp++ = c;
485                                         break;
486                               }
487                               break;
488                     default:
489                               *bp++ = c;
490                               break;
491                     }
492           }
493           *bp = 0;
494           s = tostring(buf);
495           *bp++ = ' '; *bp++ = '\0';
496           yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
497           free(s);
498           RET(STRING);
499 }
500 
501 
binsearch(char * w,const Keyword * kp,int n)502 static int binsearch(char *w, const Keyword *kp, int n)
503 {
504           int cond, low, mid, high;
505 
506           low = 0;
507           high = n - 1;
508           while (low <= high) {
509                     mid = (low + high) / 2;
510                     if ((cond = strcmp(w, kp[mid].word)) < 0)
511                               high = mid - 1;
512                     else if (cond > 0)
513                               low = mid + 1;
514                     else
515                               return mid;
516           }
517           return -1;
518 }
519 
word(char * w)520 int word(char *w)
521 {
522           const Keyword *kp;
523           int c, n;
524 
525           n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
526           if (n != -1) {      /* found in table */
527                     kp = keywords + n;
528                     yylval.i = kp->sub;
529                     switch (kp->type) { /* special handling */
530                     case BLTIN:
531                               if (kp->sub == FSYSTEM && safe)
532                                         SYNTAX( "system is unsafe" );
533                               RET(kp->type);
534                     case FUNC:
535                               if (infunc)
536                                         SYNTAX( "illegal nested function" );
537                               RET(kp->type);
538                     case RETURN:
539                               if (!infunc)
540                                         SYNTAX( "return not in function" );
541                               RET(kp->type);
542                     case VARNF:
543                               yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
544                               RET(VARNF);
545                     default:
546                               RET(kp->type);
547                     }
548           }
549           c = peek();         /* look for '(' */
550           if (c != '(' && infunc && (n=isarg(w)) >= 0) {
551                     yylval.i = n;
552                     RET(ARG);
553           } else {
554                     yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
555                     if (c == '(') {
556                               RET(CALL);
557                     } else {
558                               RET(VAR);
559                     }
560           }
561 }
562 
startreg(void)563 void startreg(void) /* next call to yylex will return a regular expression */
564 {
565           reg = true;
566 }
567 
regexpr(void)568 int regexpr(void)
569 {
570           int c;
571           static char *buf = NULL;
572           static int bufsz = 500;
573           char *bp;
574 
575           if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
576                     FATAL("out of space for reg expr");
577           bp = buf;
578           for ( ; (c = input()) != '/' && c != 0; ) {
579                     if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
580                               FATAL("out of space for reg expr %.10s...", buf);
581                     if (c == '\n') {
582                               *bp = '\0';
583                               SYNTAX( "newline in regular expression %.10s...", buf );
584                               unput('\n');
585                               break;
586                     } else if (c == '\\') {
587                               *bp++ = '\\';
588                               *bp++ = input();
589                     } else {
590                               *bp++ = c;
591                     }
592           }
593           *bp = 0;
594           if (c == 0)
595                     SYNTAX("non-terminated regular expression %.10s...", buf);
596           yylval.s = tostring(buf);
597           unput('/');
598           RET(REGEXPR);
599 }
600 
601 /* low-level lexical stuff, sort of inherited from lex */
602 
603 char      ebuf[300];
604 char      *ep = ebuf;
605 char      yysbuf[100];        /* pushback buffer */
606 char      *yysptr = yysbuf;
607 FILE      *yyin = NULL;
608 
input(void)609 int input(void)     /* get next lexical input character */
610 {
611           int c;
612           extern char *lexprog;
613 
614           if (yysptr > yysbuf)
615                     c = (uschar)*--yysptr;
616           else if (lexprog != NULL) {   /* awk '...' */
617                     if ((c = (uschar)*lexprog) != 0)
618                               lexprog++;
619           } else                                  /* awk -f ... */
620                     c = pgetc();
621           if (c == EOF)
622                     c = 0;
623           if (ep >= ebuf + sizeof ebuf)
624                     ep = ebuf;
625           *ep = c;
626           if (c != 0) {
627                     ep++;
628           }
629           return (c);
630 }
631 
unput(int c)632 void unput(int c)   /* put lexical character back on input */
633 {
634           if (c == '\n')
635                     lineno--;
636           if (yysptr >= yysbuf + sizeof(yysbuf))
637                     FATAL("pushed back too much: %.20s...", yysbuf);
638           *yysptr++ = c;
639           if (--ep < ebuf)
640                     ep = ebuf + sizeof(ebuf) - 1;
641 }
642 
unputstr(const char * s)643 void unputstr(const char *s)  /* put a string back on input */
644 {
645           int i;
646 
647           for (i = strlen(s)-1; i >= 0; i--)
648                     unput(s[i]);
649 }
650