1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include "awk.h"
34 #include "awkgram.h"
35
36 extern YYSTYPE yylval;
37 extern bool infunc;
38
39 int lineno = 1;
40 int bracecnt = 0;
41 int brackcnt = 0;
42 int parencnt = 0;
43
44 typedef struct Keyword {
45 const char *word;
46 int sub;
47 int type;
48 } Keyword;
49
50 const Keyword keywords[] = { /* keep sorted: binary searched */
51 { "BEGIN", XBEGIN, XBEGIN },
52 { "END", XEND, XEND },
53 { "NF", VARNF, VARNF },
54 { "and", FAND, BLTIN },
55 { "atan2", FATAN, BLTIN },
56 { "break", BREAK, BREAK },
57 { "close", CLOSE, CLOSE },
58 { "compl", FCOMPL, BLTIN },
59 { "continue", CONTINUE, CONTINUE },
60 { "cos", FCOS, BLTIN },
61 { "delete", DELETE, DELETE },
62 { "do", DO, DO },
63 { "else", ELSE, ELSE },
64 { "exit", EXIT, EXIT },
65 { "exp", FEXP, BLTIN },
66 { "fflush", FFLUSH, BLTIN },
67 { "for", FOR, FOR },
68 { "func", FUNC, FUNC },
69 { "function", FUNC, FUNC },
70 { "gensub", GENSUB, GENSUB },
71 { "getline", GETLINE, GETLINE },
72 { "gsub", GSUB, GSUB },
73 { "if", IF, IF },
74 { "in", IN, IN },
75 { "index", INDEX, INDEX },
76 { "int", FINT, BLTIN },
77 { "length", FLENGTH, BLTIN },
78 { "log", FLOG, BLTIN },
79 { "lshift", FLSHIFT, BLTIN },
80 { "match", MATCHFCN, MATCHFCN },
81 { "mktime", FMKTIME, BLTIN },
82 { "next", NEXT, NEXT },
83 { "nextfile", NEXTFILE, NEXTFILE },
84 { "or", FFOR, BLTIN },
85 { "print", PRINT, PRINT },
86 { "printf", PRINTF, PRINTF },
87 { "rand", FRAND, BLTIN },
88 { "return", RETURN, RETURN },
89 { "rshift", FRSHIFT, BLTIN },
90 { "sin", FSIN, BLTIN },
91 { "split", SPLIT, SPLIT },
92 { "sprintf", SPRINTF, SPRINTF },
93 { "sqrt", FSQRT, BLTIN },
94 { "srand", FSRAND, BLTIN },
95 { "strftime", FSTRFTIME, BLTIN },
96 { "sub", SUB, SUB },
97 { "substr", SUBSTR, SUBSTR },
98 { "system", FSYSTEM, BLTIN },
99 { "systime", FSYSTIME, BLTIN },
100 { "tolower", FTOLOWER, BLTIN },
101 { "toupper", FTOUPPER, BLTIN },
102 { "while", WHILE, WHILE },
103 { "xor", FXOR, BLTIN },
104 };
105
106 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
107
peek(void)108 static int peek(void)
109 {
110 int c = input();
111 unput(c);
112 return c;
113 }
114
gettok(char ** pbuf,int * psz)115 static int gettok(char **pbuf, int *psz) /* get next input token */
116 {
117 int c, retc;
118 char *buf = *pbuf;
119 int sz = *psz;
120 char *bp = buf;
121
122 c = input();
123 if (c == 0)
124 return 0;
125 buf[0] = c;
126 buf[1] = 0;
127 if (!isalnum(c) && c != '.' && c != '_')
128 return c;
129
130 *bp++ = c;
131 if (isalpha(c) || c == '_') { /* it's a varname */
132 for ( ; (c = input()) != 0; ) {
133 if (bp-buf >= sz)
134 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
135 FATAL( "out of space for name %.10s...", buf );
136 if (isalnum(c) || c == '_')
137 *bp++ = c;
138 else {
139 *bp = 0;
140 unput(c);
141 break;
142 }
143 }
144 *bp = 0;
145 retc = 'a'; /* alphanumeric */
146 } else { /* maybe it's a number, but could be . */
147 char *rem;
148 /* read input until can't be a number */
149 for ( ; (c = input()) != 0; ) {
150 if (bp-buf >= sz)
151 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
152 FATAL( "out of space for number %.10s...", buf );
153 if (isdigit(c) || c == 'e' || c == 'E'
154 || c == '.' || c == '+' || c == '-')
155 *bp++ = c;
156 else {
157 unput(c);
158 break;
159 }
160 }
161 *bp = 0;
162 strtod(buf, &rem); /* parse the number */
163 if (rem == buf) { /* it wasn't a valid number at all */
164 buf[1] = 0; /* return one character as token */
165 retc = (uschar)buf[0]; /* character is its own type */
166 unputstr(rem+1); /* put rest back for later */
167 } else { /* some prefix was a number */
168 unputstr(rem); /* put rest back for later */
169 rem[0] = 0; /* truncate buf after number part */
170 retc = '0'; /* type is number */
171 }
172 }
173 *pbuf = buf;
174 *psz = sz;
175 return retc;
176 }
177
178 int word(char *);
179 int string(void);
180 int regexpr(void);
181 bool sc = false; /* true => return a } right now */
182 bool reg = false; /* true => return a REGEXPR now */
183
yylex(void)184 int yylex(void)
185 {
186 int c;
187 static char *buf = NULL;
188 static int bufsize = 5; /* BUG: setting this small causes core dump! */
189
190 if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
191 FATAL( "out of space in yylex" );
192 if (sc) {
193 sc = false;
194 RET('}');
195 }
196 if (reg) {
197 reg = false;
198 return regexpr();
199 }
200 for (;;) {
201 c = gettok(&buf, &bufsize);
202 if (c == 0)
203 return 0;
204 if (isalpha(c) || c == '_')
205 return word(buf);
206 if (isdigit(c)) {
207 char *cp = tostring(buf);
208 double result;
209
210 if (is_number(cp, & result))
211 yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
212 else
213 yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
214 free(cp);
215 /* should this also have STR set? */
216 RET(NUMBER);
217 }
218
219 yylval.i = c;
220 switch (c) {
221 case '\n': /* {EOL} */
222 lineno++;
223 RET(NL);
224 case '\r': /* assume \n is coming */
225 case ' ': /* {WS}+ */
226 case '\t':
227 break;
228 case '#': /* #.* strip comments */
229 while ((c = input()) != '\n' && c != 0)
230 ;
231 unput(c);
232 /*
233 * Next line is a hack, it compensates for
234 * unput's treatment of \n.
235 */
236 lineno++;
237 break;
238 case ';':
239 RET(';');
240 case '\\':
241 if (peek() == '\n') {
242 input();
243 lineno++;
244 } else if (peek() == '\r') {
245 input(); input(); /* \n */
246 lineno++;
247 } else {
248 RET(c);
249 }
250 break;
251 case '&':
252 if (peek() == '&') {
253 input(); RET(AND);
254 } else
255 RET('&');
256 case '|':
257 if (peek() == '|') {
258 input(); RET(BOR);
259 } else
260 RET('|');
261 case '!':
262 if (peek() == '=') {
263 input(); yylval.i = NE; RET(NE);
264 } else if (peek() == '~') {
265 input(); yylval.i = NOTMATCH; RET(MATCHOP);
266 } else
267 RET(NOT);
268 case '~':
269 yylval.i = MATCH;
270 RET(MATCHOP);
271 case '<':
272 if (peek() == '=') {
273 input(); yylval.i = LE; RET(LE);
274 } else {
275 yylval.i = LT; RET(LT);
276 }
277 case '=':
278 if (peek() == '=') {
279 input(); yylval.i = EQ; RET(EQ);
280 } else {
281 yylval.i = ASSIGN; RET(ASGNOP);
282 }
283 case '>':
284 if (peek() == '=') {
285 input(); yylval.i = GE; RET(GE);
286 } else if (peek() == '>') {
287 input(); yylval.i = APPEND; RET(APPEND);
288 } else {
289 yylval.i = GT; RET(GT);
290 }
291 case '+':
292 if (peek() == '+') {
293 input(); yylval.i = INCR; RET(INCR);
294 } else if (peek() == '=') {
295 input(); yylval.i = ADDEQ; RET(ASGNOP);
296 } else
297 RET('+');
298 case '-':
299 if (peek() == '-') {
300 input(); yylval.i = DECR; RET(DECR);
301 } else if (peek() == '=') {
302 input(); yylval.i = SUBEQ; RET(ASGNOP);
303 } else
304 RET('-');
305 case '*':
306 if (peek() == '=') { /* *= */
307 input(); yylval.i = MULTEQ; RET(ASGNOP);
308 } else if (peek() == '*') { /* ** or **= */
309 input(); /* eat 2nd * */
310 if (peek() == '=') {
311 input(); yylval.i = POWEQ; RET(ASGNOP);
312 } else {
313 RET(POWER);
314 }
315 } else
316 RET('*');
317 case '/':
318 RET('/');
319 case '%':
320 if (peek() == '=') {
321 input(); yylval.i = MODEQ; RET(ASGNOP);
322 } else
323 RET('%');
324 case '^':
325 if (peek() == '=') {
326 input(); yylval.i = POWEQ; RET(ASGNOP);
327 } else
328 RET(POWER);
329
330 case '$':
331 /* BUG: awkward, if not wrong */
332 c = gettok(&buf, &bufsize);
333 if (isalpha(c)) {
334 if (strcmp(buf, "NF") == 0) { /* very special */
335 unputstr("(NF)");
336 RET(INDIRECT);
337 }
338 c = peek();
339 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
340 unputstr(buf);
341 RET(INDIRECT);
342 }
343 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
344 RET(IVAR);
345 } else if (c == 0) { /* */
346 SYNTAX( "unexpected end of input after $" );
347 RET(';');
348 } else {
349 unputstr(buf);
350 RET(INDIRECT);
351 }
352
353 case '}':
354 if (--bracecnt < 0)
355 SYNTAX( "extra }" );
356 sc = true;
357 RET(';');
358 case ']':
359 if (--brackcnt < 0)
360 SYNTAX( "extra ]" );
361 RET(']');
362 case ')':
363 if (--parencnt < 0)
364 SYNTAX( "extra )" );
365 RET(')');
366 case '{':
367 bracecnt++;
368 RET('{');
369 case '[':
370 brackcnt++;
371 RET('[');
372 case '(':
373 parencnt++;
374 RET('(');
375
376 case '"':
377 return string(); /* BUG: should be like tran.c ? */
378
379 default:
380 RET(c);
381 }
382 }
383 }
384
string(void)385 int string(void)
386 {
387 int c, n;
388 char *s, *bp;
389 static char *buf = NULL;
390 static int bufsz = 500;
391
392 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
393 FATAL("out of space for strings");
394 for (bp = buf; (c = input()) != '"'; ) {
395 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
396 FATAL("out of space for string %.10s...", buf);
397 switch (c) {
398 case '\n':
399 case '\r':
400 case 0:
401 *bp = '\0';
402 SYNTAX( "non-terminated string %.10s...", buf );
403 if (c == 0) /* hopeless */
404 FATAL( "giving up" );
405 lineno++;
406 break;
407 case '\\':
408 c = input();
409 switch (c) {
410 case '\n': break;
411 case '"': *bp++ = '"'; break;
412 case 'n': *bp++ = '\n'; break;
413 case 't': *bp++ = '\t'; break;
414 case 'f': *bp++ = '\f'; break;
415 case 'r': *bp++ = '\r'; break;
416 case 'b': *bp++ = '\b'; break;
417 case 'v': *bp++ = '\v'; break;
418 case 'a': *bp++ = '\a'; break;
419 case '\\': *bp++ = '\\'; break;
420
421 case '0': case '1': case '2': /* octal: \d \dd \ddd */
422 case '3': case '4': case '5': case '6': case '7':
423 n = c - '0';
424 if ((c = peek()) >= '0' && c < '8') {
425 n = 8 * n + input() - '0';
426 if ((c = peek()) >= '0' && c < '8')
427 n = 8 * n + input() - '0';
428 }
429 *bp++ = n;
430 break;
431
432 case 'x': /* hex \x0-9a-fA-F (exactly two) */
433 {
434 int i;
435
436 if (!isxdigit(peek())) {
437 unput(c);
438 break;
439 }
440 n = 0;
441 for (i = 0; i < 2; i++) {
442 c = input();
443 if (c == 0)
444 break;
445 if (isxdigit(c)) {
446 c = tolower(c);
447 n *= 16;
448 if (isdigit(c))
449 n += (c - '0');
450 else
451 n += 10 + (c - 'a');
452 } else {
453 unput(c);
454 break;
455 }
456 }
457 if (i)
458 *bp++ = n;
459 break;
460 }
461
462 case 'u': /* utf \u0-9a-fA-F (1..8) */
463 {
464 int i;
465
466 n = 0;
467 for (i = 0; i < 8; i++) {
468 c = input();
469 if (!isxdigit(c) || c == 0)
470 break;
471 c = tolower(c);
472 n *= 16;
473 if (isdigit(c))
474 n += (c - '0');
475 else
476 n += 10 + (c - 'a');
477 }
478 unput(c);
479 bp += runetochar(bp, n);
480 break;
481 }
482
483 default:
484 *bp++ = c;
485 break;
486 }
487 break;
488 default:
489 *bp++ = c;
490 break;
491 }
492 }
493 *bp = 0;
494 s = tostring(buf);
495 *bp++ = ' '; *bp++ = '\0';
496 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
497 free(s);
498 RET(STRING);
499 }
500
501
binsearch(char * w,const Keyword * kp,int n)502 static int binsearch(char *w, const Keyword *kp, int n)
503 {
504 int cond, low, mid, high;
505
506 low = 0;
507 high = n - 1;
508 while (low <= high) {
509 mid = (low + high) / 2;
510 if ((cond = strcmp(w, kp[mid].word)) < 0)
511 high = mid - 1;
512 else if (cond > 0)
513 low = mid + 1;
514 else
515 return mid;
516 }
517 return -1;
518 }
519
word(char * w)520 int word(char *w)
521 {
522 const Keyword *kp;
523 int c, n;
524
525 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
526 if (n != -1) { /* found in table */
527 kp = keywords + n;
528 yylval.i = kp->sub;
529 switch (kp->type) { /* special handling */
530 case BLTIN:
531 if (kp->sub == FSYSTEM && safe)
532 SYNTAX( "system is unsafe" );
533 RET(kp->type);
534 case FUNC:
535 if (infunc)
536 SYNTAX( "illegal nested function" );
537 RET(kp->type);
538 case RETURN:
539 if (!infunc)
540 SYNTAX( "return not in function" );
541 RET(kp->type);
542 case VARNF:
543 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
544 RET(VARNF);
545 default:
546 RET(kp->type);
547 }
548 }
549 c = peek(); /* look for '(' */
550 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
551 yylval.i = n;
552 RET(ARG);
553 } else {
554 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
555 if (c == '(') {
556 RET(CALL);
557 } else {
558 RET(VAR);
559 }
560 }
561 }
562
startreg(void)563 void startreg(void) /* next call to yylex will return a regular expression */
564 {
565 reg = true;
566 }
567
regexpr(void)568 int regexpr(void)
569 {
570 int c;
571 static char *buf = NULL;
572 static int bufsz = 500;
573 char *bp;
574
575 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
576 FATAL("out of space for reg expr");
577 bp = buf;
578 for ( ; (c = input()) != '/' && c != 0; ) {
579 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
580 FATAL("out of space for reg expr %.10s...", buf);
581 if (c == '\n') {
582 *bp = '\0';
583 SYNTAX( "newline in regular expression %.10s...", buf );
584 unput('\n');
585 break;
586 } else if (c == '\\') {
587 *bp++ = '\\';
588 *bp++ = input();
589 } else {
590 *bp++ = c;
591 }
592 }
593 *bp = 0;
594 if (c == 0)
595 SYNTAX("non-terminated regular expression %.10s...", buf);
596 yylval.s = tostring(buf);
597 unput('/');
598 RET(REGEXPR);
599 }
600
601 /* low-level lexical stuff, sort of inherited from lex */
602
603 char ebuf[300];
604 char *ep = ebuf;
605 char yysbuf[100]; /* pushback buffer */
606 char *yysptr = yysbuf;
607 FILE *yyin = NULL;
608
input(void)609 int input(void) /* get next lexical input character */
610 {
611 int c;
612 extern char *lexprog;
613
614 if (yysptr > yysbuf)
615 c = (uschar)*--yysptr;
616 else if (lexprog != NULL) { /* awk '...' */
617 if ((c = (uschar)*lexprog) != 0)
618 lexprog++;
619 } else /* awk -f ... */
620 c = pgetc();
621 if (c == EOF)
622 c = 0;
623 if (ep >= ebuf + sizeof ebuf)
624 ep = ebuf;
625 *ep = c;
626 if (c != 0) {
627 ep++;
628 }
629 return (c);
630 }
631
unput(int c)632 void unput(int c) /* put lexical character back on input */
633 {
634 if (c == '\n')
635 lineno--;
636 if (yysptr >= yysbuf + sizeof(yysbuf))
637 FATAL("pushed back too much: %.20s...", yysbuf);
638 *yysptr++ = c;
639 if (--ep < ebuf)
640 ep = ebuf + sizeof(ebuf) - 1;
641 }
642
unputstr(const char * s)643 void unputstr(const char *s) /* put a string back on input */
644 {
645 int i;
646
647 for (i = strlen(s)-1; i >= 0; i--)
648 unput(s[i]);
649 }
650