1 /*    a2py.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4  *    2000, 2001, 2002, by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  */
9 
10 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
11 #if defined(WIN32)
12 #include <io.h>
13 #endif
14 #if defined(NETWARE)
15 #include "../netware/clibstuf.h"
16 #endif
17 #include "../patchlevel.h"
18 #endif
19 #include "util.h"
20 
21 char *filename;
22 char *myname;
23 
24 int checkers = 0;
25 
26 int oper0(int type);
27 int oper1(int type, int arg1);
28 int oper2(int type, int arg1, int arg2);
29 int oper3(int type, int arg1, int arg2, int arg3);
30 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
31 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
32 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
33 #ifdef NETWARE
34 char *savestr(char *str);
35 char *cpy2(register char *to, register char *from, register int delim);
36 #endif
37 
38 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
39 static void usage(void);
40 
41 static void
usage()42 usage()
43 {
44     printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
45     printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
46     printf("\n  -D<number>      sets debugging flags."
47            "\n  -F<character>   the awk script to translate is always invoked with"
48            "\n                  this -F switch."
49            "\n  -n<fieldlist>   specifies the names of the input fields if input does"
50            "\n                  not have to be split into an array."
51            "\n  -<number>       causes a2p to assume that input will always have that"
52            "\n                  many fields.\n");
53     exit(1);
54 }
55 #endif
56 
57 #ifdef __osf__
58 #pragma message disable (mainparm) /* We have the envp in main(). */
59 #endif
60 
61 int
main(register int argc,register char ** argv,register char ** env)62 main(register int argc, register char **argv, register char **env)
63 {
64     register STR *str;
65     int i;
66     STR *tmpstr;
67     /* char *namelist;    */
68 
69 	#ifdef NETWARE
70 		fnInitGpfGlobals();	/* For importing the CLIB calls in place of Watcom calls */
71 	#endif	/* NETWARE */
72 
73     myname = argv[0];
74     linestr = str_new(80);
75     str = str_new(0);		/* first used for -I flags */
76     for (argc--,argv++; argc; argc--,argv++) {
77 	if (argv[0][0] != '-' || !argv[0][1])
78 	    break;
79 	switch (argv[0][1]) {
80 #ifdef DEBUGGING
81 	case 'D':
82 	    debug = atoi(argv[0]+2);
83 #if YYDEBUG
84 	    yydebug = (debug & 1);
85 #endif
86 	    break;
87 #endif
88 	case '0': case '1': case '2': case '3': case '4':
89 	case '5': case '6': case '7': case '8': case '9':
90 	    maxfld = atoi(argv[0]+1);
91 	    absmaxfld = TRUE;
92 	    break;
93 	case 'F':
94 	    fswitch = argv[0][2];
95 	    break;
96 	case 'n':
97 	    namelist = savestr(argv[0]+2);
98 	    break;
99 	case 'o':
100 	    old_awk = TRUE;
101 	    break;
102 	case '-':
103 	    argc--,argv++;
104 	    goto switch_end;
105 	case 0:
106 	    break;
107 	default:
108 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
109 	    fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
110             usage();
111 #else
112 	    fatal("Unrecognized switch: %s\n",argv[0]);
113 #endif
114 	}
115     }
116   switch_end:
117 
118     /* open script */
119 
120     if (argv[0] == Nullch) {
121 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
122 	if ( isatty(fileno(stdin)) )
123 	    usage();
124 #endif
125         argv[0] = "-";
126     }
127     filename = savestr(argv[0]);
128 
129     filename = savestr(argv[0]);
130     if (strEQ(filename,"-"))
131 	argv[0] = "";
132     if (!*argv[0])
133 	rsfp = stdin;
134     else
135 	rsfp = fopen(argv[0],"r");
136     if (rsfp == Nullfp)
137 	fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
138 
139     /* init tokener */
140 
141     bufptr = str_get(linestr);
142     symtab = hnew();
143     curarghash = hnew();
144 
145     /* now parse the report spec */
146 
147     if (yyparse())
148 	fatal("Translation aborted due to syntax errors.\n");
149 
150 #ifdef DEBUGGING
151     if (debug & 2) {
152 	int type, len;
153 
154 	for (i=1; i<mop;) {
155 	    type = ops[i].ival;
156 	    len = type >> 8;
157 	    type &= 255;
158 	    printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
159 	    if (type == OSTRING)
160 		printf("\t\"%s\"\n",ops[i].cval),i++;
161 	    else {
162 		while (len--) {
163 		    printf("\t%d",ops[i].ival),i++;
164 		}
165 		putchar('\n');
166 	    }
167 	}
168     }
169     if (debug & 8)
170 	dump(root);
171 #endif
172 
173     /* first pass to look for numeric variables */
174 
175     prewalk(0,0,root,&i);
176 
177     /* second pass to produce new program */
178 
179     tmpstr = walk(0,0,root,&i,P_MIN);
180     str = str_make(STARTPERL);
181     str_cat(str, "\neval 'exec ");
182     str_cat(str, BIN);
183     str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
184     if $running_under_some_shell;\n\
185 			# this emulates #! processing on NIH machines.\n\
186 			# (remove #! line above if indigestible)\n\n");
187     str_cat(str,
188       "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
189     str_cat(str,
190       "			# process any FOO=bar switches\n\n");
191     if (do_opens && opens) {
192 	str_scat(str,opens);
193 	str_free(opens);
194 	str_cat(str,"\n");
195     }
196     str_scat(str,tmpstr);
197     str_free(tmpstr);
198 #ifdef DEBUGGING
199     if (!(debug & 16))
200 #endif
201     fixup(str);
202     putlines(str);
203     if (checkers) {
204 	fprintf(stderr,
205 	  "Please check my work on the %d line%s I've marked with \"#???\".\n",
206 		checkers, checkers == 1 ? "" : "s" );
207 	fprintf(stderr,
208 	  "The operation I've selected may be wrong for the operand types.\n");
209     }
210     exit(0);
211     /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
212     return(0);
213 }
214 
215 #define RETURN(retval) return (bufptr = s,retval)
216 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
217 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
218 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
219 
220 int idtype;
221 
222 int
yylex(void)223 yylex(void)
224 {
225     register char *s = bufptr;
226     register char *d;
227     register int tmp;
228 
229   retry:
230 #if YYDEBUG
231     if (yydebug) {
232 	if (strchr(s,'\n'))
233 	    fprintf(stderr,"Tokener at %s",s);
234 	else
235 	    fprintf(stderr,"Tokener at %s\n",s);
236     }
237 #endif
238     switch (*s) {
239     default:
240 	fprintf(stderr,
241 	    "Unrecognized character %c in file %s line %d--ignoring.\n",
242 	     *s++,filename,line);
243 	goto retry;
244     case '\\':
245 	s++;
246 	if (*s && *s != '\n') {
247 	    yyerror("Ignoring spurious backslash");
248 	    goto retry;
249 	}
250 	/*FALLSTHROUGH*/
251     case 0:
252 	s = str_get(linestr);
253 	*s = '\0';
254 	if (!rsfp)
255 	    RETURN(0);
256 	line++;
257 	if ((s = str_gets(linestr, rsfp)) == Nullch) {
258 	    if (rsfp != stdin)
259 		fclose(rsfp);
260 	    rsfp = Nullfp;
261 	    s = str_get(linestr);
262 	    RETURN(0);
263 	}
264 	goto retry;
265     case ' ': case '\t':
266 	s++;
267 	goto retry;
268     case '\n':
269 	*s = '\0';
270 	XTERM(NEWLINE);
271     case '#':
272 	yylval = string(s,0);
273 	*s = '\0';
274 	XTERM(COMMENT);
275     case ';':
276 	tmp = *s++;
277 	if (*s == '\n') {
278 	    s++;
279 	    XTERM(SEMINEW);
280 	}
281 	XTERM(tmp);
282     case '(':
283 	tmp = *s++;
284 	XTERM(tmp);
285     case '{':
286     case '[':
287     case ')':
288     case ']':
289     case '?':
290     case ':':
291 	tmp = *s++;
292 	XOP(tmp);
293 #ifdef EBCDIC
294     case 7:
295 #else
296     case 127:
297 #endif
298 	s++;
299 	XTERM('}');
300     case '}':
301 	for (d = s + 1; isSPACE(*d); d++) ;
302 	if (!*d)
303 	    s = d - 1;
304 	*s = 127;
305 	XTERM(';');
306     case ',':
307 	tmp = *s++;
308 	XTERM(tmp);
309     case '~':
310 	s++;
311 	yylval = string("~",1);
312 	XTERM(MATCHOP);
313     case '+':
314     case '-':
315 	if (s[1] == *s) {
316 	    s++;
317 	    if (*s++ == '+')
318 		XTERM(INCR);
319 	    else
320 		XTERM(DECR);
321 	}
322 	/* FALL THROUGH */
323     case '*':
324     case '%':
325     case '^':
326 	tmp = *s++;
327 	if (*s == '=') {
328 	    if (tmp == '^')
329 		yylval = string("**=",3);
330 	    else
331 		yylval = string(s-1,2);
332 	    s++;
333 	    XTERM(ASGNOP);
334 	}
335 	XTERM(tmp);
336     case '&':
337 	s++;
338 	tmp = *s++;
339 	if (tmp == '&')
340 	    XTERM(ANDAND);
341 	s--;
342 	XTERM('&');
343     case '|':
344 	s++;
345 	tmp = *s++;
346 	if (tmp == '|')
347 	    XTERM(OROR);
348 	s--;
349 	while (*s == ' ' || *s == '\t')
350 	    s++;
351 	if (strnEQ(s,"getline",7))
352 	    XTERM('p');
353 	else
354 	    XTERM('|');
355     case '=':
356 	s++;
357 	tmp = *s++;
358 	if (tmp == '=') {
359 	    yylval = string("==",2);
360 	    XTERM(RELOP);
361 	}
362 	s--;
363 	yylval = string("=",1);
364 	XTERM(ASGNOP);
365     case '!':
366 	s++;
367 	tmp = *s++;
368 	if (tmp == '=') {
369 	    yylval = string("!=",2);
370 	    XTERM(RELOP);
371 	}
372 	if (tmp == '~') {
373 	    yylval = string("!~",2);
374 	    XTERM(MATCHOP);
375 	}
376 	s--;
377 	XTERM(NOT);
378     case '<':
379 	s++;
380 	tmp = *s++;
381 	if (tmp == '=') {
382 	    yylval = string("<=",2);
383 	    XTERM(RELOP);
384 	}
385 	s--;
386 	XTERM('<');
387     case '>':
388 	s++;
389 	tmp = *s++;
390 	if (tmp == '>') {
391 	    yylval = string(">>",2);
392 	    XTERM(GRGR);
393 	}
394 	if (tmp == '=') {
395 	    yylval = string(">=",2);
396 	    XTERM(RELOP);
397 	}
398 	s--;
399 	XTERM('>');
400 
401 #define SNARFWORD \
402 	d = tokenbuf; \
403 	while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
404 	    *d++ = *s++; \
405 	*d = '\0'; \
406 	d = tokenbuf; \
407 	if (*s == '(') \
408 	    idtype = USERFUN; \
409 	else \
410 	    idtype = VAR;
411 
412     case '$':
413 	s++;
414 	if (*s == '0') {
415 	    s++;
416 	    do_chop = TRUE;
417 	    need_entire = TRUE;
418 	    idtype = VAR;
419 	    ID("0");
420 	}
421 	do_split = TRUE;
422 	if (isDIGIT(*s)) {
423 	    for (d = s; isDIGIT(*s); s++) ;
424 	    yylval = string(d,s-d);
425 	    tmp = atoi(d);
426 	    if (tmp > maxfld)
427 		maxfld = tmp;
428 	    XOP(FIELD);
429 	}
430 	for (d = s; isALPHA(*s) || isDIGIT(*s) || *s == '_'; )
431 	    s++;
432 	split_to_array = set_array_base = TRUE;
433 	if (d != s)
434 	{
435 	    yylval = string(d,s-d);
436 	    XTERM(SVFIELD);
437 	}
438 	XOP(VFIELD);
439 
440     case '/':			/* may either be division or pattern */
441 	if (expectterm) {
442 	    s = scanpat(s);
443 	    XTERM(REGEX);
444 	}
445 	tmp = *s++;
446 	if (*s == '=') {
447 	    yylval = string("/=",2);
448 	    s++;
449 	    XTERM(ASGNOP);
450 	}
451 	XTERM(tmp);
452 
453     case '0': case '1': case '2': case '3': case '4':
454     case '5': case '6': case '7': case '8': case '9': case '.':
455 	s = scannum(s);
456 	XOP(NUMBER);
457     case '"':
458 	s++;
459 	s = cpy2(tokenbuf,s,s[-1]);
460 	if (!*s)
461 	    fatal("String not terminated:\n%s",str_get(linestr));
462 	s++;
463 	yylval = string(tokenbuf,0);
464 	XOP(STRING);
465 
466     case 'a': case 'A':
467 	SNARFWORD;
468 	if (strEQ(d,"ARGC"))
469 	    set_array_base = TRUE;
470 	if (strEQ(d,"ARGV")) {
471 	    yylval=numary(string("ARGV",0));
472 	    XOP(VAR);
473 	}
474 	if (strEQ(d,"atan2")) {
475 	    yylval = OATAN2;
476 	    XTERM(FUNN);
477 	}
478 	ID(d);
479     case 'b': case 'B':
480 	SNARFWORD;
481 	if (strEQ(d,"break"))
482 	    XTERM(BREAK);
483 	if (strEQ(d,"BEGIN"))
484 	    XTERM(BEGIN);
485 	ID(d);
486     case 'c': case 'C':
487 	SNARFWORD;
488 	if (strEQ(d,"continue"))
489 	    XTERM(CONTINUE);
490 	if (strEQ(d,"cos")) {
491 	    yylval = OCOS;
492 	    XTERM(FUN1);
493 	}
494 	if (strEQ(d,"close")) {
495 	    do_fancy_opens = 1;
496 	    yylval = OCLOSE;
497 	    XTERM(FUN1);
498 	}
499 	if (strEQ(d,"chdir"))
500 	    *d = toUPPER(*d);
501 	else if (strEQ(d,"crypt"))
502 	    *d = toUPPER(*d);
503 	else if (strEQ(d,"chop"))
504 	    *d = toUPPER(*d);
505 	else if (strEQ(d,"chmod"))
506 	    *d = toUPPER(*d);
507 	else if (strEQ(d,"chown"))
508 	    *d = toUPPER(*d);
509 	ID(d);
510     case 'd': case 'D':
511 	SNARFWORD;
512 	if (strEQ(d,"do"))
513 	    XTERM(DO);
514 	if (strEQ(d,"delete"))
515 	    XTERM(DELETE);
516 	if (strEQ(d,"die"))
517 	    *d = toUPPER(*d);
518 	ID(d);
519     case 'e': case 'E':
520 	SNARFWORD;
521 	if (strEQ(d,"END"))
522 	    XTERM(END);
523 	if (strEQ(d,"else"))
524 	    XTERM(ELSE);
525 	if (strEQ(d,"exit")) {
526 	    saw_line_op = TRUE;
527 	    XTERM(EXIT);
528 	}
529 	if (strEQ(d,"exp")) {
530 	    yylval = OEXP;
531 	    XTERM(FUN1);
532 	}
533 	if (strEQ(d,"elsif"))
534 	    *d = toUPPER(*d);
535 	else if (strEQ(d,"eq"))
536 	    *d = toUPPER(*d);
537 	else if (strEQ(d,"eval"))
538 	    *d = toUPPER(*d);
539 	else if (strEQ(d,"eof"))
540 	    *d = toUPPER(*d);
541 	else if (strEQ(d,"each"))
542 	    *d = toUPPER(*d);
543 	else if (strEQ(d,"exec"))
544 	    *d = toUPPER(*d);
545 	ID(d);
546     case 'f': case 'F':
547 	SNARFWORD;
548 	if (strEQ(d,"FS")) {
549 	    saw_FS++;
550 	    if (saw_FS == 1 && in_begin) {
551 		for (d = s; *d && isSPACE(*d); d++) ;
552 		if (*d == '=') {
553 		    for (d++; *d && isSPACE(*d); d++) ;
554 		    if (*d == '"' && d[2] == '"')
555 			const_FS = d[1];
556 		}
557 	    }
558 	    ID(tokenbuf);
559 	}
560 	if (strEQ(d,"for"))
561 	    XTERM(FOR);
562 	else if (strEQ(d,"function"))
563 	    XTERM(FUNCTION);
564 	if (strEQ(d,"FILENAME"))
565 	    d = "ARGV";
566 	if (strEQ(d,"foreach"))
567 	    *d = toUPPER(*d);
568 	else if (strEQ(d,"format"))
569 	    *d = toUPPER(*d);
570 	else if (strEQ(d,"fork"))
571 	    *d = toUPPER(*d);
572 	else if (strEQ(d,"fh"))
573 	    *d = toUPPER(*d);
574 	ID(d);
575     case 'g': case 'G':
576 	SNARFWORD;
577 	if (strEQ(d,"getline"))
578 	    XTERM(GETLINE);
579 	if (strEQ(d,"gsub"))
580 	    XTERM(GSUB);
581 	if (strEQ(d,"ge"))
582 	    *d = toUPPER(*d);
583 	else if (strEQ(d,"gt"))
584 	    *d = toUPPER(*d);
585 	else if (strEQ(d,"goto"))
586 	    *d = toUPPER(*d);
587 	else if (strEQ(d,"gmtime"))
588 	    *d = toUPPER(*d);
589 	ID(d);
590     case 'h': case 'H':
591 	SNARFWORD;
592 	if (strEQ(d,"hex"))
593 	    *d = toUPPER(*d);
594 	ID(d);
595     case 'i': case 'I':
596 	SNARFWORD;
597 	if (strEQ(d,"if"))
598 	    XTERM(IF);
599 	if (strEQ(d,"in"))
600 	    XTERM(IN);
601 	if (strEQ(d,"index")) {
602 	    set_array_base = TRUE;
603 	    XTERM(INDEX);
604 	}
605 	if (strEQ(d,"int")) {
606 	    yylval = OINT;
607 	    XTERM(FUN1);
608 	}
609 	ID(d);
610     case 'j': case 'J':
611 	SNARFWORD;
612 	if (strEQ(d,"join"))
613 	    *d = toUPPER(*d);
614 	ID(d);
615     case 'k': case 'K':
616 	SNARFWORD;
617 	if (strEQ(d,"keys"))
618 	    *d = toUPPER(*d);
619 	else if (strEQ(d,"kill"))
620 	    *d = toUPPER(*d);
621 	ID(d);
622     case 'l': case 'L':
623 	SNARFWORD;
624 	if (strEQ(d,"length")) {
625 	    yylval = OLENGTH;
626 	    XTERM(FUN1);
627 	}
628 	if (strEQ(d,"log")) {
629 	    yylval = OLOG;
630 	    XTERM(FUN1);
631 	}
632 	if (strEQ(d,"last"))
633 	    *d = toUPPER(*d);
634 	else if (strEQ(d,"local"))
635 	    *d = toUPPER(*d);
636 	else if (strEQ(d,"lt"))
637 	    *d = toUPPER(*d);
638 	else if (strEQ(d,"le"))
639 	    *d = toUPPER(*d);
640 	else if (strEQ(d,"locatime"))
641 	    *d = toUPPER(*d);
642 	else if (strEQ(d,"link"))
643 	    *d = toUPPER(*d);
644 	ID(d);
645     case 'm': case 'M':
646 	SNARFWORD;
647 	if (strEQ(d,"match")) {
648 	    set_array_base = TRUE;
649 	    XTERM(MATCH);
650 	}
651 	if (strEQ(d,"m"))
652 	    *d = toUPPER(*d);
653 	ID(d);
654     case 'n': case 'N':
655 	SNARFWORD;
656 	if (strEQ(d,"NF"))
657 	    do_chop = do_split = split_to_array = set_array_base = TRUE;
658 	if (strEQ(d,"next")) {
659 	    saw_line_op = TRUE;
660 	    XTERM(NEXT);
661 	}
662 	if (strEQ(d,"ne"))
663 	    *d = toUPPER(*d);
664 	ID(d);
665     case 'o': case 'O':
666 	SNARFWORD;
667 	if (strEQ(d,"ORS")) {
668 	    saw_ORS = TRUE;
669 	    d = "\\";
670 	}
671 	if (strEQ(d,"OFS")) {
672 	    saw_OFS = TRUE;
673 	    d = ",";
674 	}
675 	if (strEQ(d,"OFMT")) {
676 	    d = "#";
677 	}
678 	if (strEQ(d,"open"))
679 	    *d = toUPPER(*d);
680 	else if (strEQ(d,"ord"))
681 	    *d = toUPPER(*d);
682 	else if (strEQ(d,"oct"))
683 	    *d = toUPPER(*d);
684 	ID(d);
685     case 'p': case 'P':
686 	SNARFWORD;
687 	if (strEQ(d,"print")) {
688 	    XTERM(PRINT);
689 	}
690 	if (strEQ(d,"printf")) {
691 	    XTERM(PRINTF);
692 	}
693 	if (strEQ(d,"push"))
694 	    *d = toUPPER(*d);
695 	else if (strEQ(d,"pop"))
696 	    *d = toUPPER(*d);
697 	ID(d);
698     case 'q': case 'Q':
699 	SNARFWORD;
700 	ID(d);
701     case 'r': case 'R':
702 	SNARFWORD;
703 	if (strEQ(d,"RS")) {
704 	    d = "/";
705 	    saw_RS = TRUE;
706 	}
707 	if (strEQ(d,"rand")) {
708 	    yylval = ORAND;
709 	    XTERM(FUN1);
710 	}
711 	if (strEQ(d,"return"))
712 	    XTERM(RET);
713 	if (strEQ(d,"reset"))
714 	    *d = toUPPER(*d);
715 	else if (strEQ(d,"redo"))
716 	    *d = toUPPER(*d);
717 	else if (strEQ(d,"rename"))
718 	    *d = toUPPER(*d);
719 	ID(d);
720     case 's': case 'S':
721 	SNARFWORD;
722 	if (strEQ(d,"split")) {
723 	    set_array_base = TRUE;
724 	    XOP(SPLIT);
725 	}
726 	if (strEQ(d,"substr")) {
727 	    set_array_base = TRUE;
728 	    XTERM(SUBSTR);
729 	}
730 	if (strEQ(d,"sub"))
731 	    XTERM(SUB);
732 	if (strEQ(d,"sprintf")) {
733             /* In old awk, { print sprintf("str%sg"),"in" } prints
734              * "string"; in new awk, "in" is not considered an argument to
735              * sprintf, so the statement breaks.  To support both, the
736              * grammar treats arguments to SPRINTF_OLD like old awk,
737              * SPRINTF_NEW like new.  Here we return the appropriate one.
738              */
739 	    XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
740         }
741 	if (strEQ(d,"sqrt")) {
742 	    yylval = OSQRT;
743 	    XTERM(FUN1);
744 	}
745 	if (strEQ(d,"SUBSEP")) {
746 	    d = ";";
747 	}
748 	if (strEQ(d,"sin")) {
749 	    yylval = OSIN;
750 	    XTERM(FUN1);
751 	}
752 	if (strEQ(d,"srand")) {
753 	    yylval = OSRAND;
754 	    XTERM(FUN1);
755 	}
756 	if (strEQ(d,"system")) {
757 	    yylval = OSYSTEM;
758 	    XTERM(FUN1);
759 	}
760 	if (strEQ(d,"s"))
761 	    *d = toUPPER(*d);
762 	else if (strEQ(d,"shift"))
763 	    *d = toUPPER(*d);
764 	else if (strEQ(d,"select"))
765 	    *d = toUPPER(*d);
766 	else if (strEQ(d,"seek"))
767 	    *d = toUPPER(*d);
768 	else if (strEQ(d,"stat"))
769 	    *d = toUPPER(*d);
770 	else if (strEQ(d,"study"))
771 	    *d = toUPPER(*d);
772 	else if (strEQ(d,"sleep"))
773 	    *d = toUPPER(*d);
774 	else if (strEQ(d,"symlink"))
775 	    *d = toUPPER(*d);
776 	else if (strEQ(d,"sort"))
777 	    *d = toUPPER(*d);
778 	ID(d);
779     case 't': case 'T':
780 	SNARFWORD;
781 	if (strEQ(d,"tr"))
782 	    *d = toUPPER(*d);
783 	else if (strEQ(d,"tell"))
784 	    *d = toUPPER(*d);
785 	else if (strEQ(d,"time"))
786 	    *d = toUPPER(*d);
787 	else if (strEQ(d,"times"))
788 	    *d = toUPPER(*d);
789 	ID(d);
790     case 'u': case 'U':
791 	SNARFWORD;
792 	if (strEQ(d,"until"))
793 	    *d = toUPPER(*d);
794 	else if (strEQ(d,"unless"))
795 	    *d = toUPPER(*d);
796 	else if (strEQ(d,"umask"))
797 	    *d = toUPPER(*d);
798 	else if (strEQ(d,"unshift"))
799 	    *d = toUPPER(*d);
800 	else if (strEQ(d,"unlink"))
801 	    *d = toUPPER(*d);
802 	else if (strEQ(d,"utime"))
803 	    *d = toUPPER(*d);
804 	ID(d);
805     case 'v': case 'V':
806 	SNARFWORD;
807 	if (strEQ(d,"values"))
808 	    *d = toUPPER(*d);
809 	ID(d);
810     case 'w': case 'W':
811 	SNARFWORD;
812 	if (strEQ(d,"while"))
813 	    XTERM(WHILE);
814 	if (strEQ(d,"write"))
815 	    *d = toUPPER(*d);
816 	else if (strEQ(d,"wait"))
817 	    *d = toUPPER(*d);
818 	ID(d);
819     case 'x': case 'X':
820 	SNARFWORD;
821 	if (strEQ(d,"x"))
822 	    *d = toUPPER(*d);
823 	ID(d);
824     case 'y': case 'Y':
825 	SNARFWORD;
826 	if (strEQ(d,"y"))
827 	    *d = toUPPER(*d);
828 	ID(d);
829     case 'z': case 'Z':
830 	SNARFWORD;
831 	ID(d);
832     }
833 }
834 
835 char *
scanpat(register char * s)836 scanpat(register char *s)
837 {
838     register char *d;
839 
840     switch (*s++) {
841     case '/':
842 	break;
843     default:
844 	fatal("Search pattern not found:\n%s",str_get(linestr));
845     }
846 
847     d = tokenbuf;
848     for (; *s; s++,d++) {
849 	if (*s == '\\') {
850 	    if (s[1] == '/')
851 		*d++ = *s++;
852 	    else if (s[1] == '\\')
853 		*d++ = *s++;
854 	    else if (s[1] == '[')
855 		*d++ = *s++;
856 	}
857 	else if (*s == '[') {
858 	    *d++ = *s++;
859 	    do {
860 		if (*s == '\\' && s[1])
861 		    *d++ = *s++;
862 		if (*s == '/' || (*s == '-' && s[1] == ']'))
863 		    *d++ = '\\';
864 		*d++ = *s++;
865 	    } while (*s && *s != ']');
866 	}
867 	else if (*s == '/')
868 	    break;
869 	*d = *s;
870     }
871     *d = '\0';
872 
873     if (!*s)
874 	fatal("Search pattern not terminated:\n%s",str_get(linestr));
875     s++;
876     yylval = string(tokenbuf,0);
877     return s;
878 }
879 
880 void
yyerror(char * s)881 yyerror(char *s)
882 {
883     fprintf(stderr,"%s in file %s at line %d\n",
884       s,filename,line);
885 }
886 
887 char *
scannum(register char * s)888 scannum(register char *s)
889 {
890     register char *d;
891 
892     switch (*s) {
893     case '1': case '2': case '3': case '4': case '5':
894     case '6': case '7': case '8': case '9': case '0' : case '.':
895 	d = tokenbuf;
896 	while (isDIGIT(*s)) {
897 	    *d++ = *s++;
898 	}
899 	if (*s == '.') {
900 	    if (isDIGIT(s[1])) {
901 		*d++ = *s++;
902 		while (isDIGIT(*s)) {
903 		    *d++ = *s++;
904 		}
905 	    }
906 	    else
907 		s++;
908 	}
909 	if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
910 	    *d++ = *s++;
911 	    if (*s == '+' || *s == '-')
912 		*d++ = *s++;
913 	    while (isDIGIT(*s))
914 		*d++ = *s++;
915 	}
916 	*d = '\0';
917 	yylval = string(tokenbuf,0);
918 	break;
919     }
920     return s;
921 }
922 
923 int
string(char * ptr,int len)924 string(char *ptr, int len)
925 {
926     int retval = mop;
927 
928     ops[mop++].ival = OSTRING + (1<<8);
929     if (!len)
930 	len = strlen(ptr);
931     ops[mop].cval = (char *) safemalloc(len+1);
932     strncpy(ops[mop].cval,ptr,len);
933     ops[mop++].cval[len] = '\0';
934     if (mop >= OPSMAX)
935 	fatal("Recompile a2p with larger OPSMAX\n");
936     return retval;
937 }
938 
939 int
oper0(int type)940 oper0(int type)
941 {
942     int retval = mop;
943 
944     if (type > 255)
945 	fatal("type > 255 (%d)\n",type);
946     ops[mop++].ival = type;
947     if (mop >= OPSMAX)
948 	fatal("Recompile a2p with larger OPSMAX\n");
949     return retval;
950 }
951 
952 int
oper1(int type,int arg1)953 oper1(int type, int arg1)
954 {
955     int retval = mop;
956 
957     if (type > 255)
958 	fatal("type > 255 (%d)\n",type);
959     ops[mop++].ival = type + (1<<8);
960     ops[mop++].ival = arg1;
961     if (mop >= OPSMAX)
962 	fatal("Recompile a2p with larger OPSMAX\n");
963     return retval;
964 }
965 
966 int
oper2(int type,int arg1,int arg2)967 oper2(int type, int arg1, int arg2)
968 {
969     int retval = mop;
970 
971     if (type > 255)
972 	fatal("type > 255 (%d)\n",type);
973     ops[mop++].ival = type + (2<<8);
974     ops[mop++].ival = arg1;
975     ops[mop++].ival = arg2;
976     if (mop >= OPSMAX)
977 	fatal("Recompile a2p with larger OPSMAX\n");
978     return retval;
979 }
980 
981 int
oper3(int type,int arg1,int arg2,int arg3)982 oper3(int type, int arg1, int arg2, int arg3)
983 {
984     int retval = mop;
985 
986     if (type > 255)
987 	fatal("type > 255 (%d)\n",type);
988     ops[mop++].ival = type + (3<<8);
989     ops[mop++].ival = arg1;
990     ops[mop++].ival = arg2;
991     ops[mop++].ival = arg3;
992     if (mop >= OPSMAX)
993 	fatal("Recompile a2p with larger OPSMAX\n");
994     return retval;
995 }
996 
997 int
oper4(int type,int arg1,int arg2,int arg3,int arg4)998 oper4(int type, int arg1, int arg2, int arg3, int arg4)
999 {
1000     int retval = mop;
1001 
1002     if (type > 255)
1003 	fatal("type > 255 (%d)\n",type);
1004     ops[mop++].ival = type + (4<<8);
1005     ops[mop++].ival = arg1;
1006     ops[mop++].ival = arg2;
1007     ops[mop++].ival = arg3;
1008     ops[mop++].ival = arg4;
1009     if (mop >= OPSMAX)
1010 	fatal("Recompile a2p with larger OPSMAX\n");
1011     return retval;
1012 }
1013 
1014 int
oper5(int type,int arg1,int arg2,int arg3,int arg4,int arg5)1015 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1016 {
1017     int retval = mop;
1018 
1019     if (type > 255)
1020 	fatal("type > 255 (%d)\n",type);
1021     ops[mop++].ival = type + (5<<8);
1022     ops[mop++].ival = arg1;
1023     ops[mop++].ival = arg2;
1024     ops[mop++].ival = arg3;
1025     ops[mop++].ival = arg4;
1026     ops[mop++].ival = arg5;
1027     if (mop >= OPSMAX)
1028 	fatal("Recompile a2p with larger OPSMAX\n");
1029     return retval;
1030 }
1031 
1032 int depth = 0;
1033 
1034 void
dump(int branch)1035 dump(int branch)
1036 {
1037     register int type;
1038     register int len;
1039     register int i;
1040 
1041     type = ops[branch].ival;
1042     len = type >> 8;
1043     type &= 255;
1044     for (i=depth; i; i--)
1045 	printf(" ");
1046     if (type == OSTRING) {
1047 	printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1048     }
1049     else {
1050 	printf("(%-5d%s %d\n",branch,opname[type],len);
1051 	depth++;
1052 	for (i=1; i<=len; i++)
1053 	    dump(ops[branch+i].ival);
1054 	depth--;
1055 	for (i=depth; i; i--)
1056 	    printf(" ");
1057 	printf(")\n");
1058     }
1059 }
1060 
1061 int
bl(int arg,int maybe)1062 bl(int arg, int maybe)
1063 {
1064     if (!arg)
1065 	return 0;
1066     else if ((ops[arg].ival & 255) != OBLOCK)
1067 	return oper2(OBLOCK,arg,maybe);
1068     else if ((ops[arg].ival >> 8) < 2)
1069 	return oper2(OBLOCK,ops[arg+1].ival,maybe);
1070     else
1071 	return arg;
1072 }
1073 
1074 void
fixup(STR * str)1075 fixup(STR *str)
1076 {
1077     register char *s;
1078     register char *t;
1079 
1080     for (s = str->str_ptr; *s; s++) {
1081 	if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1082 	    strcpy(s+1,s+2);
1083 	    s++;
1084 	}
1085 	else if (*s == '\n') {
1086 	    for (t = s+1; isSPACE(*t & 127); t++) ;
1087 	    t--;
1088 	    while (isSPACE(*t & 127) && *t != '\n') t--;
1089 	    if (*t == '\n' && t-s > 1) {
1090 		if (s[-1] == '{')
1091 		    s--;
1092 		strcpy(s+1,t);
1093 	    }
1094 	    s++;
1095 	}
1096     }
1097 }
1098 
1099 void
putlines(STR * str)1100 putlines(STR *str)
1101 {
1102     register char *d, *s, *t, *e;
1103     register int pos, newpos;
1104 
1105     d = tokenbuf;
1106     pos = 0;
1107     for (s = str->str_ptr; *s; s++) {
1108 	*d++ = *s;
1109 	pos++;
1110 	if (*s == '\n') {
1111 	    *d = '\0';
1112 	    d = tokenbuf;
1113 	    pos = 0;
1114 	    putone();
1115 	}
1116 	else if (*s == '\t')
1117 	    pos += 7;
1118 	if (pos > 78) {		/* split a long line? */
1119 	    *d-- = '\0';
1120 	    newpos = 0;
1121 	    for (t = tokenbuf; isSPACE(*t & 127); t++) {
1122 		if (*t == '\t')
1123 		    newpos += 8;
1124 		else
1125 		    newpos += 1;
1126 	    }
1127 	    e = d;
1128 	    while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1129 		d--;
1130 	    if (d < t+10) {
1131 		d = e;
1132 		while (d > tokenbuf &&
1133 		  (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1134 		    d--;
1135 	    }
1136 	    if (d < t+10) {
1137 		d = e;
1138 		while (d > tokenbuf &&
1139 		  (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1140 		    d--;
1141 	    }
1142 	    if (d < t+10) {
1143 		d = e;
1144 		while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1145 		    d--;
1146 	    }
1147 	    if (d < t+10) {
1148 		d = e;
1149 		while (d > tokenbuf && *d != ' ')
1150 		    d--;
1151 	    }
1152 	    if (d > t+3) {
1153                 char save[2048];
1154                 strcpy(save, d);
1155 		*d = '\n';
1156                 d[1] = '\0';
1157 		putone();
1158 		putchar('\n');
1159 		if (d[-1] != ';' && !(newpos % 4)) {
1160 		    *t++ = ' ';
1161 		    *t++ = ' ';
1162 		    newpos += 2;
1163 		}
1164 		strcpy(t,save+1);
1165 		newpos += strlen(t);
1166 		d = t + strlen(t);
1167 		pos = newpos;
1168 	    }
1169 	    else
1170 		d = e + 1;
1171 	}
1172     }
1173 }
1174 
1175 void
putone(void)1176 putone(void)
1177 {
1178     register char *t;
1179 
1180     for (t = tokenbuf; *t; t++) {
1181 	*t &= 127;
1182 	if (*t == 127) {
1183 	    *t = ' ';
1184 	    strcpy(t+strlen(t)-1, "\t#???\n");
1185 	    checkers++;
1186 	}
1187     }
1188     t = tokenbuf;
1189     if (*t == '#') {
1190 	if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1191 	    return;
1192 	if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1193 	    return;
1194     }
1195     fputs(tokenbuf,stdout);
1196 }
1197 
1198 int
numary(int arg)1199 numary(int arg)
1200 {
1201     STR *key;
1202     int dummy;
1203 
1204     key = walk(0,0,arg,&dummy,P_MIN);
1205     str_cat(key,"[]");
1206     hstore(symtab,key->str_ptr,str_make("1"));
1207     str_free(key);
1208     set_array_base = TRUE;
1209     return arg;
1210 }
1211 
1212 int
rememberargs(int arg)1213 rememberargs(int arg)
1214 {
1215     int type;
1216     STR *str;
1217 
1218     if (!arg)
1219 	return arg;
1220     type = ops[arg].ival & 255;
1221     if (type == OCOMMA) {
1222 	rememberargs(ops[arg+1].ival);
1223 	rememberargs(ops[arg+3].ival);
1224     }
1225     else if (type == OVAR) {
1226 	str = str_new(0);
1227 	hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1228     }
1229     else
1230 	fatal("panic: unknown argument type %d, line %d\n",type,line);
1231     return arg;
1232 }
1233 
1234 int
aryrefarg(int arg)1235 aryrefarg(int arg)
1236 {
1237     int type = ops[arg].ival & 255;
1238     STR *str;
1239 
1240     if (type != OSTRING)
1241 	fatal("panic: aryrefarg %d, line %d\n",type,line);
1242     str = hfetch(curarghash,ops[arg+1].cval);
1243     if (str)
1244 	str_set(str,"*");
1245     return arg;
1246 }
1247 
1248 int
fixfargs(int name,int arg,int prevargs)1249 fixfargs(int name, int arg, int prevargs)
1250 {
1251     int type;
1252     STR *str;
1253     int numargs = 0;
1254 
1255     if (!arg)
1256 	return prevargs;
1257     type = ops[arg].ival & 255;
1258     if (type == OCOMMA) {
1259 	numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1260 	numargs = fixfargs(name,ops[arg+3].ival,numargs);
1261     }
1262     else if (type == OVAR) {
1263 	str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1264 	if (strEQ(str_get(str),"*")) {
1265 	    char tmpbuf[128];
1266 
1267 	    str_set(str,"");		/* in case another routine has this */
1268 	    ops[arg].ival &= ~255;
1269 	    ops[arg].ival |= OSTAR;
1270 	    sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1271 	    fprintf(stderr,"Adding %s\n",tmpbuf);
1272 	    str = str_new(0);
1273 	    str_set(str,"*");
1274 	    hstore(curarghash,tmpbuf,str);
1275 	}
1276 	numargs = prevargs + 1;
1277     }
1278     else
1279 	fatal("panic: unknown argument type %d, arg %d, line %d\n",
1280 	  type,prevargs+1,line);
1281     return numargs;
1282 }
1283 
1284 int
fixrargs(char * name,int arg,int prevargs)1285 fixrargs(char *name, int arg, int prevargs)
1286 {
1287     int type;
1288     STR *str;
1289     int numargs;
1290 
1291     if (!arg)
1292 	return prevargs;
1293     type = ops[arg].ival & 255;
1294     if (type == OCOMMA) {
1295 	numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1296 	numargs = fixrargs(name,ops[arg+3].ival,numargs);
1297     }
1298     else {
1299 	char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1300 	sprintf(tmpbuf,"%s:%d",name,prevargs);
1301 	str = hfetch(curarghash,tmpbuf);
1302 	safefree(tmpbuf);
1303 	if (str && strEQ(str->str_ptr,"*")) {
1304 	    if (type == OVAR || type == OSTAR) {
1305 		ops[arg].ival &= ~255;
1306 		ops[arg].ival |= OSTAR;
1307 	    }
1308 	    else
1309 		fatal("Can't pass expression by reference as arg %d of %s\n",
1310 		    prevargs+1, name);
1311 	}
1312 	numargs = prevargs + 1;
1313     }
1314     return numargs;
1315 }
1316