1 /*        $NetBSD: parser.c,v 1.184 2024/10/21 15:57:45 kre Exp $     */
2 
3 /*-
4  * Copyright (c) 1991, 1993
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Kenneth Almquist.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c    8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.184 2024/10/21 15:57:45 kre Exp $");
41 #endif
42 #endif /* not lint */
43 
44 #include <limits.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 
49 #include "shell.h"
50 #include "parser.h"
51 #include "nodes.h"
52 #include "expand.h" /* defines rmescapes() */
53 #include "eval.h"   /* defines commandname */
54 #include "syntax.h"
55 #include "options.h"
56 #include "input.h"
57 #include "output.h"
58 #include "redir.h"  /* defines max_user_fd */
59 #include "var.h"
60 #include "error.h"
61 #include "memalloc.h"
62 #include "mystring.h"
63 #include "alias.h"
64 #include "show.h"
65 #ifndef SMALL
66 #include "myhistedit.h"
67 #endif
68 #ifdef DEBUG
69 #include "nodenames.h"
70 #endif
71 
72 /*
73  * Shell command parser.
74  */
75 
76 /* values returned by readtoken */
77 #include "token.h"
78 
79 #define OPENBRACE '{'
80 #define CLOSEBRACE '}'
81 
82 struct HereDoc {
83           struct HereDoc *next;         /* next here document in list */
84           union node *here;             /* redirection node */
85           char *eofmark;                /* string indicating end of input */
86           int striptabs;                /* if set, strip leading tabs */
87           int startline;                /* line number where << seen */
88 };
89 
90 MKINIT struct parse_state parse_state;
91 union parse_state_p psp = { .c_current_parser = &parse_state };
92 
93 static const struct parse_state init_parse_state = {        /* all 0's ... */
94           .ps_heredoclist = NULL,
95           .ps_parsebackquote = 0,
96           .ps_doprompt = 0,
97           .ps_needprompt = 0,
98           .ps_lasttoken = 0,
99           .ps_tokpushback = 0,
100           .ps_wordtext = NULL,
101           .ps_checkkwd = 0,
102           .ps_redirnode = NULL,
103           .ps_heredoc = NULL,
104           .ps_quoteflag = 0,
105           .ps_startlinno = 0,
106           .ps_funclinno = 0,
107           .ps_elided_nl = 0,
108 };
109 
110 STATIC union node *list(int);
111 STATIC union node *andor(void);
112 STATIC union node *pipeline(void);
113 STATIC union node *command(void);
114 STATIC union node *simplecmd(union node **, union node *);
115 STATIC union node *makeword(int);
116 STATIC void parsefname(void);
117 STATIC int slurp_heredoc(char *const, const int, const int);
118 STATIC void readheredocs(void);
119 STATIC int peektoken(void);
120 STATIC int readtoken(void);
121 STATIC int xxreadtoken(void);
122 STATIC int readtoken1(int, char const *, int);
123 STATIC int noexpand(char *);
124 STATIC void linebreak(void);
125 STATIC void consumetoken(int);
126 STATIC void synexpect(int, const char *) __dead;
127 STATIC void synerror(const char *) __dead;
128 STATIC void setprompt(int);
129 STATIC int pgetc_linecont(void);
130 
131 static const char EOFhere[] = "EOF reading here (<<) document";
132 
133 #ifdef DEBUG
134 int parsing = 0;
135 #endif
136 
137 /*
138  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
139  * valid parse tree indicating a blank line.)
140  */
141 
142 union node *
parsecmd(int interact)143 parsecmd(int interact)
144 {
145           int t;
146           union node *n;
147 
148 #ifdef DEBUG
149           parsing++;
150 #endif
151           tokpushback = 0;
152           checkkwd = 0;
153           doprompt = interact;
154           if (doprompt)
155                     setprompt(1);
156           else
157                     setprompt(0);
158           needprompt = 0;
159           t = readtoken();
160 #ifdef DEBUG
161           parsing--;
162 #endif
163           if (t == TEOF)
164                     return NEOF;
165           if (t == TNL)
166                     return NULL;
167 
168 #ifdef DEBUG
169           parsing++;
170 #endif
171           tokpushback++;
172           n = list(1);
173 #ifdef DEBUG
174           parsing--;
175 #endif
176           if (heredoclist)
177                     error("%d: Here document (<<%s) expected but not present",
178                               heredoclist->startline, heredoclist->eofmark);
179           return n;
180 }
181 
182 
183 STATIC union node *
list(int nlflag)184 list(int nlflag)
185 {
186           union node *ntop, *n1, *n2, *n3;
187           int tok;
188 
189           CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno));
190 
191           checkkwd = CHKNL | CHKKWD | CHKALIAS;
192           if (nlflag == 0 && tokendlist[peektoken()])
193                     return NULL;
194           ntop = n1 = NULL;
195           for (;;) {
196                     n2 = andor();
197                     tok = readtoken();
198                     if (tok == TBACKGND) {
199                               if (n2->type == NCMD || n2->type == NPIPE)
200                                         n2->ncmd.backgnd = 1;
201                               else if (n2->type == NREDIR)
202                                         n2->type = NBACKGND;
203                               else {
204                                         n3 = stalloc(sizeof(struct nredir));
205                                         n3->type = NBACKGND;
206                                         n3->nredir.n = n2;
207                                         n3->nredir.redirect = NULL;
208                                         n2 = n3;
209                               }
210                     }
211 
212                     if (ntop == NULL)
213                               ntop = n2;
214                     else if (n1 == NULL) {
215                               n1 = stalloc(sizeof(struct nbinary));
216                               n1->type = NSEMI;
217                               n1->nbinary.ch1 = ntop;
218                               n1->nbinary.ch2 = n2;
219                               ntop = n1;
220                     } else {
221                               n3 = stalloc(sizeof(struct nbinary));
222                               n3->type = NSEMI;
223                               n3->nbinary.ch1 = n1->nbinary.ch2;
224                               n3->nbinary.ch2 = n2;
225                               n1->nbinary.ch2 = n3;
226                               n1 = n3;
227                     }
228 
229                     switch (tok) {
230                     case TBACKGND:
231                     case TSEMI:
232                               tok = readtoken();
233                               /* FALLTHROUGH */
234                     case TNL:
235                               if (tok == TNL) {
236                                         readheredocs();
237                                         if (nlflag)
238                                                   return ntop;
239                               } else if (tok == TEOF && nlflag)
240                                         return ntop;
241                               else
242                                         tokpushback++;
243 
244                               checkkwd = CHKNL | CHKKWD | CHKALIAS;
245                               if (!nlflag && tokendlist[peektoken()])
246                                         return ntop;
247                               break;
248                     case TEOF:
249                               pungetc();          /* push back EOF on input */
250                               return ntop;
251                     default:
252                               if (nlflag)
253                                         synexpect(-1, 0);
254                               tokpushback++;
255                               return ntop;
256                     }
257           }
258 }
259 
260 STATIC union node *
andor(void)261 andor(void)
262 {
263           union node *n1, *n2, *n3;
264           int t;
265 
266           CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
267 
268           n1 = pipeline();
269           for (;;) {
270                     if ((t = readtoken()) == TAND) {
271                               t = NAND;
272                     } else if (t == TOR) {
273                               t = NOR;
274                     } else {
275                               tokpushback++;
276                               return n1;
277                     }
278                     n2 = pipeline();
279                     n3 = stalloc(sizeof(struct nbinary));
280                     n3->type = t;
281                     n3->nbinary.ch1 = n1;
282                     n3->nbinary.ch2 = n2;
283                     n1 = n3;
284           }
285 }
286 
287 STATIC union node *
pipeline(void)288 pipeline(void)
289 {
290           union node *n1, *n2, *pipenode;
291           struct nodelist *lp, *prev;
292           int negate;
293 
294           CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
295 
296           negate = 0;
297           checkkwd = CHKNL | CHKKWD | CHKALIAS;
298           while (readtoken() == TNOT) {
299                     CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
300 #ifndef BOGUS_NOT_COMMAND
301                     if (posix && negate)
302                               synerror("2nd \"!\" unexpected");
303 #endif
304                     negate++;
305           }
306           tokpushback++;
307           n1 = command();
308           if (readtoken() == TPIPE) {
309                     pipenode = stalloc(sizeof(struct npipe));
310                     pipenode->type = NPIPE;
311                     pipenode->npipe.backgnd = 0;
312                     lp = stalloc(sizeof(struct nodelist));
313                     pipenode->npipe.cmdlist = lp;
314                     lp->n = n1;
315                     do {
316                               prev = lp;
317                               lp = stalloc(sizeof(struct nodelist));
318                               lp->n = command();
319                               prev->next = lp;
320                     } while (readtoken() == TPIPE);
321                     lp->next = NULL;
322                     n1 = pipenode;
323           }
324           tokpushback++;
325           if (negate) {
326                     CTRACE(DBG_PARSE, ("%snegate pipeline\n",
327                         (negate&1) ? "" : "double "));
328                     n2 = stalloc(sizeof(struct nnot));
329                     n2->type = (negate & 1) ? NNOT : NDNOT;
330                     n2->nnot.com = n1;
331                     return n2;
332           } else
333                     return n1;
334 }
335 
336 
337 
338 STATIC union node *
command(void)339 command(void)
340 {
341           union node *n1, *n2;
342           union node *ap, **app;
343           union node *cp, **cpp;
344           union node *redir, **rpp;
345           int t;
346 #ifdef BOGUS_NOT_COMMAND
347           int negate = 0;
348 #endif
349 
350           CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
351 
352           checkkwd = CHKNL | CHKKWD | CHKALIAS;
353           redir = NULL;
354           n1 = NULL;
355           rpp = &redir;
356 
357           /* Check for redirection which may precede command */
358           while (readtoken() == TREDIR) {
359                     *rpp = n2 = redirnode;
360                     rpp = &n2->nfile.next;
361                     parsefname();
362           }
363           tokpushback++;
364 
365 #ifdef BOGUS_NOT_COMMAND                /* only in pipeline() */
366           while (readtoken() == TNOT) {
367                     CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
368                     negate++;
369           }
370           tokpushback++;
371 #endif
372 
373           switch (readtoken()) {
374           case TIF:
375                     n1 = stalloc(sizeof(struct nif));
376                     n1->type = NIF;
377                     n1->nif.test = list(0);
378                     consumetoken(TTHEN);
379                     n1->nif.ifpart = list(0);
380                     n2 = n1;
381                     while (readtoken() == TELIF) {
382                               n2->nif.elsepart = stalloc(sizeof(struct nif));
383                               n2 = n2->nif.elsepart;
384                               n2->type = NIF;
385                               n2->nif.test = list(0);
386                               consumetoken(TTHEN);
387                               n2->nif.ifpart = list(0);
388                     }
389                     if (lasttoken == TELSE)
390                               n2->nif.elsepart = list(0);
391                     else {
392                               n2->nif.elsepart = NULL;
393                               tokpushback++;
394                     }
395                     consumetoken(TFI);
396                     checkkwd = CHKKWD | CHKALIAS;
397                     break;
398           case TWHILE:
399           case TUNTIL:
400                     n1 = stalloc(sizeof(struct nbinary));
401                     n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
402                     n1->nbinary.ch1 = list(0);
403                     consumetoken(TDO);
404                     n1->nbinary.ch2 = list(0);
405                     consumetoken(TDONE);
406                     checkkwd = CHKKWD | CHKALIAS;
407                     break;
408           case TFOR:
409                     if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
410                               synerror("Bad for loop variable");
411                     n1 = stalloc(sizeof(struct nfor));
412                     n1->type = NFOR;
413                     n1->nfor.var = wordtext;
414                     n1->nfor.lineno = startlinno;
415                     linebreak();
416                     if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) {
417                               app = &ap;
418                               while (readtoken() == TWORD) {
419                                         n2 = makeword(startlinno);
420                                         *app = n2;
421                                         app = &n2->narg.next;
422                               }
423                               *app = NULL;
424                               n1->nfor.args = ap;
425                               if (lasttoken != TNL && lasttoken != TSEMI)
426                                         synexpect(TSEMI, 0);
427                               if (lasttoken == TNL)
428                                         readheredocs();
429                     } else {
430                               static char argvars[5] = {
431                                   CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
432                               };
433 
434                               n2 = stalloc(sizeof(struct narg));
435                               n2->type = NARG;
436                               n2->narg.text = argvars;
437                               n2->narg.backquote = NULL;
438                               n2->narg.next = NULL;
439                               n2->narg.lineno = startlinno;
440                               n1->nfor.args = n2;
441                               /*
442                                * Newline or semicolon here is optional (but note
443                                * that the original Bourne shell only allowed NL).
444                                */
445                               if (lasttoken != TNL && lasttoken != TSEMI)
446                                         tokpushback++;
447                     }
448                     checkkwd = CHKNL | CHKKWD | CHKALIAS;
449                     if ((t = readtoken()) == TDO)
450                               t = TDONE;
451                     else if (t == TBEGIN)
452                               t = TEND;
453                     else
454                               synexpect(TDO, 0);
455                     n1->nfor.body = list(0);
456                     consumetoken(t);
457                     checkkwd = CHKKWD | CHKALIAS;
458                     break;
459           case TCASE:
460                     n1 = stalloc(sizeof(struct ncase));
461                     n1->type = NCASE;
462                     n1->ncase.lineno = startlinno - elided_nl;
463                     consumetoken(TWORD);
464                     n1->ncase.expr = makeword(startlinno);
465                     linebreak();
466                     if (lasttoken != TWORD || !equal(wordtext, "in"))
467                               synexpect(-1, "in");
468                     cpp = &n1->ncase.cases;
469                     checkkwd = CHKNL | CHKKWD;
470                     readtoken();
471                     /*
472                      * Both ksh and bash accept 'case x in esac'
473                      * so configure scripts started taking advantage of this.
474                      * The page: http://pubs.opengroup.org/onlinepubs/\
475                      * 009695399/utilities/xcu_chap02.html contradicts itself,
476                      * as to if this is legal; the "Case Conditional Format"
477                      * paragraph shows one case is required, but the "Grammar"
478                      * section shows a grammar that explicitly allows the no
479                      * case option.
480                      *
481                      * The standard also says (section 2.10):
482                      *   This formal syntax shall take precedence over the
483                      *   preceding text syntax description.
484                      * ie: the "Grammar" section wins.  The text is just
485                      * a rough guide (introduction to the common case.)
486                      */
487                     while (lasttoken != TESAC) {
488                               *cpp = cp = stalloc(sizeof(struct nclist));
489                               cp->type = NCLIST;
490                               app = &cp->nclist.pattern;
491                               if (lasttoken == TLP)
492                                         readtoken();
493                               for (;;) {
494                                         if (lasttoken < TWORD)
495                                                   synexpect(TWORD, 0);
496                                         *app = ap = makeword(startlinno);
497                                         checkkwd = CHKNL | CHKKWD;
498                                         if (readtoken() != TPIPE)
499                                                   break;
500                                         app = &ap->narg.next;
501                                         readtoken();
502                               }
503                               if (lasttoken != TRP)
504                                         synexpect(TRP, 0);
505                               cp->nclist.lineno = startlinno;
506                               cp->nclist.body = list(0);
507 
508                               checkkwd = CHKNL | CHKKWD | CHKALIAS;
509                               if ((t = readtoken()) != TESAC) {
510                                         if (t != TENDCASE && t != TCASEFALL) {
511                                                   synexpect(TENDCASE, 0);
512                                         } else {
513                                                   if (t == TCASEFALL)
514                                                             cp->type = NCLISTCONT;
515                                                   checkkwd = CHKNL | CHKKWD;
516                                                   readtoken();
517                                         }
518                               }
519                               cpp = &cp->nclist.next;
520                     }
521                     *cpp = NULL;
522                     checkkwd = CHKKWD | CHKALIAS;
523                     break;
524           case TLP:
525                     n1 = stalloc(sizeof(struct nredir));
526                     n1->type = NSUBSHELL;
527                     n1->nredir.n = list(0);
528                     n1->nredir.redirect = NULL;
529                     if (n1->nredir.n == NULL)
530                               synexpect(-1, 0);
531                     consumetoken(TRP);
532                     checkkwd = CHKKWD | CHKALIAS;
533                     break;
534           case TBEGIN:
535                     n1 = list(0);
536                     if (posix && n1 == NULL)
537                               synexpect(-1, 0);
538                     consumetoken(TEND);
539                     checkkwd = CHKKWD | CHKALIAS;
540                     break;
541 
542           case TBACKGND:
543           case TSEMI:
544           case TAND:
545           case TOR:
546           case TPIPE:
547           case TNL:
548           case TEOF:
549           case TRP:
550           case TENDCASE:
551           case TCASEFALL:
552                     /*
553                      * simple commands must have something in them,
554                      * either a word (which at this point includes a=b)
555                      * or a redirection.  If we reached the end of the
556                      * command (which one of these tokens indicates)
557                      * when we are just starting, and have not had a
558                      * redirect, then ...
559                      *
560                      * nb: it is still possible to end up with empty
561                      * simple commands, if the "command" is a var
562                      * expansion that produces nothing:
563                      *        X= ; $X && $X
564                      * -->          &&
565                      * That is OK and is handled after word expansions.
566                      */
567                     if (!redir)
568                               synexpect(-1, 0);
569                     /*
570                      * continue to build a node containing the redirect.
571                      * the tokpushback means that our ending token will be
572                      * read again in simplecmd, causing it to terminate,
573                      * so only the redirect(s) will be contained in the
574                      * returned n1
575                      */
576                     /* FALLTHROUGH */
577           case TWORD:
578                     tokpushback++;
579                     n1 = simplecmd(rpp, redir);
580                     goto checkneg;
581           default:
582                     synexpect(-1, 0);
583                     /* NOTREACHED */
584           }
585 
586           /* Now check for redirection which may follow command */
587           while (readtoken() == TREDIR) {
588                     *rpp = n2 = redirnode;
589                     rpp = &n2->nfile.next;
590                     parsefname();
591           }
592           tokpushback++;
593           *rpp = NULL;
594           if (redir) {
595                     if (n1 == NULL || n1->type != NSUBSHELL) {
596                               n2 = stalloc(sizeof(struct nredir));
597                               n2->type = NREDIR;
598                               n2->nredir.n = n1;
599                               n1 = n2;
600                     }
601                     n1->nredir.redirect = redir;
602           }
603 
604  checkneg:;
605 #ifdef BOGUS_NOT_COMMAND
606           if (negate) {
607                     VTRACE(DBG_PARSE, ("bogus %snegate command\n",
608                         (negate&1) ? "" : "double "));
609                     n2 = stalloc(sizeof(struct nnot));
610                     n2->type = (negate & 1) ? NNOT : NDNOT;
611                     n2->nnot.com = n1;
612                     return n2;
613           }
614           else
615 #endif
616                     return n1;
617 }
618 
619 
620 STATIC union node *
simplecmd(union node ** rpp,union node * redir)621 simplecmd(union node **rpp, union node *redir)
622 {
623           union node *args, **app;
624           union node *n = NULL;
625           int line = 0;
626           int savecheckkwd;
627 #ifdef BOGUS_NOT_COMMAND
628           union node *n2;
629           int negate = 0;
630 #endif
631 
632           CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
633               redir ? "" : "out", plinno));
634 
635           /* If we don't have any redirections already, then we must reset */
636           /* rpp to be the address of the local redir variable.  */
637           if (redir == 0)
638                     rpp = &redir;
639 
640           args = NULL;
641           app = &args;
642 
643 #ifdef BOGUS_NOT_COMMAND      /* pipelines get negated, commands do not */
644           while (readtoken() == TNOT) {
645                     VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
646                     negate++;
647           }
648           tokpushback++;
649 #endif
650 
651           savecheckkwd = CHKALIAS;
652           for (;;) {
653                     checkkwd = savecheckkwd;
654                     if (readtoken() == TWORD) {
655                               if (line == 0)
656                                         line = startlinno;
657                               n = makeword(startlinno);
658                               *app = n;
659                               app = &n->narg.next;
660                               if (savecheckkwd != 0 && !isassignment(wordtext))
661                                         savecheckkwd = 0;
662                     } else if (lasttoken == TREDIR) {
663                               if (line == 0)
664                                         line = startlinno;
665                               *rpp = n = redirnode;
666                               rpp = &n->nfile.next;
667                               parsefname();       /* read name of redirection file */
668                     } else if (lasttoken == TLP && app == &args->narg.next
669                                                       && redir == 0) {
670                               /* We have a function */
671                               consumetoken(TRP);
672                               funclinno = plinno;
673                               /*
674                                * Make sure there are no unquoted $'s in the
675                                * name (allowing those, not expanding them,
676                                * simply treating '$' as a character, is desirable
677                                * but the parser has converted them to CTLxxx
678                                * chars, and that's not what we want
679                                *
680                                * Fortunately here the user can simply quote
681                                * the name to avoid this restriction.
682                                */
683                               if (!noexpand(n->narg.text))
684                                         synerror("Bad function name (use quotes)");
685                               rmescapes(n->narg.text);
686                               if (strchr(n->narg.text, '/'))
687                                         synerror("Bad function name");
688                               VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n",
689                                   n->narg.text, plinno));
690                               n->type = NDEFUN;
691                               n->narg.lineno = plinno - elided_nl;
692                               n->narg.next = command();
693                               funclinno = 0;
694                               goto checkneg;
695                     } else {
696                               tokpushback++;
697                               break;
698                     }
699           }
700 
701           if (args == NULL && redir == NULL)
702                     synexpect(-1, 0);
703           *app = NULL;
704           *rpp = NULL;
705           n = stalloc(sizeof(struct ncmd));
706           n->type = NCMD;
707           n->ncmd.lineno = line - elided_nl;
708           n->ncmd.backgnd = 0;
709           n->ncmd.args = args;
710           n->ncmd.redirect = redir;
711           n->ncmd.lineno = startlinno;
712 
713  checkneg:;
714 #ifdef BOGUS_NOT_COMMAND
715           if (negate) {
716                     VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
717                         (negate&1) ? "" : "double "));
718                     n2 = stalloc(sizeof(struct nnot));
719                     n2->type = (negate & 1) ? NNOT : NDNOT;
720                     n2->nnot.com = n;
721                     return n2;
722           }
723           else
724 #endif
725                     return n;
726 }
727 
728 STATIC union node *
makeword(int lno)729 makeword(int lno)
730 {
731           union node *n;
732 
733           n = stalloc(sizeof(struct narg));
734           n->type = NARG;
735           n->narg.next = NULL;
736           n->narg.text = wordtext;
737           n->narg.backquote = backquotelist;
738           n->narg.lineno = lno;
739           return n;
740 }
741 
742 void
fixredir(union node * n,const char * text,int err)743 fixredir(union node *n, const char *text, int err)
744 {
745 
746           VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
747           if (!err)
748                     n->ndup.vname = NULL;
749 
750           if (is_number(text)) {
751                     n->ndup.dupfd = number(text);
752                     if (n->ndup.dupfd < user_fd_limit &&
753                         n->ndup.dupfd > max_user_fd)
754                               max_user_fd = n->ndup.dupfd;
755           } else if (text[0] == '-' && text[1] == '\0')
756                     n->ndup.dupfd = -1;
757           else {
758 
759                     if (err)
760                               synerror("Bad fd number");
761                     else
762                               n->ndup.vname = makeword(startlinno - elided_nl);
763           }
764 }
765 
766 
767 STATIC void
parsefname(void)768 parsefname(void)
769 {
770           union node *n = redirnode;
771 
772           if (readtoken() != TWORD)
773                     synexpect(-1, 0);
774           if (n->type == NHERE) {
775                     struct HereDoc *here = heredoc;
776                     struct HereDoc *p;
777 
778                     if (quoteflag == 0)
779                               n->type = NXHERE;
780                     VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
781                     if (here->striptabs) {
782                               while (*wordtext == '\t')
783                                         wordtext++;
784                     }
785 
786                     /*
787                      * this test is not really necessary, we are not
788                      * required to expand wordtext, but there's no reason
789                      * it cannot be $$ or something like that - that would
790                      * not mean the pid, but literally two '$' characters.
791                      * There is no need for limits on what the word can be.
792                      * However, it needs to stay literal as entered, not
793                      * have $ converted to CTLVAR or something, which as
794                      * the parser is, at the minute, is impossible to prevent.
795                      * So, leave it like this until the rest of the parser is fixed.
796                      */
797                     if (!noexpand(wordtext))
798                               synerror("Unimplemented form of eof marker"
799                                   " for << redirection");
800 
801                     rmescapes(wordtext);
802                     here->eofmark = wordtext;
803                     here->next = NULL;
804                     if (heredoclist == NULL)
805                               heredoclist = here;
806                     else {
807                               for (p = heredoclist ; p->next ; p = p->next)
808                                         continue;
809                               p->next = here;
810                     }
811           } else if (n->type == NTOFD || n->type == NFROMFD) {
812                     fixredir(n, wordtext, 0);
813           } else {
814                     n->nfile.fname = makeword(startlinno - elided_nl);
815           }
816 }
817 
818 /*
819  * Check to see whether we are at the end of the here document.  When this
820  * is called, c is set to the first character of the next input line.  If
821  * we are at the end of the here document, this routine sets the c to PEOF.
822  * The new value of c is returned.
823  */
824 
825 static int
checkend(int c,char * const eofmark,const int striptabs)826 checkend(int c, char * const eofmark, const int striptabs)
827 {
828 
829           if (striptabs) {
830                     while (c == '\t')
831                               c = pgetc();
832           }
833           if (c == PEOF) {
834                     if (*eofmark == '\0')
835                               return (c);
836                     synerror(EOFhere);
837           }
838           if (c == *eofmark) {
839                     int c2;
840                     char *q;
841 
842                     for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
843                               if (c2 == '\n') {
844                                         plinno++;
845                                         needprompt = doprompt;
846                               }
847                     if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
848                               c = PEOF;
849                               if (c2 == '\n') {
850                                         plinno++;
851                                         needprompt = doprompt;
852                               }
853                     } else {
854                               pungetc();
855                               pushstring(eofmark + 1, q - (eofmark + 1), NULL);
856                     }
857           } else if (c == '\n' && *eofmark == '\0') {
858                     c = PEOF;
859                     plinno++;
860                     needprompt = doprompt;
861           }
862           return (c);
863 }
864 
865 
866 /*
867  * Input any here documents.
868  */
869 
870 STATIC int
slurp_heredoc(char * const eofmark,const int striptabs,const int sq)871 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
872 {
873           int c;
874           char *out;
875           int lines = plinno;
876 
877           c = pgetc();
878 
879           /*
880            * If we hit EOF on the input, and the eofmark is a null string ('')
881            * we consider this empty line to be the eofmark, and exit without err.
882            */
883           if (c == PEOF && *eofmark != '\0')
884                     synerror(EOFhere);
885 
886           STARTSTACKSTR(out);
887 
888           while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
889                     do {
890                               if (sq) {
891                                         /*
892                                          * in single quoted mode (eofmark quoted)
893                                          * all we look for is \n so we can check
894                                          * for the eofmark - everything saved literally.
895                                          */
896                                         STPUTC(c, out);
897                                         if (c == '\n') {
898                                                   plinno++;
899                                                   break;
900                                         }
901                                         continue;
902                               }
903                               /*
904                                * In double quoted (non-quoted eofmark)
905                                * we must handle \ followed by \n here
906                                * otherwise we can mismatch the end mark.
907                                * All other uses of \ will be handled later
908                                * when the here doc is expanded.
909                                *
910                                * This also makes sure \\ followed by \n does
911                                * not suppress the newline (the \ quotes itself)
912                                */
913                               if (c == '\\') {              /* A backslash */
914                                         STPUTC(c, out);
915                                         c = pgetc();                  /* followed by */
916                                         if (c == '\n') {    /* a newline?  */
917                                                   STPUTC(c, out);
918                                                   plinno++;
919                                                   continue; /* don't break */
920                                         }
921                               }
922                               STPUTC(c, out);                         /* keep the char */
923                               if (c == '\n') {              /* at end of line */
924                                         plinno++;
925                                         break;                        /* look for eofmark */
926                               }
927                     } while ((c = pgetc()) != PEOF);
928 
929                     /*
930                      * If we have read a line, and reached EOF, without
931                      * finding the eofmark, whether the EOF comes before
932                      * or immediately after the \n, that is an error.
933                      */
934                     if (c == PEOF || (c = pgetc()) == PEOF)
935                               synerror(EOFhere);
936           }
937           STPUTC('\0', out);
938 
939           c = out - stackblock();
940           out = stackblock();
941           grabstackblock(c);
942           wordtext = out;
943 
944           VTRACE(DBG_PARSE,
945              ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
946                     plinno - lines, sq ? "quoted " : "",  eofmark,
947                     striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
948                     wordtext, (c > 16 ? "..." : ""), plinno));
949 
950           return (plinno - lines);
951 }
952 
953 static char *
insert_elided_nl(char * str)954 insert_elided_nl(char *str)
955 {
956           while (elided_nl > 0) {
957                     STPUTC(CTLNONL, str);
958                     elided_nl--;
959           }
960           return str;
961 }
962 
963 STATIC void
readheredocs(void)964 readheredocs(void)
965 {
966           struct HereDoc *here;
967           union node *n;
968           int line, l;
969 
970           line = 0;           /*XXX - gcc!  obviously unneeded */
971           if (heredoclist)
972                     line = heredoclist->startline + 1;
973           l = 0;
974           while (heredoclist) {
975                     line += l;
976                     here = heredoclist;
977                     heredoclist = here->next;
978                     if (needprompt) {
979                               setprompt(2);
980                               needprompt = 0;
981                     }
982 
983                     l = slurp_heredoc(here->eofmark, here->striptabs,
984                         here->here->nhere.type == NHERE);
985 
986                     here->here->nhere.doc = n = makeword(line);
987 
988                     if (here->here->nhere.type == NHERE)
989                               continue;
990 
991                     /*
992                      * Now "parse" here docs that have unquoted eofmarkers.
993                      */
994                     setinputstring(wordtext, 1, line);
995                     VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n",
996                               l, line));
997                     readtoken1(pgetc(), DQSYNTAX, 1);
998                     n->narg.text = wordtext;
999                     n->narg.backquote = backquotelist;
1000                     popfile();
1001           }
1002 }
1003 
1004 STATIC int
peektoken(void)1005 peektoken(void)
1006 {
1007           int t;
1008 
1009           t = readtoken();
1010           tokpushback++;
1011           return (t);
1012 }
1013 
1014 STATIC int
readtoken(void)1015 readtoken(void)
1016 {
1017           int t;
1018 #ifdef DEBUG
1019           int alreadyseen = tokpushback;
1020           int savecheckkwd = checkkwd;
1021 #endif
1022           struct alias *ap;
1023 
1024  top:;
1025           t = xxreadtoken();
1026 
1027           if (checkkwd & CHKNL) {
1028                     while (t == TNL) {
1029                               readheredocs();
1030                               t = xxreadtoken();
1031                     }
1032           }
1033 
1034           /*
1035            * check for keywords and aliases
1036            */
1037           if (t == TWORD && !quoteflag) {
1038                     const char *const *pp;
1039 
1040                     if (checkkwd & CHKKWD)
1041                               for (pp = parsekwd; *pp; pp++) {
1042                                         if (**pp == *wordtext && equal(*pp, wordtext)) {
1043                                                   lasttoken = t = pp -
1044                                                       parsekwd + KWDOFFSET;
1045                                                   VTRACE(DBG_PARSE,
1046                                                       ("keyword %s recognized @%d\n",
1047                                                       tokname[t], plinno));
1048                                                   goto out;
1049                                         }
1050                               }
1051 
1052                     if (checkkwd & CHKALIAS &&
1053                         (ap = lookupalias(wordtext, 1)) != NULL) {
1054                               VTRACE(DBG_PARSE,
1055                                   ("alias '%s' recognized -> <:%s:>\n",
1056                                   wordtext, ap->val));
1057                               pushstring(ap->val, strlen(ap->val), ap);
1058                               goto top;
1059                     }
1060           }
1061  out:;
1062           if (t != TNOT)
1063                     checkkwd = 0;
1064 
1065           VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n",
1066               alreadyseen ? "reread " : "", tokname[t],
1067               t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd));
1068           return (t);
1069 }
1070 
1071 
1072 /*
1073  * Read the next input token.
1074  * If the token is a word, we set backquotelist to the list of cmds in
1075  *        backquotes.  We set quoteflag to true if any part of the word was
1076  *        quoted.
1077  * If the token is TREDIR, then we set redirnode to a structure containing
1078  *        the redirection.
1079  * In all cases, the variable startlinno is set to the number of the line
1080  *        on which the token starts.
1081  *
1082  * [Change comment:  here documents and internal procedures]
1083  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
1084  *  word parsing code into a separate routine.  In this case, readtoken
1085  *  doesn't need to have any internal procedures, but parseword does.
1086  *  We could also make parseoperator in essence the main routine, and
1087  *  have parseword (readtoken1?) handle both words and redirection.]
1088  */
1089 
1090 #define RETURN(token)         return lasttoken = (token)
1091 
1092 STATIC int
xxreadtoken(void)1093 xxreadtoken(void)
1094 {
1095           int c;
1096 
1097           if (tokpushback) {
1098                     tokpushback = 0;
1099                     CTRACE(DBG_LEXER,
1100                         ("xxreadtoken() returns %s (%d) again\n",
1101                               tokname[lasttoken], lasttoken));
1102                     return lasttoken;
1103           }
1104           if (needprompt) {
1105                     setprompt(2);
1106                     needprompt = 0;
1107           }
1108           elided_nl = 0;
1109           startlinno = plinno;
1110           for (;;) {          /* until token or start of word found */
1111                     c = pgetc_macro();
1112                     CTRACE(DBG_LEXER, ("xxreadtoken() sees '%c' (%#.2x) ",
1113                         c&0xFF, c&0x1FF));
1114                     switch (c) {
1115                     case ' ': case '\t': case PFAKE:
1116                               CTRACE(DBG_LEXER, (" ignored\n"));
1117                               continue;
1118                     case '#':
1119                               while ((c = pgetc()) != '\n' && c != PEOF)
1120                                         continue;
1121                               CTRACE(DBG_LEXER,
1122                                   ("skipped comment to (not incl) \\n\n"));
1123                               pungetc();
1124                               continue;
1125 
1126                     case '\n':
1127                               plinno++;
1128                               CTRACE(DBG_LEXER, ("newline now @%d\n", plinno));
1129                               needprompt = doprompt;
1130                               RETURN(TNL);
1131                     case PEOF:
1132                               CTRACE(DBG_LEXER, ("EOF -> TEOF (return)\n"));
1133                               RETURN(TEOF);
1134 
1135                     case '&':
1136                               if (pgetc_linecont() == '&') {
1137                                         CTRACE(DBG_LEXER,
1138                                             ("and another  -> TAND (return)\n"));
1139                                         RETURN(TAND);
1140                               }
1141                               pungetc();
1142                               CTRACE(DBG_LEXER, (" -> TBACKGND (return)\n"));
1143                               RETURN(TBACKGND);
1144                     case '|':
1145                               if (pgetc_linecont() == '|') {
1146                                         CTRACE(DBG_LEXER,
1147                                             ("and another  -> TOR (return)\n"));
1148                                         RETURN(TOR);
1149                               }
1150                               pungetc();
1151                               CTRACE(DBG_LEXER, (" -> TPIPE (return)\n"));
1152                               RETURN(TPIPE);
1153                     case ';':
1154                               switch (pgetc_linecont()) {
1155                               case ';':
1156                                         CTRACE(DBG_LEXER,
1157                                             ("and another -> TENDCASE (return)\n"));
1158                                         RETURN(TENDCASE);
1159                               case '&':
1160                                         CTRACE(DBG_LEXER,
1161                                             ("and '&' -> TCASEFALL (return)\n"));
1162                                         RETURN(TCASEFALL);
1163                               default:
1164                                         pungetc();
1165                                         CTRACE(DBG_LEXER, (" -> TSEMI (return)\n"));
1166                                         RETURN(TSEMI);
1167                               }
1168                     case '(':
1169                               CTRACE(DBG_LEXER, (" -> TLP (return)\n"));
1170                               RETURN(TLP);
1171                     case ')':
1172                               CTRACE(DBG_LEXER, (" -> TRP (return)\n"));
1173                               RETURN(TRP);
1174 
1175                     case '\\':
1176                               switch (pgetc()) {
1177                               case '\n':
1178                                         startlinno = ++plinno;
1179                                         CTRACE(DBG_LEXER, ("\\\n ignored, now @%d\n",
1180                                             plinno));
1181                                         if (doprompt)
1182                                                   setprompt(2);
1183                                         else
1184                                                   setprompt(0);
1185                                         continue;
1186                               case PEOF:
1187                                         CTRACE(DBG_LEXER,
1188                                           ("then EOF -> TEOF (return) '\\' dropped\n"));
1189                                         RETURN(TEOF);
1190                               default:
1191                                         CTRACE(DBG_LEXER, ("not \\\n or EOF: "));
1192                                         pungetc();
1193                                         break;
1194                               }
1195                               /* FALLTHROUGH */
1196                     default:
1197                               CTRACE(DBG_LEXER, ("getting a word\n"));
1198                               return readtoken1(c, BASESYNTAX, 0);
1199                     }
1200           }
1201 #undef RETURN
1202 }
1203 
1204 
1205 
1206 /*
1207  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
1208  * is not NULL, read a here document.  In the latter case, eofmark is the
1209  * word which marks the end of the document and striptabs is true if
1210  * leading tabs should be stripped from the document.  The argument firstc
1211  * is the first character of the input token or document.
1212  *
1213  * Because C does not have internal subroutines, I have simulated them
1214  * using goto's to implement the subroutine linkage.  The following macros
1215  * will run code that appears at the end of readtoken1.
1216  */
1217 
1218 /*
1219  * We used to remember only the current syntax, variable nesting level,
1220  * double quote state for each var nesting level, and arith nesting
1221  * level (unrelated to var nesting) and one prev syntax when in arith
1222  * syntax.  This worked for simple cases, but can't handle arith inside
1223  * var expansion inside arith inside var with some quoted and some not.
1224  *
1225  * Inspired by FreeBSD's implementation (though it was the obvious way)
1226  * though implemented differently, we now have a stack that keeps track
1227  * of what we are doing now, and what we were doing previously.
1228  * Every time something changes, which will eventually end and should
1229  * revert to the previous state, we push this stack, and then pop it
1230  * again later (that is every ${} with an operator (to parse the word
1231  * or pattern that follows) ${x} and $x are too simple to need it)
1232  * $(( )) $( ) and "...".   Always.   Really, always!
1233  *
1234  * The stack is implemented as one static (on the C stack) base block
1235  * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1236  * enough for the vast majority of cases.  For torture tests, we
1237  * malloc more blocks as needed.  All accesses through the inline
1238  * functions below.
1239  */
1240 
1241 /*
1242  * varnest & arinest will typically be 0 or 1
1243  * (varnest can increment in usages like ${x=${y}} but probably
1244  *  does not really need to)
1245  * parenlevel allows balancing parens inside a $(( )), it is reset
1246  * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1247  * quoted is special - we need to know 2 things ... are we inside "..."
1248  * (even if inherited from some previous nesting level) and was there
1249  * an opening '"' at this level (so the next will be closing).
1250  * "..." can span nesting levels, but cannot be opened in one and
1251  * closed in a different one.
1252  * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1253  * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1254  * is really so special that this setting is not very important)
1255  * and 0x10 that indicates that an opening quote has been seen.
1256  * The bottom 4 bits are inherited, the 0x10 bit is not.
1257  */
1258 struct tokenstate {
1259           const char *ts_syntax;
1260           unsigned short ts_parenlevel; /* counters */
1261           unsigned short ts_varnest;    /* 64000 levels should be enough! */
1262           unsigned short ts_arinest;
1263           unsigned short ts_quoted;     /* 1 -> single, 2 -> double */
1264           unsigned short ts_magicq;     /* heredoc or word expand */
1265 };
1266 
1267 #define   NQ        0x00      /* Unquoted */
1268 #define   SQ        0x01      /* Single Quotes */
1269 #define   DQ        0x02      /* Double Quotes (or equivalent) */
1270 #define   CQ        0x03      /* C style Single Quotes */
1271 #define   QF        0x0F                /* Mask to extract previous values */
1272 #define   QS        0x10      /* Quoting started at this level in stack */
1273 
1274 #define   LEVELS_PER_BLOCK    8
1275 #define   VSS                           struct statestack
1276 
1277 struct statestack {
1278           VSS *prev;                    /* previous block in list */
1279           int cur;            /* which of our tokenstates is current */
1280           struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1281 };
1282 
1283 static inline struct tokenstate *
currentstate(VSS * stack)1284 currentstate(VSS *stack)
1285 {
1286           return &stack->tokenstate[stack->cur];
1287 }
1288 
1289 #ifdef notdef
1290 static inline struct tokenstate *
prevstate(VSS * stack)1291 prevstate(VSS *stack)
1292 {
1293           if (stack->cur != 0)
1294                     return &stack->tokenstate[stack->cur - 1];
1295           if (stack->prev == NULL)      /* cannot drop below base */
1296                     return &stack->tokenstate[0];
1297           return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1298 }
1299 #endif
1300 
1301 static inline VSS *
bump_state_level(VSS * stack)1302 bump_state_level(VSS *stack)
1303 {
1304           struct tokenstate *os, *ts;
1305 
1306           os = currentstate(stack);
1307 
1308           if (++stack->cur >= LEVELS_PER_BLOCK) {
1309                     VSS *ss;
1310 
1311                     ss = (VSS *)ckmalloc(sizeof (struct statestack));
1312                     ss->cur = 0;
1313                     ss->prev = stack;
1314                     stack = ss;
1315           }
1316 
1317           ts = currentstate(stack);
1318 
1319           ts->ts_parenlevel = 0;        /* parens inside never match outside */
1320 
1321           ts->ts_quoted  = os->ts_quoted & QF;    /* these are default settings */
1322           ts->ts_varnest = os->ts_varnest;
1323           ts->ts_arinest = os->ts_arinest;        /* when appropriate    */
1324           ts->ts_syntax  = os->ts_syntax;                   /*    they will be altered */
1325           ts->ts_magicq  = os->ts_magicq;
1326 
1327           return stack;
1328 }
1329 
1330 static inline VSS *
drop_state_level(VSS * stack)1331 drop_state_level(VSS *stack)
1332 {
1333           if (stack->cur == 0) {
1334                     VSS *ss;
1335 
1336                     ss = stack;
1337                     stack = ss->prev;
1338                     if (stack == NULL)
1339                               return ss;
1340                     ckfree(ss);
1341           }
1342           --stack->cur;
1343           return stack;
1344 }
1345 
1346 static inline void
cleanup_state_stack(VSS * stack)1347 cleanup_state_stack(VSS *stack)
1348 {
1349           while (stack->prev != NULL) {
1350                     stack->cur = 0;
1351                     stack = drop_state_level(stack);
1352           }
1353 }
1354 
1355 #define   PARSESUB()          {goto parsesub; parsesub_return:;}
1356 #define   PARSEARITH()        {goto parsearith; parsearith_return:;}
1357 
1358 /*
1359  * The following macros all assume the existence of a local var "stack"
1360  * which contains a pointer to the current struct stackstate
1361  */
1362 
1363 /*
1364  * These are macros rather than inline funcs to avoid code churn as much
1365  * as possible - they replace macros of the same name used previously.
1366  */
1367 #define   ISDBLQUOTE()        (currentstate(stack)->ts_quoted & QS)
1368 #define   SETDBLQUOTE()       (currentstate(stack)->ts_quoted = QS | DQ)
1369 #ifdef notdef
1370 #define   CLRDBLQUOTE()       (currentstate(stack)->ts_quoted =                 \
1371                                   stack->cur != 0 || stack->prev ?              \
1372                                         prevstate(stack)->ts_quoted & QF : 0)
1373 #endif
1374 
1375 /*
1376  * This set are just to avoid excess typing and line lengths...
1377  * The ones that "look like" var names must be implemented to be lvalues
1378  */
1379 #define   syntax              (currentstate(stack)->ts_syntax)
1380 #define   parenlevel          (currentstate(stack)->ts_parenlevel)
1381 #define   varnest             (currentstate(stack)->ts_varnest)
1382 #define   arinest             (currentstate(stack)->ts_arinest)
1383 #define   quoted              (currentstate(stack)->ts_quoted)
1384 #define   magicq              (currentstate(stack)->ts_magicq)
1385 #define   TS_PUSH() (stack = bump_state_level(stack))
1386 #define   TS_POP()  (stack = drop_state_level(stack))
1387 
1388 /*
1389  * Called to parse command substitutions.  oldstyle is true if the command
1390  * is enclosed inside `` (otherwise it was enclosed in "$( )")
1391  *
1392  * Internally nlpp is a pointer to the head of the linked
1393  * list of commands (passed by reference), and savelen is the number of
1394  * characters on the top of the stack which must be preserved.
1395  */
1396 static char *
parsebackq(VSS * const stack,char * const in,struct nodelist ** const pbqlist,const int oldstyle)1397 parsebackq(VSS *const stack, char * const in,
1398     struct nodelist **const pbqlist, const int oldstyle)
1399 {
1400           struct nodelist **nlpp;
1401           const int savepbq = parsebackquote;
1402           union node *n;
1403           char *out;
1404           char *str = NULL;
1405           char *volatile sstr = str;
1406           struct jmploc jmploc;
1407           struct jmploc *const savehandler = handler;
1408           struct parsefile *const savetopfile = getcurrentfile();
1409           const int savelen = in - stackblock();
1410           int saveprompt;
1411           int lno;
1412 
1413           if (setjmp(jmploc.loc)) {
1414                     popfilesupto(savetopfile);
1415                     if (sstr)
1416                               ckfree(__UNVOLATILE(sstr));
1417                     cleanup_state_stack(stack);
1418                     parsebackquote = 0;
1419                     handler = savehandler;
1420                     CTRACE(DBG_LEXER, ("parsebackq() err (%d), unwinding\n",
1421                         exception));
1422                     longjmp(handler->loc, 1);
1423           }
1424           INTOFF;
1425           sstr = str = NULL;
1426           if (savelen > 0) {
1427                     sstr = str = ckmalloc(savelen);
1428                     memcpy(str, stackblock(), savelen);
1429           }
1430           handler = &jmploc;
1431           INTON;
1432           if (oldstyle) {
1433                     /*
1434                      * We must read until the closing backquote, giving special
1435                      * treatment to some slashes, and then push the string and
1436                      * reread it as input, interpreting it normally.
1437                      */
1438                     int pc;
1439                     int psavelen;
1440                     char *pstr;
1441                     int line1 = plinno;
1442 
1443                     VTRACE(DBG_PARSE|DBG_LEXER,
1444                         ("parsebackq: repackaging `` as $( )"));
1445                     /*
1446                      * Because the entire `...` is read here, we don't
1447                      * need to bother the state stack.  That will be used
1448                      * (as appropriate) when the processed string is re-read.
1449                      */
1450                     STARTSTACKSTR(out);
1451 #ifdef DEBUG
1452                     for (psavelen = 0;;psavelen++) {        /* } */
1453 #else
1454                     for (;;) {
1455 #endif
1456                               if (needprompt) {
1457                                         setprompt(2);
1458                                         needprompt = 0;
1459                               }
1460                               pc = pgetc();
1461                               VTRACE(DBG_LEXER,
1462                                   ("parsebackq() got '%c'(%#.2x) in `` %s", pc&0xFF,
1463                                         pc&0x1FF, pc == '`' ? "terminator\n" : ""));
1464                               if (pc == '`')
1465                                         break;
1466                               switch (pc) {
1467                               case '\\':
1468                                         pc = pgetc();
1469                                         VTRACE(DBG_LEXER, ("then '%c'(%#.2x) ",
1470                                             pc&0xFF, pc&0x1FF));
1471 #ifdef DEBUG
1472                                         psavelen++;
1473 #endif
1474                                         if (pc == '\n') {   /* keep \ \n for later */
1475                                                   plinno++;
1476                                                   VTRACE(DBG_LEXER, ("@%d ", plinno));
1477                                                   needprompt = doprompt;
1478                                         }
1479                                         if (pc != '\\' && pc != '`' && pc != '$'
1480                                             && (!ISDBLQUOTE() || pc != '"')) {
1481                                                   VTRACE(DBG_LEXER, ("keep '\\' "));
1482                                                   STPUTC('\\', out);
1483                                         }
1484                                         break;
1485 
1486                               case '\n':
1487                                         plinno++;
1488                                         VTRACE(DBG_LEXER, ("@%d ", plinno));
1489                                         needprompt = doprompt;
1490                                         break;
1491 
1492                               case PEOF:
1493                                       startlinno = line1;
1494                                         VTRACE(DBG_LEXER, ("EOF\n", plinno));
1495                                         synerror("EOF in backquote substitution");
1496                                         break;
1497 
1498                               default:
1499                                         break;
1500                               }
1501                               VTRACE(DBG_LEXER, (".\n", plinno));
1502                               STPUTC(pc, out);
1503                     }
1504                     STPUTC('\0', out);
1505                     VTRACE(DBG_LEXER, ("parsebackq() ``:"));
1506                     VTRACE(DBG_PARSE|DBG_LEXER, (" read %d", psavelen));
1507                     psavelen = out - stackblock();
1508                     VTRACE(DBG_PARSE|DBG_LEXER, (" produced %d\n", psavelen));
1509                     if (psavelen > 0) {
1510                               pstr = grabstackstr(out);
1511                               CTRACE(DBG_LEXER,
1512                                   ("parsebackq() reprocessing as $(%s)\n", pstr));
1513                               setinputstring(pstr, 1, line1);
1514                     }
1515           }
1516           nlpp = pbqlist;
1517           while (*nlpp)
1518                     nlpp = &(*nlpp)->next;
1519           *nlpp = stalloc(sizeof(struct nodelist));
1520           (*nlpp)->next = NULL;
1521           parsebackquote = oldstyle;
1522 
1523           if (oldstyle) {
1524                     saveprompt = doprompt;
1525                     doprompt = 0;
1526           } else
1527                     saveprompt = 0;
1528 
1529           lno = -plinno;
1530           CTRACE(DBG_LEXER, ("parsebackq() parsing embedded command list\n"));
1531           n = list(0);
1532           CTRACE(DBG_LEXER, ("parsebackq() parsed $() (%d -> %d)\n", -lno,
1533               lno + plinno));
1534           lno += plinno;
1535 
1536           if (oldstyle) {
1537                     if (peektoken() != TEOF)
1538                               synexpect(-1, 0);
1539                     doprompt = saveprompt;
1540           } else
1541                     consumetoken(TRP);
1542 
1543           (*nlpp)->n = n;
1544           if (oldstyle) {
1545                     /*
1546                      * Start reading from old file again, ignoring any pushed back
1547                      * tokens left from the backquote parsing
1548                      */
1549                     CTRACE(DBG_LEXER, ("parsebackq() back to previous input\n"));
1550                     popfile();
1551                     tokpushback = 0;
1552           }
1553 
1554           while (stackblocksize() <= savelen)
1555                     growstackblock();
1556           STARTSTACKSTR(out);
1557           if (str) {
1558                     memcpy(out, str, savelen);
1559                     STADJUST(savelen, out);
1560                     INTOFF;
1561                     ckfree(str);
1562                     sstr = str = NULL;
1563                     INTON;
1564           }
1565           parsebackquote = savepbq;
1566           handler = savehandler;
1567           if (arinest || ISDBLQUOTE()) {
1568                     STPUTC(CTLBACKQ | CTLQUOTE, out);
1569                     while (--lno >= 0)
1570                               STPUTC(CTLNONL, out);
1571           } else
1572                     STPUTC(CTLBACKQ, out);
1573 
1574           return out;
1575 }
1576 
1577 /*
1578  * Parse a redirection operator.  The parameter "out" points to a string
1579  * specifying the fd to be redirected.  It is guaranteed to be either ""
1580  * or a numeric string (for now anyway).  The parameter "c" contains the
1581  * first character of the redirection operator.
1582  *
1583  * Note the string "out" is on the stack, which we are about to clobber,
1584  * so process it first...
1585  */
1586 
1587 static void
1588 parseredir(const char *out,  int c)
1589 {
1590           union node *np;
1591           int fd;
1592 
1593           np = stalloc(sizeof(struct nfile));
1594 
1595           fd = (*out == '\0') ? -1 : number(out);           /* number(out) >= 0 */
1596           np->nfile.fd = fd;  /* do this again later with updated fd */
1597           if (fd != np->nfile.fd)
1598                     error("file descriptor (%d) out of range (max %ld)",
1599                         fd, user_fd_limit - 1);
1600           if (fd < user_fd_limit && fd > max_user_fd)
1601                     max_user_fd = fd;
1602 
1603           VTRACE(DBG_LEXER, ("parseredir after '%s%c' ", out, c));
1604           if (c == '>') {
1605                     if (fd < 0)
1606                               fd = 1;
1607                     c = pgetc_linecont();
1608                     VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1609                     if (c == '>')
1610                               np->type = NAPPEND;
1611                     else if (c == '|')
1612                               np->type = NCLOBBER;
1613                     else if (c == '&')
1614                               np->type = NTOFD;
1615                     else {
1616                               np->type = NTO;
1617                               VTRACE(DBG_LEXER, ("unwanted ", c));
1618                               pungetc();
1619                     }
1620           } else {  /* c == '<' */
1621                     if (fd < 0)
1622                               fd = 0;
1623                     c = pgetc_linecont();
1624                     VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1625                     switch (c) {
1626                     case '<':
1627                               /* if sizes differ, just discard the old one */
1628                               if (sizeof (struct nfile) != sizeof (struct nhere))
1629                                         np = stalloc(sizeof(struct nhere));
1630                               np->type = NHERE;
1631                               np->nhere.fd = 0;
1632                               heredoc = stalloc(sizeof(struct HereDoc));
1633                               heredoc->here = np;
1634                               heredoc->startline = plinno;
1635                               if ((c = pgetc_linecont()) == '-') {
1636                                         CTRACE(DBG_LEXER, ("and '%c'(%#.2x) ",
1637                                             c & 0xFF, c & 0x1FF));
1638                                         heredoc->striptabs = 1;
1639                               } else {
1640                                         heredoc->striptabs = 0;
1641                                         pungetc();
1642                               }
1643                               break;
1644 
1645                     case '&':
1646                               np->type = NFROMFD;
1647                               break;
1648 
1649                     case '>':
1650                               np->type = NFROMTO;
1651                               break;
1652 
1653                     default:
1654                               np->type = NFROM;
1655                               VTRACE(DBG_LEXER, ("unwanted('%c'0#.2x)", c&0xFF,
1656                                   c&0x1FF));
1657                               pungetc();
1658                               break;
1659                     }
1660           }
1661           np->nfile.fd = fd;
1662 
1663           VTRACE(DBG_LEXER, (" ->%"PRIdsNT" fd=%d\n", NODETYPENAME(np->type),fd));
1664 
1665           redirnode = np;               /* this is the "value" of TRENODE */
1666 }
1667 
1668 /*
1669  * Called to parse a backslash escape sequence inside $'...'.
1670  * The backslash has already been read.
1671  */
1672 static char *
1673 readcstyleesc(char *out)
1674 {
1675           int c, vc, i, n;
1676           unsigned int v;
1677 
1678           c = pgetc();
1679           VTRACE(DBG_LEXER, ("CSTR(\\%c)(\\%#x)", c&0xFF, c&0x1FF));
1680           switch (c) {
1681           case '\0':
1682           case PEOF:
1683                     synerror("Unterminated quoted string ($'...)");
1684           case '\n':
1685                     plinno++;
1686                     VTRACE(DBG_LEXER, ("@%d ", plinno));
1687                     if (doprompt)
1688                               setprompt(2);
1689                     else
1690                               setprompt(0);
1691                     return out;
1692 
1693           case '\\':
1694           case '\'':
1695           case '"':
1696                     v = c;
1697                     break;
1698 
1699           case 'a': v = '\a'; break;
1700           case 'b': v = '\b'; break;
1701           case 'e': v = '\033'; break;
1702           case 'f': v = '\f'; break;
1703           case 'n': v = '\n'; break;
1704           case 'r': v = '\r'; break;
1705           case 't': v = '\t'; break;
1706           case 'v': v = '\v'; break;
1707 
1708           case '0': case '1': case '2': case '3':
1709           case '4': case '5': case '6': case '7':
1710                     v = c - '0';
1711                     c = pgetc();
1712                     if (c >= '0' && c <= '7') {
1713                               v <<= 3;
1714                               v += c - '0';
1715                               c = pgetc();
1716                               if (c >= '0' && c <= '7') {
1717                                         v <<= 3;
1718                                         v += c - '0';
1719                               } else
1720                                         pungetc();
1721                     } else
1722                               pungetc();
1723                     break;
1724 
1725           case 'c':
1726                     c = pgetc();
1727                     if (c < 0x3f || c > 0x7a || c == 0x60)
1728                               synerror("Bad \\c escape sequence");
1729                     if (c == '\\' && pgetc() != '\\')
1730                               synerror("Bad \\c\\ escape sequence");
1731                     if (c == '?')
1732                               v = 127;
1733                     else
1734                               v = c & 0x1f;
1735                     break;
1736 
1737           case 'x':
1738                     n = 2;
1739                     goto hexval;
1740           case 'u':
1741                     n = 4;
1742                     goto hexval;
1743           case 'U':
1744                     n = 8;
1745           hexval:;
1746                     v = 0;
1747                     for (i = 0; i < n; i++) {
1748                               c = pgetc();
1749                               if (c >= '0' && c <= '9')
1750                                         v = (v << 4) + c - '0';
1751                               else if (c >= 'A' && c <= 'F')
1752                                         v = (v << 4) + c - 'A' + 10;
1753                               else if (c >= 'a' && c <= 'f')
1754                                         v = (v << 4) + c - 'a' + 10;
1755                               else {
1756                                         pungetc();
1757                                         break;
1758                               }
1759                     }
1760                     if (n > 2 && v > 127) {
1761                               if (v >= 0xd800 && v <= 0xdfff)
1762                                         synerror("Invalid \\u escape sequence");
1763 
1764                               /* XXX should we use iconv here. What locale? */
1765                               CHECKSTRSPACE(12, out);
1766 
1767 /*
1768  * Add a byte to output string, while checking if it needs to
1769  * be escaped -- if its value happens to match the value of one
1770  * of our internal CTL* chars - which would (at a minimum) be
1771  * summarily removed later, if not escaped.
1772  *
1773  * The current definition of ISCTL() allows the compiler to
1774  * optimise away either half, or all, of the test in most of
1775  * the cases here (0xc0 | anything) cannot be between 0x80 and 0x9f
1776  * for example, so there a test is not needed).
1777  *
1778  * Which tests can be removed depends upon the actual values
1779  * selected for the CTL* chars.
1780  */
1781 #define   ESC_USTPUTC(c, o) do {                                      \
1782                     char _ch = (c);                                   \
1783                                                                       \
1784                     if (ISCTL(_ch))                                   \
1785                               USTPUTC(CTLESC, o);           \
1786                     USTPUTC(_ch, o);                        \
1787           } while (0)
1788 
1789                               VTRACE(DBG_LEXER, ("CSTR(\\%c%8.8x)", n==4?'u':'U', v));
1790                               if (v <= 0x7ff) {
1791                                         ESC_USTPUTC(0xc0 | v >> 6, out);
1792                                         ESC_USTPUTC(0x80 | (v & 0x3f), out);
1793                                         return out;
1794                               } else if (v <= 0xffff) {
1795                                         ESC_USTPUTC(0xe0 | v >> 12, out);
1796                                         ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1797                                         ESC_USTPUTC(0x80 | (v & 0x3f), out);
1798                                         return out;
1799                               } else if (v <= 0x10ffff) {
1800                                         ESC_USTPUTC(0xf0 | v >> 18, out);
1801                                         ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1802                                         ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1803                                         ESC_USTPUTC(0x80 | (v & 0x3f), out);
1804                                         return out;
1805 
1806           /* these next two are not very likely, but we may as well be complete */
1807                               } else if (v <= 0x3FFFFFF) {
1808                                         ESC_USTPUTC(0xf8 | v >> 24, out);
1809                                         ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
1810                                         ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1811                                         ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1812                                         ESC_USTPUTC(0x80 | (v & 0x3f), out);
1813                                         return out;
1814                               } else if (v <= 0x7FFFFFFF) {
1815                                         ESC_USTPUTC(0xfC | v >> 30, out);
1816                                         ESC_USTPUTC(0x80 | ((v >> 24) & 0x3f), out);
1817                                         ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
1818                                         ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1819                                         ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1820                                         ESC_USTPUTC(0x80 | (v & 0x3f), out);
1821                                         return out;
1822                               }
1823                               if (v > 127)
1824                                         v = '?';
1825                     }
1826                     break;
1827           default:
1828                     synerror("Unknown $'' escape sequence");
1829           }
1830           vc = (char)v;
1831           VTRACE(DBG_LEXER, ("->%u(%#x)['%c']", v, v, vc&0xFF));
1832 
1833           /*
1834            * If we managed to create a \n from a \ sequence (no matter how)
1835            * then we replace it with the magic CRTCNL control char, which
1836            * will turn into a \n again later, but in the meantime, never
1837            * causes LINENO increments.
1838            */
1839           if (vc == '\n') {
1840                     VTRACE(DBG_LEXER, ("CTLCNL."));
1841                     USTPUTC(CTLCNL, out);
1842                     return out;
1843           }
1844 
1845           /*
1846            * We can't handle NUL bytes.
1847            * POSIX says we should skip till the closing quote.
1848            */
1849           if (vc == '\0') {
1850                     CTRACE(DBG_LEXER, ("\\0: skip to '", v, v, vc&0xFF));
1851                     while ((c = pgetc()) != '\'') {
1852                               if (c == '\\')
1853                                         c = pgetc();
1854                               if (c == PEOF)
1855                                         synerror("Unterminated quoted string ($'...)");
1856                               if (c == '\n') {
1857                                         plinno++;
1858                                         if (doprompt)
1859                                                   setprompt(2);
1860                                         else
1861                                                   setprompt(0);
1862                               }
1863                     }
1864                     pungetc();
1865                     return out;
1866           }
1867           CVTRACE(DBG_LEXER, NEEDESC(vc), ("CTLESC-"));
1868           VTRACE(DBG_LEXER, ("'%c'(%#.2x)", vc&0xFF, vc&0x1FF));
1869           if (NEEDESC(vc))
1870                     USTPUTC(CTLESC, out);
1871           USTPUTC(vc, out);
1872           return out;
1873 }
1874 
1875 /*
1876  * The lowest level basic tokenizer.
1877  *
1878  * The next input byte (character) is in firstc, syn says which
1879  * syntax tables we are to use (basic, single or double quoted, or arith)
1880  * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1881  * quote character itself is not special (used parsing here docs and similar)
1882  *
1883  * The result is the type of the next token (its value, when there is one,
1884  * is saved in the relevant global var - must fix that someday!) which is
1885  * also saved for re-reading ("lasttoken").
1886  *
1887  * Overall, this routine does far more parsing than it is supposed to.
1888  * That will also need fixing, someday...
1889  */
1890 STATIC int
1891 readtoken1(int firstc, char const *syn, int oneword)
1892 {
1893           int c;
1894           char * out;
1895           int len;
1896           struct nodelist *bqlist;
1897           int quotef;
1898           VSS static_stack;
1899           VSS *stack = &static_stack;
1900 
1901           stack->prev = NULL;
1902           stack->cur = 0;
1903 
1904           syntax = syn;
1905 
1906 #ifdef DEBUG
1907 #define SYNTAX      (         syntax == BASESYNTAX ? "BASE" :                   \
1908                               syntax == DQSYNTAX   ? "DQ"   :                   \
1909                               syntax == SQSYNTAX   ? "SQ"   :                   \
1910                               syntax == ARISYNTAX  ? "ARI"  :                   \
1911                                                   "???"                         )
1912 #endif
1913 
1914           startlinno = plinno;
1915           varnest = 0;
1916           quoted = 0;
1917           if (syntax == DQSYNTAX)
1918                     SETDBLQUOTE();
1919           quotef = 0;
1920           bqlist = NULL;
1921           arinest = 0;
1922           parenlevel = 0;
1923           elided_nl = 0;
1924           magicq = oneword;
1925 
1926           CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n",
1927               firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "",
1928               ISDBLQUOTE()?" ISDBLQUOTE":"", quoted));
1929 
1930           STARTSTACKSTR(out);
1931 
1932           for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1933                     if (syntax == ARISYNTAX)
1934                               out = insert_elided_nl(out);
1935                     CHECKSTRSPACE(6, out);        /* permit 6 calls to USTPUTC */
1936                     switch (syntax[c]) {
1937                     case CFAKE:
1938                               VTRACE(DBG_LEXER, ("CFAKE"));
1939                               if (syntax == BASESYNTAX && varnest == 0)
1940                                         break;
1941                               VTRACE(DBG_LEXER, (","));
1942                               continue;
1943                     case CNL: /* '\n' */
1944                               VTRACE(DBG_LEXER, ("CNL"));
1945                               if (syntax == BASESYNTAX && varnest == 0)
1946                                         break;    /* exit loop */
1947                               USTPUTC(c, out);
1948                               plinno++;
1949                               VTRACE(DBG_LEXER, ("@%d,", plinno));
1950                               if (doprompt)
1951                                         setprompt(2);
1952                               else
1953                                         setprompt(0);
1954                               continue;
1955 
1956                     case CSBACK:        /* single quoted backslash */
1957                               if ((quoted & QF) == CQ) {
1958                                         out = readcstyleesc(out);
1959                                         continue;
1960                               }
1961                               VTRACE(DBG_LEXER, ("ESC:"));
1962                               USTPUTC(CTLESC, out);
1963                               /* FALLTHROUGH */
1964                     case CWORD:
1965                               VTRACE(DBG_LEXER, ("'%c'", c));
1966                               USTPUTC(c, out);
1967                               continue;
1968 
1969                     case CCTL:
1970                               CVTRACE(DBG_LEXER, !magicq || ISDBLQUOTE(),
1971                                   ("%s%sESC:",!magicq?"!m":"",ISDBLQUOTE()?"DQ":""));
1972                               if (!magicq || ISDBLQUOTE())
1973                                         USTPUTC(CTLESC, out);
1974                               VTRACE(DBG_LEXER, ("'%c'", c));
1975                               USTPUTC(c, out);
1976                               continue;
1977                     case CBACK:         /* backslash */
1978                               c = pgetc();
1979                               VTRACE(DBG_LEXER, ("\\'%c'(%#.2x)", c&0xFF, c&0x1FF));
1980                               if (c == PEOF) {
1981                                         VTRACE(DBG_LEXER, ("EOF, keep \\ "));
1982                                         USTPUTC('\\', out);
1983                                         pungetc();
1984                                         continue;
1985                               }
1986                               if (c == '\n') {
1987                                         plinno++;
1988                                         elided_nl++;
1989                                         VTRACE(DBG_LEXER, ("eli \\n (%d) @%d ",
1990                                             elided_nl, plinno));
1991                                         if (doprompt)
1992                                                   setprompt(2);
1993                                         else
1994                                                   setprompt(0);
1995                                         continue;
1996                               }
1997                               CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1998                               quotef = 1;         /* current token is quoted */
1999                               if (quoted && c != '\\' && c != '`' &&
2000                                   (c != '}' || varnest == 0) &&
2001                                   c != '$' && (c != '"' || magicq)) {
2002                                         /*
2003                                          * retain the \ (which we *know* needs CTLESC)
2004                                          * when in "..." and the following char is
2005                                          * not one of the magic few.)
2006                                          * Otherwise the \ has done its work, and
2007                                          * is dropped.
2008                                          */
2009                                         VTRACE(DBG_LEXER, ("ESC:'\\'"));
2010                                         USTPUTC(CTLESC, out);
2011                                         USTPUTC('\\', out);
2012                               }
2013                               CVTRACE(DBG_LEXER, NEEDESC(c) || !magicq,
2014                                   ("%sESC:", NEEDESC(c) ? "+" : "m"));
2015                               VTRACE(DBG_LEXER, ("'%c'(%#.2x)", c&0xFF, c&0x1FF));
2016                               if (NEEDESC(c))
2017                                         USTPUTC(CTLESC, out);
2018                               else if (!magicq) {
2019                                         USTPUTC(CTLESC, out);
2020                                         USTPUTC(c, out);
2021                                         continue;
2022                               }
2023                               USTPUTC(c, out);
2024                               continue;
2025                     case CSQUOTE:
2026                               if (syntax != SQSYNTAX) {
2027                                         CVTRACE(DBG_LEXER, !magicq, (" CQM "));
2028                                         if (!magicq)
2029                                                   USTPUTC(CTLQUOTEMARK, out);
2030                                         CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
2031                                         quotef = 1;
2032                                         TS_PUSH();
2033                                         syntax = SQSYNTAX;
2034                                         quoted = SQ;
2035                                         VTRACE(DBG_LEXER, (" TS_PUSH(SQ)"));
2036                                         continue;
2037                               }
2038                               if (magicq && arinest == 0 && varnest == 0) {
2039                                         /* Ignore inside quoted here document */
2040                                         VTRACE(DBG_LEXER, ("<<'>>"));
2041                                         USTPUTC(c, out);
2042                                         continue;
2043                               }
2044                               /* End of single quotes... */
2045                               TS_POP();
2046                               VTRACE(DBG_LEXER, ("SQ TS_POP->%s ", SYNTAX));
2047                               CVTRACE(DBG_LEXER, syntax == BASESYNTAX, (" CQE "));
2048                               if (syntax == BASESYNTAX)
2049                                         USTPUTC(CTLQUOTEEND, out);
2050                               continue;
2051                     case CDQUOTE:
2052                               if (magicq && arinest == 0 /* && varnest == 0 */) {
2053                                         VTRACE(DBG_LEXER, ("<<\">>"));
2054                                         /* Ignore inside here document */
2055                                         USTPUTC(c, out);
2056                                         continue;
2057                               }
2058                               CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
2059                               quotef = 1;
2060                               if (arinest) {
2061                                         if (ISDBLQUOTE()) {
2062                                                   VTRACE(DBG_LEXER,
2063                                                       (" CQE ari(%d", arinest));
2064                                                   USTPUTC(CTLQUOTEEND, out);
2065                                                   TS_POP();
2066                                                   VTRACE(DBG_LEXER, ("%d)TS_POP->%s ",
2067                                                       arinest, SYNTAX));
2068                                         } else {
2069                                                   VTRACE(DBG_LEXER,
2070                                                     (" ari(%d) %s TS_PUSH->DQ CQM ",
2071                                                      arinest, SYNTAX));
2072                                                   TS_PUSH();
2073                                                   syntax = DQSYNTAX;
2074                                                   SETDBLQUOTE();
2075                                                   USTPUTC(CTLQUOTEMARK, out);
2076                                         }
2077                                         continue;
2078                               }
2079                               CVTRACE(DBG_LEXER, magicq, (" MQignDQ "));
2080                               if (magicq)
2081                                         continue;
2082                               if (ISDBLQUOTE()) {
2083                                         TS_POP();
2084                                         VTRACE(DBG_LEXER,
2085                                             (" DQ TS_POP->%s CQE ", SYNTAX));
2086                                         USTPUTC(CTLQUOTEEND, out);
2087                               } else {
2088                                         VTRACE(DBG_LEXER,
2089                                             (" %s TS_POP->DQ CQM ", SYNTAX));
2090                                         TS_PUSH();
2091                                         syntax = DQSYNTAX;
2092                                         SETDBLQUOTE();
2093                                         USTPUTC(CTLQUOTEMARK, out);
2094                               }
2095                               continue;
2096                     case CVAR:          /* '$' */
2097                               VTRACE(DBG_LEXER, ("'$'..."));
2098                               out = insert_elided_nl(out);
2099                               PARSESUB();                   /* parse substitution */
2100                               continue;
2101                     case CENDVAR:       /* CLOSEBRACE */
2102                               if (varnest > 0 && !ISDBLQUOTE()) {
2103                                         VTRACE(DBG_LEXER, ("vn=%d !DQ", varnest));
2104                                         TS_POP();
2105                                         VTRACE(DBG_LEXER, (" TS_POP->%s CEV ", SYNTAX));
2106                                         USTPUTC(CTLENDVAR, out);
2107                               } else {
2108                                         VTRACE(DBG_LEXER, ("'%c'", c));
2109                                         USTPUTC(c, out);
2110                               }
2111                               out = insert_elided_nl(out);
2112                               continue;
2113                     case CLP: /* '(' in arithmetic */
2114                               parenlevel++;
2115                               VTRACE(DBG_LEXER, ("'('(%d)", parenlevel));
2116                               USTPUTC(c, out);
2117                               continue;
2118                     case CRP: /* ')' in arithmetic */
2119                               if (parenlevel > 0) {
2120                                         USTPUTC(c, out);
2121                                         --parenlevel;
2122                                         VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2123                               } else {
2124                                         VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2125                                         if (pgetc_linecont() == /*(*/ ')') {
2126                                                   out = insert_elided_nl(out);
2127                                                   if (--arinest == 0) {
2128                                                             TS_POP();
2129                                                             USTPUTC(CTLENDARI, out);
2130                                                   } else
2131                                                             USTPUTC(/*(*/ ')', out);
2132                                         } else {
2133                                                   break;    /* to synerror() just below */
2134 #if 0     /* the old way, causes weird errors on bad input */
2135                                                   /*
2136                                                    * unbalanced parens
2137                                                    *  (don't 2nd guess - no error)
2138                                                    */
2139                                                   pungetc();
2140                                                   USTPUTC(/*(*/ ')', out);
2141 #endif
2142                                         }
2143                               }
2144                               continue;
2145                     case CBQUOTE:       /* '`' */
2146                               VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n"));
2147                               out = parsebackq(stack, out, &bqlist, 1);
2148                               VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: "));
2149                               continue;
2150                     case CEOF:                    /* --> c == PEOF */
2151                               VTRACE(DBG_LEXER, ("EOF "));
2152                               break;              /* will exit loop */
2153                     default:
2154                               VTRACE(DBG_LEXER, ("['%c'(%#.2x)]", c&0xFF, c&0x1FF));
2155                               if (varnest == 0 && !ISDBLQUOTE())
2156                                         break;    /* exit loop */
2157                               USTPUTC(c, out);
2158                               VTRACE(DBG_LEXER, (","));
2159                               continue;
2160                     }
2161                     VTRACE(DBG_LEXER, (" END TOKEN\n", c&0xFF, c&0x1FF));
2162                     break;    /* break from switch -> break from for loop too */
2163           }
2164 
2165           if (syntax == ARISYNTAX) {
2166                     cleanup_state_stack(stack);
2167                     synerror(/*((*/ "Missing '))'");
2168           }
2169           if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
2170                     cleanup_state_stack(stack);
2171                     synerror("Unterminated quoted string");
2172           }
2173           if (varnest != 0) {
2174                     cleanup_state_stack(stack);
2175                     startlinno = plinno;
2176                     /* { */
2177                     synerror("Missing '}'");
2178           }
2179 
2180           STPUTC('\0', out);
2181           len = out - stackblock();
2182           out = stackblock();
2183 
2184           if (!magicq) {
2185                     if ((c == '<' || c == '>')
2186                      && quotef == 0 && (*out == '\0' || is_number(out))) {
2187                               parseredir(out, c);
2188                               cleanup_state_stack(stack);
2189                               return lasttoken = TREDIR;
2190                     } else {
2191                               pungetc();
2192                     }
2193           }
2194 
2195           VTRACE(DBG_PARSE|DBG_LEXER,
2196               ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n",
2197               (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""),
2198               len, elided_nl));
2199 
2200           quoteflag = quotef;
2201           backquotelist = bqlist;
2202           grabstackblock(len);
2203           wordtext = out;
2204           cleanup_state_stack(stack);
2205           return lasttoken = TWORD;
2206 /* end of readtoken routine */
2207 
2208 
2209 /*
2210  * Parse a substitution.  At this point, we have read the dollar sign
2211  * and nothing else.
2212  */
2213 
2214 parsesub: {
2215           int subtype;
2216           int typeloc;
2217           int flags;
2218           const char *p;
2219           static const char types[] = "}-+?=";    /* see parser.h VSXYZ defs */
2220 
2221           c = pgetc_linecont();
2222           VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF));
2223           if (c == '(' /*)*/) {         /* $(command) or $((arith)) */
2224                     if (pgetc_linecont() == '(' /*')'*/ ) {
2225                               VTRACE(DBG_LEXER, ("\"$((\" ARITH "));
2226                               out = insert_elided_nl(out);
2227                               PARSEARITH();
2228                     } else {
2229                               VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n"));
2230                               out = insert_elided_nl(out);
2231                               pungetc();
2232                               out = parsebackq(stack, out, &bqlist, 0);
2233                               VTRACE(DBG_LEXER, ("parseback()->readtoken1(): "));
2234                     }
2235           } else if (c == OPENBRACE || is_name(c) || is_special(c)) {
2236                     VTRACE(DBG_LEXER, (" $EXP:CTLVAR "));
2237                     USTPUTC(CTLVAR, out);
2238                     typeloc = out - stackblock();
2239                     USTPUTC(VSNORMAL, out);
2240                     subtype = VSNORMAL;
2241                     flags = 0;
2242                     if (c == OPENBRACE) {
2243                               c = pgetc_linecont();
2244                               if (c == '#') {
2245                                         if ((c = pgetc_linecont()) == CLOSEBRACE)
2246                                                   c = '#';
2247                                         else if (is_name(c) || isdigit(c))
2248                                                   subtype = VSLENGTH;
2249                                         else if (is_special(c)) {
2250                                                   /*
2251                                                    * ${#} is $# - the number of sh params
2252                                                    * ${##} is the length of ${#}
2253                                                    * ${###} is ${#} with as much nothing
2254                                                    *        as possible removed from start
2255                                                    * ${##1} is ${#} with leading 1 gone
2256                                                    * ${##\#} is ${#} with leading # gone
2257                                                    *
2258                                                    * this stuff is UGLY!
2259                                                    */
2260                                                   if (pgetc_linecont() == CLOSEBRACE) {
2261                                                             pungetc();
2262                                                             subtype = VSLENGTH;
2263                                                   } else {
2264                                                             static char cbuf[2];
2265 
2266                                                             pungetc();   /* would like 2 */
2267                                                             cbuf[0] = c; /* so ... */
2268                                                             cbuf[1] = '\0';
2269                                                             pushstring(cbuf, 1, NULL);
2270                                                             c = '#';     /* ${#:...} */
2271                                                             subtype = 0; /* .. or similar */
2272                                                   }
2273                                         } else {
2274                                                   pungetc();
2275                                                   c = '#';
2276                                                   subtype = 0;
2277                                         }
2278                               }
2279                               else
2280                                         subtype = 0;
2281                               VTRACE(DBG_LEXER, ("${ st=%d ", subtype));
2282                     }
2283                     if (is_name(c)) {
2284                               p = out;
2285                               do {
2286                                         VTRACE(DBG_LEXER, ("%c", c));
2287                                         STPUTC(c, out);
2288                                         c = pgetc_linecont();
2289                               } while (is_in_name(c));
2290 
2291 #if 0
2292                               if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
2293                                         int i;
2294                                         int linno;
2295                                         char buf[10];
2296 
2297                                         /*
2298                                          * The "LINENO hack"
2299                                          *
2300                                          * Replace the variable name with the
2301                                          * current line number.
2302                                          */
2303                                         linno = plinno;
2304                                         if (funclinno != 0)
2305                                                   linno -= funclinno - 1;
2306                                         snprintf(buf, sizeof(buf), "%d", linno);
2307                                         STADJUST(-6, out);
2308                                         for (i = 0; buf[i] != '\0'; i++)
2309                                                   STPUTC(buf[i], out);
2310                                         flags |= VSLINENO;
2311                               }
2312 #endif
2313                     } else if (is_digit(c)) {
2314                               do {
2315                                         VTRACE(DBG_LEXER, ("%c", c));
2316                                         STPUTC(c, out);
2317                                         c = pgetc_linecont();
2318                               } while (subtype != VSNORMAL && is_digit(c));
2319                     }
2320                     else if (is_special(c)) {
2321                               VTRACE(DBG_LEXER, ("\"$%c", c));
2322                               USTPUTC(c, out);
2323                               c = pgetc_linecont();
2324                     }
2325                     else {
2326                               VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??", c&0xFF, c&0xFF));
2327                               subtype = VSUNKNOWN;
2328                     }
2329 
2330                     STPUTC('=', out);
2331                     if (subtype == 0) {
2332                               switch (c) {
2333                               case ':':
2334                                         flags |= VSNUL;
2335                                         c = pgetc_linecont();
2336                                         /*FALLTHROUGH*/
2337                               default:
2338                                         p = strchr(types, c);
2339                                         if (__predict_false(p == NULL)) {
2340                                                   subtype = VSUNKNOWN;
2341                                                             /*
2342                                                              * keep the unknown modifier
2343                                                              * for the error message.
2344                                                              *
2345                                                              * Note that if we came from
2346                                                              * the case ':' above, that
2347                                                              * is the unknown modifier,
2348                                                              * not the following character
2349                                                              *
2350                                                              * It is not important that
2351                                                              * we keep the remaining word
2352                                                              * intact, it will never be
2353                                                              * used.
2354                                                              */
2355                                                   if (flags & VSNUL)
2356                                                             /* (ie: lose c) */
2357                                                             STPUTC(':', out);
2358                                                   else
2359                                                             STPUTC(c, out);
2360                                         } else
2361                                                   subtype = p - types + VSNORMAL;
2362                                         break;
2363                               case '%':
2364                               case '#':
2365                                         {
2366                                                   int cc = c;
2367                                                   subtype = c == '#' ? VSTRIMLEFT :
2368                                                                            VSTRIMRIGHT;
2369                                                   c = pgetc_linecont();
2370                                                   if (c == cc)
2371                                                             subtype++;
2372                                                   else
2373                                                             pungetc();
2374                                                   break;
2375                                         }
2376                               }
2377                     } else {
2378                               if (subtype == VSLENGTH && c != /*{*/ '}') {
2379                                         STPUTC('#', out);
2380                                         subtype = VSUNKNOWN;
2381                               }
2382                               pungetc();
2383                     }
2384                     if (quoted || arinest)
2385                               flags |= VSQUOTE;
2386                     if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
2387                               flags |= VSPATQ;
2388                     VTRACE(DBG_LEXER, (" st%d:%x", subtype, flags));
2389                     *(stackblock() + typeloc) = subtype | flags;
2390                     if (subtype != VSNORMAL) {
2391                               TS_PUSH();
2392                               varnest++;
2393                               arinest = 0;
2394                               if (subtype > VSASSIGN) {     /* # ## % %% */
2395                                         syntax = BASESYNTAX;
2396                                         quoted = 0;
2397                                         magicq = 0;
2398                               }
2399                               VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ",
2400                                   SYNTAX, varnest, quoted ? " Q" : ""));
2401                     }
2402           } else if (c == '\'' && syntax == BASESYNTAX) {
2403                     USTPUTC(CTLQUOTEMARK, out);
2404                     VTRACE(DBG_LEXER, (" CSTR \"$'\" CQM "));
2405                     CVTRACE(DBG_LEXER, quotef==0, ("QF=1 "));
2406                     quotef = 1;
2407                     TS_PUSH();
2408                     syntax = SQSYNTAX;
2409                     quoted = CQ;
2410                     VTRACE(DBG_LEXER, ("%s->TS_PUSH()->SQ ", SYNTAX));
2411           } else {
2412                     VTRACE(DBG_LEXER, ("$unk -> '$' (pushback '%c'%#.2x)",
2413                               c & 0xFF, c & 0x1FF));
2414                     USTPUTC('$', out);
2415                     pungetc();
2416           }
2417           goto parsesub_return;
2418 }
2419 
2420 
2421 /*
2422  * Parse an arithmetic expansion (indicate start of one and set state)
2423  */
2424 parsearith: {
2425 
2426 #if 0
2427           if (syntax == ARISYNTAX) {
2428                     /*
2429                      * we collapse embedded arithmetic expansion to
2430                      * parentheses, which should be equivalent
2431                      *
2432                      *        XXX It isn't, must fix, soonish...
2433                      */
2434                     USTPUTC('(' /*)*/, out);
2435                     USTPUTC('(' /*)*/, out);
2436                     /*
2437                      * Need 2 of them because there will (should be)
2438                      * two closing ))'s to follow later.
2439                      */
2440                     parenlevel += 2;
2441           } else
2442 #endif
2443           {
2444                     VTRACE(DBG_LEXER, (" CTLARI%c ", ISDBLQUOTE()?'"':'_'));
2445                     USTPUTC(CTLARI, out);
2446                     if (ISDBLQUOTE())
2447                               USTPUTC('"',out);
2448                     else
2449                               USTPUTC(' ',out);
2450 
2451                     VTRACE(DBG_LEXER, ("%s->TS_PUSH->ARI(1)", SYNTAX));
2452                     TS_PUSH();
2453                     syntax = ARISYNTAX;
2454                     arinest = 1;
2455                     varnest = 0;
2456                     magicq = 1;
2457           }
2458           goto parsearith_return;
2459 }
2460 
2461 } /* end of readtoken */
2462 
2463 
2464 
2465 
2466 #ifdef mkinit
2467 INCLUDE "parser.h"
2468 
2469 RESET {
2470           psp.v_current_parser = &parse_state;
2471 
2472           parse_state.ps_tokpushback = 0;
2473           parse_state.ps_checkkwd = 0;
2474           parse_state.ps_heredoclist = NULL;
2475 }
2476 #endif
2477 
2478 /*
2479  * Returns true if the text contains nothing to expand (no dollar signs
2480  * or backquotes).
2481  */
2482 
2483 STATIC int
2484 noexpand(char *text)
2485 {
2486           char *p;
2487           char c;
2488 
2489           p = text;
2490           while ((c = *p++) != '\0') {
2491                     if (c == CTLQUOTEMARK || c == CTLQUOTEEND)
2492                               continue;
2493                     if (c == CTLESC)
2494                               p++;
2495                     else if (ISCTL(c))
2496                               return 0;
2497           }
2498           return 1;
2499 }
2500 
2501 
2502 /*
2503  * Return true if the argument is a legal variable name (a letter or
2504  * underscore followed by zero or more letters, underscores, and digits).
2505  */
2506 
2507 int
2508 goodname(const char *name)
2509 {
2510           const char *p;
2511 
2512           p = name;
2513           if (! is_name(*p))
2514                     return 0;
2515           while (*++p) {
2516                     if (! is_in_name(*p))
2517                               return 0;
2518           }
2519           return 1;
2520 }
2521 
2522 int
2523 isassignment(const char *p)
2524 {
2525           if (!is_name(*p))
2526                     return 0;
2527           while (*++p != '=')
2528                     if (*p == '\0' || !is_in_name(*p))
2529                               return 0;
2530           return 1;
2531 }
2532 
2533 /*
2534  * skip past any \n's, and leave lasttoken set to whatever follows
2535  */
2536 STATIC void
2537 linebreak(void)
2538 {
2539           while (readtoken() == TNL)
2540                     readheredocs();
2541 }
2542 
2543 /*
2544  * The next token must be "token" -- check, then move past it
2545  */
2546 STATIC void
2547 consumetoken(int token)
2548 {
2549           if (readtoken() != token) {
2550                     VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s",
2551                         token, tokname[token], tokname[lasttoken]));
2552                     CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext));
2553                     VTRACE(DBG_PARSE, ("\n"));
2554                     synexpect(token, NULL);
2555           }
2556 }
2557 
2558 /*
2559  * Called when an unexpected token is read during the parse.  The argument
2560  * is the token that is expected, or -1 if more than one type of token can
2561  * occur at this point.
2562  */
2563 
2564 STATIC void
2565 synexpect(int token, const char *text)
2566 {
2567           char msg[64];
2568           char *p;
2569 
2570           if (lasttoken == TWORD) {
2571                     size_t len = strlen(wordtext);
2572 
2573                     if (len <= 13)
2574                               fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2575                     else
2576                               fmtstr(msg, 34,
2577                                   "Word \"%.10s...\" unexpected", wordtext);
2578           } else
2579                     fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2580 
2581           p = strchr(msg, '\0');
2582           if (text)
2583                     fmtstr(p, 30, " (expecting \"%.10s\")", text);
2584           else if (token >= 0)
2585                     fmtstr(p, 30, " (expecting %s)",  tokname[token]);
2586 
2587           synerror(msg);
2588           /* NOTREACHED */
2589 }
2590 
2591 
2592 STATIC void
2593 synerror(const char *msg)
2594 {
2595           error("%d: Syntax error: %s", startlinno, msg);
2596           /* NOTREACHED */
2597 }
2598 
2599 STATIC void
2600 setprompt(int which)
2601 {
2602           whichprompt = which;
2603 
2604 #ifndef SMALL
2605           if (!el)
2606 #endif
2607                     out2str(getprompt(NULL));
2608 }
2609 
2610 /*
2611  * handle getting the next character, while ignoring \ \n
2612  * (which is a little tricky as we only have one char of pushback
2613  * and we need that one elsewhere).
2614  */
2615 STATIC int
2616 pgetc_linecont(void)
2617 {
2618           int c;
2619 
2620           while ((c = pgetc()) == '\\') {
2621                     c = pgetc();
2622                     if (c == '\n') {
2623                               plinno++;
2624                               elided_nl++;
2625                               VTRACE(DBG_LEXER, ("\"\\n\"drop(el=%d@%d)",
2626                                   elided_nl, plinno));
2627                               if (doprompt)
2628                                         setprompt(2);
2629                               else
2630                                         setprompt(0);
2631                     } else {
2632                               pungetc();
2633                               /* Allow the backslash to be pushed back. */
2634                               pushstring("\\", 1, NULL);
2635                               return (pgetc());
2636                     }
2637           }
2638           return (c);
2639 }
2640 
2641 /*
2642  * called by editline -- any expansions to the prompt
2643  *    should be added here.
2644  */
2645 const char *
2646 getprompt(void *unused)
2647 {
2648           char *p;
2649           const char *cp;
2650           int wp;
2651 
2652           if (!doprompt)
2653                     return "";
2654 
2655           VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt));
2656 
2657           switch (wp = whichprompt) {
2658           case 0:
2659                     return "";
2660           case 1:
2661                     p = ps1val();
2662                     break;
2663           case 2:
2664                     p = ps2val();
2665                     break;
2666           default:
2667                     return "<internal prompt error>";
2668           }
2669           if (p == NULL)
2670                     return "";
2671 
2672           VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p));
2673 
2674           cp = expandstr(p, plinno);
2675           whichprompt = wp;   /* history depends on it not changing */
2676 
2677           VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp));
2678 
2679           return cp;
2680 }
2681 
2682 /*
2683  * Expand a string ... used for expanding prompts (PS1...)
2684  *
2685  * Never return NULL, always some string (return input string if invalid)
2686  *
2687  * The internal routine does the work, leaving the result on the
2688  * stack (or in a static string, or even the input string) and
2689  * handles parser recursion, and cleanup after an error while parsing.
2690  *
2691  * The visible interface copies the result off the stack (if it is there),
2692  * and handles stack management, leaving the stack in the exact same
2693  * state it was when expandstr() was called (so it can be used part way
2694  * through building a stack data structure - as in when PS2 is being
2695  * expanded half way through reading a "command line")
2696  *
2697  * on error, expandonstack() cleans up the parser state, but then
2698  * simply jumps out through expandstr() without doing any stack cleanup,
2699  * which is OK, as the error handler must deal with that anyway.
2700  *
2701  * The split into two funcs is to avoid problems with setjmp/longjmp
2702  * and local variables which could otherwise be optimised into bizarre
2703  * behaviour.
2704  */
2705 static const char *
2706 expandonstack(char *ps, int cmdsub, int lineno)
2707 {
2708           union node n;
2709           struct jmploc jmploc;
2710           struct jmploc *const savehandler = handler;
2711           struct parsefile *const savetopfile = getcurrentfile();
2712           char * const save_ps = ps;
2713           const int save_x = xflag;
2714           const int save_e_s = errors_suppressed;
2715           struct parse_state new_state = init_parse_state;
2716           struct parse_state *const saveparser = psp.v_current_parser;
2717           const char *result = NULL;
2718 
2719           if (!setjmp(jmploc.loc)) {
2720                     handler = &jmploc;
2721                     errors_suppressed = 1;
2722 
2723                     psp.v_current_parser = &new_state;
2724                     setinputstring(ps, 1, lineno);
2725 
2726                     readtoken1(pgetc(), DQSYNTAX, 1);
2727                     if (backquotelist != NULL) {
2728                               if (!cmdsub)
2729                                         result = ps;
2730                               else if (cmdsub == 1 && !promptcmds)
2731                                         result = "-o promptcmds not set: ";
2732                     }
2733                     if (result == NULL) {
2734                               n.narg.type = NARG;
2735                               n.narg.next = NULL;
2736                               n.narg.text = wordtext;
2737                               n.narg.lineno = lineno;
2738                               n.narg.backquote = backquotelist;
2739 
2740                               xflag = 0;          /* we might be expanding PS4 ... */
2741                               expandarg(&n, NULL, 0);
2742                               result = stackblock();
2743                     }
2744           } else {
2745                     psp.v_current_parser = saveparser;
2746                     xflag = save_x;
2747                     popfilesupto(savetopfile);
2748                     handler = savehandler;
2749                     errors_suppressed = save_e_s;
2750 
2751                     if (exception == EXEXIT)
2752                               longjmp(handler->loc, 1);
2753                     if (exception == EXINT)
2754                               exraise(SIGINT);
2755                     return "";
2756           }
2757           psp.v_current_parser = saveparser;
2758           xflag = save_x;
2759           popfilesupto(savetopfile);
2760           handler = savehandler;
2761           errors_suppressed = save_e_s;
2762 
2763           if (result == NULL)
2764                     result = save_ps;
2765 
2766           return result;
2767 }
2768 
2769 const char *
2770 expandstr(char *ps, int lineno)
2771 {
2772           const char *result = NULL;
2773           struct stackmark smark;
2774           static char *buffer = NULL;   /* storage for prompt, never freed */
2775           static size_t bufferlen = 0;
2776 
2777           setstackmark(&smark);
2778           /*
2779            * At this point we anticipate that there may be a string
2780            * growing on the stack, but we have no idea how big it is.
2781            * However we know that it cannot be bigger than the current
2782            * allocated stack block, so simply reserve the whole thing,
2783            * then we can use the stack without barfing all over what
2784            * is there already...   (the stack mark undoes this later.)
2785            */
2786           (void) stalloc(stackblocksize());
2787 
2788           result = expandonstack(ps, 1, lineno);
2789 
2790           if (__predict_true(result == stackblock())) {
2791                     size_t len = strlen(result) + 1;
2792 
2793                     /*
2794                      * the result (usual case) is on the stack, which we
2795                      * are just about to discard (popstackmark()) so we
2796                      * need to move it somewhere safe first.
2797                      */
2798 
2799                     if (__predict_false(len > bufferlen)) {
2800                               char *new;
2801                               size_t newlen = bufferlen;
2802 
2803                               if (__predict_false(len > (SIZE_MAX >> 4))) {
2804                                         result = "huge prompt: ";
2805                                         goto getout;
2806                               }
2807 
2808                               if (newlen == 0)
2809                                         newlen = 32;
2810                               while (newlen <= len)
2811                                         newlen <<= 1;
2812 
2813                               new = (char *)realloc(buffer, newlen);
2814 
2815                               if (__predict_false(new == NULL)) {
2816                                         /*
2817                                          * this should rarely (if ever) happen
2818                                          * but we must do something when it does...
2819                                          */
2820                                         result = "No mem for prompt: ";
2821                                         goto getout;
2822                               } else {
2823                                         buffer = new;
2824                                         bufferlen = newlen;
2825                               }
2826                     }
2827                     (void)memcpy(buffer, result, len);
2828                     result = buffer;
2829           }
2830 
2831   getout:;
2832           popstackmark(&smark);
2833 
2834           return result;
2835 }
2836 
2837 #ifndef SMALL
2838 /*
2839  * A version of the above which isn't tailored to expanding prompts,
2840  * but can be used for expanding other expandable variables when
2841  * they need to be used.   ${LINENO} will always expand to 0 in this case.
2842  */
2843 
2844 const char *
2845 expandvar(char *var, int flags)
2846 {
2847           const char *result = NULL;
2848           struct stackmark smark;
2849           static char *buffer = NULL;   /* storage for result */
2850           static size_t bufferlen = 0;
2851 
2852           setstackmark(&smark);
2853           /*
2854            * At this point we anticipate that there may be a string
2855            * growing on the stack, [...]   [see expandstr() above].
2856            */
2857           (void) stalloc(stackblocksize());
2858 
2859           result = expandonstack(var, (flags & VUNSAFE ? 0 : 2), 0);
2860           if (__predict_false(result == NULL || *result == '\0')) {
2861                     result = NULL;
2862                     goto getout;
2863           }
2864 
2865           if (__predict_true(result == stackblock())) {
2866                     size_t len = strlen(result) + 1;
2867 
2868                     /*
2869                      * the result is on the stack, so we
2870                      * need to move it somewhere safe first.
2871                      */
2872 
2873                     if (__predict_false(len > bufferlen)) {
2874                               char *new;
2875                               size_t newlen = bufferlen;
2876 
2877                               if (__predict_false(len > (SIZE_MAX >> 4))) {
2878                                         result = "";
2879                                         goto getout;
2880                               }
2881 
2882                               if (__predict_false(newlen == 0))
2883                                         newlen = 32;
2884                               while (newlen <= len)
2885                                         newlen <<= 1;
2886 
2887                               new = (char *)realloc(buffer, newlen);
2888 
2889                               if (__predict_false(new == NULL)) {
2890                                         /*
2891                                          * this should rarely (if ever) happen
2892                                          * but we must do something when it does...
2893                                          */
2894                                         result = "";
2895                                         goto getout;
2896                               } else {
2897                                         buffer = new;
2898                                         bufferlen = newlen;
2899                               }
2900                     }
2901                     (void)memcpy(buffer, result, len);
2902                     result = buffer;
2903           }
2904 
2905   getout:;
2906           popstackmark(&smark);
2907 
2908           return result;
2909 }
2910 #endif
2911 
2912 /*
2913  * and a simpler version, which does no $( ) expansions, for
2914  * use during shell startup when we know we are not parsing,
2915  * and so the stack is not in use - we can do what we like,
2916  * and do not need to clean up (that's handled externally).
2917  *
2918  * Simply return the result, even if it is on the stack
2919  */
2920 const char *
2921 expandenv(char *arg)
2922 {
2923           return expandonstack(arg, 0, 0);
2924 }
2925