1 /*        $NetBSD: for.c,v 1.185 2025/04/22 19:28:50 rillig Exp $     */
2 
3 /*
4  * Copyright (c) 1992, The Regents of the University of California.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Handling of .for/.endfor loops in a makefile.
34  *
35  * For loops have the form:
36  *
37  *        .for <varname...> in <value...>
38  *        # the body
39  *        .endfor
40  *
41  * When a .for line is parsed, the following lines are copied to the body of
42  * the .for loop, until the corresponding .endfor line is reached.  In this
43  * phase, the body is not yet evaluated.  This also applies to any nested
44  * .for loops.
45  *
46  * After reaching the .endfor, the values from the .for line are grouped
47  * according to the number of variables.  For each such group, the unexpanded
48  * body is scanned for expressions, and those that match the
49  * variable names are replaced with expressions of the form ${:U...}.  After
50  * that, the body is treated like a file from an .include directive.
51  *
52  * Interface:
53  *        For_Eval  Evaluate the loop in the passed line.
54  *
55  *        For_Run             Run accumulated loop
56  */
57 
58 #include "make.h"
59 
60 /*        "@(#)for.c          8.1 (Berkeley) 6/6/93"        */
61 MAKE_RCSID("$NetBSD: for.c,v 1.185 2025/04/22 19:28:50 rillig Exp $");
62 
63 
64 typedef struct ForLoop {
65           Vector /* of 'char *' */ vars; /* Iteration variables */
66           SubstringWords items;         /* Substitution items */
67           Buffer body;                  /* Unexpanded body of the loop */
68           unsigned nextItem;  /* Where to continue iterating */
69 } ForLoop;
70 
71 
72 static ForLoop *accumFor;     /* Loop being accumulated */
73 
74 
75 /* See LK_FOR_BODY. */
76 static void
skip_whitespace_or_line_continuation(const char ** pp)77 skip_whitespace_or_line_continuation(const char **pp)
78 {
79           const char *p = *pp;
80           for (;;) {
81                     if (ch_isspace(*p))
82                               p++;
83                     else if (p[0] == '\\' && p[1] == '\n')
84                               p += 2;
85                     else
86                               break;
87           }
88           *pp = p;
89 }
90 
91 static ForLoop *
ForLoop_New(void)92 ForLoop_New(void)
93 {
94           ForLoop *f = bmake_malloc(sizeof *f);
95 
96           Vector_Init(&f->vars, sizeof(char *));
97           SubstringWords_Init(&f->items);
98           Buf_Init(&f->body);
99           f->nextItem = 0;
100 
101           return f;
102 }
103 
104 void
ForLoop_Free(ForLoop * f)105 ForLoop_Free(ForLoop *f)
106 {
107           while (f->vars.len > 0)
108                     free(*(char **)Vector_Pop(&f->vars));
109           Vector_Done(&f->vars);
110 
111           SubstringWords_Free(f->items);
112           Buf_Done(&f->body);
113 
114           free(f);
115 }
116 
117 char *
ForLoop_Details(const ForLoop * f)118 ForLoop_Details(const ForLoop *f)
119 {
120           size_t i, n;
121           const char **vars;
122           const Substring *items;
123           Buffer buf;
124 
125           n = f->vars.len;
126           vars = f->vars.items;
127           assert(f->nextItem >= n);
128           items = f->items.words + f->nextItem - n;
129 
130           Buf_Init(&buf);
131           for (i = 0; i < n; i++) {
132                     if (i > 0)
133                               Buf_AddStr(&buf, ", ");
134                     Buf_AddStr(&buf, vars[i]);
135                     Buf_AddStr(&buf, " = ");
136                     Buf_AddRange(&buf, items[i].start, items[i].end);
137           }
138           return Buf_DoneData(&buf);
139 }
140 
141 static bool
IsValidInVarname(char c)142 IsValidInVarname(char c)
143 {
144           return c != '$' && c != ':' && c != '\\' &&
145               c != '(' && c != '{' && c != ')' && c != '}';
146 }
147 
148 static void
ForLoop_ParseVarnames(ForLoop * f,const char ** pp)149 ForLoop_ParseVarnames(ForLoop *f, const char **pp)
150 {
151           const char *p = *pp, *start;
152 
153           for (;;) {
154                     cpp_skip_whitespace(&p);
155                     if (*p == '\0') {
156                               Parse_Error(PARSE_FATAL, "missing `in' in for");
157                               goto cleanup;
158                     }
159 
160                     for (start = p; *p != '\0' && !ch_isspace(*p); p++)
161                               if (!IsValidInVarname(*p))
162                                         goto invalid_variable_name;
163 
164                     if (p - start == 2 && memcmp(start, "in", 2) == 0)
165                               break;
166 
167                     *(char **)Vector_Push(&f->vars) = bmake_strsedup(start, p);
168           }
169 
170           if (f->vars.len == 0) {
171                     Parse_Error(PARSE_FATAL, "no iteration variables in for");
172                     return;
173           }
174 
175           *pp = p;
176           return;
177 
178 invalid_variable_name:
179           Parse_Error(PARSE_FATAL,
180               "invalid character '%c' in .for loop variable name", *p);
181 cleanup:
182           while (f->vars.len > 0)
183                     free(*(char **)Vector_Pop(&f->vars));
184 }
185 
186 static bool
ForLoop_ParseItems(ForLoop * f,const char * p)187 ForLoop_ParseItems(ForLoop *f, const char *p)
188 {
189           char *items;
190           int parseErrorsBefore = parseErrors;
191 
192           cpp_skip_whitespace(&p);
193 
194           items = Var_Subst(p, SCOPE_GLOBAL, VARE_EVAL);
195           f->items = Substring_Words(
196               parseErrors == parseErrorsBefore ? items : "", false);
197           free(items);
198 
199           if (f->items.len == 1 && Substring_IsEmpty(f->items.words[0]))
200                     f->items.len = 0;   /* .for var in ${:U} */
201 
202           if (f->items.len % f->vars.len != 0) {
203                     Parse_Error(PARSE_FATAL,
204                         "Wrong number of words (%u) in .for "
205                         "substitution list with %u variables",
206                         (unsigned)f->items.len, (unsigned)f->vars.len);
207                     return false;
208           }
209 
210           return true;
211 }
212 
213 static bool
IsFor(const char * p)214 IsFor(const char *p)
215 {
216           return p[0] == 'f' && p[1] == 'o' && p[2] == 'r' && ch_isspace(p[3]);
217 }
218 
219 static bool
IsEndfor(const char * p)220 IsEndfor(const char *p)
221 {
222           return p[0] == 'e' && strncmp(p, "endfor", 6) == 0 &&
223                  (p[6] == '\0' || ch_isspace(p[6]));
224 }
225 
226 /*
227  * Evaluate the for loop in the passed line. The line looks like this:
228  *        .for <varname...> in <value...>
229  *
230  * Results:
231  *        0         not a .for directive
232  *        1         found a .for directive
233  *        -1        erroneous .for directive
234  */
235 int
For_Eval(const char * line)236 For_Eval(const char *line)
237 {
238           const char *p;
239           ForLoop *f;
240 
241           p = line + 1;                 /* skip the '.' */
242           skip_whitespace_or_line_continuation(&p);
243 
244           if (IsFor(p)) {
245                     p += 3;
246 
247                     f = ForLoop_New();
248                     ForLoop_ParseVarnames(f, &p);
249                     if (f->vars.len > 0 && !ForLoop_ParseItems(f, p))
250                               f->items.len = 0;   /* don't iterate */
251 
252                     accumFor = f;
253                     return 1;
254           } else if (IsEndfor(p)) {
255                     Parse_Error(PARSE_FATAL, "for-less endfor");
256                     return -1;
257           } else
258                     return 0;
259 }
260 
261 /*
262  * Add another line to the .for loop that is being built up.
263  * Returns false when the matching .endfor is reached.
264  */
265 bool
For_Accum(const char * line,int * forLevel)266 For_Accum(const char *line, int *forLevel)
267 {
268           const char *p = line;
269 
270           if (*p == '.') {
271                     p++;
272                     skip_whitespace_or_line_continuation(&p);
273 
274                     if (IsEndfor(p)) {
275                               DEBUG1(FOR, "For: end for %d\n", *forLevel);
276                               if (--*forLevel == 0)
277                                         return false;
278                     } else if (IsFor(p)) {
279                               (*forLevel)++;
280                               DEBUG1(FOR, "For: new loop %d\n", *forLevel);
281                     }
282           }
283 
284           Buf_AddStr(&accumFor->body, line);
285           Buf_AddByte(&accumFor->body, '\n');
286           return true;
287 }
288 
289 /*
290  * When the body of a '.for i' loop is prepared for an iteration, each
291  * occurrence of $i in the body is replaced with ${:U...}, inserting the
292  * value of the item.  If this item contains a '$', it may be the start of an
293  * expression.  This expression is copied verbatim, its length is
294  * determined here, in a rather naive way, ignoring escape characters and
295  * funny delimiters in modifiers like ':S}from}to}'.
296  */
297 static size_t
ExprLen(const char * s,const char * e)298 ExprLen(const char *s, const char *e)
299 {
300           char expr_open, expr_close;
301           int depth;
302           const char *p;
303 
304           if (s == e)
305                     return 0; /* just escape the '$' */
306 
307           expr_open = s[0];
308           if (expr_open == '(')
309                     expr_close = ')';
310           else if (expr_open == '{')
311                     expr_close = '}';
312           else
313                     return 1; /* Single char variable */
314 
315           depth = 1;
316           for (p = s + 1; p != e; p++) {
317                     if (*p == expr_open)
318                               depth++;
319                     else if (*p == expr_close && --depth == 0)
320                               return (size_t)(p + 1 - s);
321           }
322 
323           /* Expression end not found, escape the $ */
324           return 0;
325 }
326 
327 /*
328  * While expanding the body of a .for loop, write the item as a ${:U...}
329  * expression, escaping characters as needed.  The result is later unescaped
330  * by ApplyModifier_Defined.
331  */
332 static void
AddEscaped(Buffer * body,Substring item,char endc)333 AddEscaped(Buffer *body, Substring item, char endc)
334 {
335           const char *p;
336           char ch;
337 
338           for (p = item.start; p != item.end;) {
339                     ch = *p;
340                     if (ch == '$') {
341                               size_t len = ExprLen(p + 1, item.end);
342                               if (len != 0) {
343                                         /*
344                                          * XXX: Should a '\' be added here?
345                                          * See directive-for-escape.mk, ExprLen.
346                                          */
347                                         Buf_AddBytes(body, p, 1 + len);
348                                         p += 1 + len;
349                                         continue;
350                               }
351                               Buf_AddByte(body, '\\');
352                     } else if (ch == ':' || ch == '\\' || ch == endc)
353                               Buf_AddByte(body, '\\');
354                     else if (ch == '\n') {
355                               Parse_Error(PARSE_FATAL, "newline in .for value");
356                               ch = ' '; /* prevent newline injection */
357                     }
358                     Buf_AddByte(body, ch);
359                     p++;
360           }
361 }
362 
363 /*
364  * While expanding the body of a .for loop, replace the variable name of an
365  * expression like ${i} or ${i:...} or $(i) or $(i:...) with ":Uvalue".
366  */
367 static void
ForLoop_SubstVarLong(ForLoop * f,unsigned firstItem,Buffer * body,const char ** pp,char endc,const char ** inout_mark)368 ForLoop_SubstVarLong(ForLoop *f, unsigned firstItem, Buffer *body,
369                          const char **pp, char endc, const char **inout_mark)
370 {
371           size_t i;
372           const char *start = *pp;
373           const char **varnames = Vector_Get(&f->vars, 0);
374 
375           for (i = 0; i < f->vars.len; i++) {
376                     const char *p = start;
377 
378                     if (!cpp_skip_string(&p, varnames[i]))
379                               continue;
380                     /* XXX: why test for backslash here? */
381                     if (*p != ':' && *p != endc && *p != '\\')
382                               continue;
383 
384                     /*
385                      * Found a variable match.  Skip over the variable name and
386                      * instead add ':U<value>' to the current body.
387                      */
388                     Buf_AddRange(body, *inout_mark, start);
389                     Buf_AddStr(body, ":U");
390                     AddEscaped(body, f->items.words[firstItem + i], endc);
391 
392                     *inout_mark = p;
393                     *pp = p;
394                     return;
395           }
396 }
397 
398 /*
399  * While expanding the body of a .for loop, replace single-character
400  * expressions like $i with their ${:U...} expansion.
401  */
402 static void
ForLoop_SubstVarShort(ForLoop * f,unsigned firstItem,Buffer * body,const char * p,const char ** inout_mark)403 ForLoop_SubstVarShort(ForLoop *f, unsigned firstItem, Buffer *body,
404                           const char *p, const char **inout_mark)
405 {
406           char ch = *p;
407           const char **vars;
408           size_t i;
409 
410           /* Skip $$ and stupid ones. */
411           if (ch == '}' || ch == ')' || ch == ':' || ch == '$')
412                     return;
413 
414           vars = Vector_Get(&f->vars, 0);
415           for (i = 0; i < f->vars.len; i++) {
416                     const char *varname = vars[i];
417                     if (varname[0] == ch && varname[1] == '\0')
418                               goto found;
419           }
420           return;
421 
422 found:
423           Buf_AddRange(body, *inout_mark, p);
424           *inout_mark = p + 1;
425 
426           /* Replace $<ch> with ${:U<value>} */
427           Buf_AddStr(body, "{:U");
428           AddEscaped(body, f->items.words[firstItem + i], '}');
429           Buf_AddByte(body, '}');
430 }
431 
432 /*
433  * Compute the body for the current iteration by copying the unexpanded body,
434  * replacing the expressions for the iteration variables on the way.
435  *
436  * Using expressions ensures that the .for loop can't generate
437  * syntax, and that the later parsing will still see an expression.
438  * This code assumes that the variable with the empty name is never defined,
439  * see unit-tests/varname-empty.mk.
440  *
441  * The detection of substitutions of the loop control variables is naive.
442  * Many of the modifiers use '\$' instead of '$$' to escape '$', so it is
443  * possible to contrive a makefile where an unwanted substitution happens.
444  * See unit-tests/directive-for-escape.mk.
445  */
446 static void
ForLoop_SubstBody(ForLoop * f,unsigned firstItem,Buffer * body)447 ForLoop_SubstBody(ForLoop *f, unsigned firstItem, Buffer *body)
448 {
449           const char *p, *end;
450           const char *mark;   /* where the last substitution left off */
451 
452           Buf_Clear(body);
453 
454           mark = f->body.data;
455           end = f->body.data + f->body.len;
456           for (p = mark; (p = strchr(p, '$')) != NULL;) {
457                     if (p[1] == '{' || p[1] == '(') {
458                               char endc = p[1] == '{' ? '}' : ')';
459                               p += 2;
460                               ForLoop_SubstVarLong(f, firstItem, body,
461                                   &p, endc, &mark);
462                     } else {
463                               ForLoop_SubstVarShort(f, firstItem, body,
464                                   p + 1, &mark);
465                               p += 2;
466                     }
467           }
468 
469           Buf_AddRange(body, mark, end);
470 }
471 
472 /*
473  * Compute the body for the current iteration by copying the unexpanded body,
474  * replacing the expressions for the iteration variables on the way.
475  */
476 bool
For_NextIteration(ForLoop * f,Buffer * body)477 For_NextIteration(ForLoop *f, Buffer *body)
478 {
479           if (f->nextItem == f->items.len)
480                     return false;
481 
482           f->nextItem += (unsigned)f->vars.len;
483           ForLoop_SubstBody(f, f->nextItem - (unsigned)f->vars.len, body);
484           if (DEBUG(FOR)) {
485                     char *details = ForLoop_Details(f);
486                     debug_printf("For: loop body with %s:\n%s",
487                         details, body->data);
488                     free(details);
489           }
490           return true;
491 }
492 
493 /* Break out of the .for loop. */
494 void
For_Break(ForLoop * f)495 For_Break(ForLoop *f)
496 {
497           f->nextItem = (unsigned)f->items.len;
498 }
499 
500 /* Run the .for loop, imitating the actions of an include file. */
501 void
For_Run(unsigned headLineno,unsigned bodyReadLines)502 For_Run(unsigned headLineno, unsigned bodyReadLines)
503 {
504           Buffer buf;
505           ForLoop *f = accumFor;
506           accumFor = NULL;
507 
508           if (f->items.len > 0) {
509                     Buf_Init(&buf);
510                     Parse_PushInput(NULL, headLineno, bodyReadLines, buf, f);
511           } else
512                     ForLoop_Free(f);
513 }
514