xref: /trueos/usr.bin/make/str.c (revision bcd0e15cf642d6e5bf78ee585ad282b0e3061864)
1 /*-
2  * Copyright (c) 1988, 1989, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1989 by Berkeley Softworks
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Adam de Boor.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  * @(#)str.c	5.8 (Berkeley) 6/1/90
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include <stdlib.h>
45 #include <string.h>
46 
47 #include "buf.h"
48 #include "str.h"
49 #include "util.h"
50 
51 /**
52  * Initialize the argument array object.  The array is initially
53  * eight positions, and will be expanded as necessary.  The first
54  * position is set to NULL since everything ignores it.  We allocate
55  * (size + 1) since we need space for the terminating NULL.  The
56  * buffer is set to NULL, since no common buffer is allocated yet.
57  */
58 void
ArgArray_Init(ArgArray * aa)59 ArgArray_Init(ArgArray *aa)
60 {
61 
62 	aa->size = 8;
63 	aa->argv = emalloc((aa->size + 1) * sizeof(char *));
64 	aa->argc = 0;
65 	aa->argv[aa->argc++] = NULL;
66 	aa->len = 0;
67 	aa->buffer = NULL;
68 }
69 
70 /**
71  * Cleanup the memory allocated for in the argument array object.
72  */
73 void
ArgArray_Done(ArgArray * aa)74 ArgArray_Done(ArgArray *aa)
75 {
76 
77 	if (aa->buffer == NULL) {
78 		int	i;
79 		/* args are individually allocated */
80 		for (i = 0; i < aa->argc; ++i) {
81 			if (aa->argv[i]) {
82 				free(aa->argv[i]);
83 				aa->argv[i] = NULL;
84 			}
85 		}
86 	} else {
87 		/* args are part of a single allocation */
88 		free(aa->buffer);
89 		aa->buffer = NULL;
90 	}
91 	free(aa->argv);
92 	aa->argv = NULL;
93 	aa->argc = 0;
94 	aa->size = 0;
95 }
96 
97 /*-
98  * str_concat --
99  *	concatenate the two strings, inserting a space or slash between them.
100  *
101  * returns --
102  *	the resulting string in allocated space.
103  */
104 char *
str_concat(const char * s1,const char * s2,int flags)105 str_concat(const char *s1, const char *s2, int flags)
106 {
107 	int len1, len2;
108 	char *result;
109 
110 	/* get the length of both strings */
111 	len1 = strlen(s1);
112 	len2 = strlen(s2);
113 
114 	/* allocate length plus separator plus EOS */
115 	result = emalloc(len1 + len2 + 2);
116 
117 	/* copy first string into place */
118 	memcpy(result, s1, len1);
119 
120 	/* add separator character */
121 	if (flags & STR_ADDSPACE) {
122 		result[len1] = ' ';
123 		++len1;
124 	} else if (flags & STR_ADDSLASH) {
125 		result[len1] = '/';
126 		++len1;
127 	}
128 
129 	/* copy second string plus EOS into place */
130 	memcpy(result + len1, s2, len2 + 1);
131 
132 	return (result);
133 }
134 
135 /**
136  * Fracture a string into an array of words (as delineated by tabs or
137  * spaces) taking quotation marks into account.  Leading tabs/spaces
138  * are ignored.
139  */
140 void
brk_string(ArgArray * aa,const char str[],Boolean expand)141 brk_string(ArgArray *aa, const char str[], Boolean expand)
142 {
143 	char	inquote;
144 	char	*start;
145 	char	*arg;
146 
147 	/* skip leading space chars. */
148 	for (; *str == ' ' || *str == '\t'; ++str)
149 		continue;
150 
151 	ArgArray_Init(aa);
152 
153 	aa->buffer = estrdup(str);
154 
155 	arg = aa->buffer;
156 	start = arg;
157 	inquote = '\0';
158 
159 	/*
160 	 * copy the string; at the same time, parse backslashes,
161 	 * quotes and build the argument list.
162 	 */
163 	for (;;) {
164 		switch (str[0]) {
165 		case '"':
166 		case '\'':
167 			if (inquote == '\0') {
168 				inquote = str[0];
169 				if (expand)
170 					break;
171 				if (start == NULL)
172 					start = arg;
173 			} else if (inquote == str[0]) {
174 				inquote = '\0';
175 				/* Don't miss "" or '' */
176 				if (start == NULL)
177 					start = arg;
178 				if (expand)
179 					break;
180 			} else {
181 				/* other type of quote found */
182 				if (start == NULL)
183 					start = arg;
184 			}
185 			*arg++ = str[0];
186 			break;
187 		case ' ':
188 		case '\t':
189 		case '\n':
190 			if (inquote) {
191 				if (start == NULL)
192 					start = arg;
193 				*arg++ = str[0];
194 				break;
195 			}
196 			if (start == NULL)
197 				break;
198 			/* FALLTHROUGH */
199 		case '\0':
200 			/*
201 			 * end of a token -- make sure there's enough argv
202 			 * space and save off a pointer.
203 			 */
204 			if (aa->argc == aa->size) {
205 				aa->size *= 2;		/* ramp up fast */
206 				aa->argv = erealloc(aa->argv,
207 				    (aa->size + 1) * sizeof(char *));
208 			}
209 
210 			*arg++ = '\0';
211 			if (start == NULL) {
212 				aa->argv[aa->argc] = start;
213 				return;
214 			}
215 			if (str[0] == '\n' || str[0] == '\0') {
216 				aa->argv[aa->argc++] = start;
217 				aa->argv[aa->argc] = NULL;
218 				return;
219 			} else {
220 				aa->argv[aa->argc++] = start;
221 				start = NULL;
222 				break;
223 			}
224 		case '\\':
225 			if (start == NULL)
226 				start = arg;
227 			if (expand) {
228 				switch (str[1]) {
229 				case '\0':
230 				case '\n':
231 					/* hmmm; fix it up as best we can */
232 					*arg++ = '\\';
233 					break;
234 				case 'b':
235 					*arg++ = '\b';
236 					++str;
237 					break;
238 				case 'f':
239 					*arg++ = '\f';
240 					++str;
241 					break;
242 				case 'n':
243 					*arg++ = '\n';
244 					++str;
245 					break;
246 				case 'r':
247 					*arg++ = '\r';
248 					++str;
249 					break;
250 				case 't':
251 					*arg++ = '\t';
252 					++str;
253 					break;
254 				default:
255 					*arg++ = str[1];
256 					++str;
257 					break;
258 				}
259 			} else {
260 				*arg++ = str[0];
261 				if (str[1] != '\0') {
262 					++str;
263 					*arg++ = str[0];
264 				}
265 			}
266 			break;
267 		default:
268 			if (start == NULL)
269 				start = arg;
270 			*arg++ = str[0];
271 			break;
272 		}
273 		++str;
274 	}
275 }
276 
277 /*
278  * Quote a string for appending it to MAKEFLAGS. According to Posix the
279  * kind of quoting here is implementation-defined. This quoting must ensure
280  * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same
281  * options, option arguments and macro definitions as in the calling make.
282  * We simply quote all blanks, which according to Posix are space and tab
283  * in the POSIX locale. Don't use isblank because in that case makes with
284  * different locale settings could not communicate. We must also quote
285  * backslashes obviously.
286  */
287 char *
MAKEFLAGS_quote(const char * str)288 MAKEFLAGS_quote(const char *str)
289 {
290 	char *ret, *q;
291 	const char *p;
292 
293 	/* assume worst case - everything has to be quoted */
294 	ret = emalloc(strlen(str) * 2 + 1);
295 
296 	p = str;
297 	q = ret;
298 	while (*p != '\0') {
299 		switch (*p) {
300 
301 		  case ' ':
302 		  case '\t':
303 			*q++ = '\\';
304 			break;
305 
306 		  default:
307 			break;
308 		}
309 		*q++ = *p++;
310 	}
311 	*q++ = '\0';
312 	return (ret);
313 }
314 
315 void
MAKEFLAGS_break(ArgArray * aa,const char str[])316 MAKEFLAGS_break(ArgArray *aa, const char str[])
317 {
318 	char	*arg;
319 	char	*start;
320 
321 	ArgArray_Init(aa);
322 
323 	aa->buffer = strdup(str);
324 
325 	arg = aa->buffer;
326 	start = NULL;
327 
328 	for (;;) {
329 		switch (str[0]) {
330 		case ' ':
331 		case '\t':
332 			/* word separator */
333 			if (start == NULL) {
334 				/* not in a word */
335 				str++;
336 				continue;
337 			}
338 			/* FALLTHRU */
339 		case '\0':
340 			if (aa->argc == aa->size) {
341 				aa->size *= 2;
342 				aa->argv = erealloc(aa->argv,
343  				    (aa->size + 1) * sizeof(char *));
344 			}
345 
346 			*arg++ = '\0';
347 			if (start == NULL) {
348 				aa->argv[aa->argc] = start;
349 				return;
350 			}
351 			if (str[0] == '\0') {
352 				aa->argv[aa->argc++] = start;
353 				aa->argv[aa->argc] = NULL;
354 				return;
355 			} else {
356 				aa->argv[aa->argc++] = start;
357 				start = NULL;
358 				str++;
359 				continue;
360 			}
361 
362 		case '\\':
363 			if (str[1] == ' ' || str[1] == '\t')
364 				str++;
365 			break;
366 
367 		default:
368 			break;
369 		}
370 		if (start == NULL)
371 			start = arg;
372 		*arg++ = *str++;
373 	}
374 }
375 
376 /*
377  * Str_Match --
378  *
379  * See if a particular string matches a particular pattern.
380  *
381  * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
382  * matching operation permits the following special characters in the
383  * pattern: *?\[] (see the man page for details on what these mean).
384  *
385  * Side effects: None.
386  */
387 int
Str_Match(const char * string,const char * pattern)388 Str_Match(const char *string, const char *pattern)
389 {
390 	char c2;
391 
392 	for (;;) {
393 		/*
394 		 * See if we're at the end of both the pattern and the
395 		 * string. If, we succeeded.  If we're at the end of the
396 		 * pattern but not at the end of the string, we failed.
397 		 */
398 		if (*pattern == 0)
399 			return (!*string);
400 		if (*string == 0 && *pattern != '*')
401 			return (0);
402 		/*
403 		 * Check for a "*" as the next pattern character.  It matches
404 		 * any substring.  We handle this by calling ourselves
405 		 * recursively for each postfix of string, until either we
406 		 * match or we reach the end of the string.
407 		 */
408 		if (*pattern == '*') {
409 			pattern += 1;
410 			if (*pattern == 0)
411 				return (1);
412 			while (*string != 0) {
413 				if (Str_Match(string, pattern))
414 					return (1);
415 				++string;
416 			}
417 			return (0);
418 		}
419 		/*
420 		 * Check for a "?" as the next pattern character.  It matches
421 		 * any single character.
422 		 */
423 		if (*pattern == '?')
424 			goto thisCharOK;
425 		/*
426 		 * Check for a "[" as the next pattern character.  It is
427 		 * followed by a list of characters that are acceptable, or
428 		 * by a range (two characters separated by "-").
429 		 */
430 		if (*pattern == '[') {
431 			++pattern;
432 			for (;;) {
433 				if ((*pattern == ']') || (*pattern == 0))
434 					return (0);
435 				if (*pattern == *string)
436 					break;
437 				if (pattern[1] == '-') {
438 					c2 = pattern[2];
439 					if (c2 == 0)
440 						return (0);
441 					if ((*pattern <= *string) &&
442 					    (c2 >= *string))
443 						break;
444 					if ((*pattern >= *string) &&
445 					    (c2 <= *string))
446 						break;
447 					pattern += 2;
448 				}
449 				++pattern;
450 			}
451 			while ((*pattern != ']') && (*pattern != 0))
452 				++pattern;
453 			goto thisCharOK;
454 		}
455 		/*
456 		 * If the next pattern character is '/', just strip off the
457 		 * '/' so we do exact matching on the character that follows.
458 		 */
459 		if (*pattern == '\\') {
460 			++pattern;
461 			if (*pattern == 0)
462 				return (0);
463 		}
464 		/*
465 		 * There's no special character.  Just make sure that the
466 		 * next characters of each string match.
467 		 */
468 		if (*pattern != *string)
469 			return (0);
470 thisCharOK:	++pattern;
471 		++string;
472 	}
473 }
474 
475 
476 /**
477  * Str_SYSVMatch
478  *	Check word against pattern for a match (% is wild),
479  *
480  * Results:
481  *	Returns the beginning position of a match or null. The number
482  *	of characters matched is returned in len.
483  */
484 const char *
Str_SYSVMatch(const char * word,const char * pattern,int * len)485 Str_SYSVMatch(const char *word, const char *pattern, int *len)
486 {
487 	const char *m, *p, *w;
488 
489 	p = pattern;
490 	w = word;
491 
492 	if (*w == '\0') {
493 		/* Zero-length word cannot be matched against */
494 		*len = 0;
495 		return (NULL);
496 	}
497 
498 	if (*p == '\0') {
499 		/* Null pattern is the whole string */
500 		*len = strlen(w);
501 		return (w);
502 	}
503 
504 	if ((m = strchr(p, '%')) != NULL) {
505 		/* check that the prefix matches */
506 		for (; p != m && *w && *w == *p; w++, p++)
507 			continue;
508 
509 		if (p != m)
510 			return (NULL);	/* No match */
511 
512 		if (*++p == '\0') {
513 			/* No more pattern, return the rest of the string */
514 			*len = strlen(w);
515 			return (w);
516 		}
517 	}
518 
519 	m = w;
520 
521 	/* Find a matching tail */
522 	do
523 		if (strcmp(p, w) == 0) {
524 			*len = w - m;
525 			return (m);
526 		}
527 	while (*w++ != '\0');
528 
529 	return (NULL);
530 }
531 
532 
533 /**
534  * Str_SYSVSubst
535  *	Substitute '%' on the pattern with len characters from src.
536  *	If the pattern does not contain a '%' prepend len characters
537  *	from src.
538  *
539  * Side Effects:
540  *	Places result on buf
541  */
542 void
Str_SYSVSubst(Buffer * buf,const char * pat,const char * src,int len)543 Str_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len)
544 {
545 	const char *m;
546 
547 	if ((m = strchr(pat, '%')) != NULL) {
548 		/* Copy the prefix */
549 		Buf_AppendRange(buf, pat, m);
550 		/* skip the % */
551 		pat = m + 1;
552 	}
553 
554 	/* Copy the pattern */
555 	Buf_AddBytes(buf, len, (const Byte *)src);
556 
557 	/* append the rest */
558 	Buf_Append(buf, pat);
559 }
560