1 /*	$OpenPackages$ */
2 /*	$OpenBSD: str.c,v 1.21 2004/04/07 13:11:36 espie Exp $	*/
3 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
4 
5 /*-
6  * Copyright (c) 1988, 1989, 1990, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * Copyright (c) 1989 by Berkeley Softworks
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Adam de Boor.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <ctype.h>
40 #include <string.h>
41 #include "config.h"
42 #include "defines.h"
43 #include "str.h"
44 #include "memory.h"
45 #include "buf.h"
46 
47 char *
Str_concati(const char * s1,const char * e1,const char * s2,const char * e2,int sep)48 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
49     int sep)
50 {
51     size_t len1, len2;
52     char *result;
53 
54     /* get the length of both strings */
55     len1 = e1 - s1;
56     len2 = e2 - s2;
57 
58     /* space for separator */
59     if (sep)
60 	len1++;
61     result = emalloc(len1 + len2 + 1);
62 
63     /* copy first string into place */
64     memcpy(result, s1, len1);
65 
66     /* add separator character */
67     if (sep)
68 	result[len1-1] = sep;
69 
70     /* copy second string plus EOS into place */
71     memcpy(result + len1, s2, len2);
72     result[len1+len2] = '\0';
73     return result;
74 }
75 
76 /*-
77  * brk_string --
78  *	Fracture a string into an array of words (as delineated by tabs or
79  *	spaces) taking quotation marks into account.  Leading tabs/spaces
80  *	are ignored.
81  *
82  * returns --
83  *	Pointer to the array of pointers to the words.	To make life easier,
84  *	the first word is always the value of the .MAKE variable.
85  */
86 char **
brk_string(const char * str,int * store_argc,char ** buffer)87 brk_string(const char *str, int *store_argc, char **buffer)
88 {
89     int argc;
90     char ch;
91     char inquote;
92     const char *p;
93     char *start, *t;
94     size_t len;
95     int argmax = 50;
96     size_t curlen = 0;
97     char **argv = emalloc((argmax + 1) * sizeof(char *));
98 
99     /* skip leading space chars. */
100     for (; *str == ' ' || *str == '\t'; ++str)
101 	continue;
102 
103     /* allocate room for a copy of the string */
104     if ((len = strlen(str) + 1) > curlen)
105 	*buffer = emalloc(curlen = len);
106 
107     /*
108      * copy the string; at the same time, parse backslashes,
109      * quotes and build the argument list.
110      */
111     argc = 0;
112     inquote = '\0';
113     for (p = str, start = t = *buffer;; ++p) {
114 	switch (ch = *p) {
115 	case '"':
116 	case '\'':
117 	    if (inquote) {
118 		if (inquote == ch)
119 		    inquote = '\0';
120 		else
121 		    break;
122 	    } else {
123 		inquote = ch;
124 		/* Don't miss "" or '' */
125 		if (start == NULL && p[1] == inquote) {
126 		    start = t + 1;
127 		    break;
128 		}
129 	    }
130 	    continue;
131 	case ' ':
132 	case '\t':
133 	case '\n':
134 	    if (inquote)
135 		break;
136 	    if (!start)
137 		continue;
138 	    /* FALLTHROUGH */
139 	case '\0':
140 	    /*
141 	     * end of a token -- make sure there's enough argv
142 	     * space and save off a pointer.
143 	     */
144 	    if (!start)
145 		goto done;
146 
147 	    *t++ = '\0';
148 	    if (argc == argmax) {
149 		argmax *= 2;		/* ramp up fast */
150 		argv = erealloc(argv, (argmax + 1) * sizeof(char *));
151 	    }
152 	    argv[argc++] = start;
153 	    start = NULL;
154 	    if (ch == '\n' || ch == '\0')
155 		goto done;
156 	    continue;
157 	case '\\':
158 	    switch (ch = *++p) {
159 	    case '\0':
160 	    case '\n':
161 		/* hmmm; fix it up as best we can */
162 		ch = '\\';
163 		--p;
164 		break;
165 	    case 'b':
166 		ch = '\b';
167 		break;
168 	    case 'f':
169 		ch = '\f';
170 		break;
171 	    case 'n':
172 		ch = '\n';
173 		break;
174 	    case 'r':
175 		ch = '\r';
176 		break;
177 	    case 't':
178 		ch = '\t';
179 		break;
180 	    }
181 		break;
182 	}
183 	if (!start)
184 	    start = t;
185 	*t++ = ch;
186     }
187 done:
188     argv[argc] = NULL;
189     *store_argc = argc;
190     return argv;
191 }
192 
193 
194 const char *
iterate_words(const char ** end)195 iterate_words(const char **end)
196 {
197     const char	*start, *p;
198     char	state = 0;
199     start = *end;
200 
201     while (isspace(*start))
202 	start++;
203     if (*start == '\0')
204 	return NULL;
205 
206     for (p = start;; p++)
207 	switch(*p) {
208 	    case '\\':
209 		if (p[1] != '\0')
210 		    p++;
211 		break;
212 	    case '\'':
213 	    case '"':
214 		if (state == *p)
215 		    state = 0;
216 		else if (state == 0)
217 		    state = *p;
218 		break;
219 	    case ' ':
220 	    case '\t':
221 		if (state != 0)
222 		    break;
223 		/* FALLTHROUGH */
224 	    case '\0':
225 		*end = p;
226 		return start;
227 	    default:
228 		break;
229 	    }
230 }
231 
232 bool
Str_Matchi(const char * string,const char * estring,const char * pattern,const char * epattern)233 Str_Matchi(const char *string, const char *estring,
234     const char *pattern, const char *epattern)
235 {
236     while (pattern != epattern) {
237 	/* Check for a "*" as the next pattern character.  It matches
238 	 * any substring.  We handle this by calling ourselves
239 	 * recursively for each postfix of string, until either we
240 	 * match or we reach the end of the string.  */
241 	if (*pattern == '*') {
242 	    pattern++;
243 	    /* Skip over contiguous  sequences of `?*', so that recursive
244 	     * calls only occur on `real' characters.  */
245 	    while (pattern != epattern &&
246 	    	(*pattern == '?' || *pattern == '*')) {
247 		if (*pattern == '?') {
248 		    if (string == estring)
249 			return false;
250 		    else
251 			string++;
252 		}
253 		pattern++;
254 	    }
255 	    if (pattern == epattern)
256 		return true;
257 	    for (; string != estring; string++)
258 		if (Str_Matchi(string, estring, pattern, epattern))
259 		    return true;
260 	    return false;
261 	} else if (string == estring)
262 	    return false;
263 	/* Check for a "[" as the next pattern character.  It is
264 	 * followed by a list of characters that are acceptable, or
265 	 * by a range (two characters separated by "-").  */
266 	else if (*pattern == '[') {
267 	    pattern++;
268 	    if (pattern == epattern)
269 		return false;
270 	    if (*pattern == '!' || *pattern == '^') {
271 		pattern++;
272 		if (pattern == epattern)
273 			return false;
274 		/* Negative match */
275 		for (;;) {
276 		    if (*pattern == '\\') {
277 			if (++pattern == epattern)
278 			    return false;
279 		    }
280 		    if (*pattern == *string)
281 			return false;
282 		    if (pattern[1] == '-') {
283 			if (pattern + 2 == epattern)
284 			    return false;
285 			if (*pattern < *string && *string <= pattern[2])
286 			    return false;
287 			if (pattern[2] <= *string && *string < *pattern)
288 			    return false;
289 			pattern += 3;
290 		    } else
291 			pattern++;
292 		    if (pattern == epattern)
293 			return false;
294 		    /* The test for ']' is done at the end so that ']'
295 		     * can be used at the start of the range without '\' */
296 		    if (*pattern == ']')
297 			break;
298 		}
299 	    } else {
300 		for (;;) {
301 		    if (*pattern == '\\') {
302 			if (++pattern == epattern)
303 			    return false;
304 		    }
305 		    if (*pattern == *string)
306 			break;
307 		    if (pattern[1] == '-') {
308 			if (pattern + 2 == epattern)
309 			    return false;
310 			if (*pattern < *string && *string <= pattern[2])
311 			    break;
312 			if (pattern[2] <= *string && *string < *pattern)
313 			    break;
314 			pattern += 3;
315 		    } else
316 			pattern++;
317 		    /* The test for ']' is done at the end so that ']'
318 		     * can be used at the start of the range without '\' */
319 		    if (pattern == epattern || *pattern == ']')
320 			return false;
321 		}
322 		/* Found matching character, skip over rest of class.  */
323 		while (*pattern != ']') {
324 		    if (*pattern == '\\')
325 			pattern++;
326 		    /* A non-terminated character class is ok.	*/
327 		    if (pattern == epattern)
328 			break;
329 		    pattern++;
330 		}
331 	    }
332 	}
333 	/* '?' matches any single character, so shunt test.  */
334 	else if (*pattern != '?') {
335 	    /* If the next pattern character is '\', just strip off the
336 	     * '\' so we do exact matching on the character that follows.  */
337 	    if (*pattern == '\\') {
338 		if (++pattern == epattern)
339 		    return false;
340 	    }
341 	    /* There's no special character.  Just make sure that
342 	     * the next characters of each string match.  */
343 	    if (*pattern != *string)
344 		return false;
345 	}
346 	pattern++;
347 	string++;
348     }
349     if (string == estring)
350 	return true;
351     else
352 	return false;
353 }
354 
355 
356 /*-
357  *-----------------------------------------------------------------------
358  * Str_SYSVMatch --
359  *	Check word against pattern for a match (% is wild),
360  *
361  * Results:
362  *	Returns the beginning position of a match or null. The number
363  *	of characters matched is returned in len.
364  *-----------------------------------------------------------------------
365  */
366 const char *
Str_SYSVMatch(const char * word,const char * pattern,size_t * len)367 Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
368 {
369     const char *p = pattern;
370     const char *w = word;
371     const char *m;
372 
373     if (*p == '\0') {
374 	/* Null pattern is the whole string.  */
375 	*len = strlen(w);
376 	return w;
377     }
378 
379     if ((m = strchr(p, '%')) != NULL) {
380 	/* Check that the prefix matches.  */
381 	for (; p != m && *w && *w == *p; w++, p++)
382 	     continue;
383 
384 	if (p != m)
385 	    return NULL;	/* No match.  */
386 
387 	if (*++p == '\0') {
388 	    /* No more pattern, return the rest of the string.	*/
389 	    *len = strlen(w);
390 	    return w;
391 	}
392     }
393 
394     m = w;
395 
396     /* Find a matching tail.  */
397     do {
398 	if (strcmp(p, w) == 0) {
399 	    *len = w - m;
400 	    return m;
401 	}
402     } while (*w++ != '\0');
403 
404 
405     return NULL;
406 }
407 
408 
409 /*-
410  *-----------------------------------------------------------------------
411  * Str_SYSVSubst --
412  *	Substitute '%' in the pattern with len characters from src.
413  *	If the pattern does not contain a '%' prepend len characters
414  *	from src.
415  *
416  * Side Effects:
417  *	Adds result to buf
418  *-----------------------------------------------------------------------
419  */
420 void
Str_SYSVSubst(Buffer buf,const char * pat,const char * src,size_t len)421 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
422 {
423     const char *m;
424 
425     if ((m = strchr(pat, '%')) != NULL) {
426 	/* Copy the prefix.  */
427 	Buf_Addi(buf, pat, m);
428 	/* Skip the %.	*/
429 	pat = m + 1;
430     }
431 
432     /* Copy the pattern.  */
433     Buf_AddChars(buf, len, src);
434 
435     /* Append the rest.  */
436     Buf_AddString(buf, pat);
437 }
438 
439 char *
Str_dupi(const char * begin,const char * end)440 Str_dupi(const char *begin, const char *end)
441 {
442     char *s;
443 
444     s = emalloc(end - begin + 1);
445     memcpy(s, begin, end - begin);
446     s[end-begin] = '\0';
447     return s;
448 }
449 
450 char *
escape_dupi(const char * begin,const char * end,const char * set)451 escape_dupi(const char *begin, const char *end, const char *set)
452 {
453     char *s, *t;
454 
455     t = s = emalloc(end - begin + 1);
456     while (begin != end) {
457 	if (*begin == '\\') {
458 	    begin++;
459 	    if (begin == end) {
460 		*t++ = '\\';
461 		break;
462 	    }
463 	    if (strchr(set, *begin) == NULL)
464 		*t++ = '\\';
465 	}
466 	*t++ = *begin++;
467     }
468     *t++ = '\0';
469     return s;
470 }
471 
472 char *
Str_rchri(const char * begin,const char * end,int c)473 Str_rchri(const char *begin, const char *end, int c)
474 {
475     if (begin != end)
476 	do {
477 	    if (*--end == c)
478 		return (char *)end;
479 	} while (end != begin);
480     return NULL;
481 }
482