1 /* $OpenPackages$ */
2 /* $OpenBSD: str.c,v 1.21 2004/04/07 13:11:36 espie Exp $ */
3 /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
4
5 /*-
6 * Copyright (c) 1988, 1989, 1990, 1993
7 * The Regents of the University of California. All rights reserved.
8 * Copyright (c) 1989 by Berkeley Softworks
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * Adam de Boor.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <ctype.h>
40 #include <string.h>
41 #include "config.h"
42 #include "defines.h"
43 #include "str.h"
44 #include "memory.h"
45 #include "buf.h"
46
47 char *
Str_concati(const char * s1,const char * e1,const char * s2,const char * e2,int sep)48 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
49 int sep)
50 {
51 size_t len1, len2;
52 char *result;
53
54 /* get the length of both strings */
55 len1 = e1 - s1;
56 len2 = e2 - s2;
57
58 /* space for separator */
59 if (sep)
60 len1++;
61 result = emalloc(len1 + len2 + 1);
62
63 /* copy first string into place */
64 memcpy(result, s1, len1);
65
66 /* add separator character */
67 if (sep)
68 result[len1-1] = sep;
69
70 /* copy second string plus EOS into place */
71 memcpy(result + len1, s2, len2);
72 result[len1+len2] = '\0';
73 return result;
74 }
75
76 /*-
77 * brk_string --
78 * Fracture a string into an array of words (as delineated by tabs or
79 * spaces) taking quotation marks into account. Leading tabs/spaces
80 * are ignored.
81 *
82 * returns --
83 * Pointer to the array of pointers to the words. To make life easier,
84 * the first word is always the value of the .MAKE variable.
85 */
86 char **
brk_string(const char * str,int * store_argc,char ** buffer)87 brk_string(const char *str, int *store_argc, char **buffer)
88 {
89 int argc;
90 char ch;
91 char inquote;
92 const char *p;
93 char *start, *t;
94 size_t len;
95 int argmax = 50;
96 size_t curlen = 0;
97 char **argv = emalloc((argmax + 1) * sizeof(char *));
98
99 /* skip leading space chars. */
100 for (; *str == ' ' || *str == '\t'; ++str)
101 continue;
102
103 /* allocate room for a copy of the string */
104 if ((len = strlen(str) + 1) > curlen)
105 *buffer = emalloc(curlen = len);
106
107 /*
108 * copy the string; at the same time, parse backslashes,
109 * quotes and build the argument list.
110 */
111 argc = 0;
112 inquote = '\0';
113 for (p = str, start = t = *buffer;; ++p) {
114 switch (ch = *p) {
115 case '"':
116 case '\'':
117 if (inquote) {
118 if (inquote == ch)
119 inquote = '\0';
120 else
121 break;
122 } else {
123 inquote = ch;
124 /* Don't miss "" or '' */
125 if (start == NULL && p[1] == inquote) {
126 start = t + 1;
127 break;
128 }
129 }
130 continue;
131 case ' ':
132 case '\t':
133 case '\n':
134 if (inquote)
135 break;
136 if (!start)
137 continue;
138 /* FALLTHROUGH */
139 case '\0':
140 /*
141 * end of a token -- make sure there's enough argv
142 * space and save off a pointer.
143 */
144 if (!start)
145 goto done;
146
147 *t++ = '\0';
148 if (argc == argmax) {
149 argmax *= 2; /* ramp up fast */
150 argv = erealloc(argv, (argmax + 1) * sizeof(char *));
151 }
152 argv[argc++] = start;
153 start = NULL;
154 if (ch == '\n' || ch == '\0')
155 goto done;
156 continue;
157 case '\\':
158 switch (ch = *++p) {
159 case '\0':
160 case '\n':
161 /* hmmm; fix it up as best we can */
162 ch = '\\';
163 --p;
164 break;
165 case 'b':
166 ch = '\b';
167 break;
168 case 'f':
169 ch = '\f';
170 break;
171 case 'n':
172 ch = '\n';
173 break;
174 case 'r':
175 ch = '\r';
176 break;
177 case 't':
178 ch = '\t';
179 break;
180 }
181 break;
182 }
183 if (!start)
184 start = t;
185 *t++ = ch;
186 }
187 done:
188 argv[argc] = NULL;
189 *store_argc = argc;
190 return argv;
191 }
192
193
194 const char *
iterate_words(const char ** end)195 iterate_words(const char **end)
196 {
197 const char *start, *p;
198 char state = 0;
199 start = *end;
200
201 while (isspace(*start))
202 start++;
203 if (*start == '\0')
204 return NULL;
205
206 for (p = start;; p++)
207 switch(*p) {
208 case '\\':
209 if (p[1] != '\0')
210 p++;
211 break;
212 case '\'':
213 case '"':
214 if (state == *p)
215 state = 0;
216 else if (state == 0)
217 state = *p;
218 break;
219 case ' ':
220 case '\t':
221 if (state != 0)
222 break;
223 /* FALLTHROUGH */
224 case '\0':
225 *end = p;
226 return start;
227 default:
228 break;
229 }
230 }
231
232 bool
Str_Matchi(const char * string,const char * estring,const char * pattern,const char * epattern)233 Str_Matchi(const char *string, const char *estring,
234 const char *pattern, const char *epattern)
235 {
236 while (pattern != epattern) {
237 /* Check for a "*" as the next pattern character. It matches
238 * any substring. We handle this by calling ourselves
239 * recursively for each postfix of string, until either we
240 * match or we reach the end of the string. */
241 if (*pattern == '*') {
242 pattern++;
243 /* Skip over contiguous sequences of `?*', so that recursive
244 * calls only occur on `real' characters. */
245 while (pattern != epattern &&
246 (*pattern == '?' || *pattern == '*')) {
247 if (*pattern == '?') {
248 if (string == estring)
249 return false;
250 else
251 string++;
252 }
253 pattern++;
254 }
255 if (pattern == epattern)
256 return true;
257 for (; string != estring; string++)
258 if (Str_Matchi(string, estring, pattern, epattern))
259 return true;
260 return false;
261 } else if (string == estring)
262 return false;
263 /* Check for a "[" as the next pattern character. It is
264 * followed by a list of characters that are acceptable, or
265 * by a range (two characters separated by "-"). */
266 else if (*pattern == '[') {
267 pattern++;
268 if (pattern == epattern)
269 return false;
270 if (*pattern == '!' || *pattern == '^') {
271 pattern++;
272 if (pattern == epattern)
273 return false;
274 /* Negative match */
275 for (;;) {
276 if (*pattern == '\\') {
277 if (++pattern == epattern)
278 return false;
279 }
280 if (*pattern == *string)
281 return false;
282 if (pattern[1] == '-') {
283 if (pattern + 2 == epattern)
284 return false;
285 if (*pattern < *string && *string <= pattern[2])
286 return false;
287 if (pattern[2] <= *string && *string < *pattern)
288 return false;
289 pattern += 3;
290 } else
291 pattern++;
292 if (pattern == epattern)
293 return false;
294 /* The test for ']' is done at the end so that ']'
295 * can be used at the start of the range without '\' */
296 if (*pattern == ']')
297 break;
298 }
299 } else {
300 for (;;) {
301 if (*pattern == '\\') {
302 if (++pattern == epattern)
303 return false;
304 }
305 if (*pattern == *string)
306 break;
307 if (pattern[1] == '-') {
308 if (pattern + 2 == epattern)
309 return false;
310 if (*pattern < *string && *string <= pattern[2])
311 break;
312 if (pattern[2] <= *string && *string < *pattern)
313 break;
314 pattern += 3;
315 } else
316 pattern++;
317 /* The test for ']' is done at the end so that ']'
318 * can be used at the start of the range without '\' */
319 if (pattern == epattern || *pattern == ']')
320 return false;
321 }
322 /* Found matching character, skip over rest of class. */
323 while (*pattern != ']') {
324 if (*pattern == '\\')
325 pattern++;
326 /* A non-terminated character class is ok. */
327 if (pattern == epattern)
328 break;
329 pattern++;
330 }
331 }
332 }
333 /* '?' matches any single character, so shunt test. */
334 else if (*pattern != '?') {
335 /* If the next pattern character is '\', just strip off the
336 * '\' so we do exact matching on the character that follows. */
337 if (*pattern == '\\') {
338 if (++pattern == epattern)
339 return false;
340 }
341 /* There's no special character. Just make sure that
342 * the next characters of each string match. */
343 if (*pattern != *string)
344 return false;
345 }
346 pattern++;
347 string++;
348 }
349 if (string == estring)
350 return true;
351 else
352 return false;
353 }
354
355
356 /*-
357 *-----------------------------------------------------------------------
358 * Str_SYSVMatch --
359 * Check word against pattern for a match (% is wild),
360 *
361 * Results:
362 * Returns the beginning position of a match or null. The number
363 * of characters matched is returned in len.
364 *-----------------------------------------------------------------------
365 */
366 const char *
Str_SYSVMatch(const char * word,const char * pattern,size_t * len)367 Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
368 {
369 const char *p = pattern;
370 const char *w = word;
371 const char *m;
372
373 if (*p == '\0') {
374 /* Null pattern is the whole string. */
375 *len = strlen(w);
376 return w;
377 }
378
379 if ((m = strchr(p, '%')) != NULL) {
380 /* Check that the prefix matches. */
381 for (; p != m && *w && *w == *p; w++, p++)
382 continue;
383
384 if (p != m)
385 return NULL; /* No match. */
386
387 if (*++p == '\0') {
388 /* No more pattern, return the rest of the string. */
389 *len = strlen(w);
390 return w;
391 }
392 }
393
394 m = w;
395
396 /* Find a matching tail. */
397 do {
398 if (strcmp(p, w) == 0) {
399 *len = w - m;
400 return m;
401 }
402 } while (*w++ != '\0');
403
404
405 return NULL;
406 }
407
408
409 /*-
410 *-----------------------------------------------------------------------
411 * Str_SYSVSubst --
412 * Substitute '%' in the pattern with len characters from src.
413 * If the pattern does not contain a '%' prepend len characters
414 * from src.
415 *
416 * Side Effects:
417 * Adds result to buf
418 *-----------------------------------------------------------------------
419 */
420 void
Str_SYSVSubst(Buffer buf,const char * pat,const char * src,size_t len)421 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
422 {
423 const char *m;
424
425 if ((m = strchr(pat, '%')) != NULL) {
426 /* Copy the prefix. */
427 Buf_Addi(buf, pat, m);
428 /* Skip the %. */
429 pat = m + 1;
430 }
431
432 /* Copy the pattern. */
433 Buf_AddChars(buf, len, src);
434
435 /* Append the rest. */
436 Buf_AddString(buf, pat);
437 }
438
439 char *
Str_dupi(const char * begin,const char * end)440 Str_dupi(const char *begin, const char *end)
441 {
442 char *s;
443
444 s = emalloc(end - begin + 1);
445 memcpy(s, begin, end - begin);
446 s[end-begin] = '\0';
447 return s;
448 }
449
450 char *
escape_dupi(const char * begin,const char * end,const char * set)451 escape_dupi(const char *begin, const char *end, const char *set)
452 {
453 char *s, *t;
454
455 t = s = emalloc(end - begin + 1);
456 while (begin != end) {
457 if (*begin == '\\') {
458 begin++;
459 if (begin == end) {
460 *t++ = '\\';
461 break;
462 }
463 if (strchr(set, *begin) == NULL)
464 *t++ = '\\';
465 }
466 *t++ = *begin++;
467 }
468 *t++ = '\0';
469 return s;
470 }
471
472 char *
Str_rchri(const char * begin,const char * end,int c)473 Str_rchri(const char *begin, const char *end, int c)
474 {
475 if (begin != end)
476 do {
477 if (*--end == c)
478 return (char *)end;
479 } while (end != begin);
480 return NULL;
481 }
482