1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
2 /*
3 * tc.str.c: Short string package
4 * This has been a lesson of how to write buggy code!
5 */
6 /*-
7 * Copyright (c) 1980, 1991 The Regents of the University of California.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 #include "sh.h"
35
36 #include <assert.h>
37 #include <limits.h>
38
39 RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
40
41 #define MALLOC_INCR 128
42 #ifdef WIDE_STRINGS
43 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
44 #else
45 #define MALLOC_SURPLUS 0
46 #endif
47
48 #ifdef WIDE_STRINGS
49 size_t
one_mbtowc(Char * pwc,const char * s,size_t n)50 one_mbtowc(Char *pwc, const char *s, size_t n)
51 {
52 int len;
53
54 len = rt_mbtowc(pwc, s, n);
55 if (len == -1) {
56 reset_mbtowc();
57 *pwc = (unsigned char)*s | INVALID_BYTE;
58 }
59 if (len <= 0)
60 len = 1;
61 return len;
62 }
63
64 size_t
one_wctomb(char * s,Char wchar)65 one_wctomb(char *s, Char wchar)
66 {
67 int len;
68
69 if (wchar & INVALID_BYTE) {
70 s[0] = wchar & 0xFF;
71 len = 1;
72 } else {
73 #ifdef UTF16_STRINGS
74 if (wchar >= 0x10000) {
75 /* UTF-16 systems can't handle these values directly in calls to
76 wctomb. Convert value to UTF-16 surrogate and call wcstombs to
77 convert the "string" to the correct multibyte representation,
78 if any. */
79 wchar_t ws[3];
80 wchar -= 0x10000;
81 ws[0] = 0xd800 | (wchar >> 10);
82 ws[1] = 0xdc00 | (wchar & 0x3ff);
83 ws[2] = 0;
84 /* The return value of wcstombs excludes the trailing 0, so len is
85 the correct number of multibytes for the Unicode char. */
86 len = wcstombs (s, ws, MB_CUR_MAX + 1);
87 } else
88 #endif
89 len = wctomb(s, (wchar_t) wchar);
90 if (len == -1)
91 s[0] = wchar;
92 if (len <= 0)
93 len = 1;
94 }
95 return len;
96 }
97
98 int
rt_mbtowc(Char * pwc,const char * s,size_t n)99 rt_mbtowc(Char *pwc, const char *s, size_t n)
100 {
101 int ret;
102 char back[MB_LEN_MAX];
103 wchar_t tmp;
104 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
105 # if defined(AUTOSET_KANJI)
106 static mbstate_t mb_zero, mb;
107 /*
108 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
109 */
110 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
111 !memcmp(&mb, &mb_zero, sizeof(mb)))
112 {
113 *pwc = *s;
114 return 1;
115 }
116 # else
117 mbstate_t mb;
118 # endif
119
120 memset (&mb, 0, sizeof mb);
121 ret = mbrtowc(&tmp, s, n, &mb);
122 #else
123 ret = mbtowc(&tmp, s, n);
124 #endif
125 if (ret > 0) {
126 *pwc = tmp;
127 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
128 if (tmp >= 0xd800 && tmp <= 0xdbff) {
129 /* UTF-16 surrogate pair. Fetch second half and compute
130 UTF-32 value. Dispense with the inverse test in this case. */
131 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
132 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
133 ret = -1;
134 else {
135 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
136 ret += n2;
137 }
138 } else
139 #endif
140 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
141 ret = -1;
142
143 } else if (ret == -2)
144 ret = -1;
145 else if (ret == 0)
146 *pwc = '\0';
147
148 return ret;
149 }
150 #endif
151
152 #ifdef SHORT_STRINGS
153 Char **
blk2short(char ** src)154 blk2short(char **src)
155 {
156 size_t n;
157 Char **sdst, **dst;
158
159 /*
160 * Count
161 */
162 for (n = 0; src[n] != NULL; n++)
163 continue;
164 sdst = dst = xmalloc((n + 1) * sizeof(Char *));
165
166 for (; *src != NULL; src++)
167 *dst++ = SAVE(*src);
168 *dst = NULL;
169 return (sdst);
170 }
171
172 char **
short2blk(Char ** src)173 short2blk(Char **src)
174 {
175 size_t n;
176 char **sdst, **dst;
177
178 /*
179 * Count
180 */
181 for (n = 0; src[n] != NULL; n++)
182 continue;
183 sdst = dst = xmalloc((n + 1) * sizeof(char *));
184
185 for (; *src != NULL; src++)
186 *dst++ = strsave(short2str(*src));
187 *dst = NULL;
188 return (sdst);
189 }
190
191 Char *
str2short(const char * src)192 str2short(const char *src)
193 {
194 static struct Strbuf buf; /* = Strbuf_INIT; */
195
196 if (src == NULL)
197 return (NULL);
198
199 buf.len = 0;
200 while (*src) {
201 Char wc;
202
203 src += one_mbtowc(&wc, src, MB_LEN_MAX);
204 Strbuf_append1(&buf, wc);
205 }
206 Strbuf_terminate(&buf);
207 return buf.s;
208 }
209
210 char *
short2str(const Char * src)211 short2str(const Char *src)
212 {
213 static char *sdst = NULL;
214 static size_t dstsize = 0;
215 char *dst, *edst;
216
217 if (src == NULL)
218 return (NULL);
219
220 if (sdst == NULL) {
221 dstsize = MALLOC_INCR;
222 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
223 }
224 dst = sdst;
225 edst = &dst[dstsize];
226 while (*src) {
227 dst += one_wctomb(dst, *src & CHAR);
228 src++;
229 if (dst >= edst) {
230 char *wdst = dst;
231 char *wedst = edst;
232
233 dstsize += MALLOC_INCR;
234 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
235 edst = &sdst[dstsize];
236 dst = &edst[-MALLOC_INCR];
237 while (wdst > wedst) {
238 dst++;
239 wdst--;
240 }
241 }
242 }
243 *dst = 0;
244 return (sdst);
245 }
246
247 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
248 Char *
s_strcpy(Char * dst,const Char * src)249 s_strcpy(Char *dst, const Char *src)
250 {
251 Char *sdst;
252
253 sdst = dst;
254 while ((*dst++ = *src++) != '\0')
255 continue;
256 return (sdst);
257 }
258
259 Char *
s_strncpy(Char * dst,const Char * src,size_t n)260 s_strncpy(Char *dst, const Char *src, size_t n)
261 {
262 Char *sdst;
263
264 if (n == 0)
265 return(dst);
266
267 sdst = dst;
268 do
269 if ((*dst++ = *src++) == '\0') {
270 while (--n != 0)
271 *dst++ = '\0';
272 return(sdst);
273 }
274 while (--n != 0);
275 return (sdst);
276 }
277
278 Char *
s_strcat(Char * dst,const Char * src)279 s_strcat(Char *dst, const Char *src)
280 {
281 Strcpy(Strend(dst), src);
282 return dst;
283 }
284
285 #ifdef NOTUSED
286 Char *
s_strncat(Char * dst,const Char * src,size_t n)287 s_strncat(Char *dst, const Char *src, size_t n)
288 {
289 Char *sdst;
290
291 if (n == 0)
292 return (dst);
293
294 sdst = dst;
295
296 while (*dst)
297 dst++;
298
299 do
300 if ((*dst++ = *src++) == '\0')
301 return(sdst);
302 while (--n != 0)
303 continue;
304
305 *dst = '\0';
306 return (sdst);
307 }
308
309 #endif
310
311 Char *
s_strchr(const Char * str,int ch)312 s_strchr(const Char *str, int ch)
313 {
314 do
315 if (*str == ch)
316 return ((Char *)(intptr_t)str);
317 while (*str++);
318 return (NULL);
319 }
320
321 Char *
s_strrchr(const Char * str,int ch)322 s_strrchr(const Char *str, int ch)
323 {
324 const Char *rstr;
325
326 rstr = NULL;
327 do
328 if (*str == ch)
329 rstr = str;
330 while (*str++);
331 return ((Char *)(intptr_t)rstr);
332 }
333
334 size_t
s_strlen(const Char * str)335 s_strlen(const Char *str)
336 {
337 size_t n;
338
339 for (n = 0; *str++; n++)
340 continue;
341 return (n);
342 }
343
344 int
s_strcmp(const Char * str1,const Char * str2)345 s_strcmp(const Char *str1, const Char *str2)
346 {
347 for (; *str1 && *str1 == *str2; str1++, str2++)
348 continue;
349 /*
350 * The following case analysis is necessary so that characters which look
351 * negative collate low against normal characters but high against the
352 * end-of-string NUL.
353 */
354 if (*str1 == '\0' && *str2 == '\0')
355 return (0);
356 else if (*str1 == '\0')
357 return (-1);
358 else if (*str2 == '\0')
359 return (1);
360 else
361 return (*str1 - *str2);
362 }
363
364 int
s_strncmp(const Char * str1,const Char * str2,size_t n)365 s_strncmp(const Char *str1, const Char *str2, size_t n)
366 {
367 if (n == 0)
368 return (0);
369 do {
370 if (*str1 != *str2) {
371 /*
372 * The following case analysis is necessary so that characters
373 * which look negative collate low against normal characters
374 * but high against the end-of-string NUL.
375 */
376 if (*str1 == '\0')
377 return (-1);
378 else if (*str2 == '\0')
379 return (1);
380 else
381 return (*str1 - *str2);
382 }
383 if (*str1 == '\0')
384 return(0);
385 str1++, str2++;
386 } while (--n != 0);
387 return(0);
388 }
389 #endif /* not WIDE_STRINGS */
390
391 int
s_strcasecmp(const Char * str1,const Char * str2)392 s_strcasecmp(const Char *str1, const Char *str2)
393 {
394 #ifdef WIDE_STRINGS
395 wint_t l1 = 0, l2 = 0;
396 for (; *str1; str1++, str2++)
397 if (*str1 == *str2)
398 l1 = l2 = 0;
399 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
400 break;
401 #else
402 unsigned char l1 = 0, l2 = 0;
403 for (; *str1; str1++, str2++)
404 if (*str1 == *str2)
405 l1 = l2 = 0;
406 else if ((l1 = tolower((unsigned char)*str1)) !=
407 (l2 = tolower((unsigned char)*str2)))
408 break;
409 #endif
410 /*
411 * The following case analysis is necessary so that characters which look
412 * negative collate low against normal characters but high against the
413 * end-of-string NUL.
414 */
415 if (*str1 == '\0' && *str2 == '\0')
416 return (0);
417 else if (*str1 == '\0')
418 return (-1);
419 else if (*str2 == '\0')
420 return (1);
421 else if (l1 == l2) /* They are zero when they are equal */
422 return (*str1 - *str2);
423 else
424 return (l1 - l2);
425 }
426
427 Char *
s_strnsave(const Char * s,size_t len)428 s_strnsave(const Char *s, size_t len)
429 {
430 Char *n;
431
432 n = xmalloc((len + 1) * sizeof (*n));
433 memcpy(n, s, len * sizeof (*n));
434 n[len] = '\0';
435 return n;
436 }
437
438 Char *
s_strsave(const Char * s)439 s_strsave(const Char *s)
440 {
441 Char *n;
442 size_t size;
443
444 if (s == NULL)
445 s = STRNULL;
446 size = (Strlen(s) + 1) * sizeof(*n);
447 n = xmalloc(size);
448 memcpy(n, s, size);
449 return (n);
450 }
451
452 Char *
s_strspl(const Char * cp,const Char * dp)453 s_strspl(const Char *cp, const Char *dp)
454 {
455 Char *res, *ep;
456 const Char *p, *q;
457
458 if (!cp)
459 cp = STRNULL;
460 if (!dp)
461 dp = STRNULL;
462 for (p = cp; *p++;)
463 continue;
464 for (q = dp; *q++;)
465 continue;
466 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
467 for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
468 continue;
469 for (ep--, q = dp; (*ep++ = *q++) != '\0';)
470 continue;
471 return (res);
472 }
473
474 Char *
s_strend(const Char * cp)475 s_strend(const Char *cp)
476 {
477 if (!cp)
478 return ((Char *)(intptr_t) cp);
479 while (*cp)
480 cp++;
481 return ((Char *)(intptr_t) cp);
482 }
483
484 Char *
s_strstr(const Char * s,const Char * t)485 s_strstr(const Char *s, const Char *t)
486 {
487 do {
488 const Char *ss = s;
489 const Char *tt = t;
490
491 do
492 if (*tt == '\0')
493 return ((Char *)(intptr_t) s);
494 while (*ss++ == *tt++);
495 } while (*s++ != '\0');
496 return (NULL);
497 }
498
499 #else /* !SHORT_STRINGS */
500 char *
caching_strip(const char * s)501 caching_strip(const char *s)
502 {
503 static char *buf = NULL;
504 static size_t buf_size = 0;
505 size_t size;
506
507 if (s == NULL)
508 return NULL;
509 size = strlen(s) + 1;
510 if (buf_size < size) {
511 buf = xrealloc(buf, size);
512 buf_size = size;
513 }
514 memcpy(buf, s, size);
515 strip(buf);
516 return buf;
517 }
518 #endif
519
520 char *
short2qstr(const Char * src)521 short2qstr(const Char *src)
522 {
523 static char *sdst = NULL;
524 static size_t dstsize = 0;
525 char *dst, *edst;
526
527 if (src == NULL)
528 return (NULL);
529
530 if (sdst == NULL) {
531 dstsize = MALLOC_INCR;
532 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
533 }
534 dst = sdst;
535 edst = &dst[dstsize];
536 while (*src) {
537 if (*src & QUOTE) {
538 *dst++ = '\\';
539 if (dst == edst) {
540 dstsize += MALLOC_INCR;
541 sdst = xrealloc(sdst,
542 (dstsize + MALLOC_SURPLUS) * sizeof(char));
543 edst = &sdst[dstsize];
544 dst = &edst[-MALLOC_INCR];
545 }
546 }
547 dst += one_wctomb(dst, *src & CHAR);
548 src++;
549 if (dst >= edst) {
550 ptrdiff_t i = dst - edst;
551 dstsize += MALLOC_INCR;
552 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
553 edst = &sdst[dstsize];
554 dst = &edst[-MALLOC_INCR + i];
555 }
556 }
557 *dst = 0;
558 return (sdst);
559 }
560
561 struct blk_buf *
bb_alloc()562 bb_alloc()
563 {
564 return xcalloc(1, sizeof(struct blk_buf));
565 }
566
567 static void
bb_store(struct blk_buf * bb,Char * str)568 bb_store(struct blk_buf *bb, Char *str)
569 {
570 if (bb->len == bb->size) { /* Keep space for terminating NULL */
571 if (bb->size == 0)
572 bb->size = 16; /* Arbitrary */
573 else
574 bb->size *= 2;
575 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
576 }
577 bb->vec[bb->len] = str;
578 }
579
580 void
bb_append(struct blk_buf * bb,Char * str)581 bb_append(struct blk_buf *bb, Char *str)
582 {
583 bb_store(bb, str);
584 bb->len++;
585 }
586
587 void
bb_cleanup(void * xbb)588 bb_cleanup(void *xbb)
589 {
590 struct blk_buf *bb;
591 size_t i;
592
593 bb = xbb;
594 for (i = 0; i < bb->len; i++)
595 xfree(bb->vec[i]);
596 xfree(bb->vec);
597 }
598
599 void
bb_free(void * bb)600 bb_free(void *bb)
601 {
602 bb_cleanup(bb);
603 xfree(bb);
604 }
605
606 Char **
bb_finish(struct blk_buf * bb)607 bb_finish(struct blk_buf *bb)
608 {
609 bb_store(bb, NULL);
610 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
611 }
612
613 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \
614 \
615 struct STRBUF * \
616 STRBUF##_alloc(void) \
617 { \
618 return xcalloc(1, sizeof(struct STRBUF)); \
619 } \
620 \
621 static void \
622 STRBUF##_store1(struct STRBUF *buf, CHAR c) \
623 { \
624 if (buf->size == buf->len) { \
625 if (buf->size == 0) \
626 buf->size = 64; /* Arbitrary */ \
627 else \
628 buf->size *= 2; \
629 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
630 } \
631 assert(buf->s); \
632 buf->s[buf->len] = c; \
633 } \
634 \
635 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
636 void \
637 STRBUF##_terminate(struct STRBUF *buf) \
638 { \
639 STRBUF##_store1(buf, '\0'); \
640 } \
641 \
642 void \
643 STRBUF##_append1(struct STRBUF *buf, CHAR c) \
644 { \
645 STRBUF##_store1(buf, c); \
646 buf->len++; \
647 } \
648 \
649 void \
650 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
651 { \
652 if (buf->size < buf->len + len) { \
653 if (buf->size == 0) \
654 buf->size = 64; /* Arbitrary */ \
655 while (buf->size < buf->len + len) \
656 buf->size *= 2; \
657 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
658 } \
659 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
660 buf->len += len; \
661 } \
662 \
663 void \
664 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
665 { \
666 STRBUF##_appendn(buf, s, STRLEN(s)); \
667 } \
668 \
669 CHAR * \
670 STRBUF##_finish(struct STRBUF *buf) \
671 { \
672 STRBUF##_append1(buf, 0); \
673 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
674 } \
675 \
676 void \
677 STRBUF##_cleanup(void *xbuf) \
678 { \
679 struct STRBUF *buf; \
680 \
681 buf = xbuf; \
682 xfree(buf->s); \
683 } \
684 \
685 void \
686 STRBUF##_free(void *xbuf) \
687 { \
688 STRBUF##_cleanup(xbuf); \
689 xfree(xbuf); \
690 } \
691 \
692 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
693
694 DO_STRBUF(strbuf, char, strlen);
695 DO_STRBUF(Strbuf, Char, Strlen);
696