xref: /dragonfly/contrib/tcsh-6/tc.str.c (revision 84d884bf08edef6c02f15218458cd5df8010b654)
1 /*
2  * tc.str.c: Short string package
3  *             This has been a lesson of how to write buggy code!
4  */
5 /*-
6  * Copyright (c) 1980, 1991 The Regents of the University of California.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 #include "sh.h"
34 
35 #include <assert.h>
36 #include <limits.h>
37 
38 #define MALLOC_INCR 128
39 #ifdef WIDE_STRINGS
40 #define MALLOC_SURPLUS        MB_LEN_MAX /* Space for one multibyte character */
41 #else
42 #define MALLOC_SURPLUS        0
43 #endif
44 
45 #ifdef WIDE_STRINGS
46 size_t
one_mbtowc(Char * pwc,const char * s,size_t n)47 one_mbtowc(Char *pwc, const char *s, size_t n)
48 {
49     int len;
50 
51     len = rt_mbtowc(pwc, s, n);
52     if (len == -1) {
53         reset_mbtowc();
54           *pwc = (unsigned char)*s | INVALID_BYTE;
55     }
56     if (len <= 0)
57           len = 1;
58     return len;
59 }
60 
61 size_t
one_wctomb(char * s,Char wchar)62 one_wctomb(char *s, Char wchar)
63 {
64     int len;
65 
66 #if INVALID_BYTE != 0
67     if ((wchar & INVALID_BYTE) == INVALID_BYTE) {    /* wchar >= INVALID_BYTE */
68           /* invalid char
69            * exmaple)
70            * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
71           *s = (char)wchar;
72           len = 1;
73 #else
74     if (wchar & (CHAR & INVALID_BYTE)) {
75           s[0] = wchar & (CHAR & 0xFF);
76           len = 1;
77 #endif
78     } else {
79 #if INVALID_BYTE != 0
80           wchar &= MAX_UTF32;
81 #else
82           wchar &= CHAR;
83 #endif
84 #ifdef UTF16_STRINGS
85           if (wchar >= 0x10000) {
86               /* UTF-16 systems can't handle these values directly in calls to
87                  wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
88                  convert the "string" to the correct multibyte representation,
89                  if any. */
90               wchar_t ws[3];
91               wchar -= 0x10000;
92               ws[0] = 0xd800 | (wchar >> 10);
93               ws[1] = 0xdc00 | (wchar & 0x3ff);
94               ws[2] = 0;
95               /* The return value of wcstombs excludes the trailing 0, so len is
96                  the correct number of multibytes for the Unicode char. */
97               len = wcstombs (s, ws, MB_CUR_MAX + 1);
98           } else
99 #endif
100           len = wctomb(s, (wchar_t) wchar);
101           if (len == -1)
102               s[0] = wchar;
103           if (len <= 0)
104               len = 1;
105     }
106     return len;
107 }
108 
109 int
110 rt_mbtowc(Char *pwc, const char *s, size_t n)
111 {
112     int ret;
113     char back[MB_LEN_MAX];
114     wchar_t tmp;
115 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
116 # if defined(AUTOSET_KANJI)
117     static mbstate_t mb_zero, mb;
118     /*
119      * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
120      */
121     if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
122           !memcmp(&mb, &mb_zero, sizeof(mb)))
123     {
124           *pwc = *s;
125           return 1;
126     }
127 # else
128     mbstate_t mb;
129 # endif
130 
131     memset (&mb, 0, sizeof mb);
132     ret = mbrtowc(&tmp, s, n, &mb);
133 #else
134     ret = mbtowc(&tmp, s, n);
135 #endif
136     if (ret > 0) {
137           *pwc = tmp;
138 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
139           if (tmp >= 0xd800 && tmp <= 0xdbff) {
140               /* UTF-16 surrogate pair.  Fetch second half and compute
141                  UTF-32 value.  Dispense with the inverse test in this case. */
142               size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
143               if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
144                     ret = -1;
145               else {
146                     *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
147                     ret += n2;
148               }
149           } else
150 #endif
151           if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
152               ret = -1;
153 
154     } else if (ret == -2)
155           ret = -1;
156     else if (ret == 0)
157           *pwc = '\0';
158 
159     return ret;
160 }
161 #endif
162 
163 #ifdef SHORT_STRINGS
164 Char  **
165 blk2short(char **src)
166 {
167     size_t     n;
168     Char **sdst, **dst;
169 
170     /*
171      * Count
172      */
173     for (n = 0; src[n] != NULL; n++)
174           continue;
175     sdst = dst = xmalloc((n + 1) * sizeof(Char *));
176 
177     for (; *src != NULL; src++)
178           *dst++ = SAVE(*src);
179     *dst = NULL;
180     return (sdst);
181 }
182 
183 char  **
184 short2blk(Char **src)
185 {
186     size_t     n;
187     char **sdst, **dst;
188 
189     /*
190      * Count
191      */
192     for (n = 0; src[n] != NULL; n++)
193           continue;
194     sdst = dst = xmalloc((n + 1) * sizeof(char *));
195 
196     for (; *src != NULL; src++)
197           *dst++ = strsave(short2str(*src));
198     *dst = NULL;
199     return (sdst);
200 }
201 
202 Char   *
203 str2short(const char *src)
204 {
205     static struct Strbuf buf; /* = Strbuf_INIT; */
206 
207     if (src == NULL)
208           return (NULL);
209 
210     buf.len = 0;
211     while (*src) {
212           Char wc;
213 
214           src += one_mbtowc(&wc, src, MB_LEN_MAX);
215           Strbuf_append1(&buf, wc);
216     }
217     Strbuf_terminate(&buf);
218     return buf.s;
219 }
220 
221 char   *
222 short2str(const Char *src)
223 {
224     static char *sdst = NULL;
225     static size_t dstsize = 0;
226     char *dst, *edst;
227 
228     if (src == NULL)
229           return (NULL);
230 
231     if (sdst == NULL) {
232           dstsize = MALLOC_INCR;
233           sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
234     }
235     dst = sdst;
236     edst = &dst[dstsize];
237     while (*src) {
238           dst += one_wctomb(dst, *src);
239           src++;
240           if (dst >= edst) {
241               char *wdst = dst;
242               char *wedst = edst;
243 
244               dstsize += MALLOC_INCR;
245               sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
246               edst = &sdst[dstsize];
247               dst = &edst[-MALLOC_INCR];
248               while (wdst > wedst) {
249                     dst++;
250                     wdst--;
251               }
252           }
253     }
254     *dst = 0;
255     return (sdst);
256 }
257 
258 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
259 Char   *
260 s_strcpy(Char *dst, const Char *src)
261 {
262     Char *sdst;
263 
264     sdst = dst;
265     while ((*dst++ = *src++) != '\0')
266           continue;
267     return (sdst);
268 }
269 
270 Char   *
271 s_strncpy(Char *dst, const Char *src, size_t n)
272 {
273     Char *sdst;
274 
275     if (n == 0)
276           return(dst);
277 
278     sdst = dst;
279     do
280           if ((*dst++ = *src++) == '\0') {
281               while (--n != 0)
282                     *dst++ = '\0';
283               return(sdst);
284           }
285     while (--n != 0);
286     return (sdst);
287 }
288 
289 Char   *
290 s_strcat(Char *dst, const Char *src)
291 {
292     Strcpy(Strend(dst), src);
293     return dst;
294 }
295 
296 #ifdef NOTUSED
297 Char   *
298 s_strncat(Char *dst, const Char *src, size_t n)
299 {
300     Char *sdst;
301 
302     if (n == 0)
303           return (dst);
304 
305     sdst = dst;
306 
307     while (*dst)
308           dst++;
309 
310     do
311           if ((*dst++ = *src++) == '\0')
312               return(sdst);
313     while (--n != 0)
314           continue;
315 
316     *dst = '\0';
317     return (sdst);
318 }
319 
320 #endif
321 
322 Char   *
323 s_strchr(const Char *str, int ch)
324 {
325     do
326           if (*str == ch)
327               return ((Char *)(intptr_t)str);
328     while (*str++);
329     return (NULL);
330 }
331 
332 Char   *
333 s_strrchr(const Char *str, int ch)
334 {
335     const Char *rstr;
336 
337     rstr = NULL;
338     do
339           if (*str == ch)
340               rstr = str;
341     while (*str++);
342     return ((Char *)(intptr_t)rstr);
343 }
344 
345 size_t
346 s_strlen(const Char *str)
347 {
348     size_t n;
349 
350     for (n = 0; *str++; n++)
351           continue;
352     return (n);
353 }
354 
355 int
356 s_strcmp(const Char *str1, const Char *str2)
357 {
358     for (; *str1 && *str1 == *str2; str1++, str2++)
359           continue;
360     /*
361      * The following case analysis is necessary so that characters which look
362      * negative collate low against normal characters but high against the
363      * end-of-string NUL.
364      */
365     if (*str1 == '\0' && *str2 == '\0')
366           return (0);
367     else if (*str1 == '\0')
368           return (-1);
369     else if (*str2 == '\0')
370           return (1);
371     else
372           return (*str1 - *str2);
373 }
374 
375 int
376 s_strncmp(const Char *str1, const Char *str2, size_t n)
377 {
378     if (n == 0)
379           return (0);
380     do {
381           if (*str1 != *str2) {
382               /*
383                * The following case analysis is necessary so that characters
384                * which look negative collate low against normal characters
385                * but high against the end-of-string NUL.
386                */
387               if (*str1 == '\0')
388                     return (-1);
389               else if (*str2 == '\0')
390                     return (1);
391               else
392                     return (*str1 - *str2);
393           }
394         if (*str1 == '\0')
395               return(0);
396           str1++, str2++;
397     } while (--n != 0);
398     return(0);
399 }
400 #endif /* not WIDE_STRINGS */
401 
402 int
403 s_strcasecmp(const Char *str1, const Char *str2)
404 {
405 #ifdef WIDE_STRINGS
406     wint_t l1 = 0, l2 = 0;
407     for (; *str1; str1++, str2++)
408           if (*str1 == *str2)
409               l1 = l2 = 0;
410           else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
411               break;
412 #else
413     unsigned char l1 = 0, l2 = 0;
414     for (; *str1; str1++, str2++)
415           if (*str1 == *str2)
416                     l1 = l2 = 0;
417           else if ((l1 = tolower((unsigned char)*str1)) !=
418               (l2 = tolower((unsigned char)*str2)))
419               break;
420 #endif
421     /*
422      * The following case analysis is necessary so that characters which look
423      * negative collate low against normal characters but high against the
424      * end-of-string NUL.
425      */
426     if (*str1 == '\0' && *str2 == '\0')
427           return (0);
428     else if (*str1 == '\0')
429           return (-1);
430     else if (*str2 == '\0')
431           return (1);
432     else if (l1 == l2)        /* They are zero when they are equal */
433           return (*str1 - *str2);
434     else
435           return (l1 - l2);
436 }
437 
438 Char   *
439 s_strnsave(const Char *s, size_t len)
440 {
441     Char *n;
442 
443     n = xmalloc((len + 1) * sizeof (*n));
444     memcpy(n, s, len * sizeof (*n));
445     n[len] = '\0';
446     return n;
447 }
448 
449 Char   *
450 s_strsave(const Char *s)
451 {
452     Char   *n;
453     size_t size;
454 
455     if (s == NULL)
456           s = STRNULL;
457     size = (Strlen(s) + 1) * sizeof(*n);
458     n = xmalloc(size);
459     memcpy(n, s, size);
460     return (n);
461 }
462 
463 Char   *
464 s_strspl(const Char *cp, const Char *dp)
465 {
466     Char *res, *ep;
467     const Char *p, *q;
468 
469     if (!cp)
470           cp = STRNULL;
471     if (!dp)
472           dp = STRNULL;
473     for (p = cp; *p++;)
474           continue;
475     for (q = dp; *q++;)
476           continue;
477     res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
478     for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
479           continue;
480     for (ep--, q = dp; (*ep++ = *q++) != '\0';)
481           continue;
482     return (res);
483 }
484 
485 Char   *
486 s_strend(const Char *cp)
487 {
488     if (!cp)
489           return ((Char *)(intptr_t) cp);
490     while (*cp)
491           cp++;
492     return ((Char *)(intptr_t) cp);
493 }
494 
495 Char   *
496 s_strstr(const Char *s, const Char *t)
497 {
498     do {
499           const Char *ss = s;
500           const Char *tt = t;
501 
502           do
503               if (*tt == '\0')
504                     return ((Char *)(intptr_t) s);
505           while (*ss++ == *tt++);
506     } while (*s++ != '\0');
507     return (NULL);
508 }
509 
510 #else /* !SHORT_STRINGS */
511 char *
512 caching_strip(const char *s)
513 {
514     static char *buf = NULL;
515     static size_t buf_size = 0;
516     size_t size;
517 
518     if (s == NULL)
519       return NULL;
520     size = strlen(s) + 1;
521     if (buf_size < size) {
522           buf = xrealloc(buf, size);
523           buf_size = size;
524     }
525     memcpy(buf, s, size);
526     strip(buf);
527     return buf;
528 }
529 #endif
530 
531 char   *
532 short2qstr(const Char *src)
533 {
534     static char *sdst = NULL;
535     static size_t dstsize = 0;
536     char *dst, *edst;
537 
538     if (src == NULL)
539           return (NULL);
540 
541     if (sdst == NULL) {
542           dstsize = MALLOC_INCR;
543           sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
544     }
545     dst = sdst;
546     edst = &dst[dstsize];
547     while (*src) {
548           if (*src & QUOTE) {
549               *dst++ = '\\';
550               if (dst == edst) {
551                     dstsize += MALLOC_INCR;
552                     sdst = xrealloc(sdst,
553                                         (dstsize + MALLOC_SURPLUS) * sizeof(char));
554                     edst = &sdst[dstsize];
555                     dst = &edst[-MALLOC_INCR];
556               }
557           }
558           dst += one_wctomb(dst, *src);
559           src++;
560           if (dst >= edst) {
561               ptrdiff_t i = dst - edst;
562               dstsize += MALLOC_INCR;
563               sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
564               edst = &sdst[dstsize];
565               dst = &edst[-MALLOC_INCR + i];
566           }
567     }
568     *dst = 0;
569     return (sdst);
570 }
571 
572 struct blk_buf *
573 bb_alloc(void)
574 {
575     return xcalloc(1, sizeof(struct blk_buf));
576 }
577 
578 static void
579 bb_store(struct blk_buf *bb, Char *str)
580 {
581     if (bb->len == bb->size) { /* Keep space for terminating NULL */
582           if (bb->size == 0)
583               bb->size = 16; /* Arbitrary */
584           else
585               bb->size *= 2;
586           bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
587     }
588     bb->vec[bb->len] = str;
589 }
590 
591 void
592 bb_append(struct blk_buf *bb, Char *str)
593 {
594     bb_store(bb, str);
595     bb->len++;
596 }
597 
598 void
599 bb_cleanup(void *xbb)
600 {
601     struct blk_buf *bb;
602     size_t i;
603 
604     bb = (struct blk_buf *)xbb;
605     if (bb->vec) {
606           for (i = 0; i < bb->len; i++)
607               xfree(bb->vec[i]);
608           xfree(bb->vec);
609     }
610     bb->vec = NULL;
611     bb->len = 0;
612 }
613 
614 void
615 bb_free(void *bb)
616 {
617     bb_cleanup(bb);
618     xfree(bb);
619 }
620 
621 Char **
622 bb_finish(struct blk_buf *bb)
623 {
624     bb_store(bb, NULL);
625     return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
626 }
627 
628 #define DO_STRBUF(STRBUF, CHAR, STRLEN)                               \
629                                                                                 \
630 struct STRBUF *                                                                 \
631 STRBUF##_alloc(void)                                                            \
632 {                                                                               \
633     return xcalloc(1, sizeof(struct STRBUF));                         \
634 }                                                                               \
635                                                                                 \
636 static void                                                                     \
637 STRBUF##_store1(struct STRBUF *buf, CHAR c)                           \
638 {                                                                               \
639     if (buf->size == buf->len) {                                      \
640           if (buf->size == 0)                                         \
641               buf->size = 64; /* Arbitrary */                         \
642           else                                                                  \
643               buf->size *= 2;                                         \
644           buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));     \
645     }                                                                           \
646     assert(buf->s);                                                   \
647     buf->s[buf->len] = c;                                             \
648 }                                                                               \
649                                                                                 \
650 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
651 void                                                                            \
652 STRBUF##_terminate(struct STRBUF *buf)                                \
653 {                                                                               \
654     STRBUF##_store1(buf, '\0');                                                 \
655 }                                                                               \
656                                                                                 \
657 void                                                                            \
658 STRBUF##_append1(struct STRBUF *buf, CHAR c)                          \
659 {                                                                               \
660     STRBUF##_store1(buf, c);                                          \
661     buf->len++;                                                                 \
662 }                                                                               \
663                                                                                 \
664 void                                                                            \
665 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)       \
666 {                                                                               \
667     if (buf->size < buf->len + len) {                                 \
668           if (buf->size == 0)                                         \
669               buf->size = 64; /* Arbitrary */                         \
670           while (buf->size < buf->len + len)                          \
671               buf->size *= 2;                                         \
672           buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));     \
673     }                                                                           \
674     memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));    \
675     buf->len += len;                                                            \
676 }                                                                               \
677                                                                                 \
678 void                                                                            \
679 STRBUF##_append(struct STRBUF *buf, const CHAR *s)                    \
680 {                                                                               \
681     STRBUF##_appendn(buf, s, STRLEN(s));                              \
682 }                                                                               \
683                                                                                 \
684 CHAR *                                                                          \
685 STRBUF##_finish(struct STRBUF *buf)                                   \
686 {                                                                               \
687     STRBUF##_append1(buf, 0);                                         \
688     return xrealloc(buf->s, buf->len * sizeof(*buf->s));    \
689 }                                                                               \
690                                                                                 \
691 void                                                                            \
692 STRBUF##_cleanup(void *xbuf)                                          \
693 {                                                                               \
694     struct STRBUF *buf;                                                         \
695                                                                                 \
696     buf = xbuf;                                                                 \
697     xfree(buf->s);                                                    \
698 }                                                                               \
699                                                                                 \
700 void                                                                            \
701 STRBUF##_free(void *xbuf)                                             \
702 {                                                                               \
703     STRBUF##_cleanup(xbuf);                                           \
704     xfree(xbuf);                                                      \
705 }                                                                               \
706                                                                                 \
707 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
708 
709 DO_STRBUF(strbuf, char, strlen);
710 DO_STRBUF(Strbuf, Char, Strlen);
711