1 /* $OpenBSD$ */
2
3 /*
4 * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/types.h>
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <wchar.h>
26
27 #include "compat.h"
28 #include "tmux.h"
29
30 static const wchar_t utf8_force_wide[] = {
31 0x0261D,
32 0x026F9,
33 0x0270A,
34 0x0270B,
35 0x0270C,
36 0x0270D,
37 0x1F1E6,
38 0x1F1E7,
39 0x1F1E8,
40 0x1F1E9,
41 0x1F1EA,
42 0x1F1EB,
43 0x1F1EC,
44 0x1F1ED,
45 0x1F1EE,
46 0x1F1EF,
47 0x1F1F0,
48 0x1F1F1,
49 0x1F1F2,
50 0x1F1F3,
51 0x1F1F4,
52 0x1F1F5,
53 0x1F1F6,
54 0x1F1F7,
55 0x1F1F8,
56 0x1F1F9,
57 0x1F1FA,
58 0x1F1FB,
59 0x1F1FC,
60 0x1F1FD,
61 0x1F1FE,
62 0x1F1FF,
63 0x1F385,
64 0x1F3C2,
65 0x1F3C3,
66 0x1F3C4,
67 0x1F3C7,
68 0x1F3CA,
69 0x1F3CB,
70 0x1F3CC,
71 0x1F3FB,
72 0x1F3FC,
73 0x1F3FD,
74 0x1F3FE,
75 0x1F3FF,
76 0x1F442,
77 0x1F443,
78 0x1F446,
79 0x1F447,
80 0x1F448,
81 0x1F449,
82 0x1F44A,
83 0x1F44B,
84 0x1F44C,
85 0x1F44D,
86 0x1F44E,
87 0x1F44F,
88 0x1F450,
89 0x1F466,
90 0x1F467,
91 0x1F468,
92 0x1F469,
93 0x1F46B,
94 0x1F46C,
95 0x1F46D,
96 0x1F46E,
97 0x1F470,
98 0x1F471,
99 0x1F472,
100 0x1F473,
101 0x1F474,
102 0x1F475,
103 0x1F476,
104 0x1F477,
105 0x1F478,
106 0x1F47C,
107 0x1F481,
108 0x1F482,
109 0x1F483,
110 0x1F485,
111 0x1F486,
112 0x1F487,
113 0x1F48F,
114 0x1F491,
115 0x1F4AA,
116 0x1F574,
117 0x1F575,
118 0x1F57A,
119 0x1F590,
120 0x1F595,
121 0x1F596,
122 0x1F645,
123 0x1F646,
124 0x1F647,
125 0x1F64B,
126 0x1F64C,
127 0x1F64D,
128 0x1F64E,
129 0x1F64F,
130 0x1F6A3,
131 0x1F6B4,
132 0x1F6B5,
133 0x1F6B6,
134 0x1F6C0,
135 0x1F6CC,
136 0x1F90C,
137 0x1F90F,
138 0x1F918,
139 0x1F919,
140 0x1F91A,
141 0x1F91B,
142 0x1F91C,
143 0x1F91D,
144 0x1F91E,
145 0x1F91F,
146 0x1F926,
147 0x1F930,
148 0x1F931,
149 0x1F932,
150 0x1F933,
151 0x1F934,
152 0x1F935,
153 0x1F936,
154 0x1F937,
155 0x1F938,
156 0x1F939,
157 0x1F93D,
158 0x1F93E,
159 0x1F977,
160 0x1F9B5,
161 0x1F9B6,
162 0x1F9B8,
163 0x1F9B9,
164 0x1F9BB,
165 0x1F9CD,
166 0x1F9CE,
167 0x1F9CF,
168 0x1F9D1,
169 0x1F9D2,
170 0x1F9D3,
171 0x1F9D4,
172 0x1F9D5,
173 0x1F9D6,
174 0x1F9D7,
175 0x1F9D8,
176 0x1F9D9,
177 0x1F9DA,
178 0x1F9DB,
179 0x1F9DC,
180 0x1F9DD,
181 0x1FAC3,
182 0x1FAC4,
183 0x1FAC5,
184 0x1FAF0,
185 0x1FAF1,
186 0x1FAF2,
187 0x1FAF3,
188 0x1FAF4,
189 0x1FAF5,
190 0x1FAF6,
191 0x1FAF7,
192 0x1FAF8
193 };
194
195 struct utf8_item {
196 RB_ENTRY(utf8_item) index_entry;
197 u_int index;
198
199 RB_ENTRY(utf8_item) data_entry;
200 char data[UTF8_SIZE];
201 u_char size;
202 };
203
204 static int
utf8_data_cmp(struct utf8_item * ui1,struct utf8_item * ui2)205 utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
206 {
207 if (ui1->size < ui2->size)
208 return (-1);
209 if (ui1->size > ui2->size)
210 return (1);
211 return (memcmp(ui1->data, ui2->data, ui1->size));
212 }
213 RB_HEAD(utf8_data_tree, utf8_item);
214 RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp);
215 static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree);
216
217 static int
utf8_index_cmp(struct utf8_item * ui1,struct utf8_item * ui2)218 utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
219 {
220 if (ui1->index < ui2->index)
221 return (-1);
222 if (ui1->index > ui2->index)
223 return (1);
224 return (0);
225 }
226 RB_HEAD(utf8_index_tree, utf8_item);
227 RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp);
228 static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree);
229
230 static u_int utf8_next_index;
231
232 #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
233 #define UTF8_GET_WIDTH(uc) (((uc) >> 29) - 1)
234
235 #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
236 #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
237
238 /* Get a UTF-8 item from data. */
239 static struct utf8_item *
utf8_item_by_data(const u_char * data,size_t size)240 utf8_item_by_data(const u_char *data, size_t size)
241 {
242 struct utf8_item ui;
243
244 memcpy(ui.data, data, size);
245 ui.size = size;
246
247 return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui));
248 }
249
250 /* Get a UTF-8 item from data. */
251 static struct utf8_item *
utf8_item_by_index(u_int index)252 utf8_item_by_index(u_int index)
253 {
254 struct utf8_item ui;
255
256 ui.index = index;
257
258 return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui));
259 }
260
261 /* Add a UTF-8 item. */
262 static int
utf8_put_item(const u_char * data,size_t size,u_int * index)263 utf8_put_item(const u_char *data, size_t size, u_int *index)
264 {
265 struct utf8_item *ui;
266
267 ui = utf8_item_by_data((const unsigned char *)data, size);
268 if (ui != NULL) {
269 *index = ui->index;
270 log_debug("%s: found %.*s = %u", __func__, (int)size, data,
271 *index);
272 return (0);
273 }
274
275 if (utf8_next_index == 0xffffff + 1)
276 return (-1);
277
278 ui = xcalloc(1, sizeof *ui);
279 ui->index = utf8_next_index++;
280 RB_INSERT(utf8_index_tree, &utf8_index_tree, ui);
281
282 memcpy(ui->data, data, size);
283 ui->size = size;
284 RB_INSERT(utf8_data_tree, &utf8_data_tree, ui);
285
286 *index = ui->index;
287 log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index);
288 return (0);
289 }
290
291 static int
utf8_table_cmp(const void * vp1,const void * vp2)292 utf8_table_cmp(const void *vp1, const void *vp2)
293 {
294 const wchar_t *wc1 = vp1, *wc2 = vp2;
295
296 if (*wc1 < *wc2)
297 return (-1);
298 if (*wc1 > *wc2)
299 return (1);
300 return (0);
301 }
302
303 /* Check if character in table. */
304 int
utf8_in_table(wchar_t find,const wchar_t * table,u_int count)305 utf8_in_table(wchar_t find, const wchar_t *table, u_int count)
306 {
307 wchar_t *found;
308
309 found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp);
310 return (found != NULL);
311 }
312
313 /* Get UTF-8 character from data. */
314 enum utf8_state
utf8_from_data(const struct utf8_data * ud,utf8_char * uc)315 utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
316 {
317 u_int index;
318
319 if (ud->width > 2)
320 fatalx("invalid UTF-8 width: %u", ud->width);
321
322 if (ud->size > UTF8_SIZE)
323 goto fail;
324 if (ud->size <= 3) {
325 index = (((utf8_char)ud->data[2] << 16)|
326 ((utf8_char)ud->data[1] << 8)|
327 ((utf8_char)ud->data[0]));
328 } else if (utf8_put_item(ud->data, ud->size, &index) != 0)
329 goto fail;
330 *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
331 log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
332 (int)ud->size, ud->data, *uc);
333 return (UTF8_DONE);
334
335 fail:
336 if (ud->width == 0)
337 *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0);
338 else if (ud->width == 1)
339 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20;
340 else
341 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020;
342 return (UTF8_ERROR);
343 }
344
345 /* Get UTF-8 data from character. */
346 void
utf8_to_data(utf8_char uc,struct utf8_data * ud)347 utf8_to_data(utf8_char uc, struct utf8_data *ud)
348 {
349 struct utf8_item *ui;
350 u_int index;
351
352 memset(ud, 0, sizeof *ud);
353 ud->size = ud->have = UTF8_GET_SIZE(uc);
354 ud->width = UTF8_GET_WIDTH(uc);
355
356 if (ud->size <= 3) {
357 ud->data[2] = (uc >> 16);
358 ud->data[1] = ((uc >> 8) & 0xff);
359 ud->data[0] = (uc & 0xff);
360 } else {
361 index = (uc & 0xffffff);
362 if ((ui = utf8_item_by_index(index)) == NULL)
363 memset(ud->data, ' ', ud->size);
364 else
365 memcpy(ud->data, ui->data, ud->size);
366 }
367
368 log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,
369 (int)ud->size, ud->data);
370 }
371
372 /* Get UTF-8 character from a single ASCII character. */
373 u_int
utf8_build_one(u_char ch)374 utf8_build_one(u_char ch)
375 {
376 return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch);
377 }
378
379 /* Set a single character. */
380 void
utf8_set(struct utf8_data * ud,u_char ch)381 utf8_set(struct utf8_data *ud, u_char ch)
382 {
383 static const struct utf8_data empty = { { 0 }, 1, 1, 1 };
384
385 memcpy(ud, &empty, sizeof *ud);
386 *ud->data = ch;
387 }
388
389 /* Copy UTF-8 character. */
390 void
utf8_copy(struct utf8_data * to,const struct utf8_data * from)391 utf8_copy(struct utf8_data *to, const struct utf8_data *from)
392 {
393 u_int i;
394
395 memcpy(to, from, sizeof *to);
396
397 for (i = to->size; i < sizeof to->data; i++)
398 to->data[i] = '\0';
399 }
400
401 /* Get width of Unicode character. */
402 static enum utf8_state
utf8_width(struct utf8_data * ud,int * width)403 utf8_width(struct utf8_data *ud, int *width)
404 {
405 wchar_t wc;
406
407 if (utf8_towc(ud, &wc) != UTF8_DONE)
408 return (UTF8_ERROR);
409 if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) {
410 *width = 2;
411 return (UTF8_DONE);
412 }
413 #ifdef HAVE_UTF8PROC
414 *width = utf8proc_wcwidth(wc);
415 log_debug("utf8proc_wcwidth(%05X) returned %d", (u_int)wc, *width);
416 #else
417 *width = wcwidth(wc);
418 log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
419 if (*width < 0) {
420 /*
421 * C1 control characters are nonprintable, so they are always
422 * zero width.
423 */
424 *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1;
425 }
426 #endif
427 if (*width >= 0 && *width <= 0xff)
428 return (UTF8_DONE);
429 return (UTF8_ERROR);
430 }
431
432 /* Convert UTF-8 character to wide character. */
433 enum utf8_state
utf8_towc(const struct utf8_data * ud,wchar_t * wc)434 utf8_towc(const struct utf8_data *ud, wchar_t *wc)
435 {
436 #ifdef HAVE_UTF8PROC
437 switch (utf8proc_mbtowc(wc, ud->data, ud->size)) {
438 #else
439 switch (mbtowc(wc, __UNCONST(ud->data), ud->size)) {
440 #endif
441 case -1:
442 log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
443 errno);
444 mbtowc(NULL, NULL, MB_CUR_MAX);
445 return (UTF8_ERROR);
446 case 0:
447 return (UTF8_ERROR);
448 }
449 log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc);
450 return (UTF8_DONE);
451 }
452
453 /* Convert wide character to UTF-8 character. */
454 enum utf8_state
455 utf8_fromwc(wchar_t wc, struct utf8_data *ud)
456 {
457 int size, width;
458
459 #ifdef HAVE_UTF8PROC
460 size = utf8proc_wctomb(ud->data, wc);
461 #else
462 size = wctomb((char *)ud->data, wc);
463 #endif
464 if (size < 0) {
465 log_debug("UTF-8 %d, wctomb() %d", wc, errno);
466 wctomb(NULL, 0);
467 return (UTF8_ERROR);
468 }
469 if (size == 0)
470 return (UTF8_ERROR);
471 ud->size = ud->have = size;
472 if (utf8_width(ud, &width) == UTF8_DONE) {
473 ud->width = width;
474 return (UTF8_DONE);
475 }
476 return (UTF8_ERROR);
477 }
478
479 /*
480 * Open UTF-8 sequence.
481 *
482 * 11000010-11011111 C2-DF start of 2-byte sequence
483 * 11100000-11101111 E0-EF start of 3-byte sequence
484 * 11110000-11110100 F0-F4 start of 4-byte sequence
485 */
486 enum utf8_state
487 utf8_open(struct utf8_data *ud, u_char ch)
488 {
489 memset(ud, 0, sizeof *ud);
490 if (ch >= 0xc2 && ch <= 0xdf)
491 ud->size = 2;
492 else if (ch >= 0xe0 && ch <= 0xef)
493 ud->size = 3;
494 else if (ch >= 0xf0 && ch <= 0xf4)
495 ud->size = 4;
496 else
497 return (UTF8_ERROR);
498 utf8_append(ud, ch);
499 return (UTF8_MORE);
500 }
501
502 /* Append character to UTF-8, closing if finished. */
503 enum utf8_state
504 utf8_append(struct utf8_data *ud, u_char ch)
505 {
506 int width;
507
508 if (ud->have >= ud->size)
509 fatalx("UTF-8 character overflow");
510 if (ud->size > sizeof ud->data)
511 fatalx("UTF-8 character size too large");
512
513 if (ud->have != 0 && (ch & 0xc0) != 0x80)
514 ud->width = 0xff;
515
516 ud->data[ud->have++] = ch;
517 if (ud->have != ud->size)
518 return (UTF8_MORE);
519
520 if (ud->width == 0xff)
521 return (UTF8_ERROR);
522 if (utf8_width(ud, &width) != UTF8_DONE)
523 return (UTF8_ERROR);
524 ud->width = width;
525
526 return (UTF8_DONE);
527 }
528
529 /*
530 * Encode len characters from src into dst, which is guaranteed to have four
531 * bytes available for each character from src (for \abc or UTF-8) plus space
532 * for \0.
533 */
534 int
535 utf8_strvis(char *dst, const char *src, size_t len, int flag)
536 {
537 struct utf8_data ud;
538 const char *start = dst, *end = src + len;
539 enum utf8_state more;
540 size_t i;
541
542 while (src < end) {
543 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) {
544 while (++src < end && more == UTF8_MORE)
545 more = utf8_append(&ud, *src);
546 if (more == UTF8_DONE) {
547 /* UTF-8 character finished. */
548 for (i = 0; i < ud.size; i++)
549 *dst++ = ud.data[i];
550 continue;
551 }
552 /* Not a complete, valid UTF-8 character. */
553 src -= ud.have;
554 }
555 if ((flag & VIS_DQ) && src[0] == '$' && src < end - 1) {
556 if (isalpha((u_char)src[1]) ||
557 src[1] == '_' ||
558 src[1] == '{')
559 *dst++ = '\\';
560 *dst++ = '$';
561 } else if (src < end - 1)
562 dst = vis(dst, src[0], flag, src[1]);
563 else if (src < end)
564 dst = vis(dst, src[0], flag, '\0');
565 src++;
566 }
567 *dst = '\0';
568 return (dst - start);
569 }
570
571 /* Same as utf8_strvis but allocate the buffer. */
572 int
573 utf8_stravis(char **dst, const char *src, int flag)
574 {
575 char *buf;
576 int len;
577
578 buf = xreallocarray(NULL, 4, strlen(src) + 1);
579 len = utf8_strvis(buf, src, strlen(src), flag);
580
581 *dst = xrealloc(buf, len + 1);
582 return (len);
583 }
584
585 /* Same as utf8_strvis but allocate the buffer. */
586 int
587 utf8_stravisx(char **dst, const char *src, size_t srclen, int flag)
588 {
589 char *buf;
590 int len;
591
592 buf = xreallocarray(NULL, 4, srclen + 1);
593 len = utf8_strvis(buf, src, srclen, flag);
594
595 *dst = xrealloc(buf, len + 1);
596 return (len);
597 }
598
599 /* Does this string contain anything that isn't valid UTF-8? */
600 int
601 utf8_isvalid(const char *s)
602 {
603 struct utf8_data ud;
604 const char *end;
605 enum utf8_state more;
606
607 end = s + strlen(s);
608 while (s < end) {
609 if ((more = utf8_open(&ud, *s)) == UTF8_MORE) {
610 while (++s < end && more == UTF8_MORE)
611 more = utf8_append(&ud, *s);
612 if (more == UTF8_DONE)
613 continue;
614 return (0);
615 }
616 if (*s < 0x20 || *s > 0x7e)
617 return (0);
618 s++;
619 }
620 return (1);
621 }
622
623 /*
624 * Sanitize a string, changing any UTF-8 characters to '_'. Caller should free
625 * the returned string. Anything not valid printable ASCII or UTF-8 is
626 * stripped.
627 */
628 char *
629 utf8_sanitize(const char *src)
630 {
631 char *dst = NULL;
632 size_t n = 0;
633 enum utf8_state more;
634 struct utf8_data ud;
635 u_int i;
636
637 while (*src != '\0') {
638 dst = xreallocarray(dst, n + 1, sizeof *dst);
639 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) {
640 while (*++src != '\0' && more == UTF8_MORE)
641 more = utf8_append(&ud, *src);
642 if (more == UTF8_DONE) {
643 dst = xreallocarray(dst, n + ud.width,
644 sizeof *dst);
645 for (i = 0; i < ud.width; i++)
646 dst[n++] = '_';
647 continue;
648 }
649 src -= ud.have;
650 }
651 if (*src > 0x1f && *src < 0x7f)
652 dst[n++] = *src;
653 else
654 dst[n++] = '_';
655 src++;
656 }
657 dst = xreallocarray(dst, n + 1, sizeof *dst);
658 dst[n] = '\0';
659 return (dst);
660 }
661
662 /* Get UTF-8 buffer length. */
663 size_t
664 utf8_strlen(const struct utf8_data *s)
665 {
666 size_t i;
667
668 for (i = 0; s[i].size != 0; i++)
669 /* nothing */;
670 return (i);
671 }
672
673 /* Get UTF-8 string width. */
674 u_int
675 utf8_strwidth(const struct utf8_data *s, ssize_t n)
676 {
677 ssize_t i;
678 u_int width = 0;
679
680 for (i = 0; s[i].size != 0; i++) {
681 if (n != -1 && n == i)
682 break;
683 width += s[i].width;
684 }
685 return (width);
686 }
687
688 /*
689 * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0.
690 * Caller frees.
691 */
692 struct utf8_data *
693 utf8_fromcstr(const char *src)
694 {
695 struct utf8_data *dst = NULL;
696 size_t n = 0;
697 enum utf8_state more;
698
699 while (*src != '\0') {
700 dst = xreallocarray(dst, n + 1, sizeof *dst);
701 if ((more = utf8_open(&dst[n], *src)) == UTF8_MORE) {
702 while (*++src != '\0' && more == UTF8_MORE)
703 more = utf8_append(&dst[n], *src);
704 if (more == UTF8_DONE) {
705 n++;
706 continue;
707 }
708 src -= dst[n].have;
709 }
710 utf8_set(&dst[n], *src);
711 n++;
712 src++;
713 }
714 dst = xreallocarray(dst, n + 1, sizeof *dst);
715 dst[n].size = 0;
716 return (dst);
717 }
718
719 /* Convert from a buffer of UTF-8 characters into a string. Caller frees. */
720 char *
721 utf8_tocstr(struct utf8_data *src)
722 {
723 char *dst = NULL;
724 size_t n = 0;
725
726 for(; src->size != 0; src++) {
727 dst = xreallocarray(dst, n + src->size, 1);
728 memcpy(dst + n, src->data, src->size);
729 n += src->size;
730 }
731 dst = xreallocarray(dst, n + 1, 1);
732 dst[n] = '\0';
733 return (dst);
734 }
735
736 /* Get width of UTF-8 string. */
737 u_int
738 utf8_cstrwidth(const char *s)
739 {
740 struct utf8_data tmp;
741 u_int width;
742 enum utf8_state more;
743
744 width = 0;
745 while (*s != '\0') {
746 if ((more = utf8_open(&tmp, *s)) == UTF8_MORE) {
747 while (*++s != '\0' && more == UTF8_MORE)
748 more = utf8_append(&tmp, *s);
749 if (more == UTF8_DONE) {
750 width += tmp.width;
751 continue;
752 }
753 s -= tmp.have;
754 }
755 if (*s > 0x1f && *s != 0x7f)
756 width++;
757 s++;
758 }
759 return (width);
760 }
761
762 /* Pad UTF-8 string to width on the left. Caller frees. */
763 char *
764 utf8_padcstr(const char *s, u_int width)
765 {
766 size_t slen;
767 char *out;
768 u_int n, i;
769
770 n = utf8_cstrwidth(s);
771 if (n >= width)
772 return (xstrdup(s));
773
774 slen = strlen(s);
775 out = xmalloc(slen + 1 + (width - n));
776 memcpy(out, s, slen);
777 for (i = n; i < width; i++)
778 out[slen++] = ' ';
779 out[slen] = '\0';
780 return (out);
781 }
782
783 /* Pad UTF-8 string to width on the right. Caller frees. */
784 char *
785 utf8_rpadcstr(const char *s, u_int width)
786 {
787 size_t slen;
788 char *out;
789 u_int n, i;
790
791 n = utf8_cstrwidth(s);
792 if (n >= width)
793 return (xstrdup(s));
794
795 slen = strlen(s);
796 out = xmalloc(slen + 1 + (width - n));
797 for (i = 0; i < width - n; i++)
798 out[i] = ' ';
799 memcpy(out + i, s, slen);
800 out[i + slen] = '\0';
801 return (out);
802 }
803
804 int
805 utf8_cstrhas(const char *s, const struct utf8_data *ud)
806 {
807 struct utf8_data *copy, *loop;
808 int found = 0;
809
810 copy = utf8_fromcstr(s);
811 for (loop = copy; loop->size != 0; loop++) {
812 if (loop->size != ud->size)
813 continue;
814 if (memcmp(loop->data, ud->data, loop->size) == 0) {
815 found = 1;
816 break;
817 }
818 }
819 free(copy);
820
821 return (found);
822 }
823