xref: /dragonfly/usr.bin/localedef/charmap.c (revision 1b11ea06a427d663e12e6a7a5083df6209e4cda2)
1 /*
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright 2015 John Marino <draco@marino.st>
4  *
5  * This source code is derived from the illumos localedef command, and
6  * provided under BSD-style license terms by Nexenta Systems, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * CHARMAP file handling for localedef.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/tree.h>
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <limits.h>
42 #include <unistd.h>
43 #include <stddef.h>
44 #include "localedef.h"
45 #include "parser.h"
46 
47 
48 typedef struct charmap {
49           const char *name;
50           wchar_t wc;
51           RB_ENTRY(charmap) rb_sym;
52           RB_ENTRY(charmap) rb_wc;
53 } charmap_t;
54 
55 static int cmap_compare_sym(const void *n1, const void *n2);
56 static int cmap_compare_wc(const void *n1, const void *n2);
57 
58 static RB_HEAD(cmap_sym, charmap) cmap_sym;
59 static RB_HEAD(cmap_wc, charmap) cmap_wc;
60 
61 RB_PROTOTYPE_STATIC(cmap_sym, charmap, rb_sym, cmap_compare_sym);
62 RB_PROTOTYPE_STATIC(cmap_wc, charmap, rb_wc, cmap_compare_wc);
63 
64 RB_GENERATE(cmap_sym, charmap, rb_sym, cmap_compare_sym);
65 RB_GENERATE(cmap_wc, charmap, rb_wc, cmap_compare_wc);
66 
67 /*
68  * Array of POSIX specific portable characters.
69  */
70 
71 static const struct {
72           const char *name;
73           int       ch;
74 } portable_chars[] = {
75           { "NUL",            '\0' },
76           { "alert",                    '\a' },
77           { "backspace",                '\b' },
78           { "tab",            '\t' },
79           { "carriage-return",          '\r' },
80           { "newline",                  '\n' },
81           { "vertical-tab",   '\v' },
82           { "form-feed",                '\f' },
83           { "space",                    ' ' },
84           { "exclamation-mark",         '!' },
85           { "quotation-mark", '"' },
86           { "number-sign",    '#' },
87           { "dollar-sign",    '$' },
88           { "percent-sign",   '%' },
89           { "ampersand",                '&' },
90           { "apostrophe",               '\'' },
91           { "left-parenthesis",         '(' },
92           { "right-parenthesis",        '(' },
93           { "asterisk",                 '*' },
94           { "plus-sign",                '+' },
95           { "comma",                     ','},
96           { "hyphen-minus",   '-' },
97           { "hyphen",                   '-' },
98           { "full-stop",                '.' },
99           { "period",                   '.' },
100           { "slash",                    '/' },
101           { "solidus",                  '/' },
102           { "zero",           '0' },
103           { "one",            '1' },
104           { "two",            '2' },
105           { "three",                    '3' },
106           { "four",           '4' },
107           { "five",           '5' },
108           { "six",            '6' },
109           { "seven",                    '7' },
110           { "eight",                    '8' },
111           { "nine",           '9' },
112           { "colon",                    ':' },
113           { "semicolon",                ';' },
114           { "less-than-sign", '<' },
115           { "equals-sign",    '=' },
116           { "greater-than-sign",        '>' },
117           { "question-mark",  '?' },
118           { "commercial-at",  '@' },
119           { "left-square-bracket", '[' },
120           { "backslash",                '\\' },
121           { "reverse-solidus",          '\\' },
122           { "right-square-bracket", ']' },
123           { "circumflex",               '^' },
124           { "circumflex-accent",        '^' },
125           { "low-line",                 '_' },
126           { "underscore",               '_' },
127           { "grave-accent",   '`' },
128           { "left-brace",               '{' },
129           { "left-curly-bracket",       '{' },
130           { "vertical-line",  '|' },
131           { "right-brace",    '}' },
132           { "right-curly-bracket", '}' },
133           { "tilde",                    '~' },
134           { "A", 'A' },
135           { "B", 'B' },
136           { "C", 'C' },
137           { "D", 'D' },
138           { "E", 'E' },
139           { "F", 'F' },
140           { "G", 'G' },
141           { "H", 'H' },
142           { "I", 'I' },
143           { "J", 'J' },
144           { "K", 'K' },
145           { "L", 'L' },
146           { "M", 'M' },
147           { "N", 'N' },
148           { "O", 'O' },
149           { "P", 'P' },
150           { "Q", 'Q' },
151           { "R", 'R' },
152           { "S", 'S' },
153           { "T", 'T' },
154           { "U", 'U' },
155           { "V", 'V' },
156           { "W", 'W' },
157           { "X", 'X' },
158           { "Y", 'Y' },
159           { "Z", 'Z' },
160           { "a", 'a' },
161           { "b", 'b' },
162           { "c", 'c' },
163           { "d", 'd' },
164           { "e", 'e' },
165           { "f", 'f' },
166           { "g", 'g' },
167           { "h", 'h' },
168           { "i", 'i' },
169           { "j", 'j' },
170           { "k", 'k' },
171           { "l", 'l' },
172           { "m", 'm' },
173           { "n", 'n' },
174           { "o", 'o' },
175           { "p", 'p' },
176           { "q", 'q' },
177           { "r", 'r' },
178           { "s", 's' },
179           { "t", 't' },
180           { "u", 'u' },
181           { "v", 'v' },
182           { "w", 'w' },
183           { "x", 'x' },
184           { "y", 'y' },
185           { "z", 'z' },
186           { NULL, 0 }
187 };
188 
189 static int
cmap_compare_sym(const void * n1,const void * n2)190 cmap_compare_sym(const void *n1, const void *n2)
191 {
192           const charmap_t *c1 = n1;
193           const charmap_t *c2 = n2;
194           int rv;
195 
196           rv = strcmp(c1->name, c2->name);
197           return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
198 }
199 
200 static int
cmap_compare_wc(const void * n1,const void * n2)201 cmap_compare_wc(const void *n1, const void *n2)
202 {
203           const charmap_t *c1 = n1;
204           const charmap_t *c2 = n2;
205 
206           return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0);
207 }
208 
209 void
init_charmap(void)210 init_charmap(void)
211 {
212           RB_INIT(&cmap_sym);
213 
214           RB_INIT(&cmap_wc);
215 }
216 
217 static void
add_charmap_impl(const char * sym,wchar_t wc,int nodups)218 add_charmap_impl(const char *sym, wchar_t wc, int nodups)
219 {
220           charmap_t srch;
221           charmap_t *n = NULL;
222 
223           srch.wc = wc;
224           srch.name = sym;
225 
226           /*
227            * also possibly insert the wide mapping, although note that there
228            * can only be one of these per wide character code.
229            */
230           if ((wc != (wchar_t)-1) && ((RB_FIND(cmap_wc, &cmap_wc, &srch)) == NULL)) {
231                     if ((n = calloc(1, sizeof (*n))) == NULL) {
232                               errf("out of memory");
233                               return;
234                     }
235                     n->wc = wc;
236                     RB_INSERT(cmap_wc, &cmap_wc, n);
237           }
238 
239           if (sym) {
240                     if (RB_FIND(cmap_sym, &cmap_sym, &srch) != NULL) {
241                               if (nodups) {
242                                         errf("duplicate character definition");
243                               }
244                               return;
245                     }
246                     if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) {
247                               errf("out of memory");
248                               return;
249                     }
250                     n->wc = wc;
251                     n->name = sym;
252 
253                     RB_INSERT(cmap_sym, &cmap_sym, n);
254           }
255 }
256 
257 void
add_charmap(const char * sym,int c)258 add_charmap(const char *sym, int c)
259 {
260           add_charmap_impl(sym, c, 1);
261 }
262 
263 void
add_charmap_undefined(char * sym)264 add_charmap_undefined(char *sym)
265 {
266           charmap_t srch;
267           charmap_t *cm = NULL;
268 
269           srch.name = sym;
270           cm = RB_FIND(cmap_sym, &cmap_sym, &srch);
271 
272           if ((undefok == 0) && ((cm == NULL) || (cm->wc == (wchar_t)-1))) {
273                     warn("undefined symbol <%s>", sym);
274                     add_charmap_impl(sym, -1, 0);
275           } else {
276                     free(sym);
277           }
278 }
279 
280 void
add_charmap_range(char * s,char * e,int wc)281 add_charmap_range(char *s, char *e, int wc)
282 {
283           int       ls, le;
284           int       si;
285           int       sn, en;
286           int       i;
287 
288           static const char *digits = "0123456789";
289 
290           ls = strlen(s);
291           le = strlen(e);
292 
293           if (((si = strcspn(s, digits)) == 0) || (si == ls) ||
294               (strncmp(s, e, si) != 0) ||
295               ((int)strspn(s + si, digits) != (ls - si)) ||
296               ((int)strspn(e + si, digits) != (le - si)) ||
297               ((sn = atoi(s + si)) > ((en = atoi(e + si))))) {
298                     errf("malformed charmap range");
299                     return;
300           }
301 
302           s[si] = 0;
303 
304           for (i = sn; i <= en; i++) {
305                     char *nn;
306                     (void) asprintf(&nn, "%s%0*u", s, ls - si, i);
307                     if (nn == NULL) {
308                               errf("out of memory");
309                               return;
310                     }
311 
312                     add_charmap_impl(nn, wc, 1);
313                     wc++;
314           }
315           free(s);
316           free(e);
317 }
318 
319 void
add_charmap_char(const char * name,int val)320 add_charmap_char(const char *name, int val)
321 {
322           add_charmap_impl(name, val, 0);
323 }
324 
325 /*
326  * POSIX insists that certain entries be present, even when not in the
327  * orginal charmap file.
328  */
329 void
add_charmap_posix(void)330 add_charmap_posix(void)
331 {
332           int       i;
333 
334           for (i = 0; portable_chars[i].name; i++) {
335                     add_charmap_char(portable_chars[i].name, portable_chars[i].ch);
336           }
337 }
338 
339 int
lookup_charmap(const char * sym,wchar_t * wc)340 lookup_charmap(const char *sym, wchar_t *wc)
341 {
342           charmap_t srch;
343           charmap_t *n;
344 
345           srch.name = sym;
346           n = RB_FIND(cmap_sym, &cmap_sym, &srch);
347           if (n && n->wc != (wchar_t)-1) {
348                     if (wc)
349                               *wc = n->wc;
350                     return (0);
351           }
352           return (-1);
353 }
354 
355 int
check_charmap(wchar_t wc)356 check_charmap(wchar_t wc)
357 {
358           charmap_t srch;
359 
360           srch.wc = wc;
361           return (RB_FIND(cmap_wc, &cmap_wc, &srch) ? 0 : -1);
362 }
363