xref: /dragonfly/lib/libkiconv/xlat16_iconv.c (revision 8527a700756610d5fd01616a8a3665d22f5dc567)
1 /*-
2  * Copyright (c) 2003, 2005 Ryuichiro Imura
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: head/lib/libkiconv/xlat16_iconv.c 281550 2015-04-15 09:09:20Z tijl $
27  */
28 
29 /*
30  * kiconv(3) requires shared linked, and reduce module size
31  * when statically linked.
32  */
33 
34 #ifdef PIC
35 
36 #include <sys/types.h>
37 #include <sys/iconv.h>
38 #include <sys/sysctl.h>
39 
40 #include <ctype.h>
41 #include <dlfcn.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <iconv.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <wctype.h>
50 
51 #include "quirks.h"
52 
53 struct xlat16_table {
54           uint32_t *          idx[0x200];
55           void *              data;
56           size_t              size;
57 };
58 
59 static struct xlat16_table kiconv_xlat16_open(const char *, const char *, int);
60 static int chklocale(int, const char *);
61 
62 #define my_iconv_init() 0
63 #define my_iconv_open iconv_open
64 #define my_iconv iconv
65 #define my_iconv_close iconv_close
66 static size_t my_iconv_char(iconv_t, u_char **, size_t *, u_char **, size_t *);
67 
68 int
kiconv_add_xlat16_cspair(const char * tocode,const char * fromcode,int flag)69 kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
70 {
71           int error;
72           size_t idxsize;
73           struct xlat16_table xt;
74           void *data;
75           char *p;
76           const char unicode[] = ENCODING_UNICODE;
77 
78           if ((flag & KICONV_WCTYPE) == 0 &&
79               strcmp(unicode, tocode) != 0 &&
80               strcmp(unicode, fromcode) != 0 &&
81               kiconv_lookupconv(unicode) == 0) {
82                     error = kiconv_add_xlat16_cspair(unicode, fromcode, flag);
83                     if (error)
84                               return (-1);
85                     error = kiconv_add_xlat16_cspair(tocode, unicode, flag);
86                     return (error);
87           }
88 
89           if (kiconv_lookupcs(tocode, fromcode) == 0)
90                     return (0);
91 
92           if (flag & KICONV_WCTYPE)
93                     xt = kiconv_xlat16_open(fromcode, fromcode, flag);
94           else
95                     xt = kiconv_xlat16_open(tocode, fromcode, flag);
96           if (xt.size == 0)
97                     return (-1);
98 
99           idxsize = sizeof(xt.idx);
100 
101           if ((idxsize + xt.size) > ICONV_CSMAXDATALEN) {
102                     errno = E2BIG;
103                     return (-1);
104           }
105 
106           if ((data = malloc(idxsize + xt.size)) != NULL) {
107                     p = data;
108                     memcpy(p, xt.idx, idxsize);
109                     p += idxsize;
110                     memcpy(p, xt.data, xt.size);
111                     error = kiconv_add_xlat16_table(tocode, fromcode, data,
112                         (int)(idxsize + xt.size));
113                     return (error);
114           }
115 
116           return (-1);
117 }
118 
119 int
kiconv_add_xlat16_cspairs(const char * foreigncode,const char * localcode)120 kiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode)
121 {
122           int error, locale;
123 
124           error = kiconv_add_xlat16_cspair(foreigncode, localcode,
125               KICONV_FROM_LOWER | KICONV_FROM_UPPER);
126           if (error)
127                     return (error);
128           error = kiconv_add_xlat16_cspair(localcode, foreigncode,
129               KICONV_LOWER | KICONV_UPPER);
130           if (error)
131                     return (error);
132           locale = chklocale(LC_CTYPE, localcode);
133           if (locale == 0) {
134                     error = kiconv_add_xlat16_cspair(KICONV_WCTYPE_NAME, localcode,
135                         KICONV_WCTYPE);
136                     if (error)
137                               return (error);
138           }
139 
140           return (0);
141 }
142 
143 static struct xlat16_table
kiconv_xlat16_open(const char * tocode,const char * fromcode,int lcase)144 kiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase)
145 {
146           u_char src[3], dst[4], *srcp, *dstp, ud, ld;
147           int us, ls, ret;
148           uint16_t c;
149           uint32_t table[0x80];
150           size_t inbytesleft, outbytesleft, pre_q_size, post_q_size;
151           struct xlat16_table xt;
152           struct quirk_replace_list *pre_q_list, *post_q_list;
153           iconv_t cd;
154           char *p;
155 
156           xt.data = NULL;
157           xt.size = 0;
158 
159           src[2] = '\0';
160           dst[3] = '\0';
161 
162           ret = my_iconv_init();
163           if (ret)
164                     return (xt);
165 
166           cd = my_iconv_open(search_quirk(tocode, fromcode, &pre_q_list, &pre_q_size),
167               search_quirk(fromcode, tocode, &post_q_list, &post_q_size));
168           if (cd == (iconv_t) (-1))
169                     return (xt);
170 
171           if ((xt.data = malloc(0x200 * 0x80 * sizeof(uint32_t))) == NULL)
172                     return (xt);
173 
174           p = xt.data;
175 
176           for (ls = 0 ; ls < 0x200 ; ls++) {
177                     xt.idx[ls] = NULL;
178                     for (us = 0 ; us < 0x80 ; us++) {
179                               srcp = src;
180                               dstp = dst;
181 
182                               inbytesleft = 2;
183                               outbytesleft = 3;
184                               bzero(dst, outbytesleft);
185 
186                               c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls;
187 
188                               if (lcase & KICONV_WCTYPE) {
189                                         if ((c & 0xff) == 0)
190                                                   c >>= 8;
191                                         if (iswupper(c)) {
192                                                   c = towlower(c);
193                                                   if ((c & 0xff00) == 0)
194                                                             c <<= 8;
195                                                   table[us] = c | XLAT16_HAS_LOWER_CASE;
196                                         } else if (iswlower(c)) {
197                                                   c = towupper(c);
198                                                   if ((c & 0xff00) == 0)
199                                                             c <<= 8;
200                                                   table[us] = c | XLAT16_HAS_UPPER_CASE;
201                                         } else
202                                                   table[us] = 0;
203                                         /*
204                                          * store not NULL
205                                          */
206                                         if (table[us])
207                                                   xt.idx[ls] = table;
208 
209                                         continue;
210                               }
211 
212                               c = quirk_vendor2unix(c, pre_q_list, pre_q_size);
213                               src[0] = (u_char)(c >> 8);
214                               src[1] = (u_char)c;
215 
216                               ret = my_iconv_char(cd, &srcp, &inbytesleft,
217                                         &dstp, &outbytesleft);
218                               if (ret == -1) {
219                                         table[us] = 0;
220                                         continue;
221                               }
222 
223                               ud = (u_char)dst[0];
224                               ld = (u_char)dst[1];
225 
226                               switch(outbytesleft) {
227                               case 0:
228 #ifdef XLAT16_ACCEPT_3BYTE_CHR
229                                         table[us] = (ud << 8) | ld;
230                                         table[us] |= (u_char)dst[2] << 16;
231                                         table[us] |= XLAT16_IS_3BYTE_CHR;
232 #else
233                                         table[us] = 0;
234                                         continue;
235 #endif
236                                         break;
237                               case 1:
238                                         table[us] = quirk_unix2vendor((ud << 8) | ld,
239                                             post_q_list, post_q_size);
240                                         if ((table[us] >> 8) == 0)
241                                                   table[us] |= XLAT16_ACCEPT_NULL_OUT;
242                                         break;
243                               case 2:
244                                         table[us] = ud;
245                                         if (lcase & KICONV_LOWER && ud != tolower(ud)) {
246                                                   table[us] |= (u_char)tolower(ud) << 16;
247                                                   table[us] |= XLAT16_HAS_LOWER_CASE;
248                                         }
249                                         if (lcase & KICONV_UPPER && ud != toupper(ud)) {
250                                                   table[us] |= (u_char)toupper(ud) << 16;
251                                                   table[us] |= XLAT16_HAS_UPPER_CASE;
252                                         }
253                                         break;
254                               }
255 
256                               switch(inbytesleft) {
257                               case 0:
258                                         if ((ls & 0xff) == 0)
259                                                   table[us] |= XLAT16_ACCEPT_NULL_IN;
260                                         break;
261                               case 1:
262                                         c = ls > 0xff ? us | 0x80 : us;
263                                         if (lcase & KICONV_FROM_LOWER && c != tolower(c)) {
264                                                   table[us] |= (u_char)tolower(c) << 16;
265                                                   table[us] |= XLAT16_HAS_FROM_LOWER_CASE;
266                                         }
267                                         if (lcase & KICONV_FROM_UPPER && c != toupper(c)) {
268                                                   table[us] |= (u_char)toupper(c) << 16;
269                                                   table[us] |= XLAT16_HAS_FROM_UPPER_CASE;
270                                         }
271                                         break;
272                               }
273 
274                               if (table[us] == 0)
275                                         continue;
276 
277                               /*
278                                * store not NULL
279                                */
280                               xt.idx[ls] = table;
281                     }
282                     if (xt.idx[ls]) {
283                               memcpy(p, table, sizeof(table));
284                               p += sizeof(table);
285                     }
286           }
287           my_iconv_close(cd);
288 
289           xt.size = p - (char *)xt.data;
290           xt.data = realloc(xt.data, xt.size);
291           return (xt);
292 }
293 
294 static int
chklocale(int category,const char * code)295 chklocale(int category, const char *code)
296 {
297           char *p;
298           int error = -1;
299 
300           p = strchr(setlocale(category, NULL), '.');
301           if (p++) {
302                     error = strcasecmp(code, p);
303                     if (error) {
304                               /* XXX - can't avoid calling quirk here... */
305                               error = strcasecmp(code, kiconv_quirkcs(p,
306                                   KICONV_VENDOR_MICSFT));
307                     }
308           }
309           return (error);
310 }
311 
312 static size_t
my_iconv_char(iconv_t cd,u_char ** ibuf,size_t * ilen,u_char ** obuf,size_t * olen)313 my_iconv_char(iconv_t cd, u_char **ibuf, size_t * ilen, u_char **obuf,
314           size_t * olen)
315 {
316           u_char *sp, *dp, ilocal[3], olocal[3];
317           u_char c1, c2;
318           int ret;
319           size_t ir, or;
320 
321           sp = *ibuf;
322           dp = *obuf;
323           ir = *ilen;
324 
325           bzero(*obuf, *olen);
326           ret = my_iconv(cd, (char **)&sp, ilen, (char **)&dp, olen);
327           c1 = (*obuf)[0];
328           c2 = (*obuf)[1];
329 
330           if (ret == -1) {
331                     if (*ilen == ir - 1 && (*ibuf)[1] == '\0' && (c1 || c2))
332                               return (0);
333                     else
334                               return (-1);
335           }
336 
337           /*
338            * We must judge if inbuf is a single byte char or double byte char.
339            * Here, to judge, try first byte(*sp) conversion and compare.
340            */
341           ir = 1;
342           or = 3;
343 
344           bzero(olocal, or);
345           memcpy(ilocal, *ibuf, sizeof(ilocal));
346           sp = ilocal;
347           dp = olocal;
348 
349           if ((my_iconv(cd, (char **)&sp, &ir, (char **)&dp, &or)) !=
350               (size_t)-1) {
351                     if (olocal[0] != c1)
352                               return (ret);
353 
354                     if (olocal[1] == c2 && (*ibuf)[1] == '\0') {
355                               /*
356                                * inbuf is a single byte char
357                                */
358                               *ilen = 1;
359                               *olen = or;
360                               return (ret);
361                     }
362 
363                     switch(or) {
364                     case 0:
365                     case 1:
366                               if (olocal[1] == c2) {
367                                         /*
368                                          * inbuf is a single byte char,
369                                          * so return false here.
370                                          */
371                                         return (-1);
372                               } else {
373                                         /*
374                                          * inbuf is a double byte char
375                                          */
376                                         return (ret);
377                               }
378                               break;
379                     case 2:
380                               /*
381                                * should compare second byte of inbuf
382                                */
383                               break;
384                     }
385           } else {
386                     /*
387                      * inbuf clould not be splitted, so inbuf is
388                      * a double byte char.
389                      */
390                     return (ret);
391           }
392 
393           /*
394            * try second byte(*(sp+1)) conversion, and compare
395            */
396           ir = 1;
397           or = 3;
398 
399           bzero(olocal, or);
400 
401           sp = ilocal + 1;
402           dp = olocal;
403 
404           if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) !=
405               (size_t)-1) {
406                     if (olocal[0] == c2)
407                               /*
408                                * inbuf is a single byte char
409                                */
410                               return (-1);
411           }
412 
413           return (ret);
414 }
415 
416 #else /* statically linked */
417 
418 #include <sys/types.h>
419 #include <sys/iconv.h>
420 #include <errno.h>
421 
422 int
kiconv_add_xlat16_cspair(const char * tocode __unused,const char * fromcode __unused,int flag __unused)423 kiconv_add_xlat16_cspair(const char *tocode __unused, const char *fromcode __unused,
424     int flag __unused)
425 {
426 
427           errno = EINVAL;
428           return (-1);
429 }
430 
431 int
kiconv_add_xlat16_cspairs(const char * tocode __unused,const char * fromcode __unused)432 kiconv_add_xlat16_cspairs(const char *tocode __unused, const char *fromcode __unused)
433 {
434           errno = EINVAL;
435           return (-1);
436 }
437 
438 #endif /* PIC */
439