1 /*-
2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
10 *
11 * Copyright (c) 2011 The FreeBSD Foundation
12 * All rights reserved.
13 * Portions of this software were developed by David Chisnall
14 * under sponsorship from the FreeBSD Foundation.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41 #if defined(LIBC_SCCS) && !defined(lint)
42 static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93";
43 #endif /* LIBC_SCCS and not lint */
44 #include <sys/param.h>
45 __FBSDID("$FreeBSD$");
46
47 #include <errno.h>
48 #include <limits.h>
49 #include <runetype.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <wchar.h>
53 #include "mblocal.h"
54
55 extern int __mb_sb_limit;
56
57 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
58 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
59 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t,
60 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
61
62 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
63 size_t, mbstate_t * __restrict);
64 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
65 size_t, mbstate_t * __restrict);
66 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
67 size_t, mbstate_t * __restrict);
68 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
69 size_t, mbstate_t * __restrict);
70
71 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t,
72 mbstate_t * __restrict);
73 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t,
74 mbstate_t * __restrict);
75 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t,
76 mbstate_t * __restrict);
77 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t,
78 mbstate_t * __restrict);
79
80 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict,
81 const char ** __restrict, size_t, size_t,
82 mbstate_t * __restrict);
83 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict,
84 const char ** __restrict, size_t, size_t,
85 mbstate_t * __restrict);
86 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict,
87 const char ** __restrict, size_t, size_t,
88 mbstate_t * __restrict);
89 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict,
90 const char ** __restrict, size_t, size_t,
91 mbstate_t * __restrict);
92
93 static size_t _EUC_CN_wcsnrtombs(char * __restrict,
94 const wchar_t ** __restrict, size_t, size_t,
95 mbstate_t * __restrict);
96 static size_t _EUC_JP_wcsnrtombs(char * __restrict,
97 const wchar_t ** __restrict, size_t, size_t,
98 mbstate_t * __restrict);
99 static size_t _EUC_KR_wcsnrtombs(char * __restrict,
100 const wchar_t ** __restrict, size_t, size_t,
101 mbstate_t * __restrict);
102 static size_t _EUC_TW_wcsnrtombs(char * __restrict,
103 const wchar_t ** __restrict, size_t, size_t,
104 mbstate_t * __restrict);
105
106 static int _EUC_mbsinit(const mbstate_t *);
107
108 typedef struct {
109 wchar_t ch;
110 int set;
111 int want;
112 } _EucState;
113
114 static int
_EUC_mbsinit(const mbstate_t * ps)115 _EUC_mbsinit(const mbstate_t *ps)
116 {
117
118 return (ps == NULL || ((const _EucState *)ps)->want == 0);
119 }
120
121 /*
122 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
123 */
124 int
_EUC_CN_init(struct xlocale_ctype * l,_RuneLocale * rl)125 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
126 {
127 l->__mbrtowc = _EUC_CN_mbrtowc;
128 l->__wcrtomb = _EUC_CN_wcrtomb;
129 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
130 l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
131 l->__mbsinit = _EUC_mbsinit;
132
133 l->runes = rl;
134 l->__mb_cur_max = 4;
135 l->__mb_sb_limit = 256;
136 return (0);
137 }
138
139 static size_t
_EUC_CN_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)140 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
141 size_t n, mbstate_t * __restrict ps)
142 {
143 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
144 }
145
146 static size_t
_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)147 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
148 const char ** __restrict src,
149 size_t nms, size_t len, mbstate_t * __restrict ps)
150 {
151 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
152 }
153
154 static size_t
_EUC_CN_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)155 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
156 mbstate_t * __restrict ps)
157 {
158 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
159 }
160
161 static size_t
_EUC_CN_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)162 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
163 size_t nwc, size_t len, mbstate_t * __restrict ps)
164 {
165 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
166 }
167
168 /*
169 * EUC-KR uses only CS0 and CS1.
170 */
171 int
_EUC_KR_init(struct xlocale_ctype * l,_RuneLocale * rl)172 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
173 {
174 l->__mbrtowc = _EUC_KR_mbrtowc;
175 l->__wcrtomb = _EUC_KR_wcrtomb;
176 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
177 l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
178 l->__mbsinit = _EUC_mbsinit;
179
180 l->runes = rl;
181 l->__mb_cur_max = 2;
182 l->__mb_sb_limit = 128;
183 return (0);
184 }
185
186 static size_t
_EUC_KR_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)187 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
188 size_t n, mbstate_t * __restrict ps)
189 {
190 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
191 }
192
193 static size_t
_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)194 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
195 const char ** __restrict src,
196 size_t nms, size_t len, mbstate_t * __restrict ps)
197 {
198 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
199 }
200
201 static size_t
_EUC_KR_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)202 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
203 mbstate_t * __restrict ps)
204 {
205 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
206 }
207
208 static size_t
_EUC_KR_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)209 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
210 size_t nwc, size_t len, mbstate_t * __restrict ps)
211 {
212 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
213 }
214
215 /*
216 * EUC-JP uses CS0, CS1, CS2, and CS3.
217 */
218 int
_EUC_JP_init(struct xlocale_ctype * l,_RuneLocale * rl)219 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
220 {
221 l->__mbrtowc = _EUC_JP_mbrtowc;
222 l->__wcrtomb = _EUC_JP_wcrtomb;
223 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
224 l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
225 l->__mbsinit = _EUC_mbsinit;
226
227 l->runes = rl;
228 l->__mb_cur_max = 3;
229 l->__mb_sb_limit = 196;
230 return (0);
231 }
232
233 static size_t
_EUC_JP_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)234 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
235 size_t n, mbstate_t * __restrict ps)
236 {
237 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
238 }
239
240 static size_t
_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)241 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
242 const char ** __restrict src,
243 size_t nms, size_t len, mbstate_t * __restrict ps)
244 {
245 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
246 }
247
248 static size_t
_EUC_JP_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)249 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
250 mbstate_t * __restrict ps)
251 {
252 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
253 }
254
255 static size_t
_EUC_JP_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)256 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
257 size_t nwc, size_t len, mbstate_t * __restrict ps)
258 {
259 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
260 }
261
262 /*
263 * EUC-TW uses CS0, CS1, and CS2.
264 */
265 int
_EUC_TW_init(struct xlocale_ctype * l,_RuneLocale * rl)266 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
267 {
268 l->__mbrtowc = _EUC_TW_mbrtowc;
269 l->__wcrtomb = _EUC_TW_wcrtomb;
270 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
271 l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
272 l->__mbsinit = _EUC_mbsinit;
273
274 l->runes = rl;
275 l->__mb_cur_max = 4;
276 l->__mb_sb_limit = 256;
277 return (0);
278 }
279
280 static size_t
_EUC_TW_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)281 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
282 size_t n, mbstate_t * __restrict ps)
283 {
284 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
285 }
286
287 static size_t
_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)288 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
289 const char ** __restrict src,
290 size_t nms, size_t len, mbstate_t * __restrict ps)
291 {
292 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
293 }
294
295 static size_t
_EUC_TW_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)296 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
297 mbstate_t * __restrict ps)
298 {
299 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
300 }
301
302 static size_t
_EUC_TW_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)303 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
304 size_t nwc, size_t len, mbstate_t * __restrict ps)
305 {
306 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
307 }
308
309 /*
310 * Common EUC code.
311 */
312
313 static size_t
_EUC_mbrtowc_impl(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)314 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
315 size_t n, mbstate_t * __restrict ps,
316 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
317 {
318 _EucState *es;
319 int i, want;
320 wchar_t wc = 0;
321 unsigned char ch, chs;
322
323 es = (_EucState *)ps;
324
325 if (es->want < 0 || es->want > MB_CUR_MAX) {
326 errno = EINVAL;
327 return ((size_t)-1);
328 }
329
330 if (s == NULL) {
331 s = "";
332 n = 1;
333 pwc = NULL;
334 }
335
336 if (n == 0)
337 /* Incomplete multibyte sequence */
338 return ((size_t)-2);
339
340 if (es->want == 0) {
341 /* Fast path for plain ASCII (CS0) */
342 if (((ch = (unsigned char)*s) & 0x80) == 0) {
343 if (pwc != NULL)
344 *pwc = ch;
345 return (ch != '\0' ? 1 : 0);
346 }
347
348 if (ch >= 0xa1) {
349 /* CS1 */
350 want = 2;
351 } else if (ch == cs2) {
352 want = cs2width;
353 } else if (ch == cs3) {
354 want = cs3width;
355 } else {
356 errno = EILSEQ;
357 return ((size_t)-1);
358 }
359
360
361 es->want = want;
362 es->ch = 0;
363 } else {
364 want = es->want;
365 wc = es->ch;
366 }
367
368 for (i = 0; i < MIN(want, n); i++) {
369 wc <<= 8;
370 chs = *s;
371 wc |= chs;
372 s++;
373 }
374 if (i < want) {
375 /* Incomplete multibyte sequence */
376 es->want = want - i;
377 es->ch = wc;
378 errno = EILSEQ;
379 return ((size_t)-2);
380 }
381 if (pwc != NULL)
382 *pwc = wc;
383 es->want = 0;
384 return (wc == L'\0' ? 0 : want);
385 }
386
387 static size_t
_EUC_wcrtomb_impl(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)388 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
389 mbstate_t * __restrict ps,
390 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
391 {
392 _EucState *es;
393 int i, len;
394 wchar_t nm;
395
396 es = (_EucState *)ps;
397
398 if (es->want != 0) {
399 errno = EINVAL;
400 return ((size_t)-1);
401 }
402
403 if (s == NULL)
404 /* Reset to initial shift state (no-op) */
405 return (1);
406
407 if ((wc & ~0x7f) == 0) {
408 /* Fast path for plain ASCII (CS0) */
409 *s = (char)wc;
410 return (1);
411 }
412
413 /* Determine the "length" */
414 if ((unsigned)wc > 0xffffff) {
415 len = 4;
416 } else if ((unsigned)wc > 0xffff) {
417 len = 3;
418 } else if ((unsigned)wc > 0xff) {
419 len = 2;
420 } else {
421 len = 1;
422 }
423
424 if (len > MB_CUR_MAX) {
425 errno = EILSEQ;
426 return ((size_t)-1);
427 }
428
429 /* This first check excludes CS1, which is implicitly valid. */
430 if ((wc < 0xa100) || (wc > 0xffff)) {
431 /* Check for valid CS2 or CS3 */
432 nm = (wc >> ((len - 1) * 8));
433 if (nm == cs2) {
434 if (len != cs2width) {
435 errno = EILSEQ;
436 return ((size_t)-1);
437 }
438 } else if (nm == cs3) {
439 if (len != cs3width) {
440 errno = EILSEQ;
441 return ((size_t)-1);
442 }
443 } else {
444 errno = EILSEQ;
445 return ((size_t)-1);
446 }
447 }
448
449 /* Stash the bytes, least significant last */
450 for (i = len - 1; i >= 0; i--) {
451 s[i] = (wc & 0xff);
452 wc >>= 8;
453 }
454 return (len);
455 }
456