1 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2003 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 #include <sys/types.h>
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdbool.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wchar.h>
43
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
49 #include "citrus_gbk2k.h"
50
51
52 /* ----------------------------------------------------------------------
53 * private stuffs used by templates
54 */
55
56 typedef struct _GBK2KState {
57 int chlen;
58 char ch[4];
59 } _GBK2KState;
60
61 typedef struct {
62 int mb_cur_max;
63 } _GBK2KEncodingInfo;
64
65 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
66 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
67
68 #define _FUNCNAME(m) _citrus_GBK2K_##m
69 #define _ENCODING_INFO _GBK2KEncodingInfo
70 #define _ENCODING_STATE _GBK2KState
71 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max
72 #define _ENCODING_IS_STATE_DEPENDENT 0
73 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
74
75 static __inline void
76 /*ARGSUSED*/
_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,_GBK2KState * __restrict s)77 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,
78 _GBK2KState * __restrict s)
79 {
80
81 memset(s, 0, sizeof(*s));
82 }
83
84 #if 0
85 static __inline void
86 /*ARGSUSED*/
87 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused,
88 void * __restrict pspriv, const _GBK2KState * __restrict s)
89 {
90
91 memcpy(pspriv, (const void *)s, sizeof(*s));
92 }
93
94 static __inline void
95 /*ARGSUSED*/
96 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused,
97 _GBK2KState * __restrict s, const void * __restrict pspriv)
98 {
99
100 memcpy((void *)s, pspriv, sizeof(*s));
101 }
102 #endif
103
104 static __inline bool
_mb_singlebyte(int c)105 _mb_singlebyte(int c)
106 {
107
108 return ((c & 0xff) <= 0x7f);
109 }
110
111 static __inline bool
_mb_leadbyte(int c)112 _mb_leadbyte(int c)
113 {
114
115 c &= 0xff;
116 return (0x81 <= c && c <= 0xfe);
117 }
118
119 static __inline bool
_mb_trailbyte(int c)120 _mb_trailbyte(int c)
121 {
122
123 c &= 0xff;
124 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
125 }
126
127 static __inline bool
_mb_surrogate(int c)128 _mb_surrogate(int c)
129 {
130
131 c &= 0xff;
132 return (0x30 <= c && c <= 0x39);
133 }
134
135 static __inline int
_mb_count(wchar_t v)136 _mb_count(wchar_t v)
137 {
138 uint32_t c;
139
140 c = (uint32_t)v; /* XXX */
141 if (!(c & 0xffffff00))
142 return (1);
143 if (!(c & 0xffff0000))
144 return (2);
145 return (4);
146 }
147
148 #define _PSENC (psenc->ch[psenc->chlen - 1])
149 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c))
150
151 static int
_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,wchar_t * __restrict pwc,char ** __restrict s,size_t n,_GBK2KState * __restrict psenc,size_t * __restrict nresult)152 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
153 wchar_t * __restrict pwc, char ** __restrict s, size_t n,
154 _GBK2KState * __restrict psenc, size_t * __restrict nresult)
155 {
156 char *s0, *s1;
157 wchar_t wc;
158 int chlenbak, len;
159
160 s0 = *s;
161
162 if (s0 == NULL) {
163 /* _citrus_GBK2K_init_state(ei, psenc); */
164 psenc->chlen = 0;
165 *nresult = 0;
166 return (0);
167 }
168
169 chlenbak = psenc->chlen;
170
171 switch (psenc->chlen) {
172 case 3:
173 if (!_mb_leadbyte (_PSENC))
174 goto invalid;
175 /* FALLTHROUGH */
176 case 2:
177 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
178 goto invalid;
179 /* FALLTHROUGH */
180 case 1:
181 if (!_mb_leadbyte (_PSENC))
182 goto invalid;
183 /* FALLTHOROUGH */
184 case 0:
185 break;
186 default:
187 goto invalid;
188 }
189
190 for (;;) {
191 if (n-- < 1)
192 goto restart;
193
194 _PUSH_PSENC(*s0++);
195
196 switch (psenc->chlen) {
197 case 1:
198 if (_mb_singlebyte(_PSENC))
199 goto convert;
200 if (_mb_leadbyte (_PSENC))
201 continue;
202 goto ilseq;
203 case 2:
204 if (_mb_trailbyte (_PSENC))
205 goto convert;
206 if (ei->mb_cur_max == 4 &&
207 _mb_surrogate (_PSENC))
208 continue;
209 goto ilseq;
210 case 3:
211 if (_mb_leadbyte (_PSENC))
212 continue;
213 goto ilseq;
214 case 4:
215 if (_mb_surrogate (_PSENC))
216 goto convert;
217 goto ilseq;
218 }
219 }
220
221 convert:
222 len = psenc->chlen;
223 s1 = &psenc->ch[0];
224 wc = 0;
225 while (len-- > 0)
226 wc = (wc << 8) | (*s1++ & 0xff);
227
228 if (pwc != NULL)
229 *pwc = wc;
230 *s = s0;
231 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
232 /* _citrus_GBK2K_init_state(ei, psenc); */
233 psenc->chlen = 0;
234
235 return (0);
236
237 restart:
238 *s = s0;
239 *nresult = (size_t)-2;
240
241 return (0);
242
243 invalid:
244 return (EINVAL);
245
246 ilseq:
247 *nresult = (size_t)-1;
248 return (EILSEQ);
249 }
250
251 static int
_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_GBK2KState * __restrict psenc,size_t * __restrict nresult)252 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
253 char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc,
254 size_t * __restrict nresult)
255 {
256 size_t len;
257 int ret;
258
259 if (psenc->chlen != 0) {
260 ret = EINVAL;
261 goto err;
262 }
263
264 len = _mb_count(wc);
265 if (n < len) {
266 ret = E2BIG;
267 goto err;
268 }
269
270 switch (len) {
271 case 1:
272 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) {
273 ret = EILSEQ;
274 goto err;
275 }
276 break;
277 case 2:
278 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) ||
279 !_mb_trailbyte (_PUSH_PSENC(wc))) {
280 ret = EILSEQ;
281 goto err;
282 }
283 break;
284 case 4:
285 if (ei->mb_cur_max != 4 ||
286 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) ||
287 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
288 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) ||
289 !_mb_surrogate (_PUSH_PSENC(wc))) {
290 ret = EILSEQ;
291 goto err;
292 }
293 break;
294 }
295
296 memcpy(s, psenc->ch, psenc->chlen);
297 *nresult = psenc->chlen;
298 /* _citrus_GBK2K_init_state(ei, psenc); */
299 psenc->chlen = 0;
300
301 return (0);
302
303 err:
304 *nresult = (size_t)-1;
305 return (ret);
306 }
307
308 static __inline int
309 /*ARGSUSED*/
_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)310 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,
311 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
312 {
313 uint8_t ch, cl;
314
315 if ((uint32_t)wc < 0x80) {
316 /* ISO646 */
317 *csid = 0;
318 *idx = (_index_t)wc;
319 } else if ((uint32_t)wc >= 0x10000) {
320 /* GBKUCS : XXX */
321 *csid = 3;
322 *idx = (_index_t)wc;
323 } else {
324 ch = (uint8_t)(wc >> 8);
325 cl = (uint8_t)wc;
326 if (ch >= 0xA1 && cl >= 0xA1) {
327 /* EUC G1 */
328 *csid = 1;
329 *idx = (_index_t)wc & 0x7F7FU;
330 } else {
331 /* extended area (0x8140-) */
332 *csid = 2;
333 *idx = (_index_t)wc;
334 }
335 }
336
337 return (0);
338 }
339
340 static __inline int
341 /*ARGSUSED*/
_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)342 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
343 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
344 {
345
346 switch (csid) {
347 case 0:
348 /* ISO646 */
349 *wc = (wchar_t)idx;
350 break;
351 case 1:
352 /* EUC G1 */
353 *wc = (wchar_t)idx | 0x8080U;
354 break;
355 case 2:
356 /* extended area */
357 *wc = (wchar_t)idx;
358 break;
359 case 3:
360 /* GBKUCS : XXX */
361 if (ei->mb_cur_max != 4)
362 return (EINVAL);
363 *wc = (wchar_t)idx;
364 break;
365 default:
366 return (EILSEQ);
367 }
368
369 return (0);
370 }
371
372 static __inline int
373 /*ARGSUSED*/
_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,_GBK2KState * __restrict psenc,int * __restrict rstate)374 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,
375 _GBK2KState * __restrict psenc, int * __restrict rstate)
376 {
377
378 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
379 _STDENC_SDGEN_INCOMPLETE_CHAR;
380 return (0);
381 }
382
383 static int
384 /*ARGSUSED*/
_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)385 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
386 const void * __restrict var, size_t lenvar)
387 {
388 const char *p;
389
390 p = var;
391 memset((void *)ei, 0, sizeof(*ei));
392 ei->mb_cur_max = 4;
393 while (lenvar > 0) {
394 switch (_bcs_tolower(*p)) {
395 case '2':
396 MATCH("2byte", ei->mb_cur_max = 2);
397 break;
398 }
399 p++;
400 lenvar--;
401 }
402
403 return (0);
404 }
405
406 static void
407 /*ARGSUSED*/
_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo * ei __unused)408 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused)
409 {
410
411 }
412
413 /* ----------------------------------------------------------------------
414 * public interface for stdenc
415 */
416
417 _CITRUS_STDENC_DECLS(GBK2K);
418 _CITRUS_STDENC_DEF_OPS(GBK2K);
419
420 #include "citrus_stdenc_template.h"
421