1 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2004, 2006 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32 #include <sys/cdefs.h>
33 #include <sys/types.h>
34
35 #include <assert.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44
45 #include "citrus_namespace.h"
46 #include "citrus_types.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
49 #include "citrus_zw.h"
50
51 /* ----------------------------------------------------------------------
52 * private stuffs used by templates
53 */
54
55 typedef struct {
56 int dummy;
57 } _ZWEncodingInfo;
58
59 typedef enum {
60 NONE, AMBIGIOUS, ASCII, GB2312
61 } _ZWCharset;
62
63 typedef struct {
64 _ZWCharset charset;
65 int chlen;
66 char ch[4];
67 } _ZWState;
68
69 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
70 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
71
72 #define _FUNCNAME(m) _citrus_ZW_##m
73 #define _ENCODING_INFO _ZWEncodingInfo
74 #define _ENCODING_STATE _ZWState
75 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
76 #define _ENCODING_IS_STATE_DEPENDENT 1
77 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE)
78
79 static __inline void
80 /*ARGSUSED*/
_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,_ZWState * __restrict psenc)81 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
82 _ZWState * __restrict psenc)
83 {
84
85 psenc->chlen = 0;
86 psenc->charset = NONE;
87 }
88
89 #if 0
90 static __inline void
91 /*ARGSUSED*/
92 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
93 void *__restrict pspriv, const _ZWState * __restrict psenc)
94 {
95
96 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
97 }
98
99 static __inline void
100 /*ARGSUSED*/
101 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
102 _ZWState * __restrict psenc, const void * __restrict pspriv)
103 {
104
105 memcpy((void *)psenc, pspriv, sizeof(*psenc));
106 }
107 #endif
108
109 static int
_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,wchar_t * __restrict pwc,char ** __restrict s,size_t n,_ZWState * __restrict psenc,size_t * __restrict nresult)110 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
111 wchar_t * __restrict pwc, char **__restrict s, size_t n,
112 _ZWState * __restrict psenc, size_t * __restrict nresult)
113 {
114 char *s0;
115 wchar_t wc;
116 int ch, len;
117
118 if (*s == NULL) {
119 _citrus_ZW_init_state(ei, psenc);
120 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
121 return (0);
122 }
123 s0 = *s;
124 len = 0;
125
126 #define STORE \
127 do { \
128 if (n-- < 1) { \
129 *nresult = (size_t)-2; \
130 *s = s0; \
131 return (0); \
132 } \
133 ch = (unsigned char)*s0++; \
134 if (len++ > MB_LEN_MAX || ch > 0x7F)\
135 goto ilseq; \
136 psenc->ch[psenc->chlen++] = ch; \
137 } while (/*CONSTCOND*/0)
138
139 loop:
140 switch (psenc->charset) {
141 case ASCII:
142 switch (psenc->chlen) {
143 case 0:
144 STORE;
145 switch (psenc->ch[0]) {
146 case '\0': case '\n':
147 psenc->charset = NONE;
148 }
149 /*FALLTHROUGH*/
150 case 1:
151 break;
152 default:
153 return (EINVAL);
154 }
155 ch = (unsigned char)psenc->ch[0];
156 if (ch > 0x7F)
157 goto ilseq;
158 wc = (wchar_t)ch;
159 psenc->chlen = 0;
160 break;
161 case NONE:
162 if (psenc->chlen != 0)
163 return (EINVAL);
164 STORE;
165 ch = (unsigned char)psenc->ch[0];
166 if (ch != 'z') {
167 if (ch != '\n' && ch != '\0')
168 psenc->charset = ASCII;
169 wc = (wchar_t)ch;
170 psenc->chlen = 0;
171 break;
172 }
173 psenc->charset = AMBIGIOUS;
174 psenc->chlen = 0;
175 /* FALLTHROUGH */
176 case AMBIGIOUS:
177 if (psenc->chlen != 0)
178 return (EINVAL);
179 STORE;
180 if (psenc->ch[0] != 'W') {
181 psenc->charset = ASCII;
182 wc = L'z';
183 break;
184 }
185 psenc->charset = GB2312;
186 psenc->chlen = 0;
187 /* FALLTHROUGH */
188 case GB2312:
189 switch (psenc->chlen) {
190 case 0:
191 STORE;
192 ch = (unsigned char)psenc->ch[0];
193 if (ch == '\0') {
194 psenc->charset = NONE;
195 wc = (wchar_t)ch;
196 psenc->chlen = 0;
197 break;
198 } else if (ch == '\n') {
199 psenc->charset = NONE;
200 psenc->chlen = 0;
201 goto loop;
202 }
203 /*FALLTHROUGH*/
204 case 1:
205 STORE;
206 if (psenc->ch[0] == ' ') {
207 ch = (unsigned char)psenc->ch[1];
208 wc = (wchar_t)ch;
209 psenc->chlen = 0;
210 break;
211 } else if (psenc->ch[0] == '#') {
212 ch = (unsigned char)psenc->ch[1];
213 if (ch == '\n') {
214 psenc->charset = NONE;
215 wc = (wchar_t)ch;
216 psenc->chlen = 0;
217 break;
218 } else if (ch == ' ') {
219 wc = (wchar_t)ch;
220 psenc->chlen = 0;
221 break;
222 }
223 }
224 ch = (unsigned char)psenc->ch[0];
225 if (ch < 0x21 || ch > 0x7E)
226 goto ilseq;
227 wc = (wchar_t)(ch << 8);
228 ch = (unsigned char)psenc->ch[1];
229 if (ch < 0x21 || ch > 0x7E) {
230 ilseq:
231 *nresult = (size_t)-1;
232 return (EILSEQ);
233 }
234 wc |= (wchar_t)ch;
235 psenc->chlen = 0;
236 break;
237 default:
238 return (EINVAL);
239 }
240 break;
241 default:
242 return (EINVAL);
243 }
244 if (pwc != NULL)
245 *pwc = wc;
246
247 *nresult = (size_t)(wc == 0 ? 0 : len);
248 *s = s0;
249
250 return (0);
251 }
252
253 static int
254 /*ARGSUSED*/
_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,char * __restrict s,size_t n,wchar_t wc,_ZWState * __restrict psenc,size_t * __restrict nresult)255 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
256 char *__restrict s, size_t n, wchar_t wc,
257 _ZWState * __restrict psenc, size_t * __restrict nresult)
258 {
259 int ch;
260
261 if (psenc->chlen != 0)
262 return (EINVAL);
263 if ((uint32_t)wc <= 0x7F) {
264 ch = (unsigned char)wc;
265 switch (psenc->charset) {
266 case NONE:
267 if (ch == '\0' || ch == '\n')
268 psenc->ch[psenc->chlen++] = ch;
269 else {
270 if (n < 4)
271 return (E2BIG);
272 n -= 4;
273 psenc->ch[psenc->chlen++] = 'z';
274 psenc->ch[psenc->chlen++] = 'W';
275 psenc->ch[psenc->chlen++] = ' ';
276 psenc->ch[psenc->chlen++] = ch;
277 psenc->charset = GB2312;
278 }
279 break;
280 case GB2312:
281 if (n < 2)
282 return (E2BIG);
283 n -= 2;
284 if (ch == '\0') {
285 psenc->ch[psenc->chlen++] = '\n';
286 psenc->ch[psenc->chlen++] = '\0';
287 psenc->charset = NONE;
288 } else if (ch == '\n') {
289 psenc->ch[psenc->chlen++] = '#';
290 psenc->ch[psenc->chlen++] = '\n';
291 psenc->charset = NONE;
292 } else {
293 psenc->ch[psenc->chlen++] = ' ';
294 psenc->ch[psenc->chlen++] = ch;
295 }
296 break;
297 default:
298 return (EINVAL);
299 }
300 } else if ((uint32_t)wc <= 0x7E7E) {
301 switch (psenc->charset) {
302 case NONE:
303 if (n < 2)
304 return (E2BIG);
305 n -= 2;
306 psenc->ch[psenc->chlen++] = 'z';
307 psenc->ch[psenc->chlen++] = 'W';
308 psenc->charset = GB2312;
309 /* FALLTHROUGH*/
310 case GB2312:
311 if (n < 2)
312 return (E2BIG);
313 n -= 2;
314 ch = (wc >> 8) & 0xFF;
315 if (ch < 0x21 || ch > 0x7E)
316 goto ilseq;
317 psenc->ch[psenc->chlen++] = ch;
318 ch = wc & 0xFF;
319 if (ch < 0x21 || ch > 0x7E)
320 goto ilseq;
321 psenc->ch[psenc->chlen++] = ch;
322 break;
323 default:
324 return (EINVAL);
325 }
326 } else {
327 ilseq:
328 *nresult = (size_t)-1;
329 return (EILSEQ);
330 }
331 memcpy(s, psenc->ch, psenc->chlen);
332 *nresult = psenc->chlen;
333 psenc->chlen = 0;
334
335 return (0);
336 }
337
338 static int
339 /*ARGSUSED*/
_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,char * __restrict s,size_t n,_ZWState * __restrict psenc,size_t * __restrict nresult)340 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
341 char * __restrict s, size_t n, _ZWState * __restrict psenc,
342 size_t * __restrict nresult)
343 {
344
345 if (psenc->chlen != 0)
346 return (EINVAL);
347 switch (psenc->charset) {
348 case GB2312:
349 if (n-- < 1)
350 return (E2BIG);
351 psenc->ch[psenc->chlen++] = '\n';
352 psenc->charset = NONE;
353 /*FALLTHROUGH*/
354 case NONE:
355 *nresult = psenc->chlen;
356 if (psenc->chlen > 0) {
357 memcpy(s, psenc->ch, psenc->chlen);
358 psenc->chlen = 0;
359 }
360 break;
361 default:
362 return (EINVAL);
363 }
364
365 return (0);
366 }
367
368 static __inline int
369 /*ARGSUSED*/
_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,_ZWState * __restrict psenc,int * __restrict rstate)370 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
371 _ZWState * __restrict psenc, int * __restrict rstate)
372 {
373
374 switch (psenc->charset) {
375 case NONE:
376 if (psenc->chlen != 0)
377 return (EINVAL);
378 *rstate = _STDENC_SDGEN_INITIAL;
379 break;
380 case AMBIGIOUS:
381 if (psenc->chlen != 0)
382 return (EINVAL);
383 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
384 break;
385 case ASCII:
386 case GB2312:
387 switch (psenc->chlen) {
388 case 0:
389 *rstate = _STDENC_SDGEN_STABLE;
390 break;
391 case 1:
392 *rstate = (psenc->ch[0] == '#') ?
393 _STDENC_SDGEN_INCOMPLETE_SHIFT :
394 _STDENC_SDGEN_INCOMPLETE_CHAR;
395 break;
396 default:
397 return (EINVAL);
398 }
399 break;
400 default:
401 return (EINVAL);
402 }
403 return (0);
404 }
405
406 static __inline int
407 /*ARGSUSED*/
_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)408 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
409 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
410 {
411
412 *csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
413 *idx = (_index_t)wc;
414
415 return (0);
416 }
417
418 static __inline int
419 /*ARGSUSED*/
_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,wchar_t * __restrict wc,_csid_t csid,_index_t idx)420 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
421 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
422 {
423
424 switch (csid) {
425 case 0: case 1:
426 break;
427 default:
428 return (EINVAL);
429 }
430 *wc = (wchar_t)idx;
431
432 return (0);
433 }
434
435 static void
436 /*ARGSUSED*/
_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo * ei __unused)437 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
438 {
439
440 }
441
442 static int
443 /*ARGSUSED*/
_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,const void * __restrict var __unused,size_t lenvar __unused)444 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
445 const void *__restrict var __unused, size_t lenvar __unused)
446 {
447
448 return (0);
449 }
450
451 /* ----------------------------------------------------------------------
452 * public interface for stdenc
453 */
454
455 _CITRUS_STDENC_DECLS(ZW);
456 _CITRUS_STDENC_DEF_OPS(ZW);
457
458 #include "citrus_stdenc_template.h"
459