1 /*        $NetBSD: plural_parser.c,v 1.5 2025/02/26 04:49:45 andvar Exp $       */
2 
3 /*-
4  * Copyright (c) 2005 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __RCSID("$NetBSD: plural_parser.c,v 1.5 2025/02/26 04:49:45 andvar Exp $");
32 
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <citrus/citrus_namespace.h>
38 #include <citrus/citrus_region.h>
39 #include <citrus/citrus_memstream.h>
40 #include <citrus/citrus_bcs.h>
41 #include "plural_parser.h"
42 
43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44 #define ALLOW_EMPTY
45 #define ALLOW_ARBITRARY_IDENTIFIER
46 #endif
47 
48 #define MAX_LEN_ATOM                    10
49 #define MAX_NUM_OPERANDS      3
50 
51 #define T_EOF                           EOF
52 #define T_NONE                          0x100
53 #define T_LAND                          0x101     /* && */
54 #define T_LOR                           0x102     /* || */
55 #define T_EQUALITY            0x103     /* == or != */
56 #define T_RELATIONAL                    0x104     /* <, >, <= or >= */
57 #define T_ADDITIVE            0x105     /* + or - */
58 #define T_MULTIPLICATIVE      0x106     /* *, / or % */
59 #define T_IDENTIFIER                    0x200
60 #define T_CONSTANT            0x201
61 #define T_ILCHAR              0x300
62 #define T_TOOLONG             0x301
63 #define T_ILTOKEN             0x302
64 #define T_ILEND                         0x303
65 #define T_NOMEM                         0x304
66 #define T_NOTFOUND            0x305
67 #define T_ILPLURAL            0x306
68 #define T_IS_OPERATOR(t)      ((t) < 0x200)
69 #define T_IS_ERROR(t)                   ((t) >= 0x300)
70 
71 #define OP_EQ                           ('='+'=')
72 #define OP_NEQ                          ('!'+'=')
73 #define OP_LTEQ                         ('<'+'=')
74 #define OP_GTEQ                         ('>'+'=')
75 
76 #define PLURAL_NUMBER_SYMBOL  "n"
77 #define NPLURALS_SYMBOL                 "nplurals"
78 #define LEN_NPLURAL_SYMBOL    (sizeof (NPLURALS_SYMBOL) -1)
79 #define PLURAL_SYMBOL                   "plural"
80 #define LEN_PLURAL_SYMBOL     (sizeof (PLURAL_SYMBOL) -1)
81 #define PLURAL_FORMS                    "Plural-Forms:"
82 #define LEN_PLURAL_FORMS      (sizeof (PLURAL_FORMS) -1)
83 
84 /* ----------------------------------------------------------------------
85  * tokenizer part
86  */
87 
88 union token_data
89 {
90           unsigned long constant;
91 #ifdef ALLOW_ARBITRARY_IDENTIFIER
92           char identifier[MAX_LEN_ATOM+1];
93 #endif
94           char op;
95 };
96 
97 struct tokenizer_context
98 {
99           struct _memstream memstream;
100           struct {
101                     int token;
102                     union token_data token_data;
103           } token0;
104 };
105 
106 /* initialize a tokenizer context */
107 static void
init_tokenizer_context(struct tokenizer_context * tcx)108 init_tokenizer_context(struct tokenizer_context *tcx)
109 {
110           tcx->token0.token = T_NONE;
111 }
112 
113 /* get an atom (identifier or constant) */
114 static int
tokenize_atom(struct tokenizer_context * tcx,union token_data * token_data)115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116 {
117           int ch, len;
118           char buf[MAX_LEN_ATOM+1];
119 
120           len = 0;
121           while (/*CONSTCOND*/1) {
122                     ch = _memstream_getc(&tcx->memstream);
123                     if (!(_bcs_isalnum(ch) || ch == '_')) {
124                               _memstream_ungetc(&tcx->memstream, ch);
125                               break;
126                     }
127                     if (len == MAX_LEN_ATOM)
128                               return T_TOOLONG;
129                     buf[len++] = ch;
130           }
131           buf[len] = '\0';
132           if (len == 0)
133                     return T_ILCHAR;
134 
135           if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136                     unsigned long ul;
137                     char *post;
138                     ul = strtoul(buf, &post, 0);
139                     if (buf+len != post)
140                               return T_ILCHAR;
141                     token_data->constant = ul;
142                     return T_CONSTANT;
143           }
144 
145 #ifdef ALLOW_ARBITRARY_IDENTIFIER
146           strcpy(token_data->identifier, buf);
147           return T_IDENTIFIER;
148 #else
149           if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150                     return T_IDENTIFIER;
151           return T_ILCHAR;
152 #endif
153 }
154 
155 /* tokenizer main routine */
156 static int
tokenize(struct tokenizer_context * tcx,union token_data * token_data)157 tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158 {
159           int ch, prevch;
160 
161 retry:
162           ch = _memstream_getc(&tcx->memstream);
163           if (_bcs_isspace(ch))
164                     goto retry;
165 
166           switch (ch) {
167           case T_EOF:
168                     return ch;
169           case '+': case '-':
170                     token_data->op = ch;
171                     return T_ADDITIVE;
172           case '*': case '/': case '%':
173                     token_data->op = ch;
174                     return T_MULTIPLICATIVE;
175           case '?': case ':': case '(': case ')':
176                     token_data->op = ch;
177                     return ch;
178           case '&': case '|':
179                     prevch = ch;
180                     ch = _memstream_getc(&tcx->memstream);
181                     if (ch != prevch) {
182                               _memstream_ungetc(&tcx->memstream, ch);
183                               return T_ILCHAR;
184                     }
185                     token_data->op = ch;
186                     switch (ch) {
187                     case '&':
188                               return T_LAND;
189                     case '|':
190                               return T_LOR;
191                     default:
192                               return T_ILTOKEN;
193                     }
194           case '=': case '!': case '<': case '>':
195                     prevch = ch;
196                     ch = _memstream_getc(&tcx->memstream);
197                     if (ch != '=') {
198                               _memstream_ungetc(&tcx->memstream, ch);
199                               switch (prevch) {
200                               case '=':
201                                         return T_ILCHAR;
202                               case '!':
203                                         return '!';
204                               case '<':
205                               case '>':
206                                         token_data->op = prevch; /* OP_LT or OP_GT */
207                                         return T_RELATIONAL;
208                               }
209                     }
210                     /* '==', '!=', '<=' or '>=' */
211                     token_data->op = ch+prevch;
212                     switch (prevch) {
213                     case '=':
214                     case '!':
215                               return T_EQUALITY;
216                     case '<':
217                     case '>':
218                               return T_RELATIONAL;
219                     }
220                     /*NOTREACHED*/
221           }
222 
223           _memstream_ungetc(&tcx->memstream, ch);
224           return tokenize_atom(tcx, token_data);
225 }
226 
227 /* get the next token */
228 static int
get_token(struct tokenizer_context * tcx,union token_data * token_data)229 get_token(struct tokenizer_context *tcx, union token_data *token_data)
230 {
231           if (tcx->token0.token != T_NONE) {
232                     int token = tcx->token0.token;
233                     tcx->token0.token = T_NONE;
234                     *token_data = tcx->token0.token_data;
235                     return token;
236           }
237           return tokenize(tcx, token_data);
238 }
239 
240 /* push back the last token */
241 static void
unget_token(struct tokenizer_context * tcx,int token,union token_data * token_data)242 unget_token(struct tokenizer_context *tcx,
243               int token, union token_data *token_data)
244 {
245           tcx->token0.token = token;
246           tcx->token0.token_data = *token_data;
247 }
248 
249 #ifdef TEST_TOKENIZER
250 
251 int
main(int argc,char ** argv)252 main(int argc, char **argv)
253 {
254           struct tokenizer_context tcx;
255           union token_data token_data;
256           int token;
257 
258           if (argc != 2) {
259                     fprintf(stderr, "usage: %s <expression>\n", argv[0]);
260                     return EXIT_FAILURE;
261           }
262 
263           init_tokenizer_context(&tcx);
264           _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265 
266           while (1) {
267                     token = get_token(&tcx, &token_data);
268                     switch (token) {
269                     case T_EOF:
270                               goto quit;
271                     case T_ILCHAR:
272                               printf("illegal character.\n");
273                               goto quit;
274                     case T_TOOLONG:
275                               printf("too long atom.\n");
276                               goto quit;
277                     case T_CONSTANT:
278                               printf("constant: %lu\n", token_data.constant);
279                               break;
280                     case T_IDENTIFIER:
281                               printf("symbol: %s\n", token_data.identifier);
282                               break;
283                     default:
284                               printf("operator: ");
285                               switch (token) {
286                               case T_LAND:
287                                         printf("&&\n");
288                                         break;
289                               case T_LOR:
290                                         printf("||\n");
291                                         break;
292                               case T_EQUALITY:
293                                         printf("%c=\n", token_data.op-'=');
294                                         break;
295                               case T_RELATIONAL:
296                                         switch(token_data.op) {
297                                         case OP_LTEQ:
298                                         case OP_GTEQ:
299                                                   printf("%c=\n", token_data.op-'=');
300                                                   break;
301                                         default:
302                                                   printf("%c\n", token_data.op);
303                                                   break;
304                                         }
305                                         break;
306                               case T_ADDITIVE:
307                               case T_MULTIPLICATIVE:
308                                         printf("%c\n", token_data.op);
309                                         break;
310                               default:
311                                         printf("operator: %c\n", token);
312                               }
313                     }
314           }
315 quit:
316           return 0;
317 }
318 #endif /* TEST_TOKENIZER */
319 
320 
321 /* ----------------------------------------------------------------------
322  * parser part
323  *
324  * exp := cond
325  *
326  * cond := lor | lor '?' cond ':' cond
327  *
328  * lor := land ( '||' land )*
329  *
330  * land := equality ( '&&' equality )*
331  *
332  * equality := relational ( equalityops relational )*
333  * equalityops := '==' | '!='
334  *
335  * relational := additive ( relationalops additive )*
336  * relationalops := '<' | '>' | '<=' | '>='
337  *
338  * additive := multiplicative ( additiveops multiplicative )*
339  * additiveops := '+' | '-'
340  *
341  * multiplicative := lnot ( multiplicativeops lnot )*
342  * multiplicativeops := '*' | '/' | '%'
343  *
344  * lnot := '!' lnot | term
345  *
346  * term := literal | identifier | '(' exp ')'
347  *
348  */
349 
350 #define T_ENSURE_OK(token, label)                                                     \
351 do {                                                                                            \
352           if (T_IS_ERROR(token))                                                                \
353                     goto label;                                                                 \
354 } while (0)
355 #define T_ENSURE_SOMETHING(token, label)                                              \
356 do {                                                                                            \
357           if ((token) == T_EOF) {                                                               \
358                     token = T_ILEND;                                                  \
359                     goto label;                                                                 \
360           } else if (T_IS_ERROR(token))                                               \
361                     goto label;                                                                 \
362 } while (0)
363 
364 #define parser_element        plural_element
365 
366 struct parser_element;
367 struct parser_op
368 {
369           char op;
370           struct parser_element *operands[MAX_NUM_OPERANDS];
371 };
372 struct parser_element
373 {
374           int kind;
375           union
376           {
377                     struct parser_op parser_op;
378                     union token_data token_data;
379           } u;
380 };
381 
382 struct parser_op2_transition
383 {
384           int                                               kind;
385           const struct parser_op2_transition      *next;
386 };
387 
388 /* prototypes */
389 static int parse_cond(struct tokenizer_context *, struct parser_element *);
390 
391 
392 /* transition table for the 2-operand operators */
393 #define DEF_TR(t, k, n)                                                                         \
394 static struct parser_op2_transition exp_tr_##t = {                                    \
395           k, &exp_tr_##n                                                                        \
396 }
397 #define DEF_TR0(t, k)                                                                           \
398 static struct parser_op2_transition exp_tr_##t = {                                    \
399           k, NULL /* expect lnot */                                                   \
400 }
401 
402 DEF_TR0(multiplicative, T_MULTIPLICATIVE);
403 DEF_TR(additive, T_ADDITIVE, multiplicative);
404 DEF_TR(relational, T_RELATIONAL, additive);
405 DEF_TR(equality, T_EQUALITY, relational);
406 DEF_TR(land, T_LAND, equality);
407 DEF_TR(lor, T_LOR, land);
408 
409 /* init a parser element structure */
410 static void
init_parser_element(struct parser_element * pe)411 init_parser_element(struct parser_element *pe)
412 {
413           int i;
414 
415           pe->kind = T_NONE;
416           for (i=0; i<MAX_NUM_OPERANDS; i++)
417                     pe->u.parser_op.operands[i] = NULL;
418 }
419 
420 /* uninitialize a parser element structure with freeing children */
421 static void free_parser_element(struct parser_element *);
422 static void
uninit_parser_element(struct parser_element * pe)423 uninit_parser_element(struct parser_element *pe)
424 {
425           int i;
426 
427           if (T_IS_OPERATOR(pe->kind))
428                     for (i=0; i<MAX_NUM_OPERANDS; i++)
429                               if (pe->u.parser_op.operands[i])
430                                         free_parser_element(
431                                                   pe->u.parser_op.operands[i]);
432 }
433 
434 /* free a parser element structure with freeing children */
435 static void
free_parser_element(struct parser_element * pe)436 free_parser_element(struct parser_element *pe)
437 {
438           if (pe) {
439                     uninit_parser_element(pe);
440                     free(pe);
441           }
442 }
443 
444 
445 /* copy a parser element structure shallowly */
446 static void
copy_parser_element(struct parser_element * dpe,const struct parser_element * spe)447 copy_parser_element(struct parser_element *dpe,
448                         const struct parser_element *spe)
449 {
450           memcpy(dpe, spe, sizeof *dpe);
451 }
452 
453 /* duplicate a parser element structure shallowly */
454 static struct parser_element *
dup_parser_element(const struct parser_element * pe)455 dup_parser_element(const struct parser_element *pe)
456 {
457           struct parser_element *dpe = malloc(sizeof *dpe);
458           if (dpe)
459                     copy_parser_element(dpe, pe);
460           return dpe;
461 }
462 
463 /* term := identifier | constant | '(' exp ')' */
464 static int
parse_term(struct tokenizer_context * tcx,struct parser_element * pelem)465 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466 {
467           struct parser_element pe0;
468           int token;
469           union token_data token_data;
470 
471           token = get_token(tcx, &token_data);
472           switch (token) {
473           case '(':
474                     /* '(' exp ')' */
475                     init_parser_element(&pe0);
476                     /* expect exp */
477                     token = parse_cond(tcx, &pe0);
478                     T_ENSURE_OK(token, err);
479                     /* expect ')' */
480                     token = get_token(tcx, &token_data);
481                     T_ENSURE_SOMETHING(token, err);
482                     if (token != ')') {
483                               unget_token(tcx, token, &token_data);
484                               token = T_ILTOKEN;
485                               goto err;
486                     }
487                     copy_parser_element(pelem, &pe0);
488                     return token;
489 err:
490                     uninit_parser_element(&pe0);
491                     return token;
492           case T_IDENTIFIER:
493           case T_CONSTANT:
494                     pelem->kind = token;
495                     pelem->u.token_data = token_data;
496                     return token;
497           case T_EOF:
498                     return T_ILEND;
499           default:
500                     return T_ILTOKEN;
501           }
502 }
503 
504 /* lnot := '!' lnot | term */
505 static int
parse_lnot(struct tokenizer_context * tcx,struct parser_element * pelem)506 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507 {
508           struct parser_element pe0;
509           int token;
510           union token_data token_data;
511 
512           init_parser_element(&pe0);
513 
514           /* '!' or not */
515           token = get_token(tcx, &token_data);
516           if (token != '!') {
517                     /* stop: term */
518                     unget_token(tcx, token, &token_data);
519                     return parse_term(tcx, pelem);
520           }
521 
522           /* '!' term */
523           token = parse_lnot(tcx, &pe0);
524           T_ENSURE_OK(token, err);
525 
526           pelem->kind = '!';
527           pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
528           return pelem->kind;
529 err:
530           uninit_parser_element(&pe0);
531           return token;
532 }
533 
534 /* ext_op := ext_next ( op ext_next )* */
535 static int
parse_op2(struct tokenizer_context * tcx,struct parser_element * pelem,const struct parser_op2_transition * tr)536 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
537             const struct parser_op2_transition *tr)
538 {
539           struct parser_element pe0, pe1, peop;
540           int token;
541           union token_data token_data;
542           char op;
543 
544           /* special case: expect lnot */
545           if (tr == NULL)
546                     return parse_lnot(tcx, pelem);
547 
548           init_parser_element(&pe0);
549           init_parser_element(&pe1);
550           token = parse_op2(tcx, &pe0, tr->next);
551           T_ENSURE_OK(token, err);
552 
553           while (/*CONSTCOND*/1) {
554                     /* expect op or empty */
555                     token = get_token(tcx, &token_data);
556                     if (token != tr->kind) {
557                               /* stop */
558                               unget_token(tcx, token, &token_data);
559                               copy_parser_element(pelem, &pe0);
560                               break;
561                     }
562                     op = token_data.op;
563                     /* right hand */
564                     token = parse_op2(tcx, &pe1, tr->next);
565                     T_ENSURE_OK(token, err);
566 
567                     init_parser_element(&peop);
568                     peop.kind = tr->kind;
569                     peop.u.parser_op.op = op;
570                     peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
571                     init_parser_element(&pe0);
572                     peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
573                     init_parser_element(&pe1);
574                     copy_parser_element(&pe0, &peop);
575           }
576           return pelem->kind;
577 err:
578           uninit_parser_element(&pe1);
579           uninit_parser_element(&pe0);
580           return token;
581 }
582 
583 /* cond := lor | lor '?' cond ':' cond */
584 static int
parse_cond(struct tokenizer_context * tcx,struct parser_element * pelem)585 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586 {
587           struct parser_element pe0, pe1, pe2;
588           int token;
589           union token_data token_data;
590 
591           init_parser_element(&pe0);
592           init_parser_element(&pe1);
593           init_parser_element(&pe2);
594 
595           /* expect lor or empty */
596           token = parse_op2(tcx, &pe0, &exp_tr_lor);
597           T_ENSURE_OK(token, err);
598 
599           /* '?' or not */
600           token = get_token(tcx, &token_data);
601           if (token != '?') {
602                     /* stop: lor */
603                     unget_token(tcx, token, &token_data);
604                     copy_parser_element(pelem, &pe0);
605                     return pe0.kind;
606           }
607 
608           /* lor '?' cond ':' cond */
609           /* expect cond */
610           token = parse_cond(tcx, &pe1);
611           T_ENSURE_OK(token, err);
612 
613           /* expect ':' */
614           token = get_token(tcx, &token_data);
615           T_ENSURE_OK(token, err);
616           if (token != ':') {
617                     unget_token(tcx, token, &token_data);
618                     token = T_ILTOKEN;
619                     goto err;
620           }
621 
622           /* expect cond */
623           token = parse_cond(tcx, &pe2);
624           T_ENSURE_OK(token, err);
625 
626           pelem->kind = '?';
627           pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
628           pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
629           pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
630           return pelem->kind;
631 err:
632           uninit_parser_element(&pe2);
633           uninit_parser_element(&pe1);
634           uninit_parser_element(&pe0);
635           return token;
636 }
637 
638 static int
parse_exp(struct tokenizer_context * tcx,struct parser_element * pelem)639 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640 {
641           int token, token1;
642           union token_data token_data;
643 
644 #ifdef ALLOW_EMPTY
645           /* empty check */
646           token = get_token(tcx, &token_data);
647           if (token == T_EOF)
648                     return token;
649           unget_token(tcx, token, &token_data);
650 #endif
651 
652           token = parse_cond(tcx, pelem);
653           if (!T_IS_ERROR(token)) {
654                     /* termination check */
655                     token1 = get_token(tcx, &token_data);
656                     if (token1 == T_EOF)
657                               return token;
658                     else if (!T_IS_ERROR(token))
659                                unget_token(tcx, token1, &token_data);
660                     return T_ILTOKEN;
661           }
662           return token;
663 }
664 
665 
666 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
667 #include <stdio.h>
668 
669 static void dump_elem(struct parser_element *);
670 
671 static void
dump_op2(struct parser_element * pelem)672 dump_op2(struct parser_element *pelem)
673 {
674           dump_elem(pelem->u.parser_op.operands[0]);
675           printf(" ");
676           dump_elem(pelem->u.parser_op.operands[1]);
677           printf(")");
678 }
679 
680 static void
dump_op3(struct parser_element * pelem)681 dump_op3(struct parser_element *pelem)
682 {
683           dump_elem(pelem->u.parser_op.operands[0]);
684           printf(" ");
685           dump_elem(pelem->u.parser_op.operands[1]);
686           printf(" ");
687           dump_elem(pelem->u.parser_op.operands[2]);
688           printf(")");
689 }
690 
691 static void
dump_elem(struct parser_element * pelem)692 dump_elem(struct parser_element *pelem)
693 {
694           switch (pelem->kind) {
695           case T_LAND:
696                     printf("(&& ");
697                     dump_op2(pelem);
698                     break;
699           case T_LOR:
700                     printf("(|| ");
701                     dump_op2(pelem);
702                     break;
703           case T_EQUALITY:
704                     switch (pelem->u.parser_op.op) {
705                     case OP_EQ:
706                               printf("(== ");
707                               break;
708                     case OP_NEQ:
709                               printf("(!= ");
710                               break;
711                     }
712                     dump_op2(pelem);
713                     break;
714           case T_RELATIONAL:
715                     switch (pelem->u.parser_op.op) {
716                     case '<':
717                     case '>':
718                               printf("(%c ", pelem->u.parser_op.op);
719                               break;
720                     case OP_LTEQ:
721                     case OP_GTEQ:
722                               printf("(%c= ", pelem->u.parser_op.op-'=');
723                               break;
724                     }
725                     dump_op2(pelem);
726                     break;
727           case T_ADDITIVE:
728           case T_MULTIPLICATIVE:
729                     printf("(%c ", pelem->u.parser_op.op);
730                     dump_op2(pelem);
731                     break;
732           case '!':
733                     printf("(! ");
734                     dump_elem(pelem->u.parser_op.operands[0]);
735                     printf(")");
736                     break;
737           case '?':
738                     printf("(? ");
739                     dump_op3(pelem);
740                     break;
741           case T_CONSTANT:
742                     printf("%d", pelem->u.token_data.constant);
743                     break;
744           case T_IDENTIFIER:
745 #ifdef ALLOW_ARBITRARY_IDENTIFIER
746                     printf("%s", pelem->u.token_data.identifier);
747 #else
748                     printf(PLURAL_NUMBER_SYMBOL);
749 #endif
750                     break;
751           }
752 }
753 #endif
754 #ifdef TEST_PARSER
755 int
main(int argc,char ** argv)756 main(int argc, char **argv)
757 {
758           struct tokenizer_context tcx;
759           struct parser_element pelem;
760           int token;
761 
762           if (argc != 2) {
763                     fprintf(stderr, "usage: %s <expression>\n", argv[0]);
764                     return EXIT_FAILURE;
765           }
766 
767           init_tokenizer_context(&tcx);
768           _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769 
770           init_parser_element(&pelem);
771           token = parse_exp(&tcx, &pelem);
772 
773           if (token == T_EOF)
774                     printf("none");
775           else if (T_IS_ERROR(token))
776                     printf("error: 0x%X", token);
777           else
778                     dump_elem(&pelem);
779           printf("\n");
780 
781           uninit_parser_element(&pelem);
782 
783           return EXIT_SUCCESS;
784 }
785 #endif /* TEST_PARSER */
786 
787 /* ----------------------------------------------------------------------
788  * calculate plural number
789  */
790 static unsigned long
calculate_plural(const struct parser_element * pe,unsigned long n)791 calculate_plural(const struct parser_element *pe, unsigned long n)
792 {
793           unsigned long val0, val1;
794           switch (pe->kind) {
795           case T_IDENTIFIER:
796                     return n;
797           case T_CONSTANT:
798                     return pe->u.token_data.constant;
799           case '?':
800                     val0 = calculate_plural(pe->u.parser_op.operands[0], n);
801                     if (val0)
802                               val1=calculate_plural(pe->u.parser_op.operands[1], n);
803                     else
804                               val1=calculate_plural(pe->u.parser_op.operands[2], n);
805                     return val1;
806           case '!':
807                     return !calculate_plural(pe->u.parser_op.operands[0], n);
808           case T_MULTIPLICATIVE:
809           case T_ADDITIVE:
810           case T_RELATIONAL:
811           case T_EQUALITY:
812           case T_LOR:
813           case T_LAND:
814                     val0 = calculate_plural(pe->u.parser_op.operands[0], n);
815                     val1 = calculate_plural(pe->u.parser_op.operands[1], n);
816                     switch (pe->u.parser_op.op) {
817                     case '*':
818                               return val0*val1;
819                     case '/':
820                               return val0/val1;
821                     case '%':
822                               return val0%val1;
823                     case '+':
824                               return val0+val1;
825                     case '-':
826                               return val0-val1;
827                     case '<':
828                               return val0<val1;
829                     case '>':
830                               return val0>val1;
831                     case OP_LTEQ:
832                               return val0<=val1;
833                     case OP_GTEQ:
834                               return val0>=val1;
835                     case OP_EQ:
836                               return val0==val1;
837                     case OP_NEQ:
838                               return val0!=val1;
839                     case '|':
840                               return val0||val1;
841                     case '&':
842                               return val0&&val1;
843                     }
844           }
845           return 0;
846 }
847 
848 #ifdef TEST_CALC_PLURAL
849 #include <stdio.h>
850 
851 int
main(int argc,char ** argv)852 main(int argc, char **argv)
853 {
854           struct tokenizer_context tcx;
855           struct parser_element pelem;
856           int token;
857 
858           if (argc != 3) {
859                     fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
860                     return EXIT_FAILURE;
861           }
862 
863           init_tokenizer_context(&tcx);
864           _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865 
866           init_parser_element(&pelem);
867           token = parse_exp(&tcx, &pelem);
868 
869           if (token == T_EOF)
870                     printf("none");
871           else if (T_IS_ERROR(token))
872                     printf("error: 0x%X", token);
873           else {
874                     printf("plural = %lu",
875                            calculate_plural(&pelem, atoi(argv[2])));
876           }
877           printf("\n");
878 
879           uninit_parser_element(&pelem);
880 
881           return EXIT_SUCCESS;
882 }
883 #endif /* TEST_CALC_PLURAL */
884 
885 
886 /* ----------------------------------------------------------------------
887  * parse plural forms
888  */
889 
890 static void
region_skip_ws(struct _region * r)891 region_skip_ws(struct _region *r)
892 {
893           const char *str = _region_head(r);
894           size_t len = _region_size(r);
895 
896           str = _bcs_skip_ws_len(str, &len);
897           _region_init(r, __UNCONST(str), len);
898 }
899 
900 static void
region_trunc_rws(struct _region * r)901 region_trunc_rws(struct _region *r)
902 {
903           const char *str = _region_head(r);
904           size_t len = _region_size(r);
905 
906           _bcs_trunc_rws_len(str, &len);
907           _region_init(r, __UNCONST(str), len);
908 }
909 
910 static int
region_check_prefix(struct _region * r,const char * pre,size_t prelen,int ignorecase)911 region_check_prefix(struct _region *r, const char *pre, size_t prelen,
912                         int ignorecase)
913 {
914           if (_region_size(r) < prelen)
915                     return -1;
916 
917           if (ignorecase) {
918                     if (_bcs_strncasecmp(_region_head(r), pre, prelen))
919                               return -1;
920           } else {
921                     if (memcmp(_region_head(r), pre, prelen))
922                               return -1;
923           }
924           return 0;
925 }
926 
927 static int
cut_trailing_semicolon(struct _region * r)928 cut_trailing_semicolon(struct _region *r)
929 {
930 
931           region_trunc_rws(r);
932           if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
933                     return -1;
934           _region_get_subregion(r, r, 0, _region_size(r)-1);
935           return 0;
936 }
937 
938 static int
find_plural_forms(struct _region * r)939 find_plural_forms(struct _region *r)
940 {
941           struct _memstream ms;
942           struct _region rr;
943 
944           _memstream_bind(&ms, r);
945 
946           while (!_memstream_getln_region(&ms, &rr)) {
947                     if (!region_check_prefix(&rr,
948                                                    PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
949                               _region_get_subregion(
950                                         r, &rr, LEN_PLURAL_FORMS,
951                                         _region_size(&rr)-LEN_PLURAL_FORMS);
952                               region_skip_ws(r);
953                               region_trunc_rws(r);
954                               return 0;
955                     }
956           }
957           return -1;
958 }
959 
960 static int
skip_assignment(struct _region * r,const char * sym,size_t symlen)961 skip_assignment(struct _region *r, const char *sym, size_t symlen)
962 {
963           region_skip_ws(r);
964           if (region_check_prefix(r, sym, symlen, 0))
965                     return -1;
966           _region_get_subregion(r, r, symlen, _region_size(r)-symlen);
967           region_skip_ws(r);
968           if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
969                     return -1;
970           _region_get_subregion(r, r, 1, _region_size(r)-1);
971           region_skip_ws(r);
972           return 0;
973 }
974 
975 static int
skip_nplurals(struct _region * r,unsigned long * rnp)976 skip_nplurals(struct _region *r, unsigned long *rnp)
977 {
978           unsigned long np;
979           char buf[MAX_LEN_ATOM+2], *endptr;
980           const char *endptrconst;
981           size_t ofs;
982 
983           if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
984                     return -1;
985           if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
986                     return -1;
987           strlcpy(buf, _region_head(r), sizeof (buf));
988           np = strtoul(buf, &endptr, 0);
989           endptrconst = _bcs_skip_ws(endptr);
990           if (*endptrconst != ';')
991                     return -1;
992           ofs = endptrconst+1-buf;
993           if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
994                     return -1;
995           if (rnp)
996                     *rnp = np;
997           return 0;
998 }
999 
1000 static int
parse_plural_body(struct _region * r,struct parser_element ** rpe)1001 parse_plural_body(struct _region *r, struct parser_element **rpe)
1002 {
1003           int token;
1004           struct tokenizer_context tcx;
1005           struct parser_element pelem, *ppe;
1006 
1007           init_tokenizer_context(&tcx);
1008           _memstream_bind(&tcx.memstream, r);
1009 
1010           init_parser_element(&pelem);
1011           token = parse_exp(&tcx, &pelem);
1012           if (T_IS_ERROR(token))
1013                     return token;
1014 
1015           ppe = dup_parser_element(&pelem);
1016           if (ppe == NULL) {
1017                     uninit_parser_element(&pelem);
1018                     return T_NOMEM;
1019           }
1020 
1021           *rpe = ppe;
1022 
1023           return 0;
1024 }
1025 
1026 static int
parse_plural(struct parser_element ** rpe,unsigned long * rnp,const char * str,size_t len)1027 parse_plural(struct parser_element **rpe, unsigned long *rnp,
1028                const char *str, size_t len)
1029 {
1030           struct _region r;
1031 
1032           _region_init(&r, __UNCONST(str), len);
1033 
1034           if (find_plural_forms(&r))
1035                     return T_NOTFOUND;
1036           if (skip_nplurals(&r, rnp))
1037                     return T_ILPLURAL;
1038           if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1039                     return T_ILPLURAL;
1040           if (cut_trailing_semicolon(&r))
1041                     return T_ILPLURAL;
1042           return parse_plural_body(&r, rpe);
1043 }
1044 
1045 #ifdef TEST_PARSE_PLURAL
1046 int
main(int argc,char ** argv)1047 main(int argc, char **argv)
1048 {
1049           int ret;
1050           struct parser_element *pelem;
1051           unsigned long np;
1052 
1053           if (argc != 2 && argc != 3) {
1054                     fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1055                     return EXIT_FAILURE;
1056           }
1057 
1058           ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059 
1060           if (ret == T_EOF)
1061                     printf("none");
1062           else if (T_IS_ERROR(ret))
1063                     printf("error: 0x%X", ret);
1064           else {
1065                     printf("syntax tree: ");
1066                     dump_elem(pelem);
1067                     printf("\nnplurals = %lu", np);
1068                     if (argv[2])
1069                               printf(", plural = %lu",
1070                                      calculate_plural(pelem, atoi(argv[2])));
1071                     free_parser_element(pelem);
1072           }
1073           printf("\n");
1074 
1075 
1076           return EXIT_SUCCESS;
1077 }
1078 #endif /* TEST_PARSE_PLURAL */
1079 
1080 /*
1081  * external interface
1082  */
1083 
1084 int
_gettext_parse_plural(struct gettext_plural ** rpe,unsigned long * rnp,const char * str,size_t len)1085 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1086                           const char *str, size_t len)
1087 {
1088           return parse_plural((struct parser_element **)rpe, rnp, str, len);
1089 }
1090 
1091 unsigned long
_gettext_calculate_plural(const struct gettext_plural * pe,unsigned long n)1092 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093 {
1094           return calculate_plural((void *)__UNCONST(pe), n);
1095 }
1096 
1097 void
_gettext_free_plural(struct gettext_plural * pe)1098 _gettext_free_plural(struct gettext_plural *pe)
1099 {
1100           free_parser_element((void *)pe);
1101 }
1102 
1103 #ifdef TEST_PLURAL
1104 #include <libintl.h>
1105 #include <locale.h>
1106 
1107 #define PR(n)       printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1108 
1109 int
main(void)1110 main(void)
1111 {
1112           bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1113           PR(1);
1114           PR(2);
1115           PR(3);
1116           PR(4);
1117 
1118           return 0;
1119 }
1120 #endif
1121