xref: /dragonfly/contrib/flex/src/parse.y (revision 388e4ddaf1c230f115961bdb4bad6a8d3e017c93)
1 /* parse.y - parser for flex input */
2 
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
5 %token TOK_TABLES_FILE
6 
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9 
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12 
13 %left CCL_OP_DIFF CCL_OP_UNION
14 
15 /*
16  *POSIX and AT&T lex place the
17  * precedence of the repeat operator, {}, below that of concatenation.
18  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19  * Regular Expression (ERE) precedence that has the repeat operator
20  * higher than concatenation.  This causes ab{3} to yield abbb.
21  *
22  * In order to support the POSIX and AT&T precedence and the flex
23  * precedence we define two token sets for the begin and end tokens of
24  * the repeat operator, '{' and '}'.  The lexical scanner chooses
25  * which tokens to return based on whether posix_compat or lex_compat
26  * are specified. Specifying either posix_compat or lex_compat will
27  * cause flex to parse scanner files as per the AT&T and
28  * POSIX-mandated behavior.
29  */
30 
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32 
33 
34 %{
35 /*  Copyright (c) 1990 The Regents of the University of California. */
36 /*  All rights reserved. */
37 
38 /*  This code is derived from software contributed to Berkeley by */
39 /*  Vern Paxson. */
40 
41 /*  The United States Government has rights in this work pursuant */
42 /*  to contract no. DE-AC03-76SF00098 between the United States */
43 /*  Department of Energy and the University of California. */
44 
45 /*  This file is part of flex. */
46 
47 /*  Redistribution and use in source and binary forms, with or without */
48 /*  modification, are permitted provided that the following conditions */
49 /*  are met: */
50 
51 /*  1. Redistributions of source code must retain the above copyright */
52 /*     notice, this list of conditions and the following disclaimer. */
53 /*  2. Redistributions in binary form must reproduce the above copyright */
54 /*     notice, this list of conditions and the following disclaimer in the */
55 /*     documentation and/or other materials provided with the distribution. */
56 
57 /*  Neither the name of the University nor the names of its contributors */
58 /*  may be used to endorse or promote products derived from this software */
59 /*  without specific prior written permission. */
60 
61 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64 /*  PURPOSE. */
65 
66 #include "flexdef.h"
67 #include "tables.h"
68 
69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71 
72 int *scon_stk;
73 int scon_stk_ptr;
74 
75 static int madeany = false;  /* whether we've made the '.' character class */
76 static int ccldot, cclany;
77 int previous_continued_action;          /* whether the previous rule's action was '|' */
78 
79 #define format_warn3(fmt, a1, a2) \
80           do{ \
81         char fw3_msg[MAXLINE];\
82         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83         lwarn( fw3_msg );\
84           }while(0)
85 
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
88           do{ \
89           int c; \
90           for ( c = 0; c < csize; ++c ) \
91                     if ( isascii(c) && func(c) ) \
92                               ccladd( currccl, c ); \
93           }while(0)
94 
95 /* negated class */
96 #define CCL_NEG_EXPR(func) \
97           do{ \
98           int c; \
99           for ( c = 0; c < csize; ++c ) \
100                     if ( !func(c) ) \
101                               ccladd( currccl, c ); \
102           }while(0)
103 
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106 
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108  * token type is "long" instead of "int"; this leads to problems with
109  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111  * following should ensure that the default token type is "int".
112  */
113 #define YYSTYPE int
114 
115 %}
116 
117 %%
118 goal                :  initlex sect1 sect1end sect2 initforrule
119                               { /* add default rule */
120                               int def_rule;
121 
122                               pat = cclinit();
123                               cclnegate( pat );
124 
125                               def_rule = mkstate( -pat );
126 
127                               /* Remember the number of the default rule so we
128                                * don't generate "can't match" warnings for it.
129                                */
130                               default_rule = num_rules;
131 
132                               finish_rule( def_rule, false, 0, 0, 0);
133 
134                               for ( i = 1; i <= lastsc; ++i )
135                                         scset[i] = mkbranch( scset[i], def_rule );
136 
137                               if ( spprdflt )
138                                         add_action(
139                                         "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140                               else
141                                         add_action( "ECHO" );
142 
143                               add_action( ";\n\tYY_BREAK]]\n" );
144                               }
145                     ;
146 
147 initlex             :
148                               { /* initialize for processing rules */
149 
150                               /* Create default DFA start condition. */
151                               scinstal( "INITIAL", false );
152                               }
153                     ;
154 
155 sect1               :  sect1 startconddecl namelist1
156                     |  sect1 options
157                     |
158                     |  error
159                               { synerr( _("unknown error processing section 1") ); }
160                     ;
161 
162 sect1end  :  SECTEND
163                               {
164                               check_options();
165                               scon_stk = allocate_integer_array( lastsc + 1 );
166                               scon_stk_ptr = 0;
167                               }
168                     ;
169 
170 startconddecl       :  SCDECL
171                               { xcluflg = false; }
172 
173                     |  XSCDECL
174                               { xcluflg = true; }
175                     ;
176 
177 namelist1 :  namelist1 NAME
178                               { scinstal( nmstr, xcluflg ); }
179 
180                     |  NAME
181                               { scinstal( nmstr, xcluflg ); }
182 
183                     |  error
184                               { synerr( _("bad start condition list") ); }
185                     ;
186 
187 options             :  TOK_OPTION optionlist
188                     ;
189 
190 optionlist          :  optionlist option
191                     |
192                     ;
193 
194 option              :  TOK_OUTFILE '=' NAME
195                               {
196                               outfilename = xstrdup(nmstr);
197                               did_outfilename = 1;
198                               }
199                     |  TOK_EXTRA_TYPE '=' NAME
200                               { extra_type = xstrdup(nmstr); }
201                     |  TOK_PREFIX '=' NAME
202                               { prefix = xstrdup(nmstr);
203                           if (strchr(prefix, '[') || strchr(prefix, ']'))
204                               flexerror(_("Prefix must not contain [ or ]")); }
205                     |  TOK_YYCLASS '=' NAME
206                               { yyclass = xstrdup(nmstr); }
207                     |  TOK_HEADER_FILE '=' NAME
208                               { headerfilename = xstrdup(nmstr); }
209               |  TOK_TABLES_FILE '=' NAME
210             { tablesext = true; tablesfilename = xstrdup(nmstr); }
211                     ;
212 
213 sect2               :  sect2 scon initforrule flexrule '\n'
214                               { scon_stk_ptr = $2; }
215                     |  sect2 scon '{' sect2 '}'
216                               { scon_stk_ptr = $2; }
217                     |
218                     ;
219 
220 initforrule         :
221                               {
222                               /* Initialize for a parse of one rule. */
223                               trlcontxt = variable_trail_rule = varlength = false;
224                               trailcnt = headcnt = rulelen = 0;
225                               current_state_type = STATE_NORMAL;
226                               previous_continued_action = continued_action;
227                               in_rule = true;
228 
229                               new_rule();
230                               }
231                     ;
232 
233 flexrule  :  '^' rule
234                               {
235                               pat = $2;
236                               finish_rule( pat, variable_trail_rule,
237                                         headcnt, trailcnt , previous_continued_action);
238 
239                               if ( scon_stk_ptr > 0 )
240                                         {
241                                         for ( i = 1; i <= scon_stk_ptr; ++i )
242                                                   scbol[scon_stk[i]] =
243                                                             mkbranch( scbol[scon_stk[i]],
244                                                                                 pat );
245                                         }
246 
247                               else
248                                         {
249                                         /* Add to all non-exclusive start conditions,
250                                          * including the default (0) start condition.
251                                          */
252 
253                                         for ( i = 1; i <= lastsc; ++i )
254                                                   if ( ! scxclu[i] )
255                                                             scbol[i] = mkbranch( scbol[i],
256                                                                                           pat );
257                                         }
258 
259                               if ( ! bol_needed )
260                                         {
261                                         bol_needed = true;
262 
263                                         if ( performance_report > 1 )
264                                                   pinpoint_message(
265                               "'^' operator results in sub-optimal performance" );
266                                         }
267                               }
268 
269                     |  rule
270                               {
271                               pat = $1;
272                               finish_rule( pat, variable_trail_rule,
273                                         headcnt, trailcnt , previous_continued_action);
274 
275                               if ( scon_stk_ptr > 0 )
276                                         {
277                                         for ( i = 1; i <= scon_stk_ptr; ++i )
278                                                   scset[scon_stk[i]] =
279                                                             mkbranch( scset[scon_stk[i]],
280                                                                                 pat );
281                                         }
282 
283                               else
284                                         {
285                                         for ( i = 1; i <= lastsc; ++i )
286                                                   if ( ! scxclu[i] )
287                                                             scset[i] =
288                                                                       mkbranch( scset[i],
289                                                                                 pat );
290                                         }
291                               }
292 
293                     |  EOF_OP
294                               {
295                               if ( scon_stk_ptr > 0 )
296                                         build_eof_action();
297 
298                               else
299                                         {
300                                         /* This EOF applies to all start conditions
301                                          * which don't already have EOF actions.
302                                          */
303                                         for ( i = 1; i <= lastsc; ++i )
304                                                   if ( ! sceof[i] )
305                                                             scon_stk[++scon_stk_ptr] = i;
306 
307                                         if ( scon_stk_ptr == 0 )
308                                                   lwarn(
309                               "all start conditions already have <<EOF>> rules" );
310 
311                                         else
312                                                   build_eof_action();
313                                         }
314                               }
315 
316                     |  error
317                               { synerr( _("unrecognized rule") ); }
318                     ;
319 
320 scon_stk_ptr        :
321                               { $$ = scon_stk_ptr; }
322                     ;
323 
324 scon                :  '<' scon_stk_ptr namelist2 '>'
325                               { $$ = $2; }
326 
327                     |  '<' '*' '>'
328                               {
329                               $$ = scon_stk_ptr;
330 
331                               for ( i = 1; i <= lastsc; ++i )
332                                         {
333                                         int j;
334 
335                                         for ( j = 1; j <= scon_stk_ptr; ++j )
336                                                   if ( scon_stk[j] == i )
337                                                             break;
338 
339                                         if ( j > scon_stk_ptr )
340                                                   scon_stk[++scon_stk_ptr] = i;
341                                         }
342                               }
343 
344                     |
345                               { $$ = scon_stk_ptr; }
346                     ;
347 
348 namelist2 :  namelist2 ',' sconname
349 
350                     |  sconname
351 
352                     |  error
353                               { synerr( _("bad start condition list") ); }
354                     ;
355 
356 sconname  :  NAME
357                               {
358                               if ( (scnum = sclookup( nmstr )) == 0 )
359                                         format_pinpoint_message(
360                                                   "undeclared start condition %s",
361                                                   nmstr );
362                               else
363                                         {
364                                         for ( i = 1; i <= scon_stk_ptr; ++i )
365                                                   if ( scon_stk[i] == scnum )
366                                                             {
367                                                             format_warn(
368                                                                       "<%s> specified twice",
369                                                                       scname[scnum] );
370                                                             break;
371                                                             }
372 
373                                         if ( i > scon_stk_ptr )
374                                                   scon_stk[++scon_stk_ptr] = scnum;
375                                         }
376                               }
377                     ;
378 
379 rule                :  re2 re
380                               {
381                               if ( transchar[lastst[$2]] != SYM_EPSILON )
382                                         /* Provide final transition \now/ so it
383                                          * will be marked as a trailing context
384                                          * state.
385                                          */
386                                         $2 = link_machines( $2,
387                                                             mkstate( SYM_EPSILON ) );
388 
389                               mark_beginning_as_normal( $2 );
390                               current_state_type = STATE_NORMAL;
391 
392                               if ( previous_continued_action )
393                                         {
394                                         /* We need to treat this as variable trailing
395                                          * context so that the backup does not happen
396                                          * in the action but before the action switch
397                                          * statement.  If the backup happens in the
398                                          * action, then the rules "falling into" this
399                                          * one's action will *also* do the backup,
400                                          * erroneously.
401                                          */
402                                         if ( ! varlength || headcnt != 0 )
403                                                   lwarn(
404                     "trailing context made variable due to preceding '|' action" );
405 
406                                         /* Mark as variable. */
407                                         varlength = true;
408                                         headcnt = 0;
409 
410                                         }
411 
412                               if ( lex_compat || (varlength && headcnt == 0) )
413                                         { /* variable trailing context rule */
414                                         /* Mark the first part of the rule as the
415                                          * accepting "head" part of a trailing
416                                          * context rule.
417                                          *
418                                          * By the way, we didn't do this at the
419                                          * beginning of this production because back
420                                          * then current_state_type was set up for a
421                                          * trail rule, and add_accept() can create
422                                          * a new state ...
423                                          */
424                                         add_accept( $1,
425                                                   num_rules | YY_TRAILING_HEAD_MASK );
426                                         variable_trail_rule = true;
427                                         }
428 
429                               else
430                                         trailcnt = rulelen;
431 
432                               $$ = link_machines( $1, $2 );
433                               }
434 
435                     |  re2 re '$'
436                               { synerr( _("trailing context used twice") ); }
437 
438                     |  re '$'
439                               {
440                               headcnt = 0;
441                               trailcnt = 1;
442                               rulelen = 1;
443                               varlength = false;
444 
445                               current_state_type = STATE_TRAILING_CONTEXT;
446 
447                               if ( trlcontxt )
448                                         {
449                                         synerr( _("trailing context used twice") );
450                                         $$ = mkstate( SYM_EPSILON );
451                                         }
452 
453                               else if ( previous_continued_action )
454                                         {
455                                         /* See the comment in the rule for "re2 re"
456                                          * above.
457                                          */
458                                         lwarn(
459                     "trailing context made variable due to preceding '|' action" );
460 
461                                         varlength = true;
462                                         }
463 
464                               if ( lex_compat || varlength )
465                                         {
466                                         /* Again, see the comment in the rule for
467                                          * "re2 re" above.
468                                          */
469                                         add_accept( $1,
470                                                   num_rules | YY_TRAILING_HEAD_MASK );
471                                         variable_trail_rule = true;
472                                         }
473 
474                               trlcontxt = true;
475 
476                               eps = mkstate( SYM_EPSILON );
477                               $$ = link_machines( $1,
478                                         link_machines( eps, mkstate( '\n' ) ) );
479                               }
480 
481                     |  re
482                               {
483                               $$ = $1;
484 
485                               if ( trlcontxt )
486                                         {
487                                         if ( lex_compat || (varlength && headcnt == 0) )
488                                                   /* Both head and trail are
489                                                    * variable-length.
490                                                    */
491                                                   variable_trail_rule = true;
492                                         else
493                                                   trailcnt = rulelen;
494                                         }
495                               }
496                     ;
497 
498 
499 re                  :  re '|' series
500                               {
501                               varlength = true;
502                               $$ = mkor( $1, $3 );
503                               }
504 
505                     |  series
506                               { $$ = $1; }
507                     ;
508 
509 
510 re2                 :  re '/'
511                               {
512                               /* This rule is written separately so the
513                                * reduction will occur before the trailing
514                                * series is parsed.
515                                */
516 
517                               if ( trlcontxt )
518                                         synerr( _("trailing context used twice") );
519                               else
520                                         trlcontxt = true;
521 
522                               if ( varlength )
523                                         /* We hope the trailing context is
524                                          * fixed-length.
525                                          */
526                                         varlength = false;
527                               else
528                                         headcnt = rulelen;
529 
530                               rulelen = 0;
531 
532                               current_state_type = STATE_TRAILING_CONTEXT;
533                               $$ = $1;
534                               }
535                     ;
536 
537 series              :  series singleton
538                               {
539                               /* This is where concatenation of adjacent patterns
540                                * gets done.
541                                */
542                               $$ = link_machines( $1, $2 );
543                               }
544 
545                     |  singleton
546                               { $$ = $1; }
547 
548                     |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
549                               {
550                               varlength = true;
551 
552                               if ( $3 > $5 || $3 < 0 )
553                                         {
554                                         synerr( _("bad iteration values") );
555                                         $$ = $1;
556                                         }
557                               else
558                                         {
559                                         if ( $3 == 0 )
560                                                   {
561                                                   if ( $5 <= 0 )
562                                                             {
563                                                             synerr(
564                                                             _("bad iteration values") );
565                                                             $$ = $1;
566                                                             }
567                                                   else
568                                                             $$ = mkopt(
569                                                                       mkrep( $1, 1, $5 ) );
570                                                   }
571                                         else
572                                                   $$ = mkrep( $1, $3, $5 );
573                                         }
574                               }
575 
576                     |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
577                               {
578                               varlength = true;
579 
580                               if ( $3 <= 0 )
581                                         {
582                                         synerr( _("iteration value must be positive") );
583                                         $$ = $1;
584                                         }
585 
586                               else
587                                         $$ = mkrep( $1, $3, INFINITE_REPEAT );
588                               }
589 
590                     |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
591                               {
592                               /* The series could be something like "(foo)",
593                                * in which case we have no idea what its length
594                                * is, so we punt here.
595                                */
596                               varlength = true;
597 
598                               if ( $3 <= 0 )
599                                         {
600                                           synerr( _("iteration value must be positive")
601                                                     );
602                                         $$ = $1;
603                                         }
604 
605                               else
606                                         $$ = link_machines( $1,
607                                                             copysingl( $1, $3 - 1 ) );
608                               }
609 
610                     ;
611 
612 singleton :  singleton '*'
613                               {
614                               varlength = true;
615 
616                               $$ = mkclos( $1 );
617                               }
618 
619                     |  singleton '+'
620                               {
621                               varlength = true;
622                               $$ = mkposcl( $1 );
623                               }
624 
625                     |  singleton '?'
626                               {
627                               varlength = true;
628                               $$ = mkopt( $1 );
629                               }
630 
631                     |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
632                               {
633                               varlength = true;
634 
635                               if ( $3 > $5 || $3 < 0 )
636                                         {
637                                         synerr( _("bad iteration values") );
638                                         $$ = $1;
639                                         }
640                               else
641                                         {
642                                         if ( $3 == 0 )
643                                                   {
644                                                   if ( $5 <= 0 )
645                                                             {
646                                                             synerr(
647                                                             _("bad iteration values") );
648                                                             $$ = $1;
649                                                             }
650                                                   else
651                                                             $$ = mkopt(
652                                                                       mkrep( $1, 1, $5 ) );
653                                                   }
654                                         else
655                                                   $$ = mkrep( $1, $3, $5 );
656                                         }
657                               }
658 
659                     |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
660                               {
661                               varlength = true;
662 
663                               if ( $3 <= 0 )
664                                         {
665                                         synerr( _("iteration value must be positive") );
666                                         $$ = $1;
667                                         }
668 
669                               else
670                                         $$ = mkrep( $1, $3, INFINITE_REPEAT );
671                               }
672 
673                     |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
674                               {
675                               /* The singleton could be something like "(foo)",
676                                * in which case we have no idea what its length
677                                * is, so we punt here.
678                                */
679                               varlength = true;
680 
681                               if ( $3 <= 0 )
682                                         {
683                                         synerr( _("iteration value must be positive") );
684                                         $$ = $1;
685                                         }
686 
687                               else
688                                         $$ = link_machines( $1,
689                                                             copysingl( $1, $3 - 1 ) );
690                               }
691 
692                     |  '.'
693                               {
694                               if ( ! madeany )
695                                         {
696                                         /* Create the '.' character class. */
697                     ccldot = cclinit();
698                     ccladd( ccldot, '\n' );
699                     cclnegate( ccldot );
700 
701                     if ( useecs )
702                         mkeccl( ccltbl + cclmap[ccldot],
703                             ccllen[ccldot], nextecm,
704                             ecgroup, csize, csize );
705 
706                                         /* Create the (?s:'.') character class. */
707                     cclany = cclinit();
708                     cclnegate( cclany );
709 
710                     if ( useecs )
711                         mkeccl( ccltbl + cclmap[cclany],
712                             ccllen[cclany], nextecm,
713                             ecgroup, csize, csize );
714 
715                                         madeany = true;
716                                         }
717 
718                               ++rulelen;
719 
720             if (sf_dot_all())
721                 $$ = mkstate( -cclany );
722             else
723                 $$ = mkstate( -ccldot );
724                               }
725 
726                     |  fullccl
727                               {
728                                         /* Sort characters for fast searching.
729                                          */
730                                         qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
731 
732                               if ( useecs )
733                                         mkeccl( ccltbl + cclmap[$1], ccllen[$1],
734                                                   nextecm, ecgroup, csize, csize );
735 
736                               ++rulelen;
737 
738                               if (ccl_has_nl[$1])
739                                         rule_has_nl[num_rules] = true;
740 
741                               $$ = mkstate( -$1 );
742                               }
743 
744                     |  PREVCCL
745                               {
746                               ++rulelen;
747 
748                               if (ccl_has_nl[$1])
749                                         rule_has_nl[num_rules] = true;
750 
751                               $$ = mkstate( -$1 );
752                               }
753 
754                     |  '"' string '"'
755                               { $$ = $2; }
756 
757                     |  '(' re ')'
758                               { $$ = $2; }
759 
760                     |  CHAR
761                               {
762                               ++rulelen;
763 
764                               if ($1 == nlch)
765                                         rule_has_nl[num_rules] = true;
766 
767             if (sf_case_ins() && has_case($1))
768                 /* create an alternation, as in (a|A) */
769                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
770             else
771                 $$ = mkstate( $1 );
772                               }
773                     ;
774 fullccl:
775         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
776     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
777     |   braceccl
778     ;
779 
780 braceccl:
781 
782             '[' ccl ']' { $$ = $2; }
783 
784                     |  '[' '^' ccl ']'
785                               {
786                               cclnegate( $3 );
787                               $$ = $3;
788                               }
789                     ;
790 
791 ccl                 :  ccl CHAR '-' CHAR
792                               {
793 
794                               if (sf_case_ins())
795                                 {
796 
797                                   /* If one end of the range has case and the other
798                                    * does not, or the cases are different, then we're not
799                                    * sure what range the user is trying to express.
800                                    * Examples: [@-z] or [S-t]
801                                    */
802                                   if (has_case ($2) != has_case ($4)
803                                              || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
804                                              || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
805                                     format_warn3 (
806                                     _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
807                                                       $2, $4);
808 
809                                   /* If the range spans uppercase characters but not
810                                    * lowercase (or vice-versa), then should we automatically
811                                    * include lowercase characters in the range?
812                                    * Example: [@-_] spans [a-z] but not [A-Z]
813                                    */
814                                   else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
815                                     format_warn3 (
816                                     _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
817                                                       $2, $4);
818                                 }
819 
820                               if ( $2 > $4 )
821                                         synerr( _("negative range in character class") );
822 
823                               else
824                                         {
825                                         for ( i = $2; i <= $4; ++i )
826                                                   ccladd( $1, i );
827 
828                                         /* Keep track if this ccl is staying in
829                                          * alphabetical order.
830                                          */
831                                         cclsorted = cclsorted && ($2 > lastchar);
832                                         lastchar = $4;
833 
834                 /* Do it again for upper/lowercase */
835                 if (sf_case_ins() && has_case($2) && has_case($4)){
836                     $2 = reverse_case ($2);
837                     $4 = reverse_case ($4);
838 
839                     for ( i = $2; i <= $4; ++i )
840                         ccladd( $1, i );
841 
842                     cclsorted = cclsorted && ($2 > lastchar);
843                     lastchar = $4;
844                 }
845 
846                                         }
847 
848                               $$ = $1;
849                               }
850 
851                     |  ccl CHAR
852                               {
853                               ccladd( $1, $2 );
854                               cclsorted = cclsorted && ($2 > lastchar);
855                               lastchar = $2;
856 
857             /* Do it again for upper/lowercase */
858             if (sf_case_ins() && has_case($2)){
859                 $2 = reverse_case ($2);
860                 ccladd ($1, $2);
861 
862                 cclsorted = cclsorted && ($2 > lastchar);
863                 lastchar = $2;
864             }
865 
866                               $$ = $1;
867                               }
868 
869                     |  ccl ccl_expr
870                               {
871                               /* Too hard to properly maintain cclsorted. */
872                               cclsorted = false;
873                               $$ = $1;
874                               }
875 
876                     |
877                               {
878                               cclsorted = true;
879                               lastchar = 0;
880                               currccl = $$ = cclinit();
881                               }
882                     ;
883 
884 ccl_expr:
885            CCE_ALNUM          { CCL_EXPR(isalnum); }
886                     |  CCE_ALPHA        { CCL_EXPR(isalpha); }
887                     |  CCE_BLANK        { CCL_EXPR(IS_BLANK); }
888                     |  CCE_CNTRL        { CCL_EXPR(iscntrl); }
889                     |  CCE_DIGIT        { CCL_EXPR(isdigit); }
890                     |  CCE_GRAPH        { CCL_EXPR(isgraph); }
891                     |  CCE_LOWER        {
892                           CCL_EXPR(islower);
893                           if (sf_case_ins())
894                               CCL_EXPR(isupper);
895                         }
896                     |  CCE_PRINT        { CCL_EXPR(isprint); }
897                     |  CCE_PUNCT        { CCL_EXPR(ispunct); }
898                     |  CCE_SPACE        { CCL_EXPR(isspace); }
899                     |  CCE_XDIGIT       { CCL_EXPR(isxdigit); }
900                     |  CCE_UPPER        {
901                     CCL_EXPR(isupper);
902                     if (sf_case_ins())
903                         CCL_EXPR(islower);
904                                         }
905 
906         |  CCE_NEG_ALNUM      { CCL_NEG_EXPR(isalnum); }
907                     |  CCE_NEG_ALPHA    { CCL_NEG_EXPR(isalpha); }
908                     |  CCE_NEG_BLANK    { CCL_NEG_EXPR(IS_BLANK); }
909                     |  CCE_NEG_CNTRL    { CCL_NEG_EXPR(iscntrl); }
910                     |  CCE_NEG_DIGIT    { CCL_NEG_EXPR(isdigit); }
911                     |  CCE_NEG_GRAPH    { CCL_NEG_EXPR(isgraph); }
912                     |  CCE_NEG_PRINT    { CCL_NEG_EXPR(isprint); }
913                     |  CCE_NEG_PUNCT    { CCL_NEG_EXPR(ispunct); }
914                     |  CCE_NEG_SPACE    { CCL_NEG_EXPR(isspace); }
915                     |  CCE_NEG_XDIGIT   { CCL_NEG_EXPR(isxdigit); }
916                     |  CCE_NEG_LOWER    {
917                                         if ( sf_case_ins() )
918                                                   lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
919                                         else
920                                                   CCL_NEG_EXPR(islower);
921                                         }
922                     |  CCE_NEG_UPPER    {
923                                         if ( sf_case_ins() )
924                                                   lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
925                                         else
926                                                   CCL_NEG_EXPR(isupper);
927                                         }
928                     ;
929 
930 string              :  string CHAR
931                               {
932                               if ( $2 == nlch )
933                                         rule_has_nl[num_rules] = true;
934 
935                               ++rulelen;
936 
937             if (sf_case_ins() && has_case($2))
938                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
939             else
940                 $$ = mkstate ($2);
941 
942                               $$ = link_machines( $1, $$);
943                               }
944 
945                     |
946                               { $$ = mkstate( SYM_EPSILON ); }
947                     ;
948 
949 %%
950 
951 
952 /* build_eof_action - build the "<<EOF>>" action for the active start
953  *                    conditions
954  */
955 
956 void build_eof_action(void)
957           {
958           int i;
959           char action_text[MAXLINE];
960 
961           for ( i = 1; i <= scon_stk_ptr; ++i )
962                     {
963                     if ( sceof[scon_stk[i]] )
964                               format_pinpoint_message(
965                                         "multiple <<EOF>> rules for start condition %s",
966                                         scname[scon_stk[i]] );
967 
968                     else
969                               {
970                               sceof[scon_stk[i]] = true;
971 
972                               if (previous_continued_action /* && previous action was regular */)
973                                         add_action("YY_RULE_SETUP\n");
974 
975                               snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
976                                         scname[scon_stk[i]] );
977                               add_action( action_text );
978                               }
979                     }
980 
981           line_directive_out(NULL, 1);
982         add_action("[[");
983 
984           /* This isn't a normal rule after all - don't count it as
985            * such, so we don't have any holes in the rule numbering
986            * (which make generating "rule can never match" warnings
987            * more difficult.
988            */
989           --num_rules;
990           ++num_eof_rules;
991           }
992 
993 
994 /* format_synerr - write out formatted syntax error */
995 
format_synerr(const char * msg,const char arg[])996 void format_synerr( const char *msg, const char arg[] )
997           {
998           char errmsg[MAXLINE];
999 
1000           (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1001           synerr( errmsg );
1002           }
1003 
1004 
1005 /* synerr - report a syntax error */
1006 
synerr(const char * str)1007 void synerr( const char *str )
1008           {
1009           syntaxerror = true;
1010           pinpoint_message( str );
1011           }
1012 
1013 
1014 /* format_warn - write out formatted warning */
1015 
format_warn(const char * msg,const char arg[])1016 void format_warn( const char *msg, const char arg[] )
1017           {
1018           char warn_msg[MAXLINE];
1019 
1020           snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021           lwarn( warn_msg );
1022           }
1023 
1024 
1025 /* lwarn - report a warning, unless -w was given */
1026 
lwarn(const char * str)1027 void lwarn( const char *str )
1028           {
1029           line_warning( str, linenum );
1030           }
1031 
1032 /* format_pinpoint_message - write out a message formatted with one string,
1033  *                                 pinpointing its location
1034  */
1035 
format_pinpoint_message(const char * msg,const char arg[])1036 void format_pinpoint_message( const char *msg, const char arg[] )
1037           {
1038           char errmsg[MAXLINE];
1039 
1040           snprintf( errmsg, sizeof(errmsg), msg, arg );
1041           pinpoint_message( errmsg );
1042           }
1043 
1044 
1045 /* pinpoint_message - write out a message, pinpointing its location */
1046 
pinpoint_message(const char * str)1047 void pinpoint_message( const char *str )
1048           {
1049           line_pinpoint( str, linenum );
1050           }
1051 
1052 
1053 /* line_warning - report a warning at a given line, unless -w was given */
1054 
line_warning(const char * str,int line)1055 void line_warning( const char *str, int line )
1056           {
1057           char warning[MAXLINE];
1058 
1059           if ( ! nowarn )
1060                     {
1061                     snprintf( warning, sizeof(warning), "warning, %s", str );
1062                     line_pinpoint( warning, line );
1063                     }
1064           }
1065 
1066 
1067 /* line_pinpoint - write out a message, pinpointing it at the given line */
1068 
line_pinpoint(const char * str,int line)1069 void line_pinpoint( const char *str, int line )
1070           {
1071           fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1072           }
1073 
1074 
1075 /* yyerror - eat up an error message from the parser;
1076  *             currently, messages are ignore
1077  */
1078 
yyerror(const char * msg)1079 void yyerror( const char *msg )
1080           {
1081                     (void)msg;
1082           }
1083