1 /* $OpenBSD: parse.y,v 1.8 2003/06/04 17:34:44 millert Exp $ */
2
3 /* parse.y - parser for flex input */
4
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
7
8 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
9 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
10
11 %{
12 /*-
13 * Copyright (c) 1990 The Regents of the University of California.
14 * All rights reserved.
15 *
16 * This code is derived from software contributed to Berkeley by
17 * Vern Paxson.
18 *
19 * The United States Government has rights in this work pursuant
20 * to contract no. DE-AC03-76SF00098 between the United States
21 * Department of Energy and the University of California.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 *
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * Neither the name of the University nor the names of its contributors
34 * may be used to endorse or promote products derived from this software
35 * without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
38 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
39 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
40 * PURPOSE.
41 */
42
43 /* $Header: /cvs/src/usr.bin/lex/parse.y,v 1.8 2003/06/04 17:34:44 millert Exp $ */
44
45
46 /* Some versions of bison are broken in that they use alloca() but don't
47 * declare it properly. The following is the patented (just kidding!)
48 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
49 */
50 #ifdef YYBISON
51 /* AIX requires this to be the first thing in the file. What a piece. */
52 # ifdef _AIX
53 #pragma alloca
54 # endif
55 #endif
56
57 #include "flexdef.h"
58
59 /* The remainder of the alloca() cruft has to come after including flexdef.h,
60 * so HAVE_ALLOCA_H is (possibly) defined.
61 */
62 #ifdef YYBISON
63 # ifdef __GNUC__
64 # ifndef alloca
65 # define alloca __builtin_alloca
66 # endif
67 # else
68 # if HAVE_ALLOCA_H
69 # include <alloca.h>
70 # else
71 # ifdef __hpux
72 void *alloca ();
73 # else
74 # ifdef __TURBOC__
75 # include <malloc.h>
76 # else
77 char *alloca ();
78 # endif
79 # endif
80 # endif
81 # endif
82 #endif
83
84 /* Bletch, ^^^^ that was ugly! */
85
86
87 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
88 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
89
90 int *scon_stk;
91 int scon_stk_ptr;
92
93 static int madeany = false; /* whether we've made the '.' character class */
94 int previous_continued_action; /* whether the previous rule's action was '|' */
95
96 /* Expand a POSIX character class expression. */
97 #define CCL_EXPR(func) \
98 { \
99 int c; \
100 for ( c = 0; c < csize; ++c ) \
101 if ( isascii(c) && func(c) ) \
102 ccladd( currccl, c ); \
103 }
104
105 /* While POSIX defines isblank(), it's not ANSI C. */
106 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
107
108 /* On some over-ambitious machines, such as DEC Alpha's, the default
109 * token type is "long" instead of "int"; this leads to problems with
110 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
111 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
112 * following should ensure that the default token type is "int".
113 */
114 #define YYSTYPE int
115
116 %}
117
118 %%
119 goal : initlex sect1 sect1end sect2 initforrule
120 { /* add default rule */
121 int def_rule;
122
123 pat = cclinit();
124 cclnegate( pat );
125
126 def_rule = mkstate( -pat );
127
128 /* Remember the number of the default rule so we
129 * don't generate "can't match" warnings for it.
130 */
131 default_rule = num_rules;
132
133 finish_rule( def_rule, false, 0, 0 );
134
135 for ( i = 1; i <= lastsc; ++i )
136 scset[i] = mkbranch( scset[i], def_rule );
137
138 if ( spprdflt )
139 add_action(
140 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
141 else
142 add_action( "ECHO" );
143
144 add_action( ";\n\tYY_BREAK\n" );
145 }
146 ;
147
148 initlex :
149 { /* initialize for processing rules */
150
151 /* Create default DFA start condition. */
152 scinstal( "INITIAL", false );
153 }
154 ;
155
156 sect1 : sect1 startconddecl namelist1
157 | sect1 options
158 |
159 | error
160 { synerr( "unknown error processing section 1" ); }
161 ;
162
163 sect1end : SECTEND
164 {
165 check_options();
166 scon_stk = allocate_integer_array( lastsc + 1 );
167 scon_stk_ptr = 0;
168 }
169 ;
170
171 startconddecl : SCDECL
172 { xcluflg = false; }
173
174 | XSCDECL
175 { xcluflg = true; }
176 ;
177
178 namelist1 : namelist1 NAME
179 { scinstal( nmstr, xcluflg ); }
180
181 | NAME
182 { scinstal( nmstr, xcluflg ); }
183
184 | error
185 { synerr( "bad start condition list" ); }
186 ;
187
188 options : OPTION_OP optionlist
189 ;
190
191 optionlist : optionlist option
192 |
193 ;
194
195 option : OPT_OUTFILE '=' NAME
196 {
197 outfilename = copy_string( nmstr );
198 did_outfilename = 1;
199 }
200 | OPT_PREFIX '=' NAME
201 { prefix = copy_string( nmstr ); }
202 | OPT_YYCLASS '=' NAME
203 { yyclass = copy_string( nmstr ); }
204 ;
205
206 sect2 : sect2 scon initforrule flexrule '\n'
207 { scon_stk_ptr = $2; }
208 | sect2 scon '{' sect2 '}'
209 { scon_stk_ptr = $2; }
210 |
211 ;
212
213 initforrule :
214 {
215 /* Initialize for a parse of one rule. */
216 trlcontxt = variable_trail_rule = varlength = false;
217 trailcnt = headcnt = rulelen = 0;
218 current_state_type = STATE_NORMAL;
219 previous_continued_action = continued_action;
220 in_rule = true;
221
222 new_rule();
223 }
224 ;
225
226 flexrule : '^' rule
227 {
228 pat = $2;
229 finish_rule( pat, variable_trail_rule,
230 headcnt, trailcnt );
231
232 if ( scon_stk_ptr > 0 )
233 {
234 for ( i = 1; i <= scon_stk_ptr; ++i )
235 scbol[scon_stk[i]] =
236 mkbranch( scbol[scon_stk[i]],
237 pat );
238 }
239
240 else
241 {
242 /* Add to all non-exclusive start conditions,
243 * including the default (0) start condition.
244 */
245
246 for ( i = 1; i <= lastsc; ++i )
247 if ( ! scxclu[i] )
248 scbol[i] = mkbranch( scbol[i],
249 pat );
250 }
251
252 if ( ! bol_needed )
253 {
254 bol_needed = true;
255
256 if ( performance_report > 1 )
257 pinpoint_message(
258 "'^' operator results in sub-optimal performance" );
259 }
260 }
261
262 | rule
263 {
264 pat = $1;
265 finish_rule( pat, variable_trail_rule,
266 headcnt, trailcnt );
267
268 if ( scon_stk_ptr > 0 )
269 {
270 for ( i = 1; i <= scon_stk_ptr; ++i )
271 scset[scon_stk[i]] =
272 mkbranch( scset[scon_stk[i]],
273 pat );
274 }
275
276 else
277 {
278 for ( i = 1; i <= lastsc; ++i )
279 if ( ! scxclu[i] )
280 scset[i] =
281 mkbranch( scset[i],
282 pat );
283 }
284 }
285
286 | EOF_OP
287 {
288 if ( scon_stk_ptr > 0 )
289 build_eof_action();
290
291 else
292 {
293 /* This EOF applies to all start conditions
294 * which don't already have EOF actions.
295 */
296 for ( i = 1; i <= lastsc; ++i )
297 if ( ! sceof[i] )
298 scon_stk[++scon_stk_ptr] = i;
299
300 if ( scon_stk_ptr == 0 )
301 warn(
302 "all start conditions already have <<EOF>> rules" );
303
304 else
305 build_eof_action();
306 }
307 }
308
309 | error
310 { synerr( "unrecognized rule" ); }
311 ;
312
313 scon_stk_ptr :
314 { $$ = scon_stk_ptr; }
315 ;
316
317 scon : '<' scon_stk_ptr namelist2 '>'
318 { $$ = $2; }
319
320 | '<' '*' '>'
321 {
322 $$ = scon_stk_ptr;
323
324 for ( i = 1; i <= lastsc; ++i )
325 {
326 int j;
327
328 for ( j = 1; j <= scon_stk_ptr; ++j )
329 if ( scon_stk[j] == i )
330 break;
331
332 if ( j > scon_stk_ptr )
333 scon_stk[++scon_stk_ptr] = i;
334 }
335 }
336
337 |
338 { $$ = scon_stk_ptr; }
339 ;
340
341 namelist2 : namelist2 ',' sconname
342
343 | sconname
344
345 | error
346 { synerr( "bad start condition list" ); }
347 ;
348
349 sconname : NAME
350 {
351 if ( (scnum = sclookup( nmstr )) == 0 )
352 format_pinpoint_message(
353 "undeclared start condition %s",
354 nmstr );
355 else
356 {
357 for ( i = 1; i <= scon_stk_ptr; ++i )
358 if ( scon_stk[i] == scnum )
359 {
360 format_warn(
361 "<%s> specified twice",
362 scname[scnum] );
363 break;
364 }
365
366 if ( i > scon_stk_ptr )
367 scon_stk[++scon_stk_ptr] = scnum;
368 }
369 }
370 ;
371
372 rule : re2 re
373 {
374 if ( transchar[lastst[$2]] != SYM_EPSILON )
375 /* Provide final transition \now/ so it
376 * will be marked as a trailing context
377 * state.
378 */
379 $2 = link_machines( $2,
380 mkstate( SYM_EPSILON ) );
381
382 mark_beginning_as_normal( $2 );
383 current_state_type = STATE_NORMAL;
384
385 if ( previous_continued_action )
386 {
387 /* We need to treat this as variable trailing
388 * context so that the backup does not happen
389 * in the action but before the action switch
390 * statement. If the backup happens in the
391 * action, then the rules "falling into" this
392 * one's action will *also* do the backup,
393 * erroneously.
394 */
395 if ( ! varlength || headcnt != 0 )
396 warn(
397 "trailing context made variable due to preceding '|' action" );
398
399 /* Mark as variable. */
400 varlength = true;
401 headcnt = 0;
402 }
403
404 if ( lex_compat || (varlength && headcnt == 0) )
405 { /* variable trailing context rule */
406 /* Mark the first part of the rule as the
407 * accepting "head" part of a trailing
408 * context rule.
409 *
410 * By the way, we didn't do this at the
411 * beginning of this production because back
412 * then current_state_type was set up for a
413 * trail rule, and add_accept() can create
414 * a new state ...
415 */
416 add_accept( $1,
417 num_rules | YY_TRAILING_HEAD_MASK );
418 variable_trail_rule = true;
419 }
420
421 else
422 trailcnt = rulelen;
423
424 $$ = link_machines( $1, $2 );
425 }
426
427 | re2 re '$'
428 { synerr( "trailing context used twice" ); }
429
430 | re '$'
431 {
432 headcnt = 0;
433 trailcnt = 1;
434 rulelen = 1;
435 varlength = false;
436
437 current_state_type = STATE_TRAILING_CONTEXT;
438
439 if ( trlcontxt )
440 {
441 synerr( "trailing context used twice" );
442 $$ = mkstate( SYM_EPSILON );
443 }
444
445 else if ( previous_continued_action )
446 {
447 /* See the comment in the rule for "re2 re"
448 * above.
449 */
450 warn(
451 "trailing context made variable due to preceding '|' action" );
452
453 varlength = true;
454 }
455
456 if ( lex_compat || varlength )
457 {
458 /* Again, see the comment in the rule for
459 * "re2 re" above.
460 */
461 add_accept( $1,
462 num_rules | YY_TRAILING_HEAD_MASK );
463 variable_trail_rule = true;
464 }
465
466 trlcontxt = true;
467
468 eps = mkstate( SYM_EPSILON );
469 $$ = link_machines( $1,
470 link_machines( eps, mkstate( '\n' ) ) );
471 }
472
473 | re
474 {
475 $$ = $1;
476
477 if ( trlcontxt )
478 {
479 if ( lex_compat || (varlength && headcnt == 0) )
480 /* Both head and trail are
481 * variable-length.
482 */
483 variable_trail_rule = true;
484 else
485 trailcnt = rulelen;
486 }
487 }
488 ;
489
490
491 re : re '|' series
492 {
493 varlength = true;
494 $$ = mkor( $1, $3 );
495 }
496
497 | series
498 { $$ = $1; }
499 ;
500
501
502 re2 : re '/'
503 {
504 /* This rule is written separately so the
505 * reduction will occur before the trailing
506 * series is parsed.
507 */
508
509 if ( trlcontxt )
510 synerr( "trailing context used twice" );
511 else
512 trlcontxt = true;
513
514 if ( varlength )
515 /* We hope the trailing context is
516 * fixed-length.
517 */
518 varlength = false;
519 else
520 headcnt = rulelen;
521
522 rulelen = 0;
523
524 current_state_type = STATE_TRAILING_CONTEXT;
525 $$ = $1;
526 }
527 ;
528
529 series : series singleton
530 {
531 /* This is where concatenation of adjacent patterns
532 * gets done.
533 */
534 $$ = link_machines( $1, $2 );
535 }
536
537 | singleton
538 { $$ = $1; }
539 ;
540
541 singleton : singleton '*'
542 {
543 varlength = true;
544
545 $$ = mkclos( $1 );
546 }
547
548 | singleton '+'
549 {
550 varlength = true;
551 $$ = mkposcl( $1 );
552 }
553
554 | singleton '?'
555 {
556 varlength = true;
557 $$ = mkopt( $1 );
558 }
559
560 | singleton '{' NUMBER ',' NUMBER '}'
561 {
562 varlength = true;
563
564 if ( $3 > $5 || $3 < 0 )
565 {
566 synerr( "bad iteration values" );
567 $$ = $1;
568 }
569 else
570 {
571 if ( $3 == 0 )
572 {
573 if ( $5 <= 0 )
574 {
575 synerr(
576 "bad iteration values" );
577 $$ = $1;
578 }
579 else
580 $$ = mkopt(
581 mkrep( $1, 1, $5 ) );
582 }
583 else
584 $$ = mkrep( $1, $3, $5 );
585 }
586 }
587
588 | singleton '{' NUMBER ',' '}'
589 {
590 varlength = true;
591
592 if ( $3 <= 0 )
593 {
594 synerr( "iteration value must be positive" );
595 $$ = $1;
596 }
597
598 else
599 $$ = mkrep( $1, $3, INFINITY );
600 }
601
602 | singleton '{' NUMBER '}'
603 {
604 /* The singleton could be something like "(foo)",
605 * in which case we have no idea what its length
606 * is, so we punt here.
607 */
608 varlength = true;
609
610 if ( $3 <= 0 )
611 {
612 synerr( "iteration value must be positive" );
613 $$ = $1;
614 }
615
616 else
617 $$ = link_machines( $1,
618 copysingl( $1, $3 - 1 ) );
619 }
620
621 | '.'
622 {
623 if ( ! madeany )
624 {
625 /* Create the '.' character class. */
626 anyccl = cclinit();
627 ccladd( anyccl, '\n' );
628 cclnegate( anyccl );
629
630 if ( useecs )
631 mkeccl( ccltbl + cclmap[anyccl],
632 ccllen[anyccl], nextecm,
633 ecgroup, csize, csize );
634
635 madeany = true;
636 }
637
638 ++rulelen;
639
640 $$ = mkstate( -anyccl );
641 }
642
643 | fullccl
644 {
645 if ( ! cclsorted )
646 /* Sort characters for fast searching. We
647 * use a shell sort since this list could
648 * be large.
649 */
650 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
651
652 if ( useecs )
653 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
654 nextecm, ecgroup, csize, csize );
655
656 ++rulelen;
657
658 $$ = mkstate( -$1 );
659 }
660
661 | PREVCCL
662 {
663 ++rulelen;
664
665 $$ = mkstate( -$1 );
666 }
667
668 | '"' string '"'
669 { $$ = $2; }
670
671 | '(' re ')'
672 { $$ = $2; }
673
674 | CHAR
675 {
676 ++rulelen;
677
678 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
679 $1 = clower( $1 );
680
681 $$ = mkstate( $1 );
682 }
683 ;
684
685 fullccl : '[' ccl ']'
686 { $$ = $2; }
687
688 | '[' '^' ccl ']'
689 {
690 cclnegate( $3 );
691 $$ = $3;
692 }
693 ;
694
695 ccl : ccl CHAR '-' CHAR
696 {
697 if ( caseins )
698 {
699 if ( $2 >= 'A' && $2 <= 'Z' )
700 $2 = clower( $2 );
701 if ( $4 >= 'A' && $4 <= 'Z' )
702 $4 = clower( $4 );
703 }
704
705 if ( $2 > $4 )
706 synerr( "negative range in character class" );
707
708 else
709 {
710 for ( i = $2; i <= $4; ++i )
711 ccladd( $1, i );
712
713 /* Keep track if this ccl is staying in
714 * alphabetical order.
715 */
716 cclsorted = cclsorted && ($2 > lastchar);
717 lastchar = $4;
718 }
719
720 $$ = $1;
721 }
722
723 | ccl CHAR
724 {
725 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
726 $2 = clower( $2 );
727
728 ccladd( $1, $2 );
729 cclsorted = cclsorted && ($2 > lastchar);
730 lastchar = $2;
731 $$ = $1;
732 }
733
734 | ccl ccl_expr
735 {
736 /* Too hard to properly maintain cclsorted. */
737 cclsorted = false;
738 $$ = $1;
739 }
740
741 |
742 {
743 cclsorted = true;
744 lastchar = 0;
745 currccl = $$ = cclinit();
746 }
747 ;
748
749 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
750 | CCE_ALPHA { CCL_EXPR(isalpha) }
751 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
752 | CCE_CNTRL { CCL_EXPR(iscntrl) }
753 | CCE_DIGIT { CCL_EXPR(isdigit) }
754 | CCE_GRAPH { CCL_EXPR(isgraph) }
755 | CCE_LOWER { CCL_EXPR(islower) }
756 | CCE_PRINT { CCL_EXPR(isprint) }
757 | CCE_PUNCT { CCL_EXPR(ispunct) }
758 | CCE_SPACE { CCL_EXPR(isspace) }
759 | CCE_UPPER {
760 if ( caseins )
761 CCL_EXPR(islower)
762 else
763 CCL_EXPR(isupper)
764 }
765 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
766 ;
767
768 string : string CHAR
769 {
770 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
771 $2 = clower( $2 );
772
773 ++rulelen;
774
775 $$ = link_machines( $1, mkstate( $2 ) );
776 }
777
778 |
779 { $$ = mkstate( SYM_EPSILON ); }
780 ;
781
782 %%
783
784
785 /* build_eof_action - build the "<<EOF>>" action for the active start
786 * conditions
787 */
788
789 void build_eof_action()
790 {
791 register int i;
792 char action_text[MAXLINE];
793
794 for ( i = 1; i <= scon_stk_ptr; ++i )
795 {
796 if ( sceof[scon_stk[i]] )
797 format_pinpoint_message(
798 "multiple <<EOF>> rules for start condition %s",
799 scname[scon_stk[i]] );
800
801 else
802 {
803 sceof[scon_stk[i]] = true;
804 snprintf( action_text, sizeof action_text,
805 "case YY_STATE_EOF(%s):\n",
806 scname[scon_stk[i]] );
807 add_action( action_text );
808 }
809 }
810
811 line_directive_out( (FILE *) 0, 1 );
812
813 /* This isn't a normal rule after all - don't count it as
814 * such, so we don't have any holes in the rule numbering
815 * (which make generating "rule can never match" warnings
816 * more difficult.
817 */
818 --num_rules;
819 ++num_eof_rules;
820 }
821
822
823 /* format_synerr - write out formatted syntax error */
824
format_synerr(msg,arg)825 void format_synerr( msg, arg )
826 char msg[], arg[];
827 {
828 char errmsg[MAXLINE];
829
830 (void) snprintf( errmsg, sizeof errmsg, msg, arg );
831 synerr( errmsg );
832 }
833
834
835 /* synerr - report a syntax error */
836
synerr(str)837 void synerr( str )
838 char str[];
839 {
840 syntaxerror = true;
841 pinpoint_message( str );
842 }
843
844
845 /* format_warn - write out formatted warning */
846
format_warn(msg,arg)847 void format_warn( msg, arg )
848 char msg[], arg[];
849 {
850 char warn_msg[MAXLINE];
851
852 (void) snprintf( warn_msg, sizeof warn_msg, msg, arg );
853 warn( warn_msg );
854 }
855
856
857 /* warn - report a warning, unless -w was given */
858
warn(str)859 void warn( str )
860 char str[];
861 {
862 line_warning( str, linenum );
863 }
864
865 /* format_pinpoint_message - write out a message formatted with one string,
866 * pinpointing its location
867 */
868
format_pinpoint_message(msg,arg)869 void format_pinpoint_message( msg, arg )
870 char msg[], arg[];
871 {
872 char errmsg[MAXLINE];
873
874 (void) snprintf( errmsg, sizeof errmsg, msg, arg );
875 pinpoint_message( errmsg );
876 }
877
878
879 /* pinpoint_message - write out a message, pinpointing its location */
880
pinpoint_message(str)881 void pinpoint_message( str )
882 char str[];
883 {
884 line_pinpoint( str, linenum );
885 }
886
887
888 /* line_warning - report a warning at a given line, unless -w was given */
889
line_warning(str,line)890 void line_warning( str, line )
891 char str[];
892 int line;
893 {
894 char warning[MAXLINE];
895
896 if ( ! nowarn )
897 {
898 snprintf( warning, sizeof warning, "warning, %s", str );
899 line_pinpoint( warning, line );
900 }
901 }
902
903
904 /* line_pinpoint - write out a message, pinpointing it at the given line */
905
line_pinpoint(str,line)906 void line_pinpoint( str, line )
907 char str[];
908 int line;
909 {
910 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
911 }
912
913
914 /* yyerror - eat up an error message from the parser;
915 * currently, messages are ignore
916 */
917
yyerror(msg)918 void yyerror( msg )
919 char msg[];
920 {
921 }
922