1 /* $OpenBSD: scan.l,v 1.8 2003/06/04 17:34:44 millert Exp $ */ 2 3 /* scan.l - scanner for flex input */ 4 5 %{ 6 /*- 7 * Copyright (c) 1990 The Regents of the University of California. 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Vern Paxson. 12 * 13 * The United States Government has rights in this work pursuant 14 * to contract no. DE-AC03-76SF00098 between the United States 15 * Department of Energy and the University of California. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 32 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 33 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 34 * PURPOSE. 35 */ 36 37 #include "flexdef.h" 38 #include "parse.h" 39 40 #ifdef __IDSTRING 41 __IDSTRING(rcsid, "$MirOS: src/usr.bin/lex/scan.l,v 1.2 2008/06/13 15:07:36 tg Exp $"); 42 #endif 43 44 #define ACTION_ECHO add_action( yytext ) 45 #define ACTION_IFDEF(def, should_define) \ 46 { \ 47 if ( should_define ) \ 48 action_define( def, 1 ); \ 49 } 50 51 #define MARK_END_OF_PROLOG mark_prolog(); 52 53 #define YY_DECL \ 54 int flexscan() 55 56 #define RETURNCHAR \ 57 yylval = (unsigned char) yytext[0]; \ 58 return CHAR; 59 60 #define RETURNNAME \ 61 strlcpy( nmstr, yytext, sizeof nmstr); \ 62 return NAME; 63 64 #define PUT_BACK_STRING(str, start) \ 65 for ( i = strlen( str ) - 1; i >= start; --i ) \ 66 unput((str)[i]) 67 68 #define CHECK_REJECT(str) \ 69 if ( all_upper( str ) ) \ 70 reject = true; 71 72 #define CHECK_YYMORE(str) \ 73 if ( all_lower( str ) ) \ 74 yymore_used = true; 75 %} 76 77 %option caseless nodefault outfile="scan.c" stack noyy_top_state 78 %option nostdinit 79 80 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE 81 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION 82 %x OPTION LINEDIR 83 84 WS [[:blank:]]+ 85 OPTWS [[:blank:]]* 86 NOT_WS [^[:blank:]\n] 87 88 NL \r?\n 89 90 NAME ([[:alpha:]_][[:alnum:]_-]*) 91 NOT_NAME [^[:alpha:]_*\n]+ 92 93 SCNAME {NAME} 94 95 ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})) 96 97 FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) 98 CCL_CHAR ([^\\\n\]]|{ESCSEQ}) 99 CCL_EXPR ("[:"[[:alpha:]]+":]") 100 101 LEXOPT [aceknopr] 102 103 %% 104 static int bracelevel, didadef, indented_code; 105 static int doing_rule_action = false; 106 static int option_sense; 107 108 int doing_codeblock = false; 109 int i; 110 Char nmdef[MAXLINE], myesc(); 111 112 113 <INITIAL>{ 114 ^{WS} indented_code = true; BEGIN(CODEBLOCK); 115 ^"/*" ACTION_ECHO; yy_push_state( COMMENT ); 116 ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); 117 ^"%s"{NAME}? return SCDECL; 118 ^"%x"{NAME}? return XSCDECL; 119 ^"%{".*{NL} { 120 ++linenum; 121 line_directive_out( (FILE *) 0, 1 ); 122 indented_code = false; 123 BEGIN(CODEBLOCK); 124 } 125 126 {WS} /* discard */ 127 128 ^"%%".* { 129 sectnum = 2; 130 bracelevel = 0; 131 mark_defs1(); 132 line_directive_out( (FILE *) 0, 1 ); 133 BEGIN(SECT2PROLOG); 134 return SECTEND; 135 } 136 137 ^"%pointer".*{NL} yytext_is_array = false; ++linenum; 138 ^"%array".*{NL} yytext_is_array = true; ++linenum; 139 140 ^"%option" BEGIN(OPTION); return OPTION_OP; 141 142 ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */ 143 ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */ 144 145 ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) ); 146 147 ^{NAME} { 148 strlcpy( nmstr, yytext, sizeof nmstr ); 149 didadef = false; 150 BEGIN(PICKUPDEF); 151 } 152 153 {SCNAME} RETURNNAME; 154 ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ 155 {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ 156 } 157 158 159 <COMMENT>{ 160 "*/" ACTION_ECHO; yy_pop_state(); 161 "*" ACTION_ECHO; 162 [^*\n]+ ACTION_ECHO; 163 [^*\n]*{NL} ++linenum; ACTION_ECHO; 164 } 165 166 <LINEDIR>{ 167 \n yy_pop_state(); 168 [[:digit:]]+ linenum = myctoi( yytext ); 169 170 \"[^"\n]*\" { 171 flex_free( (void *) infilename ); 172 infilename = copy_string( yytext + 1 ); 173 infilename[strlen( infilename ) - 1] = '\0'; 174 } 175 . /* ignore spurious characters */ 176 } 177 178 <CODEBLOCK>{ 179 ^"%}".*{NL} ++linenum; BEGIN(INITIAL); 180 181 {NAME}|{NOT_NAME}|. ACTION_ECHO; 182 183 {NL} { 184 ++linenum; 185 ACTION_ECHO; 186 if ( indented_code ) 187 BEGIN(INITIAL); 188 } 189 } 190 191 192 <PICKUPDEF>{ 193 {WS} /* separates name and definition */ 194 195 {NOT_WS}.* { 196 strlcpy( (char *) nmdef, yytext, sizeof nmdef); 197 198 /* Skip trailing whitespace. */ 199 for ( i = strlen( (char *) nmdef ) - 1; 200 i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); 201 --i ) 202 ; 203 204 nmdef[i + 1] = '\0'; 205 206 ndinstal( nmstr, nmdef ); 207 didadef = true; 208 } 209 210 {NL} { 211 if ( ! didadef ) 212 synerr( _( "incomplete name definition" ) ); 213 BEGIN(INITIAL); 214 ++linenum; 215 } 216 } 217 218 219 <OPTION>{ 220 {NL} ++linenum; BEGIN(INITIAL); 221 {WS} option_sense = true; 222 223 "=" return '='; 224 225 no option_sense = ! option_sense; 226 227 7bit csize = option_sense ? 128 : 256; 228 8bit csize = option_sense ? 256 : 128; 229 230 align long_align = option_sense; 231 always-interactive { 232 action_define( "YY_ALWAYS_INTERACTIVE", option_sense ); 233 } 234 array yytext_is_array = option_sense; 235 backup backing_up_report = option_sense; 236 batch interactive = ! option_sense; 237 "c++" C_plus_plus = option_sense; 238 caseful|case-sensitive caseins = ! option_sense; 239 caseless|case-insensitive caseins = option_sense; 240 debug ddebug = option_sense; 241 default spprdflt = ! option_sense; 242 ecs useecs = option_sense; 243 fast { 244 useecs = usemecs = false; 245 use_read = fullspd = true; 246 } 247 full { 248 useecs = usemecs = false; 249 use_read = fulltbl = true; 250 } 251 input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); 252 interactive interactive = option_sense; 253 lex-compat lex_compat = option_sense; 254 main { 255 action_define( "YY_MAIN", option_sense ); 256 do_yywrap = ! option_sense; 257 } 258 meta-ecs usemecs = option_sense; 259 never-interactive { 260 action_define( "YY_NEVER_INTERACTIVE", option_sense ); 261 } 262 perf-report performance_report += option_sense ? 1 : -1; 263 pointer yytext_is_array = ! option_sense; 264 read use_read = option_sense; 265 reject reject_really_used = option_sense; 266 stack action_define( "YY_STACK_USED", option_sense ); 267 stdinit do_stdinit = option_sense; 268 stdout use_stdout = option_sense; 269 unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense); 270 verbose printstats = option_sense; 271 warn nowarn = ! option_sense; 272 yylineno do_yylineno = option_sense; 273 yymore yymore_really_used = option_sense; 274 yywrap do_yywrap = option_sense; 275 276 yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense); 277 yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense); 278 yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense); 279 280 yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense); 281 yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense); 282 yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense); 283 284 outfile return OPT_OUTFILE; 285 prefix return OPT_PREFIX; 286 yyclass return OPT_YYCLASS; 287 288 \"[^"\n]*\" { 289 strlcpy( nmstr, yytext + 1, sizeof nmstr); 290 nmstr[strlen( nmstr ) - 1] = '\0'; 291 return NAME; 292 } 293 294 (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. { 295 format_synerr( _( "unrecognized %%option: %s" ), 296 yytext ); 297 BEGIN(RECOVER); 298 } 299 } 300 301 <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); 302 303 304 <SECT2PROLOG>{ 305 ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ 306 ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ 307 308 ^{WS}.* ACTION_ECHO; /* indented code in prolog */ 309 310 ^{NOT_WS}.* { /* non-indented code */ 311 if ( bracelevel <= 0 ) 312 { /* not in %{ ... %} */ 313 yyless( 0 ); /* put it all back */ 314 yy_set_bol( 1 ); 315 mark_prolog(); 316 BEGIN(SECT2); 317 } 318 else 319 ACTION_ECHO; 320 } 321 322 .* ACTION_ECHO; 323 {NL} ++linenum; ACTION_ECHO; 324 325 <<EOF>> { 326 mark_prolog(); 327 sectnum = 0; 328 yyterminate(); /* to stop the parser */ 329 } 330 } 331 332 <SECT2>{ 333 ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ 334 335 ^{OPTWS}"%{" { 336 indented_code = false; 337 doing_codeblock = true; 338 bracelevel = 1; 339 BEGIN(PERCENT_BRACE_ACTION); 340 } 341 342 ^{OPTWS}"<" BEGIN(SC); return '<'; 343 ^{OPTWS}"^" return '^'; 344 \" BEGIN(QUOTE); return '"'; 345 "{"/[[:digit:]] BEGIN(NUM); return '{'; 346 "$"/([[:blank:]]|{NL}) return '$'; 347 348 {WS}"%{" { 349 bracelevel = 1; 350 BEGIN(PERCENT_BRACE_ACTION); 351 352 if ( in_rule ) 353 { 354 doing_rule_action = true; 355 in_rule = false; 356 return '\n'; 357 } 358 } 359 {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; 360 361 ^{WS}"/*" { 362 yyless( yyleng - 2 ); /* put back '/', '*' */ 363 bracelevel = 0; 364 continued_action = false; 365 BEGIN(ACTION); 366 } 367 368 ^{WS} /* allow indented rules */ 369 370 {WS} { 371 /* This rule is separate from the one below because 372 * otherwise we get variable trailing context, so 373 * we can't build the scanner using -{f,F}. 374 */ 375 bracelevel = 0; 376 continued_action = false; 377 BEGIN(ACTION); 378 379 if ( in_rule ) 380 { 381 doing_rule_action = true; 382 in_rule = false; 383 return '\n'; 384 } 385 } 386 387 {OPTWS}{NL} { 388 bracelevel = 0; 389 continued_action = false; 390 BEGIN(ACTION); 391 unput( '\n' ); /* so <ACTION> sees it */ 392 393 if ( in_rule ) 394 { 395 doing_rule_action = true; 396 in_rule = false; 397 return '\n'; 398 } 399 } 400 401 ^{OPTWS}"<<EOF>>" | 402 "<<EOF>>" return EOF_OP; 403 404 ^"%%".* { 405 sectnum = 3; 406 BEGIN(SECT3); 407 yyterminate(); /* to stop the parser */ 408 } 409 410 "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { 411 int cclval; 412 413 strlcpy( nmstr, yytext, sizeof nmstr); 414 415 /* Check to see if we've already encountered this 416 * ccl. 417 */ 418 if ( (cclval = ccllookup( (Char *) nmstr )) != 0 ) 419 { 420 if ( input() != ']' ) 421 synerr( _( "bad character class" ) ); 422 423 yylval = cclval; 424 ++cclreuse; 425 return PREVCCL; 426 } 427 else 428 { 429 /* We fudge a bit. We know that this ccl will 430 * soon be numbered as lastccl + 1 by cclinit. 431 */ 432 cclinstal( (Char *) nmstr, lastccl + 1 ); 433 434 /* Push back everything but the leading bracket 435 * so the ccl can be rescanned. 436 */ 437 yyless( 1 ); 438 439 BEGIN(FIRSTCCL); 440 return '['; 441 } 442 } 443 444 "{"{NAME}"}" { 445 register Char *nmdefptr; 446 Char *ndlookup(); 447 448 strlcpy( nmstr, yytext + 1, sizeof nmstr ); 449 nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ 450 451 if ( (nmdefptr = ndlookup( nmstr )) == 0 ) 452 format_synerr( 453 _( "undefined definition {%s}" ), 454 nmstr ); 455 456 else 457 { /* push back name surrounded by ()'s */ 458 int len = strlen( (char *) nmdefptr ); 459 460 if ( lex_compat || nmdefptr[0] == '^' || 461 (len > 0 && nmdefptr[len - 1] == '$') ) 462 { /* don't use ()'s after all */ 463 PUT_BACK_STRING((char *) nmdefptr, 0); 464 465 if ( nmdefptr[0] == '^' ) 466 BEGIN(CARETISBOL); 467 } 468 469 else 470 { 471 unput(')'); 472 PUT_BACK_STRING((char *) nmdefptr, 0); 473 unput('('); 474 } 475 } 476 } 477 478 [/|*+?.(){}] return (unsigned char) yytext[0]; 479 . RETURNCHAR; 480 } 481 482 483 <SC>{ 484 [,*] return (unsigned char) yytext[0]; 485 ">" BEGIN(SECT2); return '>'; 486 ">"/^ BEGIN(CARETISBOL); return '>'; 487 {SCNAME} RETURNNAME; 488 . { 489 format_synerr( _( "bad <start condition>: %s" ), 490 yytext ); 491 } 492 } 493 494 <CARETISBOL>"^" BEGIN(SECT2); return '^'; 495 496 497 <QUOTE>{ 498 [^"\n] RETURNCHAR; 499 \" BEGIN(SECT2); return '"'; 500 501 {NL} { 502 synerr( _( "missing quote" ) ); 503 BEGIN(SECT2); 504 ++linenum; 505 return '"'; 506 } 507 } 508 509 510 <FIRSTCCL>{ 511 "^"/[^-\]\n] BEGIN(CCL); return '^'; 512 "^"/("-"|"]") return '^'; 513 . BEGIN(CCL); RETURNCHAR; 514 } 515 516 <CCL>{ 517 -/[^\]\n] return '-'; 518 [^\]\n] RETURNCHAR; 519 "]" BEGIN(SECT2); return ']'; 520 .|{NL} { 521 synerr( _( "bad character class" ) ); 522 BEGIN(SECT2); 523 return ']'; 524 } 525 } 526 527 <FIRSTCCL,CCL>{ 528 "[:alnum:]" BEGIN(CCL); return CCE_ALNUM; 529 "[:alpha:]" BEGIN(CCL); return CCE_ALPHA; 530 "[:blank:]" BEGIN(CCL); return CCE_BLANK; 531 "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL; 532 "[:digit:]" BEGIN(CCL); return CCE_DIGIT; 533 "[:graph:]" BEGIN(CCL); return CCE_GRAPH; 534 "[:lower:]" BEGIN(CCL); return CCE_LOWER; 535 "[:print:]" BEGIN(CCL); return CCE_PRINT; 536 "[:punct:]" BEGIN(CCL); return CCE_PUNCT; 537 "[:space:]" BEGIN(CCL); return CCE_SPACE; 538 "[:upper:]" BEGIN(CCL); return CCE_UPPER; 539 "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; 540 {CCL_EXPR} { 541 format_synerr( 542 _( "bad character class expression: %s" ), 543 yytext ); 544 BEGIN(CCL); return CCE_ALNUM; 545 } 546 } 547 548 <NUM>{ 549 [[:digit:]]+ { 550 yylval = myctoi( yytext ); 551 return NUMBER; 552 } 553 554 "," return ','; 555 "}" BEGIN(SECT2); return '}'; 556 557 . { 558 synerr( _( "bad character inside {}'s" ) ); 559 BEGIN(SECT2); 560 return '}'; 561 } 562 563 {NL} { 564 synerr( _( "missing }" ) ); 565 BEGIN(SECT2); 566 ++linenum; 567 return '}'; 568 } 569 } 570 571 572 <PERCENT_BRACE_ACTION>{ 573 {OPTWS}"%}".* bracelevel = 0; 574 575 <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT ); 576 577 <CODEBLOCK,ACTION>{ 578 "reject" { 579 ACTION_ECHO; 580 CHECK_REJECT(yytext); 581 } 582 "yymore" { 583 ACTION_ECHO; 584 CHECK_YYMORE(yytext); 585 } 586 } 587 588 {NAME}|{NOT_NAME}|. ACTION_ECHO; 589 {NL} { 590 ++linenum; 591 ACTION_ECHO; 592 if ( bracelevel == 0 || 593 (doing_codeblock && indented_code) ) 594 { 595 if ( doing_rule_action ) 596 add_action( "\tYY_BREAK\n" ); 597 598 doing_rule_action = doing_codeblock = false; 599 BEGIN(SECT2); 600 } 601 } 602 } 603 604 605 /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ 606 <ACTION>{ 607 "{" ACTION_ECHO; ++bracelevel; 608 "}" ACTION_ECHO; --bracelevel; 609 [^[:alpha:]_{}"'/\n]+ ACTION_ECHO; 610 {NAME} ACTION_ECHO; 611 "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ 612 \" ACTION_ECHO; BEGIN(ACTION_STRING); 613 {NL} { 614 ++linenum; 615 ACTION_ECHO; 616 if ( bracelevel == 0 ) 617 { 618 if ( doing_rule_action ) 619 add_action( "\tYY_BREAK\n" ); 620 621 doing_rule_action = false; 622 BEGIN(SECT2); 623 } 624 } 625 . ACTION_ECHO; 626 } 627 628 <ACTION_STRING>{ 629 [^"\\\n]+ ACTION_ECHO; 630 \\. ACTION_ECHO; 631 {NL} ++linenum; ACTION_ECHO; 632 \" ACTION_ECHO; BEGIN(ACTION); 633 . ACTION_ECHO; 634 } 635 636 <COMMENT,ACTION,ACTION_STRING><<EOF>> { 637 synerr( _( "EOF encountered inside an action" ) ); 638 yyterminate(); 639 } 640 641 642 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} { 643 yylval = myesc( (Char *) yytext ); 644 645 if ( YY_START == FIRSTCCL ) 646 BEGIN(CCL); 647 648 return CHAR; 649 } 650 651 652 <SECT3>{ 653 .*(\n?) ECHO; 654 <<EOF>> sectnum = 0; yyterminate(); 655 } 656 657 <*>.|\n format_synerr( _( "bad character: %s" ), yytext ); 658 659 %% 660 661 662 int yywrap() 663 { 664 if ( --num_input_files > 0 ) 665 { 666 set_input_file( *++input_files ); 667 return 0; 668 } 669 670 else 671 return 1; 672 } 673 674 675 /* set_input_file - open the given file (if NULL, stdin) for scanning */ 676 677 void set_input_file( file ) 678 char *file; 679 { 680 if ( file && strcmp( file, "-" ) ) 681 { 682 infilename = copy_string( file ); 683 yyin = fopen( infilename, "r" ); 684 685 if ( yyin == NULL ) 686 lerrsf( _( "can't open %s" ), file ); 687 } 688 689 else 690 { 691 yyin = stdin; 692 infilename = copy_string( "<stdin>" ); 693 } 694 695 linenum = 1; 696 } 697 698 699 /* Wrapper routines for accessing the scanner's malloc routines. */ 700 701 void *flex_alloc( size ) 702 size_t size; 703 { 704 return (void *) malloc( size ); 705 } 706 707 void *flex_realloc( ptr, size ) 708 void *ptr; 709 size_t size; 710 { 711 return (void *) realloc( ptr, size ); 712 } 713 714 void flex_free( ptr ) 715 void *ptr; 716 { 717 if ( ptr ) 718 free( ptr ); 719 } 720