1 /* $OpenBSD: scanner.c,v 1.6 2003/08/01 22:01:37 david Exp $ */
2 /* $NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $ */
3
4 /*
5 * Copyright (c) 1983, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Edward Wang at The University of California, Berkeley.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
39 #else
40 static char rcsid[] = "$OpenBSD: scanner.c,v 1.6 2003/08/01 22:01:37 david Exp $";
41 #endif
42 #endif /* not lint */
43
44 #include "value.h"
45 #include "token.h"
46 #include "context.h"
47 #include "string.h"
48 #include <stdlib.h>
49
s_getc()50 s_getc()
51 {
52 int c;
53
54 switch (cx.x_type) {
55 case X_FILE:
56 c = getc(cx.x_fp);
57 if (cx.x_bol && c != EOF) {
58 cx.x_bol = 0;
59 cx.x_lineno++;
60 }
61 if (c == '\n')
62 cx.x_bol = 1;
63 return c;
64 case X_BUF:
65 if (*cx.x_bufp != 0)
66 return *cx.x_bufp++ & 0xff;
67 else
68 return EOF;
69 }
70 /*NOTREACHED*/
71 }
72
s_ungetc(c)73 s_ungetc(c)
74 {
75 if (c == EOF)
76 return EOF;
77 switch (cx.x_type) {
78 case X_FILE:
79 cx.x_bol = 0;
80 return ungetc(c, cx.x_fp);
81 case X_BUF:
82 if (cx.x_bufp > cx.x_buf)
83 return *--cx.x_bufp = c;
84 else
85 return EOF;
86 }
87 /*NOTREACHED*/
88 }
89
s_gettok()90 s_gettok()
91 {
92 char buf[100];
93 char *p = buf;
94 int c;
95 int state = 0;
96
97 loop:
98 c = s_getc();
99 switch (state) {
100 case 0:
101 switch (c) {
102 case ' ':
103 case '\t':
104 break;
105 case '\n':
106 case ';':
107 cx.x_token = T_EOL;
108 state = -1;
109 break;
110 case '#':
111 state = 1;
112 break;
113 case EOF:
114 cx.x_token = T_EOF;
115 state = -1;
116 break;
117 case 'a': case 'b': case 'c': case 'd': case 'e':
118 case 'f': case 'g': case 'h': case 'i': case 'j':
119 case 'k': case 'l': case 'm': case 'n': case 'o':
120 case 'p': case 'q': case 'r': case 's': case 't':
121 case 'u': case 'v': case 'w': case 'x': case 'y':
122 case 'z':
123 case 'A': case 'B': case 'C': case 'D': case 'E':
124 case 'F': case 'G': case 'H': case 'I': case 'J':
125 case 'K': case 'L': case 'M': case 'N': case 'O':
126 case 'P': case 'Q': case 'R': case 'S': case 'T':
127 case 'U': case 'V': case 'W': case 'X': case 'Y':
128 case 'Z':
129 case '_': case '.':
130 *p++ = c;
131 state = 2;
132 break;
133 case '"':
134 state = 3;
135 break;
136 case '\'':
137 state = 4;
138 break;
139 case '\\':
140 switch (c = s_gettok1()) {
141 case -1:
142 break;
143 case -2:
144 state = 0;
145 break;
146 default:
147 *p++ = c;
148 state = 2;
149 }
150 break;
151 case '0':
152 cx.x_val.v_num = 0;
153 state = 10;
154 break;
155 case '1': case '2': case '3': case '4':
156 case '5': case '6': case '7': case '8': case '9':
157 cx.x_val.v_num = c - '0';
158 state = 11;
159 break;
160 case '>':
161 state = 20;
162 break;
163 case '<':
164 state = 21;
165 break;
166 case '=':
167 state = 22;
168 break;
169 case '!':
170 state = 23;
171 break;
172 case '&':
173 state = 24;
174 break;
175 case '|':
176 state = 25;
177 break;
178 case '$':
179 state = 26;
180 break;
181 case '~':
182 cx.x_token = T_COMP;
183 state = -1;
184 break;
185 case '+':
186 cx.x_token = T_PLUS;
187 state = -1;
188 break;
189 case '-':
190 cx.x_token = T_MINUS;
191 state = -1;
192 break;
193 case '*':
194 cx.x_token = T_MUL;
195 state = -1;
196 break;
197 case '/':
198 cx.x_token = T_DIV;
199 state = -1;
200 break;
201 case '%':
202 cx.x_token = T_MOD;
203 state = -1;
204 break;
205 case '^':
206 cx.x_token = T_XOR;
207 state = -1;
208 break;
209 case '(':
210 cx.x_token = T_LP;
211 state = -1;
212 break;
213 case ')':
214 cx.x_token = T_RP;
215 state = -1;
216 break;
217 case ',':
218 cx.x_token = T_COMMA;
219 state = -1;
220 break;
221 case '?':
222 cx.x_token = T_QUEST;
223 state = -1;
224 break;
225 case ':':
226 cx.x_token = T_COLON;
227 state = -1;
228 break;
229 case '[':
230 cx.x_token = T_LB;
231 state = -1;
232 break;
233 case ']':
234 cx.x_token = T_RB;
235 state = -1;
236 break;
237 default:
238 cx.x_val.v_num = c;
239 cx.x_token = T_CHAR;
240 state = -1;
241 break;
242 }
243 break;
244 case 1: /* got # */
245 if (c == '\n' || c == EOF) {
246 (void) s_ungetc(c);
247 state = 0;
248 }
249 break;
250 case 2: /* unquoted string */
251 switch (c) {
252 case 'a': case 'b': case 'c': case 'd': case 'e':
253 case 'f': case 'g': case 'h': case 'i': case 'j':
254 case 'k': case 'l': case 'm': case 'n': case 'o':
255 case 'p': case 'q': case 'r': case 's': case 't':
256 case 'u': case 'v': case 'w': case 'x': case 'y':
257 case 'z':
258 case 'A': case 'B': case 'C': case 'D': case 'E':
259 case 'F': case 'G': case 'H': case 'I': case 'J':
260 case 'K': case 'L': case 'M': case 'N': case 'O':
261 case 'P': case 'Q': case 'R': case 'S': case 'T':
262 case 'U': case 'V': case 'W': case 'X': case 'Y':
263 case 'Z':
264 case '_': case '.':
265 case '0': case '1': case '2': case '3': case '4':
266 case '5': case '6': case '7': case '8': case '9':
267 if (p < buf + sizeof buf - 1)
268 *p++ = c;
269 break;
270 case '"':
271 state = 3;
272 break;
273 case '\'':
274 state = 4;
275 break;
276 case '\\':
277 switch (c = s_gettok1()) {
278 case -2:
279 (void) s_ungetc(' ');
280 case -1:
281 break;
282 default:
283 if (p < buf + sizeof buf - 1)
284 *p++ = c;
285 }
286 break;
287 default:
288 (void) s_ungetc(c);
289 case EOF:
290 *p = 0;
291 cx.x_token = T_STR;
292 switch (*buf) {
293 case 'i':
294 if (buf[1] == 'f' && buf[2] == 0)
295 cx.x_token = T_IF;
296 break;
297 case 't':
298 if (buf[1] == 'h' && buf[2] == 'e'
299 && buf[3] == 'n' && buf[4] == 0)
300 cx.x_token = T_THEN;
301 break;
302 case 'e':
303 if (buf[1] == 'n' && buf[2] == 'd'
304 && buf[3] == 'i' && buf[4] == 'f'
305 && buf[5] == 0)
306 cx.x_token = T_ENDIF;
307 else if (buf[1] == 'l' && buf[2] == 's')
308 if (buf[3] == 'i' && buf[4] == 'f'
309 && buf[5] == 0)
310 cx.x_token = T_ELSIF;
311 else if (buf[3] == 'e' && buf[4] == 0)
312 cx.x_token = T_ELSE;
313 break;
314 }
315 if (cx.x_token == T_STR
316 && (cx.x_val.v_str = str_cpy(buf)) == 0) {
317 p_memerror();
318 cx.x_token = T_EOF;
319 }
320 state = -1;
321 break;
322 }
323 break;
324 case 3: /* " quoted string */
325 switch (c) {
326 case '\n':
327 (void) s_ungetc(c);
328 case EOF:
329 case '"':
330 state = 2;
331 break;
332 case '\\':
333 switch (c = s_gettok1()) {
334 case -1:
335 case -2: /* newlines are invisible */
336 break;
337 default:
338 if (p < buf + sizeof buf - 1)
339 *p++ = c;
340 }
341 break;
342 default:
343 if (p < buf + sizeof buf - 1)
344 *p++ = c;
345 break;
346 }
347 break;
348 case 4: /* ' quoted string */
349 switch (c) {
350 case '\n':
351 (void) s_ungetc(c);
352 case EOF:
353 case '\'':
354 state = 2;
355 break;
356 case '\\':
357 switch (c = s_gettok1()) {
358 case -1:
359 case -2: /* newlines are invisible */
360 break;
361 default:
362 if (p < buf + sizeof buf - 1)
363 *p++ = c;
364 }
365 break;
366 default:
367 if (p < buf + sizeof buf - 1)
368 *p++ = c;
369 break;
370 }
371 break;
372 case 10: /* got 0 */
373 switch (c) {
374 case 'x':
375 case 'X':
376 cx.x_val.v_num = 0;
377 state = 12;
378 break;
379 case '0': case '1': case '2': case '3': case '4':
380 case '5': case '6': case '7':
381 cx.x_val.v_num = c - '0';
382 state = 13;
383 break;
384 case '8': case '9':
385 cx.x_val.v_num = c - '0';
386 state = 11;
387 break;
388 default:
389 (void) s_ungetc(c);
390 state = -1;
391 cx.x_token = T_NUM;
392 }
393 break;
394 case 11: /* decimal number */
395 switch (c) {
396 case '0': case '1': case '2': case '3': case '4':
397 case '5': case '6': case '7': case '8': case '9':
398 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
399 break;
400 default:
401 (void) s_ungetc(c);
402 state = -1;
403 cx.x_token = T_NUM;
404 }
405 break;
406 case 12: /* hex number */
407 switch (c) {
408 case '0': case '1': case '2': case '3': case '4':
409 case '5': case '6': case '7': case '8': case '9':
410 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
411 break;
412 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
413 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
414 break;
415 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
416 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
417 break;
418 default:
419 (void) s_ungetc(c);
420 state = -1;
421 cx.x_token = T_NUM;
422 }
423 break;
424 case 13: /* octal number */
425 switch (c) {
426 case '0': case '1': case '2': case '3': case '4':
427 case '5': case '6': case '7':
428 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
429 break;
430 default:
431 (void) s_ungetc(c);
432 state = -1;
433 cx.x_token = T_NUM;
434 }
435 break;
436 case 20: /* got > */
437 switch (c) {
438 case '=':
439 cx.x_token = T_GE;
440 state = -1;
441 break;
442 case '>':
443 cx.x_token = T_RS;
444 state = -1;
445 break;
446 default:
447 (void) s_ungetc(c);
448 cx.x_token = T_GT;
449 state = -1;
450 }
451 break;
452 case 21: /* got < */
453 switch (c) {
454 case '=':
455 cx.x_token = T_LE;
456 state = -1;
457 break;
458 case '<':
459 cx.x_token = T_LS;
460 state = -1;
461 break;
462 default:
463 (void) s_ungetc(c);
464 cx.x_token = T_LT;
465 state = -1;
466 }
467 break;
468 case 22: /* got = */
469 switch (c) {
470 case '=':
471 cx.x_token = T_EQ;
472 state = -1;
473 break;
474 default:
475 (void) s_ungetc(c);
476 cx.x_token = T_ASSIGN;
477 state = -1;
478 }
479 break;
480 case 23: /* got ! */
481 switch (c) {
482 case '=':
483 cx.x_token = T_NE;
484 state = -1;
485 break;
486 default:
487 (void) s_ungetc(c);
488 cx.x_token = T_NOT;
489 state = -1;
490 }
491 break;
492 case 24: /* got & */
493 switch (c) {
494 case '&':
495 cx.x_token = T_ANDAND;
496 state = -1;
497 break;
498 default:
499 (void) s_ungetc(c);
500 cx.x_token = T_AND;
501 state = -1;
502 }
503 break;
504 case 25: /* got | */
505 switch (c) {
506 case '|':
507 cx.x_token = T_OROR;
508 state = -1;
509 break;
510 default:
511 (void) s_ungetc(c);
512 cx.x_token = T_OR;
513 state = -1;
514 }
515 break;
516 case 26: /* got $ */
517 switch (c) {
518 case '?':
519 cx.x_token = T_DQ;
520 state = -1;
521 break;
522 default:
523 (void) s_ungetc(c);
524 cx.x_token = T_DOLLAR;
525 state = -1;
526 }
527 break;
528 default:
529 abort();
530 }
531 if (state >= 0)
532 goto loop;
533 return cx.x_token;
534 }
535
s_gettok1()536 s_gettok1()
537 {
538 int c;
539 int n;
540
541 c = s_getc(); /* got \ */
542 switch (c) {
543 case EOF:
544 return -1;
545 case '\n':
546 return -2;
547 case 'b':
548 return '\b';
549 case 'f':
550 return '\f';
551 case 'n':
552 return '\n';
553 case 'r':
554 return '\r';
555 case 't':
556 return '\t';
557 default:
558 return c;
559 case '0': case '1': case '2': case '3': case '4':
560 case '5': case '6': case '7':
561 break;
562 }
563 n = c - '0';
564 c = s_getc(); /* got \[0-7] */
565 if (c < '0' || c > '7') {
566 (void) s_ungetc(c);
567 return n;
568 }
569 n = n * 8 + c - '0';
570 c = s_getc(); /* got \[0-7][0-7] */
571 if (c < '0' || c > '7') {
572 (void) s_ungetc(c);
573 return n;
574 }
575 return n * 8 + c - '0';
576 }
577