1 /* $OpenBSD: b.c,v 1.17 2011/09/28 19:27:18 millert Exp $ */
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25
26 /* lasciate ogne speranza, voi ch'intrate. */
27
28 #define DEBUG
29
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "awkgram.h"
36
37 __RCSID("$MirOS: src/usr.bin/awk/b.c,v 1.3 2014/03/13 00:37:35 tg Exp $");
38
39 #define HAT (NCHARS+2) /* matches ^ in regular expr */
40 /* NCHARS is 2**n */
41 #define MAXLIN 22
42
43 #define type(v) (v)->nobj /* badly overloaded here */
44 #define info(v) (v)->ntype /* badly overloaded here */
45 #define left(v) (v)->narg[0]
46 #define right(v) (v)->narg[1]
47 #define parent(v) (v)->nnext
48
49 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
50 #define ELEAF case EMPTYRE: /* empty string in regexp */
51 #define UNARY case STAR: case PLUS: case QUEST:
52
53 /* encoding in tree Nodes:
54 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
55 left is index, right contains value or pointer to value
56 unary (STAR, PLUS, QUEST): left is child, right is null
57 binary (CAT, OR): left and right are children
58 parent contains pointer to parent
59 */
60
61
62 int *setvec;
63 int *tmpset;
64 int maxsetvec = 0;
65
66 int rtok; /* next token in current re */
67 int rlxval;
68 static uschar *rlxstr;
69 static uschar *prestr; /* current position in current re */
70 static uschar *lastre; /* origin of last re */
71
72 static int setcnt;
73 static int poscnt;
74
75 char *patbeg;
76 int patlen;
77
78 #define NFA 20 /* cache this many dynamic fa's */
79 fa *fatab[NFA];
80 int nfatab = 0; /* entries in fatab */
81
makedfa(const char * s,int anchor)82 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
83 {
84 int i, use, nuse;
85 fa *pfa;
86 static int now = 1;
87
88 if (setvec == 0) { /* first time through any RE */
89 maxsetvec = MAXLIN;
90 setvec = (int *) calloc(maxsetvec, sizeof(int));
91 tmpset = (int *) calloc(maxsetvec, sizeof(int));
92 if (setvec == 0 || tmpset == 0)
93 overflo("out of space initializing makedfa");
94 }
95
96 if (compile_time) /* a constant for sure */
97 return mkdfa(s, anchor);
98 for (i = 0; i < nfatab; i++) /* is it there already? */
99 if (fatab[i]->anchor == anchor
100 && strcmp((const char *) fatab[i]->restr, s) == 0) {
101 fatab[i]->use = now++;
102 return fatab[i];
103 }
104 pfa = mkdfa(s, anchor);
105 if (nfatab < NFA) { /* room for another */
106 fatab[nfatab] = pfa;
107 fatab[nfatab]->use = now++;
108 nfatab++;
109 return pfa;
110 }
111 use = fatab[0]->use; /* replace least-recently used */
112 nuse = 0;
113 for (i = 1; i < nfatab; i++)
114 if (fatab[i]->use < use) {
115 use = fatab[i]->use;
116 nuse = i;
117 }
118 freefa(fatab[nuse]);
119 fatab[nuse] = pfa;
120 pfa->use = now++;
121 return pfa;
122 }
123
mkdfa(const char * s,int anchor)124 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
125 /* anchor = 1 for anchored matches, else 0 */
126 {
127 Node *p, *p1;
128 fa *f;
129
130 p = reparse(s);
131 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
132 /* put ALL STAR in front of reg. exp. */
133 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
134 /* put FINAL after reg. exp. */
135
136 poscnt = 0;
137 penter(p1); /* enter parent pointers and leaf indices */
138 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
139 overflo("out of space for fa");
140 f->accept = poscnt-1; /* penter has computed number of positions in re */
141 cfoll(f, p1); /* set up follow sets */
142 freetr(p1);
143 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
144 overflo("out of space in makedfa");
145 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
146 overflo("out of space in makedfa");
147 *f->posns[1] = 0;
148 f->initstat = makeinit(f, anchor);
149 f->anchor = anchor;
150 f->restr = (uschar *) tostring(s);
151 return f;
152 }
153
makeinit(fa * f,int anchor)154 int makeinit(fa *f, int anchor)
155 {
156 int i, k;
157
158 f->curstat = 2;
159 f->out[2] = 0;
160 f->reset = 0;
161 k = *(f->re[0].lfollow);
162 xfree(f->posns[2]);
163 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
164 overflo("out of space in makeinit");
165 for (i=0; i <= k; i++) {
166 (f->posns[2])[i] = (f->re[0].lfollow)[i];
167 }
168 if ((f->posns[2])[1] == f->accept)
169 f->out[2] = 1;
170 for (i=0; i < NCHARS; i++)
171 f->gototab[2][i] = 0;
172 f->curstat = cgoto(f, 2, HAT);
173 if (anchor) {
174 *f->posns[2] = k-1; /* leave out position 0 */
175 for (i=0; i < k; i++) {
176 (f->posns[0])[i] = (f->posns[2])[i];
177 }
178
179 f->out[0] = f->out[2];
180 if (f->curstat != 2)
181 --(*f->posns[f->curstat]);
182 }
183 return f->curstat;
184 }
185
penter(Node * p)186 void penter(Node *p) /* set up parent pointers and leaf indices */
187 {
188 switch (type(p)) {
189 ELEAF
190 LEAF
191 info(p) = poscnt;
192 poscnt++;
193 break;
194 UNARY
195 penter(left(p));
196 parent(left(p)) = p;
197 break;
198 case CAT:
199 case OR:
200 penter(left(p));
201 penter(right(p));
202 parent(left(p)) = p;
203 parent(right(p)) = p;
204 break;
205 default: /* can't happen */
206 FATAL("can't happen: unknown type %d in penter", type(p));
207 break;
208 }
209 }
210
freetr(Node * p)211 void freetr(Node *p) /* free parse tree */
212 {
213 switch (type(p)) {
214 ELEAF
215 LEAF
216 xfree(p);
217 break;
218 UNARY
219 freetr(left(p));
220 xfree(p);
221 break;
222 case CAT:
223 case OR:
224 freetr(left(p));
225 freetr(right(p));
226 xfree(p);
227 break;
228 default: /* can't happen */
229 FATAL("can't happen: unknown type %d in freetr", type(p));
230 break;
231 }
232 }
233
234 /* in the parsing of regular expressions, metacharacters like . have */
235 /* to be seen literally; \056 is not a metacharacter. */
236
hexstr(uschar ** pp)237 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
238 { /* only pick up one 8-bit byte (2 chars) */
239 uschar *p;
240 int n = 0;
241 int i;
242
243 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
244 if (isdigit(*p))
245 n = 16 * n + *p - '0';
246 else if (*p >= 'a' && *p <= 'f')
247 n = 16 * n + *p - 'a' + 10;
248 else if (*p >= 'A' && *p <= 'F')
249 n = 16 * n + *p - 'A' + 10;
250 }
251 *pp = (uschar *) p;
252 return n;
253 }
254
255 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
256
quoted(uschar ** pp)257 int quoted(uschar **pp) /* pick up next thing after a \\ */
258 /* and increment *pp */
259 {
260 uschar *p = *pp;
261 int c;
262
263 if ((c = *p++) == 't')
264 c = '\t';
265 else if (c == 'n')
266 c = '\n';
267 else if (c == 'f')
268 c = '\f';
269 else if (c == 'r')
270 c = '\r';
271 else if (c == 'b')
272 c = '\b';
273 else if (c == '\\')
274 c = '\\';
275 else if (c == 'x') { /* hexadecimal goo follows */
276 c = hexstr(&p); /* this adds a null if number is invalid */
277 } else if (isoctdigit(c)) { /* \d \dd \ddd */
278 int n = c - '0';
279 if (isoctdigit(*p)) {
280 n = 8 * n + *p++ - '0';
281 if (isoctdigit(*p))
282 n = 8 * n + *p++ - '0';
283 }
284 c = n;
285 } /* else */
286 /* c = c; */
287 *pp = p;
288 return c;
289 }
290
cclenter(const char * argp)291 char *cclenter(const char *argp) /* add a character class */
292 {
293 int i, c, c2;
294 uschar *p = (uschar *) argp;
295 uschar *op, *bp;
296 static uschar *buf = 0;
297 static int bufsz = 100;
298
299 op = p;
300 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
301 FATAL("out of space for character class [%.10s...] 1", p);
302 bp = buf;
303 for (i = 0; (c = *p++) != 0; ) {
304 if (c == '\\') {
305 c = quoted(&p);
306 } else if (c == '-' && i > 0 && bp[-1] != 0) {
307 if (*p != 0) {
308 c = bp[-1];
309 c2 = *p++;
310 if (c2 == '\\')
311 c2 = quoted(&p);
312 if (c > c2) { /* empty; ignore */
313 bp--;
314 i--;
315 continue;
316 }
317 while (c < c2) {
318 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
319 FATAL("out of space for character class [%.10s...] 2", p);
320 *bp++ = ++c;
321 i++;
322 }
323 continue;
324 }
325 }
326 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
327 FATAL("out of space for character class [%.10s...] 3", p);
328 *bp++ = c;
329 i++;
330 }
331 *bp = 0;
332 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
333 xfree(op);
334 return (char *) tostring((char *) buf);
335 }
336
overflo(const char * s)337 void overflo(const char *s)
338 {
339 FATAL("regular expression too big: %.30s...", s);
340 }
341
cfoll(fa * f,Node * v)342 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
343 {
344 int i;
345 int *p;
346
347 switch (type(v)) {
348 ELEAF
349 LEAF
350 f->re[info(v)].ltype = type(v);
351 f->re[info(v)].lval.np = right(v);
352 while (f->accept >= maxsetvec) { /* guessing here! */
353 maxsetvec *= 4;
354 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
355 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
356 if (setvec == 0 || tmpset == 0)
357 overflo("out of space in cfoll()");
358 }
359 for (i = 0; i <= f->accept; i++)
360 setvec[i] = 0;
361 setcnt = 0;
362 follow(v); /* computes setvec and setcnt */
363 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
364 overflo("out of space building follow set");
365 f->re[info(v)].lfollow = p;
366 *p = setcnt;
367 for (i = f->accept; i >= 0; i--)
368 if (setvec[i] == 1)
369 *++p = i;
370 break;
371 UNARY
372 cfoll(f,left(v));
373 break;
374 case CAT:
375 case OR:
376 cfoll(f,left(v));
377 cfoll(f,right(v));
378 break;
379 default: /* can't happen */
380 FATAL("can't happen: unknown type %d in cfoll", type(v));
381 }
382 }
383
first(Node * p)384 int first(Node *p) /* collects initially active leaves of p into setvec */
385 /* returns 0 if p matches empty string */
386 {
387 int b, lp;
388
389 switch (type(p)) {
390 ELEAF
391 LEAF
392 lp = info(p); /* look for high-water mark of subscripts */
393 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
394 maxsetvec *= 4;
395 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
396 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
397 if (setvec == 0 || tmpset == 0)
398 overflo("out of space in first()");
399 }
400 if (type(p) == EMPTYRE) {
401 setvec[lp] = 0;
402 return(0);
403 }
404 if (setvec[lp] != 1) {
405 setvec[lp] = 1;
406 setcnt++;
407 }
408 if (type(p) == CCL && (*(char *) right(p)) == '\0')
409 return(0); /* empty CCL */
410 else return(1);
411 case PLUS:
412 if (first(left(p)) == 0) return(0);
413 return(1);
414 case STAR:
415 case QUEST:
416 first(left(p));
417 return(0);
418 case CAT:
419 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
420 return(1);
421 case OR:
422 b = first(right(p));
423 if (first(left(p)) == 0 || b == 0) return(0);
424 return(1);
425 }
426 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
427 return(-1);
428 }
429
follow(Node * v)430 void follow(Node *v) /* collects leaves that can follow v into setvec */
431 {
432 Node *p;
433
434 if (type(v) == FINAL)
435 return;
436 p = parent(v);
437 switch (type(p)) {
438 case STAR:
439 case PLUS:
440 first(v);
441 follow(p);
442 return;
443
444 case OR:
445 case QUEST:
446 follow(p);
447 return;
448
449 case CAT:
450 if (v == left(p)) { /* v is left child of p */
451 if (first(right(p)) == 0) {
452 follow(p);
453 return;
454 }
455 } else /* v is right child */
456 follow(p);
457 return;
458 }
459 }
460
member(int c,const char * sarg)461 int member(int c, const char *sarg) /* is c in s? */
462 {
463 uschar *s = (uschar *) sarg;
464
465 while (*s)
466 if (c == *s++)
467 return(1);
468 return(0);
469 }
470
match(fa * f,const char * p0)471 int match(fa *f, const char *p0) /* shortest match ? */
472 {
473 int s, ns;
474 uschar *p = (uschar *) p0;
475
476 s = f->reset ? makeinit(f,0) : f->initstat;
477 if (f->out[s])
478 return(1);
479 do {
480 /* assert(*p < NCHARS); */
481 if ((ns = f->gototab[s][*p]) != 0)
482 s = ns;
483 else
484 s = cgoto(f, s, *p);
485 if (f->out[s])
486 return(1);
487 } while (*p++ != 0);
488 return(0);
489 }
490
pmatch(fa * f,const char * p0)491 int pmatch(fa *f, const char *p0) /* longest match, for sub */
492 {
493 int s, ns;
494 uschar *p = (uschar *) p0;
495 uschar *q;
496 int i, k;
497
498 /* s = f->reset ? makeinit(f,1) : f->initstat; */
499 if (f->reset) {
500 f->initstat = s = makeinit(f,1);
501 } else {
502 s = f->initstat;
503 }
504 patbeg = (char *) p;
505 patlen = -1;
506 do {
507 q = p;
508 do {
509 if (f->out[s]) /* final state */
510 patlen = q-p;
511 /* assert(*q < NCHARS); */
512 if ((ns = f->gototab[s][*q]) != 0)
513 s = ns;
514 else
515 s = cgoto(f, s, *q);
516 if (s == 1) { /* no transition */
517 if (patlen >= 0) {
518 patbeg = (char *) p;
519 return(1);
520 }
521 else
522 goto nextin; /* no match */
523 }
524 } while (*q++ != 0);
525 if (f->out[s])
526 patlen = q-p-1; /* don't count $ */
527 if (patlen >= 0) {
528 patbeg = (char *) p;
529 return(1);
530 }
531 nextin:
532 s = 2;
533 if (f->reset) {
534 for (i = 2; i <= f->curstat; i++)
535 xfree(f->posns[i]);
536 k = *f->posns[0];
537 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
538 overflo("out of space in pmatch");
539 for (i = 0; i <= k; i++)
540 (f->posns[2])[i] = (f->posns[0])[i];
541 f->initstat = f->curstat = 2;
542 f->out[2] = f->out[0];
543 for (i = 0; i < NCHARS; i++)
544 f->gototab[2][i] = 0;
545 }
546 } while (*p++ != 0);
547 return (0);
548 }
549
nematch(fa * f,const char * p0)550 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
551 {
552 int s, ns;
553 uschar *p = (uschar *) p0;
554 uschar *q;
555 int i, k;
556
557 /* s = f->reset ? makeinit(f,1) : f->initstat; */
558 if (f->reset) {
559 f->initstat = s = makeinit(f,1);
560 } else {
561 s = f->initstat;
562 }
563 patlen = -1;
564 while (*p) {
565 q = p;
566 do {
567 if (f->out[s]) /* final state */
568 patlen = q-p;
569 /* assert(*q < NCHARS); */
570 if ((ns = f->gototab[s][*q]) != 0)
571 s = ns;
572 else
573 s = cgoto(f, s, *q);
574 if (s == 1) { /* no transition */
575 if (patlen > 0) {
576 patbeg = (char *) p;
577 return(1);
578 } else
579 goto nnextin; /* no nonempty match */
580 }
581 } while (*q++ != 0);
582 if (f->out[s])
583 patlen = q-p-1; /* don't count $ */
584 if (patlen > 0 ) {
585 patbeg = (char *) p;
586 return(1);
587 }
588 nnextin:
589 s = 2;
590 if (f->reset) {
591 for (i = 2; i <= f->curstat; i++)
592 xfree(f->posns[i]);
593 k = *f->posns[0];
594 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
595 overflo("out of state space");
596 for (i = 0; i <= k; i++)
597 (f->posns[2])[i] = (f->posns[0])[i];
598 f->initstat = f->curstat = 2;
599 f->out[2] = f->out[0];
600 for (i = 0; i < NCHARS; i++)
601 f->gototab[2][i] = 0;
602 }
603 p++;
604 }
605 return (0);
606 }
607
reparse(const char * p)608 Node *reparse(const char *p) /* parses regular expression pointed to by p */
609 { /* uses relex() to scan regular expression */
610 Node *np;
611
612 dprintf( ("reparse <%s>\n", p) );
613 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
614 rtok = relex();
615 /* GNU compatibility: an empty regexp matches anything */
616 if (rtok == '\0') {
617 /* FATAL("empty regular expression"); previous */
618 return(op2(EMPTYRE, NIL, NIL));
619 }
620 np = regexp();
621 if (rtok != '\0')
622 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
623 return(np);
624 }
625
regexp(void)626 Node *regexp(void) /* top-level parse of reg expr */
627 {
628 return (alt(concat(primary())));
629 }
630
primary(void)631 Node *primary(void)
632 {
633 Node *np;
634
635 switch (rtok) {
636 case CHAR:
637 np = op2(CHAR, NIL, itonp(rlxval));
638 rtok = relex();
639 return (unary(np));
640 case ALL:
641 rtok = relex();
642 return (unary(op2(ALL, NIL, NIL)));
643 case EMPTYRE:
644 rtok = relex();
645 return (unary(op2(ALL, NIL, NIL)));
646 case DOT:
647 rtok = relex();
648 return (unary(op2(DOT, NIL, NIL)));
649 case CCL:
650 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
651 rtok = relex();
652 return (unary(np));
653 case NCCL:
654 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
655 rtok = relex();
656 return (unary(np));
657 case '^':
658 rtok = relex();
659 return (unary(op2(CHAR, NIL, itonp(HAT))));
660 case '$':
661 rtok = relex();
662 return (unary(op2(CHAR, NIL, NIL)));
663 case '(':
664 rtok = relex();
665 if (rtok == ')') { /* special pleading for () */
666 rtok = relex();
667 return unary(op2(CCL, NIL, (Node *) tostring("")));
668 }
669 np = regexp();
670 if (rtok == ')') {
671 rtok = relex();
672 return (unary(np));
673 }
674 else
675 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
676 default:
677 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
678 }
679 return 0; /*NOTREACHED*/
680 }
681
concat(Node * np)682 Node *concat(Node *np)
683 {
684 switch (rtok) {
685 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
686 return (concat(op2(CAT, np, primary())));
687 }
688 return (np);
689 }
690
alt(Node * np)691 Node *alt(Node *np)
692 {
693 if (rtok == OR) {
694 rtok = relex();
695 return (alt(op2(OR, np, concat(primary()))));
696 }
697 return (np);
698 }
699
unary(Node * np)700 Node *unary(Node *np)
701 {
702 switch (rtok) {
703 case STAR:
704 rtok = relex();
705 return (unary(op2(STAR, np, NIL)));
706 case PLUS:
707 rtok = relex();
708 return (unary(op2(PLUS, np, NIL)));
709 case QUEST:
710 rtok = relex();
711 return (unary(op2(QUEST, np, NIL)));
712 default:
713 return (np);
714 }
715 }
716
717 /*
718 * Character class definitions conformant to the POSIX locale as
719 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
720 * and operating character sets are both ASCII (ISO646) or supersets
721 * thereof.
722 *
723 * Note that to avoid overflowing the temporary buffer used in
724 * relex(), the expanded character class (prior to range expansion)
725 * must be less than twice the size of their full name.
726 */
727
728 /* Because isblank doesn't show up in any of the header files on any
729 * system i use, it's defined here. if some other locale has a richer
730 * definition of "blank", define HAS_ISBLANK and provide your own
731 * version.
732 * the parentheses here are an attempt to find a path through the maze
733 * of macro definition and/or function and/or version provided. thanks
734 * to nelson beebe for the suggestion; let's see if it works everywhere.
735 */
736
737 #ifndef HAS_ISBLANK
738
739 int (xisblank)(int c)
740 {
741 return c==' ' || c=='\t';
742 }
743
744 #endif
745
746 struct charclass {
747 const char *cc_name;
748 int cc_namelen;
749 int (*cc_func)(int);
750 } charclasses[] = {
751 { "alnum", 5, isalnum },
752 { "alpha", 5, isalpha },
753 #ifndef HAS_ISBLANK
754 { "blank", 5, isspace }, /* was isblank */
755 #else
756 { "blank", 5, isblank },
757 #endif
758 { "cntrl", 5, iscntrl },
759 { "digit", 5, isdigit },
760 { "graph", 5, isgraph },
761 { "lower", 5, islower },
762 { "print", 5, isprint },
763 { "punct", 5, ispunct },
764 { "space", 5, isspace },
765 { "upper", 5, isupper },
766 { "xdigit", 6, isxdigit },
767 { NULL, 0, NULL },
768 };
769
770
relex(void)771 int relex(void) /* lexical analyzer for reparse */
772 {
773 int c, n;
774 int cflag;
775 static uschar *buf = 0;
776 static int bufsz = 100;
777 uschar *bp;
778 struct charclass *cc;
779 int i;
780
781 switch (c = *prestr++) {
782 case '|': return OR;
783 case '*': return STAR;
784 case '+': return PLUS;
785 case '?': return QUEST;
786 case '.': return DOT;
787 case '\0': prestr--; return '\0';
788 case '^':
789 case '$':
790 case '(':
791 case ')':
792 return c;
793 case '\\':
794 rlxval = quoted(&prestr);
795 return CHAR;
796 default:
797 rlxval = c;
798 return CHAR;
799 case '[':
800 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
801 FATAL("out of space in reg expr %.10s..", lastre);
802 bp = buf;
803 if (*prestr == '^') {
804 cflag = 1;
805 prestr++;
806 }
807 else
808 cflag = 0;
809 n = 2 * strlen((const char *) prestr)+1;
810 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
811 FATAL("out of space for reg expr %.10s...", lastre);
812 for (; ; ) {
813 if ((c = *prestr++) == '\\') {
814 *bp++ = '\\';
815 if ((c = *prestr++) == '\0')
816 FATAL("nonterminated character class %.20s...", lastre);
817 *bp++ = c;
818 /* } else if (c == '\n') { */
819 /* FATAL("newline in character class %.20s...", lastre); */
820 } else if (c == '[' && *prestr == ':') {
821 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
822 for (cc = charclasses; cc->cc_name; cc++)
823 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
824 break;
825 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
826 prestr[2 + cc->cc_namelen] == ']') {
827 prestr += cc->cc_namelen + 3;
828 for (i = 0; i < NCHARS; i++) {
829 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
830 FATAL("out of space for reg expr %.10s...", lastre);
831 if (cc->cc_func(i)) {
832 *bp++ = i;
833 n++;
834 }
835 }
836 } else
837 *bp++ = c;
838 } else if (c == '\0') {
839 FATAL("nonterminated character class %.20s", lastre);
840 } else if (bp == buf) { /* 1st char is special */
841 *bp++ = c;
842 } else if (c == ']') {
843 *bp++ = 0;
844 rlxstr = (uschar *) tostring((char *) buf);
845 if (cflag == 0)
846 return CCL;
847 else
848 return NCCL;
849 } else
850 *bp++ = c;
851 }
852 }
853 }
854
cgoto(fa * f,int s,int c)855 int cgoto(fa *f, int s, int c)
856 {
857 int i, j, k;
858 int *p, *q;
859
860 assert(c == HAT || c < NCHARS);
861 while (f->accept >= maxsetvec) { /* guessing here! */
862 maxsetvec *= 4;
863 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
864 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
865 if (setvec == 0 || tmpset == 0)
866 overflo("out of space in cgoto()");
867 }
868 for (i = 0; i <= f->accept; i++)
869 setvec[i] = 0;
870 setcnt = 0;
871 /* compute positions of gototab[s,c] into setvec */
872 p = f->posns[s];
873 for (i = 1; i <= *p; i++) {
874 if ((k = f->re[p[i]].ltype) != FINAL) {
875 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
876 || (k == DOT && c != 0 && c != HAT)
877 || (k == ALL && c != 0)
878 || (k == EMPTYRE && c != 0)
879 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
880 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
881 q = f->re[p[i]].lfollow;
882 for (j = 1; j <= *q; j++) {
883 if (q[j] >= maxsetvec) {
884 maxsetvec *= 4;
885 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
886 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
887 if (setvec == 0 || tmpset == 0)
888 overflo("cgoto overflow");
889 }
890 if (setvec[q[j]] == 0) {
891 setcnt++;
892 setvec[q[j]] = 1;
893 }
894 }
895 }
896 }
897 }
898 /* determine if setvec is a previous state */
899 tmpset[0] = setcnt;
900 j = 1;
901 for (i = f->accept; i >= 0; i--)
902 if (setvec[i]) {
903 tmpset[j++] = i;
904 }
905 /* tmpset == previous state? */
906 for (i = 1; i <= f->curstat; i++) {
907 p = f->posns[i];
908 if ((k = tmpset[0]) != p[0])
909 goto different;
910 for (j = 1; j <= k; j++)
911 if (tmpset[j] != p[j])
912 goto different;
913 /* setvec is state i */
914 f->gototab[s][c] = i;
915 return i;
916 different:;
917 }
918
919 /* add tmpset to current set of states */
920 if (f->curstat >= NSTATES-1) {
921 f->curstat = 2;
922 f->reset = 1;
923 for (i = 2; i < NSTATES; i++)
924 xfree(f->posns[i]);
925 } else
926 ++(f->curstat);
927 for (i = 0; i < NCHARS; i++)
928 f->gototab[f->curstat][i] = 0;
929 xfree(f->posns[f->curstat]);
930 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
931 overflo("out of space in cgoto");
932
933 f->posns[f->curstat] = p;
934 f->gototab[s][c] = f->curstat;
935 for (i = 0; i <= setcnt; i++)
936 p[i] = tmpset[i];
937 if (setvec[f->accept])
938 f->out[f->curstat] = 1;
939 else
940 f->out[f->curstat] = 0;
941 return f->curstat;
942 }
943
944
freefa(fa * f)945 void freefa(fa *f) /* free a finite automaton */
946 {
947 int i;
948
949 if (f == NULL)
950 return;
951 for (i = 0; i <= f->curstat; i++)
952 xfree(f->posns[i]);
953 for (i = 0; i <= f->accept; i++) {
954 xfree(f->re[i].lfollow);
955 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
956 xfree((f->re[i].lval.np));
957 }
958 xfree(f->restr);
959 xfree(f);
960 }
961