1 /*	$OpenBSD: b.c,v 1.17 2011/09/28 19:27:18 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'intrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "awkgram.h"
36 
37 __RCSID("$MirOS: src/usr.bin/awk/b.c,v 1.3 2014/03/13 00:37:35 tg Exp $");
38 
39 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
40 				/* NCHARS is 2**n */
41 #define MAXLIN 22
42 
43 #define type(v)		(v)->nobj	/* badly overloaded here */
44 #define info(v)		(v)->ntype	/* badly overloaded here */
45 #define left(v)		(v)->narg[0]
46 #define right(v)	(v)->narg[1]
47 #define parent(v)	(v)->nnext
48 
49 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
50 #define ELEAF	case EMPTYRE:		/* empty string in regexp */
51 #define UNARY	case STAR: case PLUS: case QUEST:
52 
53 /* encoding in tree Nodes:
54 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
55 		left is index, right contains value or pointer to value
56 	unary (STAR, PLUS, QUEST): left is child, right is null
57 	binary (CAT, OR): left and right are children
58 	parent contains pointer to parent
59 */
60 
61 
62 int	*setvec;
63 int	*tmpset;
64 int	maxsetvec = 0;
65 
66 int	rtok;		/* next token in current re */
67 int	rlxval;
68 static uschar	*rlxstr;
69 static uschar	*prestr;	/* current position in current re */
70 static uschar	*lastre;	/* origin of last re */
71 
72 static	int setcnt;
73 static	int poscnt;
74 
75 char	*patbeg;
76 int	patlen;
77 
78 #define	NFA	20	/* cache this many dynamic fa's */
79 fa	*fatab[NFA];
80 int	nfatab	= 0;	/* entries in fatab */
81 
makedfa(const char * s,int anchor)82 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
83 {
84 	int i, use, nuse;
85 	fa *pfa;
86 	static int now = 1;
87 
88 	if (setvec == 0) {	/* first time through any RE */
89 		maxsetvec = MAXLIN;
90 		setvec = (int *) calloc(maxsetvec, sizeof(int));
91 		tmpset = (int *) calloc(maxsetvec, sizeof(int));
92 		if (setvec == 0 || tmpset == 0)
93 			overflo("out of space initializing makedfa");
94 	}
95 
96 	if (compile_time)	/* a constant for sure */
97 		return mkdfa(s, anchor);
98 	for (i = 0; i < nfatab; i++)	/* is it there already? */
99 		if (fatab[i]->anchor == anchor
100 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
101 			fatab[i]->use = now++;
102 			return fatab[i];
103 		}
104 	pfa = mkdfa(s, anchor);
105 	if (nfatab < NFA) {	/* room for another */
106 		fatab[nfatab] = pfa;
107 		fatab[nfatab]->use = now++;
108 		nfatab++;
109 		return pfa;
110 	}
111 	use = fatab[0]->use;	/* replace least-recently used */
112 	nuse = 0;
113 	for (i = 1; i < nfatab; i++)
114 		if (fatab[i]->use < use) {
115 			use = fatab[i]->use;
116 			nuse = i;
117 		}
118 	freefa(fatab[nuse]);
119 	fatab[nuse] = pfa;
120 	pfa->use = now++;
121 	return pfa;
122 }
123 
mkdfa(const char * s,int anchor)124 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
125 				/* anchor = 1 for anchored matches, else 0 */
126 {
127 	Node *p, *p1;
128 	fa *f;
129 
130 	p = reparse(s);
131 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
132 		/* put ALL STAR in front of reg.  exp. */
133 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
134 		/* put FINAL after reg.  exp. */
135 
136 	poscnt = 0;
137 	penter(p1);	/* enter parent pointers and leaf indices */
138 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
139 		overflo("out of space for fa");
140 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
141 	cfoll(f, p1);	/* set up follow sets */
142 	freetr(p1);
143 	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
144 			overflo("out of space in makedfa");
145 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
146 		overflo("out of space in makedfa");
147 	*f->posns[1] = 0;
148 	f->initstat = makeinit(f, anchor);
149 	f->anchor = anchor;
150 	f->restr = (uschar *) tostring(s);
151 	return f;
152 }
153 
makeinit(fa * f,int anchor)154 int makeinit(fa *f, int anchor)
155 {
156 	int i, k;
157 
158 	f->curstat = 2;
159 	f->out[2] = 0;
160 	f->reset = 0;
161 	k = *(f->re[0].lfollow);
162 	xfree(f->posns[2]);
163 	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
164 		overflo("out of space in makeinit");
165 	for (i=0; i <= k; i++) {
166 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
167 	}
168 	if ((f->posns[2])[1] == f->accept)
169 		f->out[2] = 1;
170 	for (i=0; i < NCHARS; i++)
171 		f->gototab[2][i] = 0;
172 	f->curstat = cgoto(f, 2, HAT);
173 	if (anchor) {
174 		*f->posns[2] = k-1;	/* leave out position 0 */
175 		for (i=0; i < k; i++) {
176 			(f->posns[0])[i] = (f->posns[2])[i];
177 		}
178 
179 		f->out[0] = f->out[2];
180 		if (f->curstat != 2)
181 			--(*f->posns[f->curstat]);
182 	}
183 	return f->curstat;
184 }
185 
penter(Node * p)186 void penter(Node *p)	/* set up parent pointers and leaf indices */
187 {
188 	switch (type(p)) {
189 	ELEAF
190 	LEAF
191 		info(p) = poscnt;
192 		poscnt++;
193 		break;
194 	UNARY
195 		penter(left(p));
196 		parent(left(p)) = p;
197 		break;
198 	case CAT:
199 	case OR:
200 		penter(left(p));
201 		penter(right(p));
202 		parent(left(p)) = p;
203 		parent(right(p)) = p;
204 		break;
205 	default:	/* can't happen */
206 		FATAL("can't happen: unknown type %d in penter", type(p));
207 		break;
208 	}
209 }
210 
freetr(Node * p)211 void freetr(Node *p)	/* free parse tree */
212 {
213 	switch (type(p)) {
214 	ELEAF
215 	LEAF
216 		xfree(p);
217 		break;
218 	UNARY
219 		freetr(left(p));
220 		xfree(p);
221 		break;
222 	case CAT:
223 	case OR:
224 		freetr(left(p));
225 		freetr(right(p));
226 		xfree(p);
227 		break;
228 	default:	/* can't happen */
229 		FATAL("can't happen: unknown type %d in freetr", type(p));
230 		break;
231 	}
232 }
233 
234 /* in the parsing of regular expressions, metacharacters like . have */
235 /* to be seen literally;  \056 is not a metacharacter. */
236 
hexstr(uschar ** pp)237 int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
238 {			/* only pick up one 8-bit byte (2 chars) */
239 	uschar *p;
240 	int n = 0;
241 	int i;
242 
243 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
244 		if (isdigit(*p))
245 			n = 16 * n + *p - '0';
246 		else if (*p >= 'a' && *p <= 'f')
247 			n = 16 * n + *p - 'a' + 10;
248 		else if (*p >= 'A' && *p <= 'F')
249 			n = 16 * n + *p - 'A' + 10;
250 	}
251 	*pp = (uschar *) p;
252 	return n;
253 }
254 
255 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
256 
quoted(uschar ** pp)257 int quoted(uschar **pp)	/* pick up next thing after a \\ */
258 			/* and increment *pp */
259 {
260 	uschar *p = *pp;
261 	int c;
262 
263 	if ((c = *p++) == 't')
264 		c = '\t';
265 	else if (c == 'n')
266 		c = '\n';
267 	else if (c == 'f')
268 		c = '\f';
269 	else if (c == 'r')
270 		c = '\r';
271 	else if (c == 'b')
272 		c = '\b';
273 	else if (c == '\\')
274 		c = '\\';
275 	else if (c == 'x') {	/* hexadecimal goo follows */
276 		c = hexstr(&p);	/* this adds a null if number is invalid */
277 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
278 		int n = c - '0';
279 		if (isoctdigit(*p)) {
280 			n = 8 * n + *p++ - '0';
281 			if (isoctdigit(*p))
282 				n = 8 * n + *p++ - '0';
283 		}
284 		c = n;
285 	} /* else */
286 		/* c = c; */
287 	*pp = p;
288 	return c;
289 }
290 
cclenter(const char * argp)291 char *cclenter(const char *argp)	/* add a character class */
292 {
293 	int i, c, c2;
294 	uschar *p = (uschar *) argp;
295 	uschar *op, *bp;
296 	static uschar *buf = 0;
297 	static int bufsz = 100;
298 
299 	op = p;
300 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
301 		FATAL("out of space for character class [%.10s...] 1", p);
302 	bp = buf;
303 	for (i = 0; (c = *p++) != 0; ) {
304 		if (c == '\\') {
305 			c = quoted(&p);
306 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
307 			if (*p != 0) {
308 				c = bp[-1];
309 				c2 = *p++;
310 				if (c2 == '\\')
311 					c2 = quoted(&p);
312 				if (c > c2) {	/* empty; ignore */
313 					bp--;
314 					i--;
315 					continue;
316 				}
317 				while (c < c2) {
318 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
319 						FATAL("out of space for character class [%.10s...] 2", p);
320 					*bp++ = ++c;
321 					i++;
322 				}
323 				continue;
324 			}
325 		}
326 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
327 			FATAL("out of space for character class [%.10s...] 3", p);
328 		*bp++ = c;
329 		i++;
330 	}
331 	*bp = 0;
332 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
333 	xfree(op);
334 	return (char *) tostring((char *) buf);
335 }
336 
overflo(const char * s)337 void overflo(const char *s)
338 {
339 	FATAL("regular expression too big: %.30s...", s);
340 }
341 
cfoll(fa * f,Node * v)342 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
343 {
344 	int i;
345 	int *p;
346 
347 	switch (type(v)) {
348 	ELEAF
349 	LEAF
350 		f->re[info(v)].ltype = type(v);
351 		f->re[info(v)].lval.np = right(v);
352 		while (f->accept >= maxsetvec) {	/* guessing here! */
353 			maxsetvec *= 4;
354 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
355 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
356 			if (setvec == 0 || tmpset == 0)
357 				overflo("out of space in cfoll()");
358 		}
359 		for (i = 0; i <= f->accept; i++)
360 			setvec[i] = 0;
361 		setcnt = 0;
362 		follow(v);	/* computes setvec and setcnt */
363 		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
364 			overflo("out of space building follow set");
365 		f->re[info(v)].lfollow = p;
366 		*p = setcnt;
367 		for (i = f->accept; i >= 0; i--)
368 			if (setvec[i] == 1)
369 				*++p = i;
370 		break;
371 	UNARY
372 		cfoll(f,left(v));
373 		break;
374 	case CAT:
375 	case OR:
376 		cfoll(f,left(v));
377 		cfoll(f,right(v));
378 		break;
379 	default:	/* can't happen */
380 		FATAL("can't happen: unknown type %d in cfoll", type(v));
381 	}
382 }
383 
first(Node * p)384 int first(Node *p)	/* collects initially active leaves of p into setvec */
385 			/* returns 0 if p matches empty string */
386 {
387 	int b, lp;
388 
389 	switch (type(p)) {
390 	ELEAF
391 	LEAF
392 		lp = info(p);	/* look for high-water mark of subscripts */
393 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
394 			maxsetvec *= 4;
395 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
396 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
397 			if (setvec == 0 || tmpset == 0)
398 				overflo("out of space in first()");
399 		}
400 		if (type(p) == EMPTYRE) {
401 			setvec[lp] = 0;
402 			return(0);
403 		}
404 		if (setvec[lp] != 1) {
405 			setvec[lp] = 1;
406 			setcnt++;
407 		}
408 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
409 			return(0);		/* empty CCL */
410 		else return(1);
411 	case PLUS:
412 		if (first(left(p)) == 0) return(0);
413 		return(1);
414 	case STAR:
415 	case QUEST:
416 		first(left(p));
417 		return(0);
418 	case CAT:
419 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
420 		return(1);
421 	case OR:
422 		b = first(right(p));
423 		if (first(left(p)) == 0 || b == 0) return(0);
424 		return(1);
425 	}
426 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
427 	return(-1);
428 }
429 
follow(Node * v)430 void follow(Node *v)	/* collects leaves that can follow v into setvec */
431 {
432 	Node *p;
433 
434 	if (type(v) == FINAL)
435 		return;
436 	p = parent(v);
437 	switch (type(p)) {
438 	case STAR:
439 	case PLUS:
440 		first(v);
441 		follow(p);
442 		return;
443 
444 	case OR:
445 	case QUEST:
446 		follow(p);
447 		return;
448 
449 	case CAT:
450 		if (v == left(p)) {	/* v is left child of p */
451 			if (first(right(p)) == 0) {
452 				follow(p);
453 				return;
454 			}
455 		} else		/* v is right child */
456 			follow(p);
457 		return;
458 	}
459 }
460 
member(int c,const char * sarg)461 int member(int c, const char *sarg)	/* is c in s? */
462 {
463 	uschar *s = (uschar *) sarg;
464 
465 	while (*s)
466 		if (c == *s++)
467 			return(1);
468 	return(0);
469 }
470 
match(fa * f,const char * p0)471 int match(fa *f, const char *p0)	/* shortest match ? */
472 {
473 	int s, ns;
474 	uschar *p = (uschar *) p0;
475 
476 	s = f->reset ? makeinit(f,0) : f->initstat;
477 	if (f->out[s])
478 		return(1);
479 	do {
480 		/* assert(*p < NCHARS); */
481 		if ((ns = f->gototab[s][*p]) != 0)
482 			s = ns;
483 		else
484 			s = cgoto(f, s, *p);
485 		if (f->out[s])
486 			return(1);
487 	} while (*p++ != 0);
488 	return(0);
489 }
490 
pmatch(fa * f,const char * p0)491 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
492 {
493 	int s, ns;
494 	uschar *p = (uschar *) p0;
495 	uschar *q;
496 	int i, k;
497 
498 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
499 	if (f->reset) {
500 		f->initstat = s = makeinit(f,1);
501 	} else {
502 		s = f->initstat;
503 	}
504 	patbeg = (char *) p;
505 	patlen = -1;
506 	do {
507 		q = p;
508 		do {
509 			if (f->out[s])		/* final state */
510 				patlen = q-p;
511 			/* assert(*q < NCHARS); */
512 			if ((ns = f->gototab[s][*q]) != 0)
513 				s = ns;
514 			else
515 				s = cgoto(f, s, *q);
516 			if (s == 1) {	/* no transition */
517 				if (patlen >= 0) {
518 					patbeg = (char *) p;
519 					return(1);
520 				}
521 				else
522 					goto nextin;	/* no match */
523 			}
524 		} while (*q++ != 0);
525 		if (f->out[s])
526 			patlen = q-p-1;	/* don't count $ */
527 		if (patlen >= 0) {
528 			patbeg = (char *) p;
529 			return(1);
530 		}
531 	nextin:
532 		s = 2;
533 		if (f->reset) {
534 			for (i = 2; i <= f->curstat; i++)
535 				xfree(f->posns[i]);
536 			k = *f->posns[0];
537 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
538 				overflo("out of space in pmatch");
539 			for (i = 0; i <= k; i++)
540 				(f->posns[2])[i] = (f->posns[0])[i];
541 			f->initstat = f->curstat = 2;
542 			f->out[2] = f->out[0];
543 			for (i = 0; i < NCHARS; i++)
544 				f->gototab[2][i] = 0;
545 		}
546 	} while (*p++ != 0);
547 	return (0);
548 }
549 
nematch(fa * f,const char * p0)550 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
551 {
552 	int s, ns;
553 	uschar *p = (uschar *) p0;
554 	uschar *q;
555 	int i, k;
556 
557 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
558 	if (f->reset) {
559 		f->initstat = s = makeinit(f,1);
560 	} else {
561 		s = f->initstat;
562 	}
563 	patlen = -1;
564 	while (*p) {
565 		q = p;
566 		do {
567 			if (f->out[s])		/* final state */
568 				patlen = q-p;
569 			/* assert(*q < NCHARS); */
570 			if ((ns = f->gototab[s][*q]) != 0)
571 				s = ns;
572 			else
573 				s = cgoto(f, s, *q);
574 			if (s == 1) {	/* no transition */
575 				if (patlen > 0) {
576 					patbeg = (char *) p;
577 					return(1);
578 				} else
579 					goto nnextin;	/* no nonempty match */
580 			}
581 		} while (*q++ != 0);
582 		if (f->out[s])
583 			patlen = q-p-1;	/* don't count $ */
584 		if (patlen > 0 ) {
585 			patbeg = (char *) p;
586 			return(1);
587 		}
588 	nnextin:
589 		s = 2;
590 		if (f->reset) {
591 			for (i = 2; i <= f->curstat; i++)
592 				xfree(f->posns[i]);
593 			k = *f->posns[0];
594 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
595 				overflo("out of state space");
596 			for (i = 0; i <= k; i++)
597 				(f->posns[2])[i] = (f->posns[0])[i];
598 			f->initstat = f->curstat = 2;
599 			f->out[2] = f->out[0];
600 			for (i = 0; i < NCHARS; i++)
601 				f->gototab[2][i] = 0;
602 		}
603 		p++;
604 	}
605 	return (0);
606 }
607 
reparse(const char * p)608 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
609 {			/* uses relex() to scan regular expression */
610 	Node *np;
611 
612 	dprintf( ("reparse <%s>\n", p) );
613 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
614 	rtok = relex();
615 	/* GNU compatibility: an empty regexp matches anything */
616 	if (rtok == '\0') {
617 		/* FATAL("empty regular expression"); previous */
618 		return(op2(EMPTYRE, NIL, NIL));
619 	}
620 	np = regexp();
621 	if (rtok != '\0')
622 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
623 	return(np);
624 }
625 
regexp(void)626 Node *regexp(void)	/* top-level parse of reg expr */
627 {
628 	return (alt(concat(primary())));
629 }
630 
primary(void)631 Node *primary(void)
632 {
633 	Node *np;
634 
635 	switch (rtok) {
636 	case CHAR:
637 		np = op2(CHAR, NIL, itonp(rlxval));
638 		rtok = relex();
639 		return (unary(np));
640 	case ALL:
641 		rtok = relex();
642 		return (unary(op2(ALL, NIL, NIL)));
643 	case EMPTYRE:
644 		rtok = relex();
645 		return (unary(op2(ALL, NIL, NIL)));
646 	case DOT:
647 		rtok = relex();
648 		return (unary(op2(DOT, NIL, NIL)));
649 	case CCL:
650 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
651 		rtok = relex();
652 		return (unary(np));
653 	case NCCL:
654 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
655 		rtok = relex();
656 		return (unary(np));
657 	case '^':
658 		rtok = relex();
659 		return (unary(op2(CHAR, NIL, itonp(HAT))));
660 	case '$':
661 		rtok = relex();
662 		return (unary(op2(CHAR, NIL, NIL)));
663 	case '(':
664 		rtok = relex();
665 		if (rtok == ')') {	/* special pleading for () */
666 			rtok = relex();
667 			return unary(op2(CCL, NIL, (Node *) tostring("")));
668 		}
669 		np = regexp();
670 		if (rtok == ')') {
671 			rtok = relex();
672 			return (unary(np));
673 		}
674 		else
675 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
676 	default:
677 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
678 	}
679 	return 0;	/*NOTREACHED*/
680 }
681 
concat(Node * np)682 Node *concat(Node *np)
683 {
684 	switch (rtok) {
685 	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
686 		return (concat(op2(CAT, np, primary())));
687 	}
688 	return (np);
689 }
690 
alt(Node * np)691 Node *alt(Node *np)
692 {
693 	if (rtok == OR) {
694 		rtok = relex();
695 		return (alt(op2(OR, np, concat(primary()))));
696 	}
697 	return (np);
698 }
699 
unary(Node * np)700 Node *unary(Node *np)
701 {
702 	switch (rtok) {
703 	case STAR:
704 		rtok = relex();
705 		return (unary(op2(STAR, np, NIL)));
706 	case PLUS:
707 		rtok = relex();
708 		return (unary(op2(PLUS, np, NIL)));
709 	case QUEST:
710 		rtok = relex();
711 		return (unary(op2(QUEST, np, NIL)));
712 	default:
713 		return (np);
714 	}
715 }
716 
717 /*
718  * Character class definitions conformant to the POSIX locale as
719  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
720  * and operating character sets are both ASCII (ISO646) or supersets
721  * thereof.
722  *
723  * Note that to avoid overflowing the temporary buffer used in
724  * relex(), the expanded character class (prior to range expansion)
725  * must be less than twice the size of their full name.
726  */
727 
728 /* Because isblank doesn't show up in any of the header files on any
729  * system i use, it's defined here.  if some other locale has a richer
730  * definition of "blank", define HAS_ISBLANK and provide your own
731  * version.
732  * the parentheses here are an attempt to find a path through the maze
733  * of macro definition and/or function and/or version provided.  thanks
734  * to nelson beebe for the suggestion; let's see if it works everywhere.
735  */
736 
737 #ifndef HAS_ISBLANK
738 
739 int (xisblank)(int c)
740 {
741 	return c==' ' || c=='\t';
742 }
743 
744 #endif
745 
746 struct charclass {
747 	const char *cc_name;
748 	int cc_namelen;
749 	int (*cc_func)(int);
750 } charclasses[] = {
751 	{ "alnum",	5,	isalnum },
752 	{ "alpha",	5,	isalpha },
753 #ifndef HAS_ISBLANK
754 	{ "blank",	5,	isspace }, /* was isblank */
755 #else
756 	{ "blank",	5,	isblank },
757 #endif
758 	{ "cntrl",	5,	iscntrl },
759 	{ "digit",	5,	isdigit },
760 	{ "graph",	5,	isgraph },
761 	{ "lower",	5,	islower },
762 	{ "print",	5,	isprint },
763 	{ "punct",	5,	ispunct },
764 	{ "space",	5,	isspace },
765 	{ "upper",	5,	isupper },
766 	{ "xdigit",	6,	isxdigit },
767 	{ NULL,		0,	NULL },
768 };
769 
770 
relex(void)771 int relex(void)		/* lexical analyzer for reparse */
772 {
773 	int c, n;
774 	int cflag;
775 	static uschar *buf = 0;
776 	static int bufsz = 100;
777 	uschar *bp;
778 	struct charclass *cc;
779 	int i;
780 
781 	switch (c = *prestr++) {
782 	case '|': return OR;
783 	case '*': return STAR;
784 	case '+': return PLUS;
785 	case '?': return QUEST;
786 	case '.': return DOT;
787 	case '\0': prestr--; return '\0';
788 	case '^':
789 	case '$':
790 	case '(':
791 	case ')':
792 		return c;
793 	case '\\':
794 		rlxval = quoted(&prestr);
795 		return CHAR;
796 	default:
797 		rlxval = c;
798 		return CHAR;
799 	case '[':
800 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
801 			FATAL("out of space in reg expr %.10s..", lastre);
802 		bp = buf;
803 		if (*prestr == '^') {
804 			cflag = 1;
805 			prestr++;
806 		}
807 		else
808 			cflag = 0;
809 		n = 2 * strlen((const char *) prestr)+1;
810 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
811 			FATAL("out of space for reg expr %.10s...", lastre);
812 		for (; ; ) {
813 			if ((c = *prestr++) == '\\') {
814 				*bp++ = '\\';
815 				if ((c = *prestr++) == '\0')
816 					FATAL("nonterminated character class %.20s...", lastre);
817 				*bp++ = c;
818 			/* } else if (c == '\n') { */
819 			/* 	FATAL("newline in character class %.20s...", lastre); */
820 			} else if (c == '[' && *prestr == ':') {
821 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
822 				for (cc = charclasses; cc->cc_name; cc++)
823 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
824 						break;
825 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
826 				    prestr[2 + cc->cc_namelen] == ']') {
827 					prestr += cc->cc_namelen + 3;
828 					for (i = 0; i < NCHARS; i++) {
829 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
830 						    FATAL("out of space for reg expr %.10s...", lastre);
831 						if (cc->cc_func(i)) {
832 							*bp++ = i;
833 							n++;
834 						}
835 					}
836 				} else
837 					*bp++ = c;
838 			} else if (c == '\0') {
839 				FATAL("nonterminated character class %.20s", lastre);
840 			} else if (bp == buf) {	/* 1st char is special */
841 				*bp++ = c;
842 			} else if (c == ']') {
843 				*bp++ = 0;
844 				rlxstr = (uschar *) tostring((char *) buf);
845 				if (cflag == 0)
846 					return CCL;
847 				else
848 					return NCCL;
849 			} else
850 				*bp++ = c;
851 		}
852 	}
853 }
854 
cgoto(fa * f,int s,int c)855 int cgoto(fa *f, int s, int c)
856 {
857 	int i, j, k;
858 	int *p, *q;
859 
860 	assert(c == HAT || c < NCHARS);
861 	while (f->accept >= maxsetvec) {	/* guessing here! */
862 		maxsetvec *= 4;
863 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
864 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
865 		if (setvec == 0 || tmpset == 0)
866 			overflo("out of space in cgoto()");
867 	}
868 	for (i = 0; i <= f->accept; i++)
869 		setvec[i] = 0;
870 	setcnt = 0;
871 	/* compute positions of gototab[s,c] into setvec */
872 	p = f->posns[s];
873 	for (i = 1; i <= *p; i++) {
874 		if ((k = f->re[p[i]].ltype) != FINAL) {
875 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
876 			 || (k == DOT && c != 0 && c != HAT)
877 			 || (k == ALL && c != 0)
878 			 || (k == EMPTYRE && c != 0)
879 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
880 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
881 				q = f->re[p[i]].lfollow;
882 				for (j = 1; j <= *q; j++) {
883 					if (q[j] >= maxsetvec) {
884 						maxsetvec *= 4;
885 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
886 						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
887 						if (setvec == 0 || tmpset == 0)
888 							overflo("cgoto overflow");
889 					}
890 					if (setvec[q[j]] == 0) {
891 						setcnt++;
892 						setvec[q[j]] = 1;
893 					}
894 				}
895 			}
896 		}
897 	}
898 	/* determine if setvec is a previous state */
899 	tmpset[0] = setcnt;
900 	j = 1;
901 	for (i = f->accept; i >= 0; i--)
902 		if (setvec[i]) {
903 			tmpset[j++] = i;
904 		}
905 	/* tmpset == previous state? */
906 	for (i = 1; i <= f->curstat; i++) {
907 		p = f->posns[i];
908 		if ((k = tmpset[0]) != p[0])
909 			goto different;
910 		for (j = 1; j <= k; j++)
911 			if (tmpset[j] != p[j])
912 				goto different;
913 		/* setvec is state i */
914 		f->gototab[s][c] = i;
915 		return i;
916 	  different:;
917 	}
918 
919 	/* add tmpset to current set of states */
920 	if (f->curstat >= NSTATES-1) {
921 		f->curstat = 2;
922 		f->reset = 1;
923 		for (i = 2; i < NSTATES; i++)
924 			xfree(f->posns[i]);
925 	} else
926 		++(f->curstat);
927 	for (i = 0; i < NCHARS; i++)
928 		f->gototab[f->curstat][i] = 0;
929 	xfree(f->posns[f->curstat]);
930 	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
931 		overflo("out of space in cgoto");
932 
933 	f->posns[f->curstat] = p;
934 	f->gototab[s][c] = f->curstat;
935 	for (i = 0; i <= setcnt; i++)
936 		p[i] = tmpset[i];
937 	if (setvec[f->accept])
938 		f->out[f->curstat] = 1;
939 	else
940 		f->out[f->curstat] = 0;
941 	return f->curstat;
942 }
943 
944 
freefa(fa * f)945 void freefa(fa *f)	/* free a finite automaton */
946 {
947 	int i;
948 
949 	if (f == NULL)
950 		return;
951 	for (i = 0; i <= f->curstat; i++)
952 		xfree(f->posns[i]);
953 	for (i = 0; i <= f->accept; i++) {
954 		xfree(f->re[i].lfollow);
955 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
956 			xfree((f->re[i].lval.np));
957 	}
958 	xfree(f->restr);
959 	xfree(f);
960 }
961