1 /*	$OpenBSD: tran.c,v 1.15 2011/09/28 19:27:18 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define	DEBUG
27 #include <stdio.h>
28 #include <math.h>
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "awkgram.h"
34 
35 __RCSID("$MirOS: src/usr.bin/awk/tran.c,v 1.3 2014/03/13 00:37:36 tg Exp $");
36 
37 #define	FULLTAB	2	/* rehash when table gets this x full */
38 #define	GROWTAB 4	/* grow table by this factor */
39 
40 Array	*symtab;	/* main symbol table */
41 
42 char	**FS;		/* initial field sep */
43 char	**RS;		/* initial record sep */
44 char	**OFS;		/* output field sep */
45 char	**ORS;		/* output record sep */
46 char	**OFMT;		/* output format for numbers */
47 char	**CONVFMT;	/* format for conversions in getsval */
48 Awkfloat *NF;		/* number of fields in current record */
49 Awkfloat *NR;		/* number of current record */
50 Awkfloat *FNR;		/* number of current record in current file */
51 char	**FILENAME;	/* current filename argument */
52 Awkfloat *ARGC;		/* number of arguments from command line */
53 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
54 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
55 Awkfloat *RLENGTH;	/* length of same */
56 
57 Cell	*fsloc;		/* FS */
58 Cell	*nrloc;		/* NR */
59 Cell	*nfloc;		/* NF */
60 Cell	*fnrloc;	/* FNR */
61 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
62 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
63 Cell	*rstartloc;	/* RSTART */
64 Cell	*rlengthloc;	/* RLENGTH */
65 Cell	*symtabloc;	/* SYMTAB */
66 
67 Cell	*nullloc;	/* a guaranteed empty cell */
68 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
69 Cell	*literal0;
70 
71 extern Cell **fldtab;
72 
syminit(void)73 void syminit(void)	/* initialize symbol table with builtin vars */
74 {
75 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
76 	/* this is used for if(x)... tests: */
77 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
78 	nullnode = celltonode(nullloc, CCON);
79 
80 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
81 	FS = &fsloc->sval;
82 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
83 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
84 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
85 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
86 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
87 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
88 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
89 	NF = &nfloc->fval;
90 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
91 	NR = &nrloc->fval;
92 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
93 	FNR = &fnrloc->fval;
94 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
95 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
96 	RSTART = &rstartloc->fval;
97 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
98 	RLENGTH = &rlengthloc->fval;
99 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
100 	symtabloc->sval = (char *) symtab;
101 }
102 
arginit(int ac,char ** av)103 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
104 {
105 	Cell *cp;
106 	int i;
107 	char temp[50];
108 
109 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
110 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
111 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
112 	cp->sval = (char *) ARGVtab;
113 	for (i = 0; i < ac; i++) {
114 		snprintf(temp, sizeof temp, "%d", i);
115 		if (is_number(*av))
116 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
117 		else
118 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
119 		av++;
120 	}
121 }
122 
envinit(char ** envp)123 void envinit(char **envp)	/* set up ENVIRON variable */
124 {
125 	Cell *cp;
126 	char *p;
127 
128 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
129 	ENVtab = makesymtab(NSYMTAB);
130 	cp->sval = (char *) ENVtab;
131 	for ( ; *envp; envp++) {
132 		if ((p = strchr(*envp, '=')) == NULL)
133 			continue;
134 		if( p == *envp ) /* no left hand side name in env string */
135 			continue;
136 		*p++ = 0;	/* split into two strings at = */
137 		if (is_number(p))
138 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
139 		else
140 			setsymtab(*envp, p, 0.0, STR, ENVtab);
141 		p[-1] = '=';	/* restore in case env is passed down to a shell */
142 	}
143 }
144 
makesymtab(int n)145 Array *makesymtab(int n)	/* make a new symbol table */
146 {
147 	Array *ap;
148 	Cell **tp;
149 
150 	ap = (Array *) malloc(sizeof(Array));
151 	tp = (Cell **) calloc(n, sizeof(Cell *));
152 	if (ap == NULL || tp == NULL)
153 		FATAL("out of space in makesymtab");
154 	ap->nelem = 0;
155 	ap->size = n;
156 	ap->tab = tp;
157 	return(ap);
158 }
159 
freesymtab(Cell * ap)160 void freesymtab(Cell *ap)	/* free a symbol table */
161 {
162 	Cell *cp, *temp;
163 	Array *tp;
164 	int i;
165 
166 	if (!isarr(ap))
167 		return;
168 	tp = (Array *) ap->sval;
169 	if (tp == NULL)
170 		return;
171 	for (i = 0; i < tp->size; i++) {
172 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
173 			xfree(cp->nval);
174 			if (freeable(cp))
175 				xfree(cp->sval);
176 			temp = cp->cnext;	/* avoids freeing then using */
177 			free(cp);
178 			tp->nelem--;
179 		}
180 		tp->tab[i] = 0;
181 	}
182 	if (tp->nelem != 0)
183 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
184 	free(tp->tab);
185 	free(tp);
186 }
187 
freeelem(Cell * ap,const char * s)188 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
189 {
190 	Array *tp;
191 	Cell *p, *prev = NULL;
192 	int h;
193 
194 	tp = (Array *) ap->sval;
195 	h = hash(s, tp->size);
196 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
197 		if (strcmp(s, p->nval) == 0) {
198 			if (prev == NULL)	/* 1st one */
199 				tp->tab[h] = p->cnext;
200 			else			/* middle somewhere */
201 				prev->cnext = p->cnext;
202 			if (freeable(p))
203 				xfree(p->sval);
204 			free(p->nval);
205 			free(p);
206 			tp->nelem--;
207 			return;
208 		}
209 }
210 
setsymtab(const char * n,const char * s,Awkfloat f,unsigned t,Array * tp)211 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
212 {
213 	int h;
214 	Cell *p;
215 
216 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
217 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
218 			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
219 		return(p);
220 	}
221 	p = (Cell *) malloc(sizeof(Cell));
222 	if (p == NULL)
223 		FATAL("out of space for symbol table at %s", n);
224 	p->nval = tostring(n);
225 	p->sval = s ? tostring(s) : tostring("");
226 	p->fval = f;
227 	p->tval = t;
228 	p->csub = CUNK;
229 	p->ctype = OCELL;
230 	tp->nelem++;
231 	if (tp->nelem > FULLTAB * tp->size)
232 		rehash(tp);
233 	h = hash(n, tp->size);
234 	p->cnext = tp->tab[h];
235 	tp->tab[h] = p;
236 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
237 		(void*)p, p->nval, p->sval, p->fval, p->tval) );
238 	return(p);
239 }
240 
hash(const char * s,int n)241 int hash(const char *s, int n)	/* form hash value for string s */
242 {
243 	unsigned hashval;
244 
245 	for (hashval = 0; *s != '\0'; s++)
246 		hashval = (*s + 31 * hashval);
247 	return hashval % n;
248 }
249 
rehash(Array * tp)250 void rehash(Array *tp)	/* rehash items in small table into big one */
251 {
252 	int i, nh, nsz;
253 	Cell *cp, *op, **np;
254 
255 	nsz = GROWTAB * tp->size;
256 	np = (Cell **) calloc(nsz, sizeof(Cell *));
257 	if (np == NULL)		/* can't do it, but can keep running. */
258 		return;		/* someone else will run out later. */
259 	for (i = 0; i < tp->size; i++) {
260 		for (cp = tp->tab[i]; cp; cp = op) {
261 			op = cp->cnext;
262 			nh = hash(cp->nval, nsz);
263 			cp->cnext = np[nh];
264 			np[nh] = cp;
265 		}
266 	}
267 	free(tp->tab);
268 	tp->tab = np;
269 	tp->size = nsz;
270 }
271 
lookup(const char * s,Array * tp)272 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
273 {
274 	Cell *p;
275 	int h;
276 
277 	h = hash(s, tp->size);
278 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
279 		if (strcmp(s, p->nval) == 0)
280 			return(p);	/* found it */
281 	return(NULL);			/* not found */
282 }
283 
setfval(Cell * vp,Awkfloat f)284 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
285 {
286 	int fldno;
287 
288 	if ((vp->tval & (NUM | STR)) == 0)
289 		funnyvar(vp, "assign to");
290 	if (isfld(vp)) {
291 		donerec = 0;	/* mark $0 invalid */
292 		fldno = atoi(vp->nval);
293 		if (fldno > *NF)
294 			newfld(fldno);
295 		   dprintf( ("setting field %d to %g\n", fldno, f) );
296 	} else if (isrec(vp)) {
297 		donefld = 0;	/* mark $1... invalid */
298 		donerec = 1;
299 	}
300 	if (freeable(vp))
301 		xfree(vp->sval); /* free any previous string */
302 	vp->tval &= ~STR;	/* mark string invalid */
303 	vp->tval |= NUM;	/* mark number ok */
304 	   dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
305 	return vp->fval = f;
306 }
307 
funnyvar(Cell * vp,const char * rw)308 void funnyvar(Cell *vp, const char *rw)
309 {
310 	if (isarr(vp))
311 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
312 	if (vp->tval & FCN)
313 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
314 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
315 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
316 }
317 
setsval(Cell * vp,const char * s)318 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
319 {
320 	char *t;
321 	int fldno;
322 
323 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
324 		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
325 	if ((vp->tval & (NUM | STR)) == 0)
326 		funnyvar(vp, "assign to");
327 	if (isfld(vp)) {
328 		donerec = 0;	/* mark $0 invalid */
329 		fldno = atoi(vp->nval);
330 		if (fldno > *NF)
331 			newfld(fldno);
332 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
333 	} else if (isrec(vp)) {
334 		donefld = 0;	/* mark $1... invalid */
335 		donerec = 1;
336 	}
337 	t = tostring(s);	/* in case it's self-assign */
338 	if (freeable(vp))
339 		xfree(vp->sval);
340 	vp->tval &= ~NUM;
341 	vp->tval |= STR;
342 	vp->tval &= ~DONTFREE;
343 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
344 		(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
345 	return(vp->sval = t);
346 }
347 
getfval(Cell * vp)348 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
349 {
350 	if ((vp->tval & (NUM | STR)) == 0)
351 		funnyvar(vp, "read value of");
352 	if (isfld(vp) && donefld == 0)
353 		fldbld();
354 	else if (isrec(vp) && donerec == 0)
355 		recbld();
356 	if (!isnum(vp)) {	/* not a number */
357 		vp->fval = atof(vp->sval);	/* best guess */
358 		if (is_number(vp->sval) && !(vp->tval&CON))
359 			vp->tval |= NUM;	/* make NUM only sparingly */
360 	}
361 	   dprintf( ("getfval %p: %s = %g, t=%o\n",
362 		(void*)vp, NN(vp->nval), vp->fval, vp->tval) );
363 	return(vp->fval);
364 }
365 
get_str_val(Cell * vp,char ** fmt)366 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
367 {
368 	int n;
369 	double dtemp;
370 
371 	if ((vp->tval & (NUM | STR)) == 0)
372 		funnyvar(vp, "read value of");
373 	if (isfld(vp) && donefld == 0)
374 		fldbld();
375 	else if (isrec(vp) && donerec == 0)
376 		recbld();
377 	if (isstr(vp) == 0) {
378 		if (freeable(vp))
379 			xfree(vp->sval);
380 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
381 			n = asprintf(&vp->sval, "%.30g", vp->fval);
382 		else
383 			n = asprintf(&vp->sval, *fmt, vp->fval);
384 		if (n == -1)
385 			FATAL("out of space in get_str_val");
386 		vp->tval &= ~DONTFREE;
387 		vp->tval |= STR;
388 	}
389 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
390 		(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
391 	return(vp->sval);
392 }
393 
getsval(Cell * vp)394 char *getsval(Cell *vp)       /* get string val of a Cell */
395 {
396       return get_str_val(vp, CONVFMT);
397 }
398 
getpssval(Cell * vp)399 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
400 {
401       return get_str_val(vp, OFMT);
402 }
403 
404 
tostring(const char * s)405 char *tostring(const char *s)	/* make a copy of string s */
406 {
407 	char *p;
408 
409 	p = strdup(s);
410 	if (p == NULL)
411 		FATAL("out of space in tostring on %s", s);
412 	return p;
413 }
414 
qstring(const char * is,int delim)415 char *qstring(const char *is, int delim)	/* collect string up to next delim */
416 {
417 	const char *os = is;
418 	int c, n;
419 	const uschar *s = (const uschar *)is;
420 	uschar *buf, *bp;
421 
422 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
423 		FATAL( "out of space in qstring(%s)", s);
424 	for (bp = buf; (c = *s) != delim; s++) {
425 		if (c == '\n')
426 			SYNTAX( "newline in string %.20s...", os );
427 		else if (c != '\\')
428 			*bp++ = c;
429 		else {	/* \something */
430 			c = *++s;
431 			if (c == 0) {	/* \ at end */
432 				*bp++ = '\\';
433 				break;	/* for loop */
434 			}
435 			switch (c) {
436 			case '\\':	*bp++ = '\\'; break;
437 			case 'n':	*bp++ = '\n'; break;
438 			case 't':	*bp++ = '\t'; break;
439 			case 'b':	*bp++ = '\b'; break;
440 			case 'f':	*bp++ = '\f'; break;
441 			case 'r':	*bp++ = '\r'; break;
442 			default:
443 				if (!isdigit(c)) {
444 					*bp++ = c;
445 					break;
446 				}
447 				n = c - '0';
448 				if (isdigit(s[1])) {
449 					n = 8 * n + *++s - '0';
450 					if (isdigit(s[1]))
451 						n = 8 * n + *++s - '0';
452 				}
453 				*bp++ = n;
454 				break;
455 			}
456 		}
457 	}
458 	*bp++ = 0;
459 	return (char *) buf;
460 }
461