1 /*	$OpenBSD: fields.c,v 1.10 2004/07/20 03:50:27 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Peter McIlroy.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #ifndef lint
36 #if 0
37 static char sccsid[] = "@(#)fields.c	8.1 (Berkeley) 6/6/93";
38 #else
39 static char rcsid[] = "$OpenBSD: fields.c,v 1.10 2004/07/20 03:50:27 deraadt Exp $";
40 #endif
41 #endif /* not lint */
42 
43 /* Subroutines to generate sort keys. */
44 
45 #include "sort.h"
46 
47 #define blancmange(ptr) {					\
48 	if (BLANK & d_mask[*(ptr)])				\
49 		while (BLANK & d_mask[*(++(ptr))]);		\
50 }
51 
52 #define NEXTCOL(pos) {						\
53 	if (!SEP_FLAG)						\
54 		while (pos < lineend && BLANK & l_d_mask[*(++pos)]);		\
55 	while (pos < lineend && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));	\
56 }
57 
58 extern u_char *enterfield(u_char *, u_char *, struct field *, int);
59 
60 extern u_char *number(u_char *, u_char *, u_char *, u_char *, int);
61 
62 extern struct coldesc *clist;
63 extern int ncols;
64 
65 #define DECIMAL '.'
66 #define OFFSET 128
67 
68 u_char TENS[10];	/* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
69 u_char NEGTENS[10];	/* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
70 u_char *OFF_TENS, *OFF_NTENS;	/* TENS - '0', NEGTENS - '0' */
71 u_char fnum[NBINS], rnum[NBINS];
72 
73 /*
74  * constructs sort key with leading recheader, followed by the key,
75  * followed by the original line.
76  */
77 length_t
enterkey(RECHEADER * keybuf,DBT * line,int size,struct field fieldtable[])78 enterkey(RECHEADER *keybuf,	/* pointer to start of key */
79     DBT *line, int size, struct field fieldtable[])
80 {
81 	int i;
82 	u_char *l_d_mask;
83 	u_char *lineend, *pos;
84 	u_char *endkey, *keypos;
85 	struct coldesc *clpos;
86 	int col = 1;
87 	struct field *ftpos;
88 	l_d_mask = d_mask;
89 	pos = (u_char *) line->data - 1;
90 	lineend = (u_char *) line->data + line->size-1;
91 				/* don't include rec_delimiter */
92 	keypos = keybuf->data;
93 
94 	for (i = 0; i < ncols; i++) {
95 		clpos = clist + i;
96 		for (; (col < clpos->num) && (pos < lineend); col++) {
97 			NEXTCOL(pos);
98 		}
99 		if (pos >= lineend)
100 			break;
101 		clpos->start = SEP_FLAG ? pos + 1 : pos;
102 		NEXTCOL(pos);
103 		clpos->end = pos;
104 		col++;
105 		if (pos >= lineend) {
106 			clpos->end = lineend;
107 			i++;
108 			break;
109 		}
110 	}
111 	for (; i <= ncols; i++)
112 		clist[i].start = clist[i].end = lineend;
113 	if (clist[0].start < (u_char *) line->data)
114 		clist[0].start++;
115 	endkey = (u_char *) keybuf + size - line->size;
116 	for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
117 		if ((keypos = enterfield(keypos, endkey, ftpos,
118 		    fieldtable->flags)) == NULL)
119 			return (1);
120 
121 	if (UNIQUE)
122 		*(keypos-1) = REC_D;
123 	keybuf->offset = keypos - keybuf->data;
124 	keybuf->length = keybuf->offset + line->size;
125 	if (keybuf->length + sizeof(TRECHEADER) > size)
126 		return (1);		/* line too long for buffer */
127 	memcpy(keybuf->data + keybuf->offset, line->data, line->size);
128 	return (0);
129 }
130 
131 /*
132  * constructs a field (as defined by -k) within a key
133  */
134 u_char *
enterfield(u_char * tablepos,u_char * endkey,struct field * cur_fld,int gflags)135 enterfield(u_char *tablepos, u_char *endkey, struct field *cur_fld, int gflags)
136 {
137 	u_char *start, *end, *lineend, *mask, *lweight;
138 	struct column icol, tcol;
139 	u_int flags;
140 	u_int Rflag;
141 
142 	icol = cur_fld->icol;
143 	tcol = cur_fld->tcol;
144 	flags = cur_fld->flags;
145 	start = icol.p->start;
146 	lineend = clist[ncols].end;
147 	if (flags & BI)
148 		blancmange(start);
149 	start += icol.indent;
150 	start = min(start, lineend);
151 
152 	if (!tcol.num)
153 		end = lineend;
154 	else {
155 		if (tcol.indent) {
156 			end = tcol.p->start;
157 			if (flags & BT)
158 				blancmange(end);
159 			end += tcol.indent;
160 			end = min(end, lineend);
161 		} else
162 			end = tcol.p->end;
163 	}
164 	if (flags & N) {
165 		Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
166 		tablepos = number(tablepos, endkey, start, end, Rflag);
167 		return (tablepos);
168 	}
169 	mask = alltable;
170 	mask = cur_fld->mask;
171 	lweight = cur_fld->weights;
172 	for (; start < end; start++)
173 		if (mask[*start]) {
174 			if (*start <= 1) {
175 				if (tablepos+2 >= endkey)
176 					return (NULL);
177 				*tablepos++ = lweight[1];
178 				*tablepos++ = lweight[*start ? 2 : 1];
179 			} else {
180 				*tablepos++ = lweight[*start];
181 				if (tablepos == endkey)
182 					return (NULL);
183 			}
184 		}
185 	*tablepos++ = lweight[0];
186 	return (tablepos == endkey ? NULL : tablepos);
187 }
188 
189 /* Uses the first bin to assign sign, expsign, 0, and the first
190  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
191  *   When sorting in forward order:
192  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
193  * else use (0-99)->(2-102).
194  * If the exponent is >=61, use another byte for each additional 253
195  * in the exponent. Cutoff is at 567.
196  * To avoid confusing the exponent and the mantissa, use a field delimiter
197  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
198  * only time a field delimiter can come in that position.
199  * Reverse order is done analagously.
200  */
201 
202 u_char *
number(u_char * pos,u_char * bufend,u_char * line,u_char * lineend,int Rflag)203 number(u_char *pos, u_char *bufend, u_char *line, u_char *lineend, int Rflag)
204 {
205 	int or_sign, parity = 0;
206 	int expincr = 1, exponent = -1;
207 	int bite, expsign = 1, sign = 1;
208 	u_char lastvalue, *nonzero, *tline, *C_TENS;
209 	u_char *nweights;
210 
211 	if (Rflag)
212 		nweights = rnum;
213 	else
214 		nweights = fnum;
215 	if (pos > bufend - 8)
216 		return (NULL);
217 	/*
218 	 * or_sign sets the sort direction:
219 	 *	(-r: +/-)(sign: +/-)(expsign: +/-)
220 	 */
221 	or_sign = sign ^ expsign ^ Rflag;
222 	blancmange(line);
223 	if (*line == '-') {	/* set the sign */
224 		or_sign ^= 1;
225 		sign = 0;
226 		line++;
227 	}
228 	/* eat initial zeroes */
229 	for (; *line == '0' && line < lineend; line++)
230 		;
231 	/* calculate exponents < 0 */
232 	if (*line == DECIMAL) {
233 		exponent = 1;
234 		while (*++line == '0' && line < lineend)
235 			exponent++;
236 		expincr = 0;
237 		expsign = 0;
238 	}
239 	/* next character better be a digit */
240 	if (*line < '1' || *line > '9' || line >= lineend) {
241 		*pos++ = nweights[127];
242 		return (pos);
243 	}
244 	if (expincr) {
245 		for (tline = line-1; *++tline >= '0' &&
246 		    *tline <= '9' && tline < lineend;)
247 			exponent++;
248 	}
249 	if (exponent > 567) {
250 		*pos++ = nweights[sign ? (expsign ? 254 : 128)
251 					: (expsign ? 0 : 126)];
252 		warnx("exponent out of bounds");
253 		return (pos);
254 	}
255 	bite = min(exponent, 61);
256 	*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
257 				: (expsign ? 64-bite : 64+bite)];
258 	if (bite >= 61) {
259 		do {
260 			exponent -= bite;
261 			bite = min(exponent, 254);
262 			*pos++ = nweights[or_sign ? 254-bite : bite];
263 		} while (bite == 254);
264 	}
265 	C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
266 	for (; line < lineend; line++) {
267 		if (*line >= '0' && *line <= '9') {
268 			if (parity) {
269 				*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
270 						: *line);
271 				if (pos == bufend)
272 					return (NULL);
273 				if (*line != '0' || lastvalue != '0')
274 					nonzero = pos;
275 			} else
276 				lastvalue = *line;
277 			parity ^= 1;
278 		} else if(*line == DECIMAL) {
279 			if(!expincr)	/* a decimal already occurred once */
280 				break;
281 			expincr = 0;
282 		} else
283 			break;
284 	}
285 	if (parity && lastvalue != '0') {
286 		*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
287 					OFF_TENS[lastvalue] + '0';
288 	} else
289 		pos = nonzero;
290 	if (pos > bufend-1)
291 		return (NULL);
292 	*pos++ = or_sign ? nweights[254] : nweights[0];
293 	return (pos);
294 }
295 
296 /* This forces a gap around the record delimiter
297  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
298  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
299  */
300 void
num_init(void)301 num_init(void)
302 {
303 	int i;
304 	TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
305 	NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
306 	OFF_TENS = TENS - '0';
307 	OFF_NTENS = NEGTENS - '0';
308 	for (i = 1; i < 10; i++) {
309 		TENS[i] = TENS[i - 1] + 10;
310 		NEGTENS[i] = NEGTENS[i - 1] - 10;
311 	}
312 	for (i = 0; i < REC_D; i++) {
313 		fnum[i] = i;
314 		rnum[255 - i] = i;
315 	}
316 	for (i = REC_D; i <255; i++) {
317 		fnum[i] = i + 1;
318 		rnum[255 - i] = i - 1;
319 	}
320 }
321