1 /*	$OpenBSD: files.c,v 1.11 2004/07/20 03:50:27 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Peter McIlroy.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #ifndef lint
36 #if 0
37 static char sccsid[] = "@(#)files.c	8.1 (Berkeley) 6/6/93";
38 #else
39 static char rcsid[] = "$OpenBSD: files.c,v 1.11 2004/07/20 03:50:27 deraadt Exp $";
40 #endif
41 #endif /* not lint */
42 
43 #include "sort.h"
44 #include "fsort.h"
45 
46 #include <string.h>
47 
48 static int	seq(FILE *, DBT *, DBT *);
49 
50 /*
51  * this is the subroutine for file management for fsort().
52  * It keeps the buffers for all temporary files.
53  */
54 int
getnext(int binno,union f_handle infl0,int nfiles,RECHEADER * pos,u_char * end,struct field * dummy)55 getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char *end,
56     struct field *dummy)
57 {
58 	int i;
59 	u_char *hp;
60 	static size_t nleft = 0;
61 	static int cnt = 0, flag = -1;
62 	static u_char maxb = 0;
63 	static FILE *fp;
64 
65 	if (nleft == 0) {
66 		if (binno < 0)	/* reset files. */ {
67 			for (i = 0; i < nfiles; i++) {
68 				rewind(fstack[infl0.top + i].fp);
69 				fstack[infl0.top + i].max_o = 0;
70 			}
71 			flag = -1;
72 			nleft = cnt = 0;
73 			return (-1);
74 		}
75 		maxb = fstack[infl0.top].maxb;
76 		for (; nleft == 0; cnt++) {
77 			if (cnt >= nfiles) {
78 				cnt = 0;
79 				return (EOF);
80 			}
81 			fp = fstack[infl0.top + cnt].fp;
82 			fread(&nleft, sizeof(nleft), 1, fp);
83 			if (binno < maxb)
84 				fstack[infl0.top+cnt].max_o
85 					+= sizeof(nleft) + nleft;
86 			else if (binno == maxb) {
87 				if (binno != fstack[infl0.top].lastb) {
88 					fseek(fp, fstack[infl0.top+
89 						cnt].max_o, SEEK_SET);
90 					fread(&nleft, sizeof(nleft), 1, fp);
91 				}
92 				if (nleft == 0)
93 					fclose(fp);
94 			} else if (binno == maxb + 1) {		/* skip a bin */
95 				fseek(fp, nleft, SEEK_CUR);
96 				fread(&nleft, sizeof(nleft), 1, fp);
97 				flag = cnt;
98 			}
99 		}
100 	}
101 	if ((u_char *) pos > end - sizeof(TRECHEADER))
102 		return (BUFFEND);
103 	fread(pos, sizeof(TRECHEADER), 1, fp);
104 	if (end - pos->data < pos->length) {
105 		hp = ((u_char *)pos) + sizeof(TRECHEADER);
106 		for (i = sizeof(TRECHEADER); i ;  i--)
107 			ungetc(*--hp, fp);
108 		return (BUFFEND);
109 	}
110 	fread(pos->data, pos->length, 1, fp);
111 	nleft -= pos->length + sizeof(TRECHEADER);
112 	if (nleft == 0 && binno == fstack[infl0.top].maxb)
113 		fclose(fp);
114 	return (0);
115 }
116 
117 /*
118  * this is called when there is no special key. It's only called
119  * in the first fsort pass.
120  */
121 int
makeline(int flno,union f_handle filelist,int nfiles,RECHEADER * buffer,u_char * bufend,struct field * dummy2)122 makeline(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
123     u_char *bufend, struct field *dummy2)
124 {
125 	static u_char *obufend;
126 	static size_t osz;
127 	char *pos;
128 	static int fileno = 0, overflow = 0;
129 	static FILE *fp = 0;
130 	int c;
131 
132 	pos = (char *) buffer->data;
133 	if (overflow) {
134 		/*
135 		 * Buffer shortage is solved by either of two ways:
136 	 	 * * Flush previous buffered data and start using the
137 		 *   buffer from start (see fsort())
138 		 * * realloc buffer and bump bufend
139 		 *
140 		 * The former is preferred, realloc is only done when
141 		 * there is exactly one item in buffer which does not fit.
142 		 */
143 		if (bufend == obufend)
144 			memmove(pos, bufend - osz, osz);
145 		pos+=osz;
146 		overflow = 0;
147 	}
148 	for (;;) {
149 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
150 			return (EOF);
151 		else if (fp == 0) {
152 			if (fileno  >= nfiles)
153 				return (EOF);
154 			if (!(fp = fopen(filelist.names[fileno], "r")))
155 				err(2, "%s", filelist.names[fileno]);
156 			fileno++;
157 		}
158 		while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
159 			if ((*pos++ = c) == REC_D) {
160 				buffer->offset = 0;
161 				buffer->length = pos - (char *) buffer->data;
162 				return (0);
163 			}
164 		}
165 		if (pos >= (char *)bufend) {
166 			if (buffer->data < bufend) {
167 				overflow = 1;
168 				obufend = bufend;
169 				osz = (pos - (char *)buffer->data);
170 			}
171 			return (BUFFEND);
172 		} else if (c == EOF) {
173 			if (buffer->data != (u_char *) pos) {
174 				*pos++ = REC_D;
175 				buffer->offset = 0;
176 				buffer->length = pos - (char *) buffer->data;
177 				return (0);
178 			}
179 			FCLOSE(fp);
180 			fp = 0;
181 			if (flno >= 0)
182 				fstack[flno].fp = 0;
183 		} else {
184 			warnx("line too long: ignoring %100s...", buffer->data);
185 
186 			/* Consume the rest of the line from input */
187 			while((c = getc(fp)) != REC_D && c != EOF)
188 				;
189 
190 			buffer->offset = 0;
191 			buffer->length = 0;
192 
193 			return (BUFFEND);
194 		}
195 	}
196 }
197 
198 /*
199  * This generates keys. It's only called in the first fsort pass
200  */
201 int
makekey(int flno,union f_handle filelist,int nfiles,RECHEADER * buffer,u_char * bufend,struct field * ftbl)202 makekey(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
203     u_char *bufend, struct field *ftbl)
204 {
205 	static int fileno = 0;
206 	static FILE *dbdesc = 0;
207 	static DBT dbkey[1], line[1];
208 	static int overflow = 0;
209 	static int c;
210 
211 	if (overflow) {
212 		overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
213 									ftbl);
214 		if (overflow)
215 			return (BUFFEND);
216 		else
217 			return (0);
218 	}
219 
220 	for (;;) {
221 		if (flno >= 0) {
222 			if (!(dbdesc = fstack[flno].fp))
223 				return (EOF);
224 		} else if (!dbdesc) {
225 			if (fileno  >= nfiles)
226 				return (EOF);
227 			dbdesc = fopen(filelist.names[fileno], "r");
228 			if (!dbdesc)
229 				err(2, "%s", filelist.names[fileno]);
230 			fileno++;
231 		}
232 		if (!(c = seq(dbdesc, line, dbkey))) {
233 			if ((signed)line->size > bufend - buffer->data) {
234 				overflow = 1;
235 			} else {
236 				overflow = enterkey(buffer, line,
237 				    bufend - (u_char *) buffer, ftbl);
238 			}
239 			if (overflow)
240 				return (BUFFEND);
241 			else
242 				return (0);
243 		}
244 		if (c == EOF) {
245 			FCLOSE(dbdesc);
246 			dbdesc = 0;
247 			if (flno >= 0)
248 				fstack[flno].fp = 0;
249 		} else {
250 			((char *) line->data)[60] = '\000';
251 			warnx("line too long: ignoring %.100s...",
252 			    (char *)line->data);
253 		}
254 	}
255 }
256 
257 /*
258  * get a key/line pair from fp
259  */
260 static int
seq(FILE * fp,DBT * line,DBT * key)261 seq(FILE *fp, DBT *line, DBT *key)
262 {
263 	static char *buf, flag = 1;
264 	char *end, *pos;
265 	int c;
266 
267 	if (flag) {
268 		flag = 0;
269 		buf = (char *) linebuf;
270 		end = buf + linebuf_size;
271 		line->data = buf;
272 	}
273 	pos = buf;
274 	while ((c = getc(fp)) != EOF) {
275 		if ((*pos++ = c) == REC_D) {
276 			line->size = pos - buf;
277 			return (0);
278 		}
279 		if (pos == end) {
280 			linebuf_size *= 2;
281 			linebuf = realloc(linebuf, linebuf_size);
282 			if (!linebuf)
283 				err(2, "realloc of linebuf to %lu bytes failed",
284 					(unsigned long)linebuf_size);
285 			end = linebuf + linebuf_size;
286 			pos = linebuf + (pos - buf);
287 			line->data = buf = (char *)linebuf;
288 			continue;
289 		}
290 	}
291 	if (pos != buf) {
292 		*pos++ = REC_D;
293 		line->size = pos - buf;
294 		return (0);
295 	} else
296 		return (EOF);
297 }
298 
299 /*
300  * write a key/line pair to a temporary file
301  */
302 void
putrec(RECHEADER * rec,FILE * fp)303 putrec(RECHEADER *rec, FILE *fp)
304 {
305 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
306 }
307 
308 /*
309  * write a line to output
310  */
311 void
putline(RECHEADER * rec,FILE * fp)312 putline(RECHEADER *rec, FILE *fp)
313 {
314 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
315 }
316 
317 /*
318  * get a record from a temporary file. (Used by merge sort.)
319  */
320 int
geteasy(int flno,union f_handle filelist,int nfiles,RECHEADER * rec,u_char * end,struct field * dummy2)321 geteasy(int flno, union f_handle filelist, int nfiles, RECHEADER *rec,
322     u_char *end, struct field *dummy2)
323 {
324 	int i;
325 	FILE *fp;
326 
327 	fp = fstack[flno].fp;
328 	if ((u_char *) rec > end - sizeof(TRECHEADER))
329 		return (BUFFEND);
330 	if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
331 		fclose(fp);
332 		fstack[flno].fp = 0;
333 		return (EOF);
334 	}
335 	if (end - rec->data < rec->length) {
336 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
337 			ungetc(*((char *) rec + i), fp);
338 		return (BUFFEND);
339 	}
340 	fread(rec->data, rec->length, 1, fp);
341 	return (0);
342 }
343