1 /* $OpenBSD: files.c,v 1.11 2004/07/20 03:50:27 deraadt Exp $ */
2
3 /*-
4 * Copyright (c) 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Peter McIlroy.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #ifndef lint
36 #if 0
37 static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
38 #else
39 static char rcsid[] = "$OpenBSD: files.c,v 1.11 2004/07/20 03:50:27 deraadt Exp $";
40 #endif
41 #endif /* not lint */
42
43 #include "sort.h"
44 #include "fsort.h"
45
46 #include <string.h>
47
48 static int seq(FILE *, DBT *, DBT *);
49
50 /*
51 * this is the subroutine for file management for fsort().
52 * It keeps the buffers for all temporary files.
53 */
54 int
getnext(int binno,union f_handle infl0,int nfiles,RECHEADER * pos,u_char * end,struct field * dummy)55 getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char *end,
56 struct field *dummy)
57 {
58 int i;
59 u_char *hp;
60 static size_t nleft = 0;
61 static int cnt = 0, flag = -1;
62 static u_char maxb = 0;
63 static FILE *fp;
64
65 if (nleft == 0) {
66 if (binno < 0) /* reset files. */ {
67 for (i = 0; i < nfiles; i++) {
68 rewind(fstack[infl0.top + i].fp);
69 fstack[infl0.top + i].max_o = 0;
70 }
71 flag = -1;
72 nleft = cnt = 0;
73 return (-1);
74 }
75 maxb = fstack[infl0.top].maxb;
76 for (; nleft == 0; cnt++) {
77 if (cnt >= nfiles) {
78 cnt = 0;
79 return (EOF);
80 }
81 fp = fstack[infl0.top + cnt].fp;
82 fread(&nleft, sizeof(nleft), 1, fp);
83 if (binno < maxb)
84 fstack[infl0.top+cnt].max_o
85 += sizeof(nleft) + nleft;
86 else if (binno == maxb) {
87 if (binno != fstack[infl0.top].lastb) {
88 fseek(fp, fstack[infl0.top+
89 cnt].max_o, SEEK_SET);
90 fread(&nleft, sizeof(nleft), 1, fp);
91 }
92 if (nleft == 0)
93 fclose(fp);
94 } else if (binno == maxb + 1) { /* skip a bin */
95 fseek(fp, nleft, SEEK_CUR);
96 fread(&nleft, sizeof(nleft), 1, fp);
97 flag = cnt;
98 }
99 }
100 }
101 if ((u_char *) pos > end - sizeof(TRECHEADER))
102 return (BUFFEND);
103 fread(pos, sizeof(TRECHEADER), 1, fp);
104 if (end - pos->data < pos->length) {
105 hp = ((u_char *)pos) + sizeof(TRECHEADER);
106 for (i = sizeof(TRECHEADER); i ; i--)
107 ungetc(*--hp, fp);
108 return (BUFFEND);
109 }
110 fread(pos->data, pos->length, 1, fp);
111 nleft -= pos->length + sizeof(TRECHEADER);
112 if (nleft == 0 && binno == fstack[infl0.top].maxb)
113 fclose(fp);
114 return (0);
115 }
116
117 /*
118 * this is called when there is no special key. It's only called
119 * in the first fsort pass.
120 */
121 int
makeline(int flno,union f_handle filelist,int nfiles,RECHEADER * buffer,u_char * bufend,struct field * dummy2)122 makeline(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
123 u_char *bufend, struct field *dummy2)
124 {
125 static u_char *obufend;
126 static size_t osz;
127 char *pos;
128 static int fileno = 0, overflow = 0;
129 static FILE *fp = 0;
130 int c;
131
132 pos = (char *) buffer->data;
133 if (overflow) {
134 /*
135 * Buffer shortage is solved by either of two ways:
136 * * Flush previous buffered data and start using the
137 * buffer from start (see fsort())
138 * * realloc buffer and bump bufend
139 *
140 * The former is preferred, realloc is only done when
141 * there is exactly one item in buffer which does not fit.
142 */
143 if (bufend == obufend)
144 memmove(pos, bufend - osz, osz);
145 pos+=osz;
146 overflow = 0;
147 }
148 for (;;) {
149 if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
150 return (EOF);
151 else if (fp == 0) {
152 if (fileno >= nfiles)
153 return (EOF);
154 if (!(fp = fopen(filelist.names[fileno], "r")))
155 err(2, "%s", filelist.names[fileno]);
156 fileno++;
157 }
158 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
159 if ((*pos++ = c) == REC_D) {
160 buffer->offset = 0;
161 buffer->length = pos - (char *) buffer->data;
162 return (0);
163 }
164 }
165 if (pos >= (char *)bufend) {
166 if (buffer->data < bufend) {
167 overflow = 1;
168 obufend = bufend;
169 osz = (pos - (char *)buffer->data);
170 }
171 return (BUFFEND);
172 } else if (c == EOF) {
173 if (buffer->data != (u_char *) pos) {
174 *pos++ = REC_D;
175 buffer->offset = 0;
176 buffer->length = pos - (char *) buffer->data;
177 return (0);
178 }
179 FCLOSE(fp);
180 fp = 0;
181 if (flno >= 0)
182 fstack[flno].fp = 0;
183 } else {
184 warnx("line too long: ignoring %100s...", buffer->data);
185
186 /* Consume the rest of the line from input */
187 while((c = getc(fp)) != REC_D && c != EOF)
188 ;
189
190 buffer->offset = 0;
191 buffer->length = 0;
192
193 return (BUFFEND);
194 }
195 }
196 }
197
198 /*
199 * This generates keys. It's only called in the first fsort pass
200 */
201 int
makekey(int flno,union f_handle filelist,int nfiles,RECHEADER * buffer,u_char * bufend,struct field * ftbl)202 makekey(int flno, union f_handle filelist, int nfiles, RECHEADER *buffer,
203 u_char *bufend, struct field *ftbl)
204 {
205 static int fileno = 0;
206 static FILE *dbdesc = 0;
207 static DBT dbkey[1], line[1];
208 static int overflow = 0;
209 static int c;
210
211 if (overflow) {
212 overflow = enterkey(buffer, line, bufend - (u_char *)buffer,
213 ftbl);
214 if (overflow)
215 return (BUFFEND);
216 else
217 return (0);
218 }
219
220 for (;;) {
221 if (flno >= 0) {
222 if (!(dbdesc = fstack[flno].fp))
223 return (EOF);
224 } else if (!dbdesc) {
225 if (fileno >= nfiles)
226 return (EOF);
227 dbdesc = fopen(filelist.names[fileno], "r");
228 if (!dbdesc)
229 err(2, "%s", filelist.names[fileno]);
230 fileno++;
231 }
232 if (!(c = seq(dbdesc, line, dbkey))) {
233 if ((signed)line->size > bufend - buffer->data) {
234 overflow = 1;
235 } else {
236 overflow = enterkey(buffer, line,
237 bufend - (u_char *) buffer, ftbl);
238 }
239 if (overflow)
240 return (BUFFEND);
241 else
242 return (0);
243 }
244 if (c == EOF) {
245 FCLOSE(dbdesc);
246 dbdesc = 0;
247 if (flno >= 0)
248 fstack[flno].fp = 0;
249 } else {
250 ((char *) line->data)[60] = '\000';
251 warnx("line too long: ignoring %.100s...",
252 (char *)line->data);
253 }
254 }
255 }
256
257 /*
258 * get a key/line pair from fp
259 */
260 static int
seq(FILE * fp,DBT * line,DBT * key)261 seq(FILE *fp, DBT *line, DBT *key)
262 {
263 static char *buf, flag = 1;
264 char *end, *pos;
265 int c;
266
267 if (flag) {
268 flag = 0;
269 buf = (char *) linebuf;
270 end = buf + linebuf_size;
271 line->data = buf;
272 }
273 pos = buf;
274 while ((c = getc(fp)) != EOF) {
275 if ((*pos++ = c) == REC_D) {
276 line->size = pos - buf;
277 return (0);
278 }
279 if (pos == end) {
280 linebuf_size *= 2;
281 linebuf = realloc(linebuf, linebuf_size);
282 if (!linebuf)
283 err(2, "realloc of linebuf to %lu bytes failed",
284 (unsigned long)linebuf_size);
285 end = linebuf + linebuf_size;
286 pos = linebuf + (pos - buf);
287 line->data = buf = (char *)linebuf;
288 continue;
289 }
290 }
291 if (pos != buf) {
292 *pos++ = REC_D;
293 line->size = pos - buf;
294 return (0);
295 } else
296 return (EOF);
297 }
298
299 /*
300 * write a key/line pair to a temporary file
301 */
302 void
putrec(RECHEADER * rec,FILE * fp)303 putrec(RECHEADER *rec, FILE *fp)
304 {
305 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
306 }
307
308 /*
309 * write a line to output
310 */
311 void
putline(RECHEADER * rec,FILE * fp)312 putline(RECHEADER *rec, FILE *fp)
313 {
314 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
315 }
316
317 /*
318 * get a record from a temporary file. (Used by merge sort.)
319 */
320 int
geteasy(int flno,union f_handle filelist,int nfiles,RECHEADER * rec,u_char * end,struct field * dummy2)321 geteasy(int flno, union f_handle filelist, int nfiles, RECHEADER *rec,
322 u_char *end, struct field *dummy2)
323 {
324 int i;
325 FILE *fp;
326
327 fp = fstack[flno].fp;
328 if ((u_char *) rec > end - sizeof(TRECHEADER))
329 return (BUFFEND);
330 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
331 fclose(fp);
332 fstack[flno].fp = 0;
333 return (EOF);
334 }
335 if (end - rec->data < rec->length) {
336 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
337 ungetc(*((char *) rec + i), fp);
338 return (BUFFEND);
339 }
340 fread(rec->data, rec->length, 1, fp);
341 return (0);
342 }
343