1 /* $OpenBSD: grep.c,v 1.33 2005/04/03 19:18:33 jaredy Exp $ */
2
3 /*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/types.h>
30 #include <sys/limits.h>
31 #include <sys/stat.h>
32 #include <sys/queue.h>
33
34 #include <ctype.h>
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <regex.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43
44 #include "grep.h"
45
46 /* Flags passed to regcomp() and regexec() */
47 int cflags;
48 int eflags = REG_STARTEND;
49
50 int matchall; /* shortcut */
51 int patterns, pattern_sz;
52 char **pattern;
53 regex_t *r_pattern;
54 fastgrep_t *fg_pattern;
55
56 /* For regex errors */
57 char re_error[RE_ERROR_BUF + 1];
58
59 /* Command-line flags */
60 int Aflag; /* -A x: print x lines trailing each match */
61 int Bflag; /* -B x: print x lines leading each match */
62 int Eflag; /* -E: interpret pattern as extended regexp */
63 int Fflag; /* -F: interpret pattern as list of fixed strings */
64 int Gflag; /* -G: interpret pattern as basic regexp */
65 int Hflag; /* -H: if -R, follow explicitly listed symlinks */
66 int Lflag; /* -L: only show names of files with no matches */
67 int Pflag; /* -P: if -R, no symlinks are followed */
68 int Rflag; /* -R: recursively search directory trees */
69 int Sflag; /* -S: if -R, follow all symlinks */
70 int Vflag; /* -V: display version information */
71 #ifndef NOZ
72 int Zflag; /* -Z: decompress input before processing */
73 #endif
74 int bflag; /* -b: show block numbers for each match */
75 int cflag; /* -c: only show a count of matching lines */
76 int hflag; /* -h: don't print filename headers */
77 int iflag; /* -i: ignore case */
78 int lflag; /* -l: only show names of files with matches */
79 int nflag; /* -n: show line numbers in front of matching lines */
80 int oflag; /* -o: always print file name */
81 int qflag; /* -q: quiet mode (don't output anything) */
82 int sflag; /* -s: silent mode (ignore errors) */
83 int vflag; /* -v: only show non-matching lines */
84 int wflag; /* -w: pattern must start and end on word boundaries */
85 int xflag; /* -x: pattern must match entire line */
86 int lbflag; /* --line-buffered */
87
88 int binbehave = BIN_FILE_BIN;
89
90 enum {
91 BIN_OPT = CHAR_MAX + 1,
92 HELP_OPT,
93 MMAP_OPT,
94 LINEBUF_OPT
95 };
96
97 /* Housekeeping */
98 int first; /* flag whether or not this is our first match */
99 int tail; /* lines left to print */
100 int lead; /* number of lines in leading context queue */
101
102 struct patfile {
103 const char *pf_file;
104 SLIST_ENTRY(patfile) pf_next;
105 };
106 SLIST_HEAD(, patfile) patfilelh;
107
108 extern char *__progname;
109
110 static __dead void
usage(void)111 usage(void)
112 {
113 fprintf(stderr,
114 #ifdef NOZ
115 "usage: %s [-abcEFGHhIiLlnoPqRSsUVvwx] [-A num] [-B num] [-C[num]]\n"
116 #else
117 "usage: %s [-abcEFGHhIiLlnoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
118 #endif
119 "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
120 "\t[--line-buffered] [pattern] [file ...]\n", __progname);
121 exit(2);
122 }
123
124 #ifdef NOZ
125 static const char *optstr = "0123456789A:B:CEFGHILPSRUVabce:f:hilnoqrsuvwxy";
126 #else
127 static const char *optstr = "0123456789A:B:CEFGHILPSRUVZabce:f:hilnoqrsuvwxy";
128 #endif
129
130 struct option long_options[] =
131 {
132 {"binary-files", required_argument, NULL, BIN_OPT},
133 {"help", no_argument, NULL, HELP_OPT},
134 {"mmap", no_argument, NULL, MMAP_OPT},
135 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
136 {"after-context", required_argument, NULL, 'A'},
137 {"before-context", required_argument, NULL, 'B'},
138 {"context", optional_argument, NULL, 'C'},
139 {"devices", required_argument, NULL, 'D'},
140 {"extended-regexp", no_argument, NULL, 'E'},
141 {"fixed-strings", no_argument, NULL, 'F'},
142 {"basic-regexp", no_argument, NULL, 'G'},
143 {"binary", no_argument, NULL, 'U'},
144 {"version", no_argument, NULL, 'V'},
145 {"text", no_argument, NULL, 'a'},
146 {"byte-offset", no_argument, NULL, 'b'},
147 {"count", no_argument, NULL, 'c'},
148 {"regexp", required_argument, NULL, 'e'},
149 {"file", required_argument, NULL, 'f'},
150 {"no-filename", no_argument, NULL, 'h'},
151 {"ignore-case", no_argument, NULL, 'i'},
152 {"files-without-match", no_argument, NULL, 'L'},
153 {"files-with-matches", no_argument, NULL, 'l'},
154 {"line-number", no_argument, NULL, 'n'},
155 {"quiet", no_argument, NULL, 'q'},
156 {"silent", no_argument, NULL, 'q'},
157 {"recursive", no_argument, NULL, 'r'},
158 {"no-messages", no_argument, NULL, 's'},
159 {"revert-match", no_argument, NULL, 'v'},
160 {"word-regexp", no_argument, NULL, 'w'},
161 {"line-regexp", no_argument, NULL, 'x'},
162 {"unix-byte-offsets", no_argument, NULL, 'u'},
163 #ifndef NOZ
164 {"decompress", no_argument, NULL, 'Z'},
165 #endif
166 {NULL, no_argument, NULL, 0}
167 };
168
169
170 static void
add_pattern(char * pat,size_t len)171 add_pattern(char *pat, size_t len)
172 {
173 if (len == 0 || matchall) {
174 matchall = 1;
175 return;
176 }
177 if (patterns == pattern_sz) {
178 pattern_sz *= 2;
179 pattern = grep_realloc(pattern, ++pattern_sz * sizeof(*pattern));
180 }
181 if (pat[len - 1] == '\n')
182 --len;
183 /* pat may not be NUL-terminated */
184 if (wflag && !Fflag) {
185 int bol = 0, eol = 0, extra;
186 if (pat[0] == '^')
187 bol = 1;
188 if (pat[len - 1] == '$')
189 eol = 1;
190 extra = Eflag ? 2 : 4;
191 pattern[patterns] = grep_malloc(len + 15 + extra);
192 snprintf(pattern[patterns], len + 15 + extra,
193 "%s[[:<:]]%s%.*s%s[[:>:]]%s",
194 bol ? "^" : "",
195 Eflag ? "(" : "\\(",
196 (int)len - bol - eol, pat + bol,
197 Eflag ? ")" : "\\)",
198 eol ? "$" : "");
199 len += 14 + extra;
200 } else {
201 pattern[patterns] = grep_malloc(len + 1);
202 memcpy(pattern[patterns], pat, len);
203 pattern[patterns][len] = '\0';
204 }
205 ++patterns;
206 }
207
208 static void
read_patterns(const char * fn)209 read_patterns(const char *fn)
210 {
211 FILE *f;
212 char *line;
213 size_t len;
214 int nl;
215
216 if ((f = fopen(fn, "r")) == NULL)
217 err(2, "%s", fn);
218 nl = 0;
219 while ((line = fgetln(f, &len)) != NULL) {
220 if (*line == '\n') {
221 ++nl;
222 continue;
223 }
224 if (nl) {
225 matchall = 1;
226 break;
227 }
228 nl = 0;
229 add_pattern(line, len);
230 }
231 if (ferror(f))
232 err(2, "%s", fn);
233 fclose(f);
234 }
235
236 int
main(int argc,char * argv[])237 main(int argc, char *argv[])
238 {
239 int c, lastc, prevoptind, newarg, i;
240 struct patfile *patfile, *pf_next;
241 long l;
242 char *ep;
243
244 SLIST_INIT(&patfilelh);
245 switch (__progname[0]) {
246 case 'e':
247 Eflag++;
248 break;
249 case 'f':
250 Fflag++;
251 break;
252 case 'g':
253 Gflag++;
254 break;
255 #ifndef NOZ
256 case 'z':
257 Zflag++;
258 switch(__progname[1]) {
259 case 'e':
260 Eflag++;
261 break;
262 case 'f':
263 Fflag++;
264 break;
265 case 'g':
266 Gflag++;
267 break;
268 }
269 break;
270 #endif
271 }
272
273 lastc = '\0';
274 newarg = 1;
275 prevoptind = 1;
276 while ((c = getopt_long(argc, argv, optstr,
277 long_options, NULL)) != -1) {
278 switch (c) {
279 case '0': case '1': case '2': case '3': case '4':
280 case '5': case '6': case '7': case '8': case '9':
281 if (newarg || !isdigit(lastc))
282 Aflag = 0;
283 else if (Aflag > INT_MAX / 10)
284 errx(2, "context out of range");
285 Aflag = Bflag = (Aflag * 10) + (c - '0');
286 break;
287 case 'A':
288 case 'B':
289 l = strtol(optarg, &ep, 10);
290 if (ep == optarg || *ep != '\0' ||
291 l <= 0 || l >= INT_MAX)
292 errx(2, "context out of range");
293 if (c == 'A')
294 Aflag = (int)l;
295 else
296 Bflag = (int)l;
297 break;
298 case 'C':
299 if (optarg == NULL)
300 Aflag = Bflag = 2;
301 else {
302 l = strtol(optarg, &ep, 10);
303 if (ep == optarg || *ep != '\0' ||
304 l <= 0 || l >= INT_MAX)
305 errx(2, "context out of range");
306 Aflag = Bflag = (int)l;
307 }
308 break;
309 case 'E':
310 Fflag = Gflag = 0;
311 Eflag++;
312 break;
313 case 'F':
314 Eflag = Gflag = 0;
315 Fflag++;
316 break;
317 case 'G':
318 Eflag = Fflag = 0;
319 Gflag++;
320 break;
321 case 'H':
322 Hflag++;
323 break;
324 case 'I':
325 binbehave = BIN_FILE_SKIP;
326 break;
327 case 'L':
328 lflag = 0;
329 Lflag = qflag = 1;
330 break;
331 case 'P':
332 Pflag++;
333 break;
334 case 'S':
335 Sflag++;
336 break;
337 case 'R':
338 case 'r':
339 Rflag++;
340 oflag++;
341 break;
342 case 'U':
343 binbehave = BIN_FILE_BIN;
344 break;
345 case 'V':
346 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN);
347 exit(0);
348 break;
349 #ifndef NOZ
350 case 'Z':
351 Zflag++;
352 break;
353 #endif
354 case 'a':
355 binbehave = BIN_FILE_TEXT;
356 break;
357 case 'b':
358 bflag = 1;
359 break;
360 case 'c':
361 cflag = 1;
362 break;
363 case 'e':
364 add_pattern(optarg, strlen(optarg));
365 break;
366 case 'f':
367 patfile = grep_malloc(sizeof(*patfile));
368 patfile->pf_file = optarg;
369 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next);
370 break;
371 case 'h':
372 oflag = 0;
373 hflag = 1;
374 break;
375 case 'i':
376 case 'y':
377 iflag = 1;
378 cflags |= REG_ICASE;
379 break;
380 case 'l':
381 Lflag = 0;
382 lflag = qflag = 1;
383 break;
384 case 'n':
385 nflag = 1;
386 break;
387 case 'o':
388 hflag = 0;
389 oflag = 1;
390 break;
391 case 'q':
392 qflag = 1;
393 break;
394 case 's':
395 sflag = 1;
396 break;
397 case 'v':
398 vflag = 1;
399 break;
400 case 'w':
401 wflag = 1;
402 break;
403 case 'x':
404 xflag = 1;
405 break;
406 case BIN_OPT:
407 if (strcmp("binary", optarg) == 0)
408 binbehave = BIN_FILE_BIN;
409 else if (strcmp("without-match", optarg) == 0)
410 binbehave = BIN_FILE_SKIP;
411 else if (strcmp("text", optarg) == 0)
412 binbehave = BIN_FILE_TEXT;
413 else
414 errx(2, "Unknown binary-files option");
415 break;
416 case 'u':
417 case MMAP_OPT:
418 /* default, compatibility */
419 break;
420 case LINEBUF_OPT:
421 lbflag = 1;
422 break;
423 case HELP_OPT:
424 default:
425 usage();
426 }
427 lastc = c;
428 newarg = optind != prevoptind;
429 prevoptind = optind;
430 }
431 argc -= optind;
432 argv += optind;
433
434 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL;
435 patfile = pf_next) {
436 pf_next = SLIST_NEXT(patfile, pf_next);
437 read_patterns(patfile->pf_file);
438 free(patfile);
439 }
440
441 if (argc == 0 && patterns == 0)
442 usage();
443
444 if (patterns == 0) {
445 add_pattern(*argv, strlen(*argv));
446 --argc;
447 ++argv;
448 }
449
450 if (Eflag)
451 cflags |= REG_EXTENDED;
452 fg_pattern = grep_malloc(patterns * sizeof(*fg_pattern));
453 r_pattern = grep_malloc(patterns * sizeof(*r_pattern));
454 for (i = 0; i < patterns; ++i) {
455 /* Check if cheating is allowed (always is for fgrep). */
456 if (Fflag) {
457 fgrepcomp(&fg_pattern[i], pattern[i]);
458 } else {
459 if (fastcomp(&fg_pattern[i], pattern[i])) {
460 /* Fall back to full regex library */
461 c = regcomp(&r_pattern[i], pattern[i], cflags);
462 if (c != 0) {
463 regerror(c, &r_pattern[i], re_error,
464 RE_ERROR_BUF);
465 errx(2, "%s", re_error);
466 }
467 }
468 }
469 }
470
471 if (lbflag)
472 setlinebuf(stdout);
473
474 if ((argc == 0 || argc == 1) && !oflag)
475 hflag = 1;
476
477 if (argc == 0)
478 exit(!procfile(NULL));
479
480 if (Rflag)
481 c = grep_tree(argv);
482 else
483 for (c = 0; argc--; ++argv)
484 c += procfile(*argv);
485
486 exit(!c);
487 }
488