1 /*-
2 * Copyright (c) 2007 S.Sam Arun Raj
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/stat.h>
28 #include <sys/types.h>
29
30 #include <ctype.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <getopt.h>
35 #include <inttypes.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41
42 #include <libelf.h>
43 #include <libelftc.h>
44 #include <gelf.h>
45
46 #include "_elftc.h"
47
48 ELFTC_VCSID("$Id: strings.c 3174 2015-03-27 17:13:41Z emaste $");
49
50 enum return_code {
51 RETURN_OK,
52 RETURN_NOINPUT,
53 RETURN_SOFTWARE
54 };
55
56 enum radix_style {
57 RADIX_DECIMAL,
58 RADIX_HEX,
59 RADIX_OCTAL
60 };
61
62 enum encoding_style {
63 ENCODING_7BIT,
64 ENCODING_8BIT,
65 ENCODING_16BIT_BIG,
66 ENCODING_16BIT_LITTLE,
67 ENCODING_32BIT_BIG,
68 ENCODING_32BIT_LITTLE
69 };
70
71 #define PRINTABLE(c) \
72 ((c) >= 0 && (c) <= 255 && \
73 ((c) == '\t' || isprint((c)) || \
74 (encoding == ENCODING_8BIT && (c) > 127)))
75
76
77 static int encoding_size, entire_file, min_len, show_filename, show_loc;
78 static enum encoding_style encoding;
79 static enum radix_style radix;
80
81 static struct option strings_longopts[] = {
82 { "all", no_argument, NULL, 'a'},
83 { "bytes", required_argument, NULL, 'n'},
84 { "encoding", required_argument, NULL, 'e'},
85 { "help", no_argument, NULL, 'h'},
86 { "print-file-name", no_argument, NULL, 'f'},
87 { "radix", required_argument, NULL, 't'},
88 { "version", no_argument, NULL, 'v'},
89 { NULL, 0, NULL, 0 }
90 };
91
92 long getcharacter(void);
93 int handle_file(const char *);
94 int handle_elf(const char *, int);
95 int handle_binary(const char *, int);
96 int find_strings(const char *, off_t, off_t);
97 void show_version(void);
98 void usage(void);
99
100 /*
101 * strings(1) extracts text(contiguous printable characters)
102 * from elf and binary files.
103 */
104 int
main(int argc,char ** argv)105 main(int argc, char **argv)
106 {
107 int ch, rc;
108
109 rc = RETURN_OK;
110 min_len = 0;
111 encoding_size = 1;
112 if (elf_version(EV_CURRENT) == EV_NONE)
113 errx(EXIT_FAILURE, "ELF library initialization failed: %s",
114 elf_errmsg(-1));
115
116 while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
117 strings_longopts, NULL)) != -1)
118 switch((char)ch) {
119 case 'a':
120 entire_file = 1;
121 break;
122 case 'e':
123 if (*optarg == 's') {
124 encoding = ENCODING_7BIT;
125 } else if (*optarg == 'S') {
126 encoding = ENCODING_8BIT;
127 } else if (*optarg == 'b') {
128 encoding = ENCODING_16BIT_BIG;
129 encoding_size = 2;
130 } else if (*optarg == 'B') {
131 encoding = ENCODING_32BIT_BIG;
132 encoding_size = 4;
133 } else if (*optarg == 'l') {
134 encoding = ENCODING_16BIT_LITTLE;
135 encoding_size = 2;
136 } else if (*optarg == 'L') {
137 encoding = ENCODING_32BIT_LITTLE;
138 encoding_size = 4;
139 } else
140 usage();
141 /* NOTREACHED */
142 break;
143 case 'f':
144 show_filename = 1;
145 break;
146 case 'n':
147 min_len = (int)strtoimax(optarg, (char**)NULL, 10);
148 break;
149 case 'o':
150 show_loc = 1;
151 radix = RADIX_OCTAL;
152 break;
153 case 't':
154 show_loc = 1;
155 if (*optarg == 'd')
156 radix = RADIX_DECIMAL;
157 else if (*optarg == 'o')
158 radix = RADIX_OCTAL;
159 else if (*optarg == 'x')
160 radix = RADIX_HEX;
161 else
162 usage();
163 /* NOTREACHED */
164 break;
165 case 'v':
166 case 'V':
167 show_version();
168 /* NOTREACHED */
169 case '0':
170 case '1':
171 case '2':
172 case '3':
173 case '4':
174 case '5':
175 case '6':
176 case '7':
177 case '8':
178 case '9':
179 min_len *= 10;
180 min_len += ch - '0';
181 break;
182 case 'h':
183 case '?':
184 default:
185 usage();
186 /* NOTREACHED */
187 }
188 argc -= optind;
189 argv += optind;
190
191 if (!min_len)
192 min_len = 4;
193 if (!*argv)
194 rc = handle_file("{standard input}");
195 else while (*argv) {
196 rc = handle_file(*argv);
197 argv++;
198 }
199 return (rc);
200 }
201
202 int
handle_file(const char * name)203 handle_file(const char *name)
204 {
205 int fd, rt;
206
207 if (name == NULL)
208 return (RETURN_NOINPUT);
209 if (strcmp("{standard input}", name) != 0) {
210 if (freopen(name, "rb", stdin) == NULL) {
211 warnx("'%s': %s", name, strerror(errno));
212 return (RETURN_NOINPUT);
213 }
214 } else {
215 return (find_strings(name, (off_t)0, (off_t)0));
216 }
217
218 fd = fileno(stdin);
219 if (fd < 0)
220 return (RETURN_NOINPUT);
221 rt = handle_elf(name, fd);
222 return (rt);
223 }
224
225 /*
226 * Files not understood by handle_elf, will be passed off here and will
227 * treated as a binary file. This would include text file, core dumps ...
228 */
229 int
handle_binary(const char * name,int fd)230 handle_binary(const char *name, int fd)
231 {
232 struct stat buf;
233
234 memset(&buf, 0, sizeof(struct stat));
235 (void) lseek(fd, (off_t)0, SEEK_SET);
236 if (!fstat(fd, &buf))
237 return (find_strings(name, (off_t)0, buf.st_size));
238 return (RETURN_SOFTWARE);
239 }
240
241 /*
242 * Will analyse a file to see if it ELF, other files including ar(1),
243 * core dumps are passed off and treated as flat binary files. Unlike
244 * GNU size in FreeBSD this routine will not treat ELF object from
245 * different archs as flat binary files(has to overridden using -a).
246 */
247 int
handle_elf(const char * name,int fd)248 handle_elf(const char *name, int fd)
249 {
250 GElf_Ehdr elfhdr;
251 GElf_Shdr shdr;
252 Elf *elf;
253 Elf_Scn *scn;
254 int rc;
255
256 rc = RETURN_OK;
257 /* If entire file is choosen, treat it as a binary file */
258 if (entire_file)
259 return (handle_binary(name, fd));
260
261 (void) lseek(fd, (off_t)0, SEEK_SET);
262 elf = elf_begin(fd, ELF_C_READ, NULL);
263 if (elf_kind(elf) != ELF_K_ELF) {
264 (void) elf_end(elf);
265 return (handle_binary(name, fd));
266 }
267
268 if (gelf_getehdr(elf, &elfhdr) == NULL) {
269 (void) elf_end(elf);
270 warnx("%s: ELF file could not be processed", name);
271 return (RETURN_SOFTWARE);
272 }
273
274 if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
275 (void) elf_end(elf);
276 return (handle_binary(name, fd));
277 } else {
278 scn = NULL;
279 while ((scn = elf_nextscn(elf, scn)) != NULL) {
280 if (gelf_getshdr(scn, &shdr) == NULL)
281 continue;
282 if (shdr.sh_type != SHT_NOBITS &&
283 (shdr.sh_flags & SHF_ALLOC) != 0) {
284 rc = find_strings(name, shdr.sh_offset,
285 shdr.sh_size);
286 }
287 }
288 }
289 (void) elf_end(elf);
290 return (rc);
291 }
292
293 /*
294 * Retrieves a character from input stream based on the encoding
295 * type requested.
296 */
297 long
getcharacter(void)298 getcharacter(void)
299 {
300 long rt;
301 int i;
302 char buf[4], c;
303
304 rt = EOF;
305 for(i = 0; i < encoding_size; i++) {
306 c = getc(stdin);
307 if (feof(stdin))
308 return (EOF);
309 buf[i] = c;
310 }
311
312 switch(encoding) {
313 case ENCODING_7BIT:
314 case ENCODING_8BIT:
315 rt = buf[0];
316 break;
317 case ENCODING_16BIT_BIG:
318 rt = (buf[0] << 8) | buf[1];
319 break;
320 case ENCODING_16BIT_LITTLE:
321 rt = buf[0] | (buf[1] << 8);
322 break;
323 case ENCODING_32BIT_BIG:
324 rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
325 ((long) buf[2] << 8) | buf[3];
326 break;
327 case ENCODING_32BIT_LITTLE:
328 rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
329 ((long) buf[3] << 24);
330 break;
331 }
332 return (rt);
333 }
334
335 /*
336 * Input stream stdin is read until the end of file is reached or until
337 * the section size is reached in case of ELF files. Contiguous
338 * characters of >= min_size(default 4) will be displayed.
339 */
340 int
find_strings(const char * name,off_t offset,off_t size)341 find_strings(const char *name, off_t offset, off_t size)
342 {
343 off_t cur_off, start_off;
344 char *obuf;
345 long c;
346 int i;
347
348 if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
349 (void) fprintf(stderr, "Unable to allocate memory: %s\n",
350 strerror(errno));
351 return (RETURN_SOFTWARE);
352 }
353
354 (void) fseeko(stdin, offset, SEEK_SET);
355 cur_off = offset;
356 start_off = 0;
357 while(1) {
358 if ((offset + size) && (cur_off >= offset + size))
359 break;
360 start_off = cur_off;
361 memset(obuf, 0, min_len+1);
362 for(i = 0; i < min_len; i++) {
363 c = getcharacter();
364 if (c == EOF && feof(stdin))
365 goto _exit1;
366 if (PRINTABLE(c)) {
367 obuf[i] = c;
368 obuf[i+1] = 0;
369 cur_off += encoding_size;
370 } else {
371 if (encoding == ENCODING_8BIT &&
372 (uint8_t)c > 127) {
373 obuf[i] = c;
374 obuf[i+1] = 0;
375 cur_off += encoding_size;
376 continue;
377 }
378 cur_off += encoding_size;
379 break;
380 }
381 }
382
383 if (i >= min_len && ((cur_off <= offset + size) ||
384 !(offset + size))) {
385 if (show_filename)
386 printf ("%s: ", name);
387 if (show_loc) {
388 switch(radix) {
389 case RADIX_DECIMAL:
390 (void) printf("%7ju ",
391 (uintmax_t)start_off);
392 break;
393 case RADIX_HEX:
394 (void) printf("%7jx ",
395 (uintmax_t)start_off);
396 break;
397 case RADIX_OCTAL:
398 (void) printf("%7jo ",
399 (uintmax_t)start_off);
400 break;
401 }
402 }
403 printf("%s", obuf);
404
405 while(1) {
406 if ((offset + size) &&
407 (cur_off >= offset + size))
408 break;
409 c = getcharacter();
410 cur_off += encoding_size;
411 if (encoding == ENCODING_8BIT &&
412 (uint8_t)c > 127) {
413 putchar(c);
414 continue;
415 }
416 if (!PRINTABLE(c) || c == EOF)
417 break;
418 putchar(c);
419 }
420 putchar('\n');
421 }
422 }
423 _exit1:
424 free(obuf);
425 return (RETURN_OK);
426 }
427
428 #define USAGE_MESSAGE "\
429 Usage: %s [options] [file...]\n\
430 Print contiguous sequences of printable characters.\n\n\
431 Options:\n\
432 -a | --all Scan the entire file for strings.\n\
433 -e ENC | --encoding=ENC Select the character encoding to use.\n\
434 -f | --print-file-name Print the file name before each string.\n\
435 -h | --help Print a help message and exit.\n\
436 -n N | --bytes=N | -N Print sequences with 'N' or more characters.\n\
437 -o Print offsets in octal.\n\
438 -t R | --radix=R Print offsets using the radix named by 'R'.\n\
439 -v | --version Print a version identifier and exit.\n"
440
441 void
usage(void)442 usage(void)
443 {
444 (void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
445 exit(EXIT_FAILURE);
446 }
447
448 void
show_version(void)449 show_version(void)
450 {
451 (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
452 exit(EXIT_SUCCESS);
453 }
454