xref: /NextBSD/contrib/binutils/binutils/strings.c (revision eb1a5f8de9f7ea602c373a710f531abbf81141c4)
1 /* strings -- print the strings of printable characters in files
2    Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
18    02110-1301, USA.  */
19 
20 /* Usage: strings [options] file...
21 
22    Options:
23    --all
24    -a
25    -		Do not scan only the initialized data section of object files.
26 
27    --print-file-name
28    -f		Print the name of the file before each string.
29 
30    --bytes=min-len
31    -n min-len
32    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
33 		that are followed by a NUL or a newline.  Default is 4.
34 
35    --radix={o,x,d}
36    -t {o,x,d}	Print the offset within the file before each string,
37 		in octal/hex/decimal.
38 
39    -o		Like -to.  (Some other implementations have -o like -to,
40 		others like -td.  We chose one arbitrarily.)
41 
42    --encoding={s,S,b,l,B,L}
43    -e {s,S,b,l,B,L}
44 		Select character encoding: 7-bit-character, 8-bit-character,
45 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
46 		littleendian 32-bit.
47 
48    --target=BFDNAME
49    -T {bfdname}
50 		Specify a non-default object file format.
51 
52    --help
53    -h		Print the usage message on the standard output.
54 
55    --version
56    -v		Print the program version number.
57 
58    Written by Richard Stallman <rms@gnu.ai.mit.edu>
59    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
60 
61 #include "sysdep.h"
62 #include "bfd.h"
63 #include "getopt.h"
64 #include "libiberty.h"
65 #include "safe-ctype.h"
66 #include <sys/stat.h>
67 #include "bucomm.h"
68 
69 /* Some platforms need to put stdin into binary mode, to read
70     binary files.  */
71 #ifdef HAVE_SETMODE
72 #ifndef O_BINARY
73 #ifdef _O_BINARY
74 #define O_BINARY _O_BINARY
75 #define setmode _setmode
76 #else
77 #define O_BINARY 0
78 #endif
79 #endif
80 #if O_BINARY
81 #include <io.h>
82 #define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
83 #endif
84 #endif
85 
86 #define STRING_ISGRAPHIC(c) \
87       (   (c) >= 0 \
88        && (c) <= 255 \
89        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
90 
91 #ifndef errno
92 extern int errno;
93 #endif
94 
95 /* The BFD section flags that identify an initialized data section.  */
96 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
97 
98 #ifdef HAVE_FOPEN64
99 typedef off64_t file_off;
100 #define file_open(s,m) fopen64(s, m)
101 #else
102 typedef off_t file_off;
103 #define file_open(s,m) fopen(s, m)
104 #endif
105 #ifdef HAVE_STAT64
106 typedef struct stat64 statbuf;
107 #define file_stat(f,s) stat64(f, s)
108 #else
109 typedef struct stat statbuf;
110 #define file_stat(f,s) stat(f, s)
111 #endif
112 
113 /* Radix for printing addresses (must be 8, 10 or 16).  */
114 static int address_radix;
115 
116 /* Minimum length of sequence of graphic chars to trigger output.  */
117 static int string_min;
118 
119 /* TRUE means print address within file for each string.  */
120 static bfd_boolean print_addresses;
121 
122 /* TRUE means print filename for each string.  */
123 static bfd_boolean print_filenames;
124 
125 /* TRUE means for object files scan only the data section.  */
126 static bfd_boolean datasection_only;
127 
128 /* TRUE if we found an initialized data section in the current file.  */
129 static bfd_boolean got_a_section;
130 
131 /* The BFD object file format.  */
132 static char *target;
133 
134 /* The character encoding format.  */
135 static char encoding;
136 static int encoding_bytes;
137 
138 static struct option long_options[] =
139 {
140   {"all", no_argument, NULL, 'a'},
141   {"print-file-name", no_argument, NULL, 'f'},
142   {"bytes", required_argument, NULL, 'n'},
143   {"radix", required_argument, NULL, 't'},
144   {"encoding", required_argument, NULL, 'e'},
145   {"target", required_argument, NULL, 'T'},
146   {"help", no_argument, NULL, 'h'},
147   {"version", no_argument, NULL, 'v'},
148   {NULL, 0, NULL, 0}
149 };
150 
151 /* Records the size of a named file so that we
152    do not repeatedly run bfd_stat() on it.  */
153 
154 typedef struct
155 {
156   const char *  filename;
157   bfd_size_type filesize;
158 } filename_and_size_t;
159 
160 static void strings_a_section (bfd *, asection *, void *);
161 static bfd_boolean strings_object_file (const char *);
162 static bfd_boolean strings_file (char *file);
163 static int integer_arg (char *s);
164 static void print_strings (const char *, FILE *, file_off, int, int, char *);
165 static void usage (FILE *, int);
166 static long get_char (FILE *, file_off *, int *, char **);
167 
168 int main (int, char **);
169 
170 int
main(int argc,char ** argv)171 main (int argc, char **argv)
172 {
173   int optc;
174   int exit_status = 0;
175   bfd_boolean files_given = FALSE;
176 
177 #if defined (HAVE_SETLOCALE)
178   setlocale (LC_ALL, "");
179 #endif
180   bindtextdomain (PACKAGE, LOCALEDIR);
181   textdomain (PACKAGE);
182 
183   program_name = argv[0];
184   xmalloc_set_program_name (program_name);
185 
186   expandargv (&argc, &argv);
187 
188   string_min = -1;
189   print_addresses = FALSE;
190   print_filenames = FALSE;
191   datasection_only = TRUE;
192   target = NULL;
193   encoding = 's';
194 
195   while ((optc = getopt_long (argc, argv, "afhHn:ot:e:T:Vv0123456789",
196 			      long_options, (int *) 0)) != EOF)
197     {
198       switch (optc)
199 	{
200 	case 'a':
201 	  datasection_only = FALSE;
202 	  break;
203 
204 	case 'f':
205 	  print_filenames = TRUE;
206 	  break;
207 
208 	case 'H':
209 	case 'h':
210 	  usage (stdout, 0);
211 
212 	case 'n':
213 	  string_min = integer_arg (optarg);
214 	  if (string_min < 1)
215 	    fatal (_("invalid number %s"), optarg);
216 	  break;
217 
218 	case 'o':
219 	  print_addresses = TRUE;
220 	  address_radix = 8;
221 	  break;
222 
223 	case 't':
224 	  print_addresses = TRUE;
225 	  if (optarg[1] != '\0')
226 	    usage (stderr, 1);
227 	  switch (optarg[0])
228 	    {
229 	    case 'o':
230 	      address_radix = 8;
231 	      break;
232 
233 	    case 'd':
234 	      address_radix = 10;
235 	      break;
236 
237 	    case 'x':
238 	      address_radix = 16;
239 	      break;
240 
241 	    default:
242 	      usage (stderr, 1);
243 	    }
244 	  break;
245 
246 	case 'T':
247 	  target = optarg;
248 	  break;
249 
250 	case 'e':
251 	  if (optarg[1] != '\0')
252 	    usage (stderr, 1);
253 	  encoding = optarg[0];
254 	  break;
255 
256 	case 'V':
257 	case 'v':
258 	  print_version ("strings");
259 	  break;
260 
261 	case '?':
262 	  usage (stderr, 1);
263 
264 	default:
265 	  if (string_min < 0)
266 	    string_min = optc - '0';
267 	  else
268 	    string_min = string_min * 10 + optc - '0';
269 	  break;
270 	}
271     }
272 
273   if (string_min < 0)
274     string_min = 4;
275 
276   switch (encoding)
277     {
278     case 'S':
279     case 's':
280       encoding_bytes = 1;
281       break;
282     case 'b':
283     case 'l':
284       encoding_bytes = 2;
285       break;
286     case 'B':
287     case 'L':
288       encoding_bytes = 4;
289       break;
290     default:
291       usage (stderr, 1);
292     }
293 
294   bfd_init ();
295   set_default_bfd_target ();
296 
297   if (optind >= argc)
298     {
299       datasection_only = FALSE;
300 #ifdef SET_BINARY
301       SET_BINARY (fileno (stdin));
302 #endif
303       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
304       files_given = TRUE;
305     }
306   else
307     {
308       for (; optind < argc; ++optind)
309 	{
310 	  if (strcmp (argv[optind], "-") == 0)
311 	    datasection_only = FALSE;
312 	  else
313 	    {
314 	      files_given = TRUE;
315 	      exit_status |= strings_file (argv[optind]) == FALSE;
316 	    }
317 	}
318     }
319 
320   if (!files_given)
321     usage (stderr, 1);
322 
323   return (exit_status);
324 }
325 
326 /* Scan section SECT of the file ABFD, whose printable name is in
327    ARG->filename and whose size might be in ARG->filesize.  If it
328    contains initialized data set `got_a_section' and print the
329    strings in it.
330 
331    FIXME: We ought to be able to return error codes/messages for
332    certain conditions.  */
333 
334 static void
strings_a_section(bfd * abfd,asection * sect,void * arg)335 strings_a_section (bfd *abfd, asection *sect, void *arg)
336 {
337   filename_and_size_t * filename_and_sizep;
338   bfd_size_type *filesizep;
339   bfd_size_type sectsize;
340   void *mem;
341 
342   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
343     return;
344 
345   sectsize = bfd_get_section_size (sect);
346 
347   if (sectsize <= 0)
348     return;
349 
350   /* Get the size of the file.  This might have been cached for us.  */
351   filename_and_sizep = (filename_and_size_t *) arg;
352   filesizep = & filename_and_sizep->filesize;
353 
354   if (*filesizep == 0)
355     {
356       struct stat st;
357 
358       if (bfd_stat (abfd, &st))
359 	return;
360 
361       /* Cache the result so that we do not repeatedly stat this file.  */
362       *filesizep = st.st_size;
363     }
364 
365   /* Compare the size of the section against the size of the file.
366      If the section is bigger then the file must be corrupt and
367      we should not try dumping it.  */
368   if (sectsize >= *filesizep)
369     return;
370 
371   mem = xmalloc (sectsize);
372 
373   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
374     {
375       got_a_section = TRUE;
376 
377       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
378 		     0, sectsize, mem);
379     }
380 
381   free (mem);
382 }
383 
384 /* Scan all of the sections in FILE, and print the strings
385    in the initialized data section(s).
386 
387    Return TRUE if successful,
388    FALSE if not (such as if FILE is not an object file).  */
389 
390 static bfd_boolean
strings_object_file(const char * file)391 strings_object_file (const char *file)
392 {
393   filename_and_size_t filename_and_size;
394   bfd *abfd;
395 
396   abfd = bfd_openr (file, target);
397 
398   if (abfd == NULL)
399     /* Treat the file as a non-object file.  */
400     return FALSE;
401 
402   /* This call is mainly for its side effect of reading in the sections.
403      We follow the traditional behavior of `strings' in that we don't
404      complain if we don't recognize a file to be an object file.  */
405   if (!bfd_check_format (abfd, bfd_object))
406     {
407       bfd_close (abfd);
408       return FALSE;
409     }
410 
411   got_a_section = FALSE;
412   filename_and_size.filename = file;
413   filename_and_size.filesize = 0;
414   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
415 
416   if (!bfd_close (abfd))
417     {
418       bfd_nonfatal (file);
419       return FALSE;
420     }
421 
422   return got_a_section;
423 }
424 
425 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
426 
427 static bfd_boolean
strings_file(char * file)428 strings_file (char *file)
429 {
430   statbuf st;
431 
432   if (file_stat (file, &st) < 0)
433     {
434       if (errno == ENOENT)
435 	non_fatal (_("'%s': No such file"), file);
436       else
437 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
438 		   file, strerror (errno));
439       return FALSE;
440     }
441 
442   /* If we weren't told to scan the whole file,
443      try to open it as an object file and only look at
444      initialized data sections.  If that fails, fall back to the
445      whole file.  */
446   if (!datasection_only || !strings_object_file (file))
447     {
448       FILE *stream;
449 
450       stream = file_open (file, FOPEN_RB);
451       if (stream == NULL)
452 	{
453 	  fprintf (stderr, "%s: ", program_name);
454 	  perror (file);
455 	  return FALSE;
456 	}
457 
458       print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
459 
460       if (fclose (stream) == EOF)
461 	{
462 	  fprintf (stderr, "%s: ", program_name);
463 	  perror (file);
464 	  return FALSE;
465 	}
466     }
467 
468   return TRUE;
469 }
470 
471 /* Read the next character, return EOF if none available.
472    Assume that STREAM is positioned so that the next byte read
473    is at address ADDRESS in the file.
474 
475    If STREAM is NULL, do not read from it.
476    The caller can supply a buffer of characters
477    to be processed before the data in STREAM.
478    MAGIC is the address of the buffer and
479    MAGICCOUNT is how many characters are in it.  */
480 
481 static long
get_char(FILE * stream,file_off * address,int * magiccount,char ** magic)482 get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
483 {
484   int c, i;
485   long r = EOF;
486   unsigned char buf[4];
487 
488   for (i = 0; i < encoding_bytes; i++)
489     {
490       if (*magiccount)
491 	{
492 	  (*magiccount)--;
493 	  c = *(*magic)++;
494 	}
495       else
496 	{
497 	  if (stream == NULL)
498 	    return EOF;
499 
500 	  /* Only use getc_unlocked if we found a declaration for it.
501 	     Otherwise, libc is not thread safe by default, and we
502 	     should not use it.  */
503 
504 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
505 	  c = getc_unlocked (stream);
506 #else
507 	  c = getc (stream);
508 #endif
509 	  if (c == EOF)
510 	    return EOF;
511 	}
512 
513       (*address)++;
514       buf[i] = c;
515     }
516 
517   switch (encoding)
518     {
519     case 'S':
520     case 's':
521       r = buf[0];
522       break;
523     case 'b':
524       r = (buf[0] << 8) | buf[1];
525       break;
526     case 'l':
527       r = buf[0] | (buf[1] << 8);
528       break;
529     case 'B':
530       r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
531 	((long) buf[2] << 8) | buf[3];
532       break;
533     case 'L':
534       r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
535 	((long) buf[3] << 24);
536       break;
537     }
538 
539   if (r == EOF)
540     return 0;
541 
542   return r;
543 }
544 
545 /* Find the strings in file FILENAME, read from STREAM.
546    Assume that STREAM is positioned so that the next byte read
547    is at address ADDRESS in the file.
548    Stop reading at address STOP_POINT in the file, if nonzero.
549 
550    If STREAM is NULL, do not read from it.
551    The caller can supply a buffer of characters
552    to be processed before the data in STREAM.
553    MAGIC is the address of the buffer and
554    MAGICCOUNT is how many characters are in it.
555    Those characters come at address ADDRESS and the data in STREAM follow.  */
556 
557 static void
print_strings(const char * filename,FILE * stream,file_off address,int stop_point,int magiccount,char * magic)558 print_strings (const char *filename, FILE *stream, file_off address,
559 	       int stop_point, int magiccount, char *magic)
560 {
561   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
562 
563   while (1)
564     {
565       file_off start;
566       int i;
567       long c;
568 
569       /* See if the next `string_min' chars are all graphic chars.  */
570     tryline:
571       if (stop_point && address >= stop_point)
572 	break;
573       start = address;
574       for (i = 0; i < string_min; i++)
575 	{
576 	  c = get_char (stream, &address, &magiccount, &magic);
577 	  if (c == EOF)
578 	    return;
579 	  if (! STRING_ISGRAPHIC (c))
580 	    /* Found a non-graphic.  Try again starting with next char.  */
581 	    goto tryline;
582 	  buf[i] = c;
583 	}
584 
585       /* We found a run of `string_min' graphic characters.  Print up
586 	 to the next non-graphic character.  */
587 
588       if (print_filenames)
589 	printf ("%s: ", filename);
590       if (print_addresses)
591 	switch (address_radix)
592 	  {
593 	  case 8:
594 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
595 	    if (sizeof (start) > sizeof (long))
596 	      printf ("%7llo ", (unsigned long long) start);
597 	    else
598 #else
599 # if !BFD_HOST_64BIT_LONG
600 	    if (start != (unsigned long) start)
601 	      printf ("++%7lo ", (unsigned long) start);
602 	    else
603 # endif
604 #endif
605 	      printf ("%7lo ", (unsigned long) start);
606 	    break;
607 
608 	  case 10:
609 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
610 	    if (sizeof (start) > sizeof (long))
611 	      printf ("%7lld ", (unsigned long long) start);
612 	    else
613 #else
614 # if !BFD_HOST_64BIT_LONG
615 	    if (start != (unsigned long) start)
616 	      printf ("++%7ld ", (unsigned long) start);
617 	    else
618 # endif
619 #endif
620 	      printf ("%7ld ", (long) start);
621 	    break;
622 
623 	  case 16:
624 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
625 	    if (sizeof (start) > sizeof (long))
626 	      printf ("%7llx ", (unsigned long long) start);
627 	    else
628 #else
629 # if !BFD_HOST_64BIT_LONG
630 	    if (start != (unsigned long) start)
631 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
632 		      (unsigned long) (start & 0xffffffff));
633 	    else
634 # endif
635 #endif
636 	      printf ("%7lx ", (unsigned long) start);
637 	    break;
638 	  }
639 
640       buf[i] = '\0';
641       fputs (buf, stdout);
642 
643       while (1)
644 	{
645 	  c = get_char (stream, &address, &magiccount, &magic);
646 	  if (c == EOF)
647 	    break;
648 	  if (! STRING_ISGRAPHIC (c))
649 	    break;
650 	  putchar (c);
651 	}
652 
653       putchar ('\n');
654     }
655 }
656 
657 /* Parse string S as an integer, using decimal radix by default,
658    but allowing octal and hex numbers as in C.  */
659 
660 static int
integer_arg(char * s)661 integer_arg (char *s)
662 {
663   int value;
664   int radix = 10;
665   char *p = s;
666   int c;
667 
668   if (*p != '0')
669     radix = 10;
670   else if (*++p == 'x')
671     {
672       radix = 16;
673       p++;
674     }
675   else
676     radix = 8;
677 
678   value = 0;
679   while (((c = *p++) >= '0' && c <= '9')
680 	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
681     {
682       value *= radix;
683       if (c >= '0' && c <= '9')
684 	value += c - '0';
685       else
686 	value += (c & ~40) - 'A';
687     }
688 
689   if (c == 'b')
690     value *= 512;
691   else if (c == 'B')
692     value *= 1024;
693   else
694     p--;
695 
696   if (*p)
697     fatal (_("invalid integer argument %s"), s);
698 
699   return value;
700 }
701 
702 static void
usage(FILE * stream,int status)703 usage (FILE *stream, int status)
704 {
705   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
706   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
707   fprintf (stream, _(" The options are:\n\
708   -a - --all                Scan the entire file, not just the data section\n\
709   -f --print-file-name      Print the name of the file before each string\n\
710   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
711   -<number>                 least [number] characters (default 4).\n\
712   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
713   -o                        An alias for --radix=o\n\
714   -T --target=<BFDNAME>     Specify the binary file format\n\
715   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
716                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
717   @<file>                   Read options from <file>\n\
718   -h --help                 Display this information\n\
719   -v --version              Print the program's version number\n"));
720   list_supported_targets (program_name, stream);
721   if (REPORT_BUGS_TO[0] && status == 0)
722     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
723   exit (status);
724 }
725