1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14 
15 #include "private.h"
16 
17 #include "getopt.h"
18 #include <ctype.h>
19 
20 
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26 
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30 
31 
32 /// Parse and set the memory usage limit for compression and/or decompression.
33 static void
parse_memlimit(const char * name,const char * name_percentage,char * str,bool set_compress,bool set_decompress)34 parse_memlimit(const char *name, const char *name_percentage, char *str,
35                     bool set_compress, bool set_decompress)
36 {
37           bool is_percentage = false;
38           uint64_t value;
39 
40           const size_t len = strlen(str);
41           if (len > 0 && str[len - 1] == '%') {
42                     str[len - 1] = '\0';
43                     is_percentage = true;
44                     value = str_to_uint64(name_percentage, str, 1, 100);
45           } else {
46                     // On 32-bit systems, SIZE_MAX would make more sense than
47                     // UINT64_MAX. But use UINT64_MAX still so that scripts
48                     // that assume > 4 GiB values don't break.
49                     value = str_to_uint64(name, str, 0, UINT64_MAX);
50           }
51 
52           hardware_memlimit_set(
53                               value, set_compress, set_decompress, is_percentage);
54           return;
55 }
56 
57 
58 static void
parse_block_list(char * str)59 parse_block_list(char *str)
60 {
61           // It must be non-empty and not begin with a comma.
62           if (str[0] == '\0' || str[0] == ',')
63                     message_fatal(_("%s: Invalid argument to --block-list"), str);
64 
65           // Count the number of comma-separated strings.
66           size_t count = 1;
67           for (size_t i = 0; str[i] != '\0'; ++i)
68                     if (str[i] == ',')
69                               ++count;
70 
71           // Prevent an unlikely integer overflow.
72           if (count > SIZE_MAX / sizeof(uint64_t) - 1)
73                     message_fatal(_("%s: Too many arguments to --block-list"),
74                                         str);
75 
76           // Allocate memory to hold all the sizes specified.
77           // If --block-list was specified already, its value is forgotten.
78           free(opt_block_list);
79           opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
80 
81           for (size_t i = 0; i < count; ++i) {
82                     // Locate the next comma and replace it with \0.
83                     char *p = strchr(str, ',');
84                     if (p != NULL)
85                               *p = '\0';
86 
87                     if (str[0] == '\0') {
88                               // There is no string, that is, a comma follows
89                               // another comma. Use the previous value.
90                               //
91                               // NOTE: We checked earler that the first char
92                               // of the whole list cannot be a comma.
93                               assert(i > 0);
94                               opt_block_list[i] = opt_block_list[i - 1];
95                     } else {
96                               opt_block_list[i] = str_to_uint64("block-list", str,
97                                                   0, UINT64_MAX);
98 
99                               // Zero indicates no more new Blocks.
100                               if (opt_block_list[i] == 0) {
101                                         if (i + 1 != count)
102                                                   message_fatal(_("0 can only be used "
103                                                                       "as the last element "
104                                                                       "in --block-list"));
105 
106                                         opt_block_list[i] = UINT64_MAX;
107                               }
108                     }
109 
110                     str = p + 1;
111           }
112 
113           // Terminate the array.
114           opt_block_list[count] = 0;
115           return;
116 }
117 
118 
119 static void
parse_real(args_info * args,int argc,char ** argv)120 parse_real(args_info *args, int argc, char **argv)
121 {
122           enum {
123                     OPT_X86 = INT_MIN,
124                     OPT_POWERPC,
125                     OPT_IA64,
126                     OPT_ARM,
127                     OPT_ARMTHUMB,
128                     OPT_SPARC,
129                     OPT_DELTA,
130                     OPT_LZMA1,
131                     OPT_LZMA2,
132 
133                     OPT_SINGLE_STREAM,
134                     OPT_NO_SPARSE,
135                     OPT_FILES,
136                     OPT_FILES0,
137                     OPT_BLOCK_SIZE,
138                     OPT_BLOCK_LIST,
139                     OPT_MEM_COMPRESS,
140                     OPT_MEM_DECOMPRESS,
141                     OPT_NO_ADJUST,
142                     OPT_INFO_MEMORY,
143                     OPT_ROBOT,
144                     OPT_FLUSH_TIMEOUT,
145                     OPT_IGNORE_CHECK,
146           };
147 
148           static const char short_opts[]
149                               = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
150 
151           static const struct option long_opts[] = {
152                     // Operation mode
153                     { "compress",     no_argument,       NULL,  'z' },
154                     { "decompress",   no_argument,       NULL,  'd' },
155                     { "uncompress",   no_argument,       NULL,  'd' },
156                     { "test",         no_argument,       NULL,  't' },
157                     { "list",         no_argument,       NULL,  'l' },
158 
159                     // Operation modifiers
160                     { "keep",         no_argument,       NULL,  'k' },
161                     { "force",        no_argument,       NULL,  'f' },
162                     { "stdout",       no_argument,       NULL,  'c' },
163                     { "to-stdout",    no_argument,       NULL,  'c' },
164                     { "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
165                     { "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
166                     { "suffix",       required_argument, NULL,  'S' },
167                     // { "recursive",      no_argument,       NULL,  'r' }, // TODO
168                     { "files",        optional_argument, NULL,  OPT_FILES },
169                     { "files0",       optional_argument, NULL,  OPT_FILES0 },
170 
171                     // Basic compression settings
172                     { "format",       required_argument, NULL,  'F' },
173                     { "check",        required_argument, NULL,  'C' },
174                     { "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
175                     { "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
176                     { "block-list",  required_argument, NULL,  OPT_BLOCK_LIST },
177                     { "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
178                     { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
179                     { "memlimit",     required_argument, NULL,  'M' },
180                     { "memory",       required_argument, NULL,  'M' }, // Old alias
181                     { "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
182                     { "threads",      required_argument, NULL,  'T' },
183                     { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
184 
185                     { "extreme",      no_argument,       NULL,  'e' },
186                     { "fast",         no_argument,       NULL,  '0' },
187                     { "best",         no_argument,       NULL,  '9' },
188 
189                     // Filters
190                     { "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
191                     { "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
192                     { "x86",          optional_argument, NULL,  OPT_X86 },
193                     { "powerpc",      optional_argument, NULL,  OPT_POWERPC },
194                     { "ia64",         optional_argument, NULL,  OPT_IA64 },
195                     { "arm",          optional_argument, NULL,  OPT_ARM },
196                     { "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
197                     { "sparc",        optional_argument, NULL,  OPT_SPARC },
198                     { "delta",        optional_argument, NULL,  OPT_DELTA },
199 
200                     // Other options
201                     { "quiet",        no_argument,       NULL,  'q' },
202                     { "verbose",      no_argument,       NULL,  'v' },
203                     { "no-warn",      no_argument,       NULL,  'Q' },
204                     { "robot",        no_argument,       NULL,  OPT_ROBOT },
205                     { "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
206                     { "help",         no_argument,       NULL,  'h' },
207                     { "long-help",    no_argument,       NULL,  'H' },
208                     { "version",      no_argument,       NULL,  'V' },
209 
210                     { NULL,           0,                 NULL,   0 }
211           };
212 
213           int c;
214 
215           while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
216                               != -1) {
217                     switch (c) {
218                     // Compression preset (also for decompression if --format=raw)
219                     case '0': case '1': case '2': case '3': case '4':
220                     case '5': case '6': case '7': case '8': case '9':
221                               coder_set_preset(c - '0');
222                               break;
223 
224                     // --memlimit-compress
225                     case OPT_MEM_COMPRESS:
226                               parse_memlimit("memlimit-compress",
227                                                   "memlimit-compress%", optarg,
228                                                   true, false);
229                               break;
230 
231                     // --memlimit-decompress
232                     case OPT_MEM_DECOMPRESS:
233                               parse_memlimit("memlimit-decompress",
234                                                   "memlimit-decompress%", optarg,
235                                                   false, true);
236                               break;
237 
238                     // --memlimit
239                     case 'M':
240                               parse_memlimit("memlimit", "memlimit%", optarg,
241                                                   true, true);
242                               break;
243 
244                     // --suffix
245                     case 'S':
246                               suffix_set(optarg);
247                               break;
248 
249                     case 'T':
250                               // The max is from src/liblzma/common/common.h.
251                               hardware_threads_set(str_to_uint64("threads",
252                                                   optarg, 0, 16384));
253                               break;
254 
255                     // --version
256                     case 'V':
257                               // This doesn't return.
258                               message_version();
259 
260                     // --stdout
261                     case 'c':
262                               opt_stdout = true;
263                               break;
264 
265                     // --decompress
266                     case 'd':
267                               opt_mode = MODE_DECOMPRESS;
268                               break;
269 
270                     // --extreme
271                     case 'e':
272                               coder_set_extreme();
273                               break;
274 
275                     // --force
276                     case 'f':
277                               opt_force = true;
278                               break;
279 
280                     // --info-memory
281                     case OPT_INFO_MEMORY:
282                               // This doesn't return.
283                               hardware_memlimit_show();
284 
285                     // --help
286                     case 'h':
287                               // This doesn't return.
288                               message_help(false);
289 
290                     // --long-help
291                     case 'H':
292                               // This doesn't return.
293                               message_help(true);
294 
295                     // --list
296                     case 'l':
297                               opt_mode = MODE_LIST;
298                               break;
299 
300                     // --keep
301                     case 'k':
302                               opt_keep_original = true;
303                               break;
304 
305                     // --quiet
306                     case 'q':
307                               message_verbosity_decrease();
308                               break;
309 
310                     case 'Q':
311                               set_exit_no_warn();
312                               break;
313 
314                     case 't':
315                               opt_mode = MODE_TEST;
316                               break;
317 
318                     // --verbose
319                     case 'v':
320                               message_verbosity_increase();
321                               break;
322 
323                     // --robot
324                     case OPT_ROBOT:
325                               opt_robot = true;
326 
327                               // This is to make sure that floating point numbers
328                               // always have a dot as decimal separator.
329                               setlocale(LC_NUMERIC, "C");
330                               break;
331 
332                     case 'z':
333                               opt_mode = MODE_COMPRESS;
334                               break;
335 
336                     // Filter setup
337 
338                     case OPT_X86:
339                               coder_add_filter(LZMA_FILTER_X86,
340                                                   options_bcj(optarg));
341                               break;
342 
343                     case OPT_POWERPC:
344                               coder_add_filter(LZMA_FILTER_POWERPC,
345                                                   options_bcj(optarg));
346                               break;
347 
348                     case OPT_IA64:
349                               coder_add_filter(LZMA_FILTER_IA64,
350                                                   options_bcj(optarg));
351                               break;
352 
353                     case OPT_ARM:
354                               coder_add_filter(LZMA_FILTER_ARM,
355                                                   options_bcj(optarg));
356                               break;
357 
358                     case OPT_ARMTHUMB:
359                               coder_add_filter(LZMA_FILTER_ARMTHUMB,
360                                                   options_bcj(optarg));
361                               break;
362 
363                     case OPT_SPARC:
364                               coder_add_filter(LZMA_FILTER_SPARC,
365                                                   options_bcj(optarg));
366                               break;
367 
368                     case OPT_DELTA:
369                               coder_add_filter(LZMA_FILTER_DELTA,
370                                                   options_delta(optarg));
371                               break;
372 
373                     case OPT_LZMA1:
374                               coder_add_filter(LZMA_FILTER_LZMA1,
375                                                   options_lzma(optarg));
376                               break;
377 
378                     case OPT_LZMA2:
379                               coder_add_filter(LZMA_FILTER_LZMA2,
380                                                   options_lzma(optarg));
381                               break;
382 
383                     // Other
384 
385                     // --format
386                     case 'F': {
387                               // Just in case, support both "lzma" and "alone" since
388                               // the latter was used for forward compatibility in
389                               // LZMA Utils 4.32.x.
390                               static const struct {
391                                         char str[8];
392                                         enum format_type format;
393                               } types[] = {
394                                         { "auto",   FORMAT_AUTO },
395                                         { "xz",     FORMAT_XZ },
396                                         { "lzma",   FORMAT_LZMA },
397                                         { "alone",  FORMAT_LZMA },
398                                         // { "gzip",   FORMAT_GZIP },
399                                         // { "gz",     FORMAT_GZIP },
400                                         { "raw",    FORMAT_RAW },
401                               };
402 
403                               size_t i = 0;
404                               while (strcmp(types[i].str, optarg) != 0)
405                                         if (++i == ARRAY_SIZE(types))
406                                                   message_fatal(_("%s: Unknown file "
407                                                                       "format type"),
408                                                                       optarg);
409 
410                               opt_format = types[i].format;
411                               break;
412                     }
413 
414                     // --check
415                     case 'C': {
416                               static const struct {
417                                         char str[8];
418                                         lzma_check check;
419                               } types[] = {
420                                         { "none",   LZMA_CHECK_NONE },
421                                         { "crc32",  LZMA_CHECK_CRC32 },
422                                         { "crc64",  LZMA_CHECK_CRC64 },
423                                         { "sha256", LZMA_CHECK_SHA256 },
424                               };
425 
426                               size_t i = 0;
427                               while (strcmp(types[i].str, optarg) != 0) {
428                                         if (++i == ARRAY_SIZE(types))
429                                                   message_fatal(_("%s: Unsupported "
430                                                                       "integrity "
431                                                                       "check type"), optarg);
432                               }
433 
434                               // Use a separate check in case we are using different
435                               // liblzma than what was used to compile us.
436                               if (!lzma_check_is_supported(types[i].check))
437                                         message_fatal(_("%s: Unsupported integrity "
438                                                             "check type"), optarg);
439 
440                               coder_set_check(types[i].check);
441                               break;
442                     }
443 
444                     case OPT_IGNORE_CHECK:
445                               opt_ignore_check = true;
446                               break;
447 
448                     case OPT_BLOCK_SIZE:
449                               opt_block_size = str_to_uint64("block-size", optarg,
450                                                   0, LZMA_VLI_MAX);
451                               break;
452 
453                     case OPT_BLOCK_LIST: {
454                               parse_block_list(optarg);
455                               break;
456                     }
457 
458                     case OPT_SINGLE_STREAM:
459                               opt_single_stream = true;
460                               break;
461 
462                     case OPT_NO_SPARSE:
463                               io_no_sparse();
464                               break;
465 
466                     case OPT_FILES:
467                               args->files_delim = '\n';
468 
469                     // Fall through
470 
471                     case OPT_FILES0:
472                               if (args->files_name != NULL)
473                                         message_fatal(_("Only one file can be "
474                                                             "specified with `--files' "
475                                                             "or `--files0'."));
476 
477                               if (optarg == NULL) {
478                                         args->files_name = (char *)stdin_filename;
479                                         args->files_file = stdin;
480                               } else {
481                                         args->files_name = optarg;
482                                         args->files_file = fopen(optarg,
483                                                             c == OPT_FILES ? "r" : "rb");
484                                         if (args->files_file == NULL)
485                                                   message_fatal("%s: %s", optarg,
486                                                                       strerror(errno));
487                               }
488 
489                               break;
490 
491                     case OPT_NO_ADJUST:
492                               opt_auto_adjust = false;
493                               break;
494 
495                     case OPT_FLUSH_TIMEOUT:
496                               opt_flush_timeout = str_to_uint64("flush-timeout",
497                                                   optarg, 0, UINT64_MAX);
498                               break;
499 
500                     default:
501                               message_try_help();
502                               tuklib_exit(E_ERROR, E_ERROR, false);
503                     }
504           }
505 
506           return;
507 }
508 
509 
510 static void
parse_environment(args_info * args,char * argv0,const char * varname)511 parse_environment(args_info *args, char *argv0, const char *varname)
512 {
513           char *env = getenv(varname);
514           if (env == NULL)
515                     return;
516 
517           // We modify the string, so make a copy of it.
518           env = xstrdup(env);
519 
520           // Calculate the number of arguments in env. argc stats at one
521           // to include space for the program name.
522           int argc = 1;
523           bool prev_was_space = true;
524           for (size_t i = 0; env[i] != '\0'; ++i) {
525                     // NOTE: Cast to unsigned char is needed so that correct
526                     // value gets passed to isspace(), which expects
527                     // unsigned char cast to int. Casting to int is done
528                     // automatically due to integer promotion, but we need to
529                     // force char to unsigned char manually. Otherwise 8-bit
530                     // characters would get promoted to wrong value if
531                     // char is signed.
532                     if (isspace((unsigned char)env[i])) {
533                               prev_was_space = true;
534                     } else if (prev_was_space) {
535                               prev_was_space = false;
536 
537                               // Keep argc small enough to fit into a signed int
538                               // and to keep it usable for memory allocation.
539                               if (++argc == my_min(
540                                                   INT_MAX, SIZE_MAX / sizeof(char *)))
541                                         message_fatal(_("The environment variable "
542                                                             "%s contains too many "
543                                                             "arguments"), varname);
544                     }
545           }
546 
547           // Allocate memory to hold pointers to the arguments. Add one to get
548           // space for the terminating NULL (if some systems happen to need it).
549           char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
550           argv[0] = argv0;
551           argv[argc] = NULL;
552 
553           // Go through the string again. Split the arguments using '\0'
554           // characters and add pointers to the resulting strings to argv.
555           argc = 1;
556           prev_was_space = true;
557           for (size_t i = 0; env[i] != '\0'; ++i) {
558                     if (isspace((unsigned char)env[i])) {
559                               prev_was_space = true;
560                               env[i] = '\0';
561                     } else if (prev_was_space) {
562                               prev_was_space = false;
563                               argv[argc++] = env + i;
564                     }
565           }
566 
567           // Parse the argument list we got from the environment. All non-option
568           // arguments i.e. filenames are ignored.
569           parse_real(args, argc, argv);
570 
571           // Reset the state of the getopt_long() so that we can parse the
572           // command line options too. There are two incompatible ways to
573           // do it.
574 #ifdef HAVE_OPTRESET
575           // BSD
576           optind = 1;
577           optreset = 1;
578 #else
579           // GNU, Solaris
580           optind = 0;
581 #endif
582 
583           // We don't need the argument list from environment anymore.
584           free(argv);
585           free(env);
586 
587           return;
588 }
589 
590 
591 extern void
args_parse(args_info * args,int argc,char ** argv)592 args_parse(args_info *args, int argc, char **argv)
593 {
594           // Initialize those parts of *args that we need later.
595           args->files_name = NULL;
596           args->files_file = NULL;
597           args->files_delim = '\0';
598 
599           // Check how we were called.
600           {
601                     // Remove the leading path name, if any.
602                     const char *name = strrchr(argv[0], '/');
603                     if (name == NULL)
604                               name = argv[0];
605                     else
606                               ++name;
607 
608                     // NOTE: It's possible that name[0] is now '\0' if argv[0]
609                     // is weird, but it doesn't matter here.
610 
611                     // Look for full command names instead of substrings like
612                     // "un", "cat", and "lz" to reduce possibility of false
613                     // positives when the programs have been renamed.
614                     if (strstr(name, "xzcat") != NULL) {
615                               opt_mode = MODE_DECOMPRESS;
616                               opt_stdout = true;
617                     } else if (strstr(name, "unxz") != NULL) {
618                               opt_mode = MODE_DECOMPRESS;
619                     } else if (strstr(name, "lzcat") != NULL) {
620                               opt_format = FORMAT_LZMA;
621                               opt_mode = MODE_DECOMPRESS;
622                               opt_stdout = true;
623                     } else if (strstr(name, "unlzma") != NULL) {
624                               opt_format = FORMAT_LZMA;
625                               opt_mode = MODE_DECOMPRESS;
626                     } else if (strstr(name, "lzma") != NULL) {
627                               opt_format = FORMAT_LZMA;
628                     }
629           }
630 
631           // First the flags from the environment
632           parse_environment(args, argv[0], "XZ_DEFAULTS");
633           parse_environment(args, argv[0], "XZ_OPT");
634 
635           // Then from the command line
636           parse_real(args, argc, argv);
637 
638           // If encoder or decoder support was omitted at build time,
639           // show an error now so that the rest of the code can rely on
640           // that whatever is in opt_mode is also supported.
641 #ifndef HAVE_ENCODERS
642           if (opt_mode == MODE_COMPRESS)
643                     message_fatal(_("Compression support was disabled "
644                                         "at build time"));
645 #endif
646 #ifndef HAVE_DECODERS
647           // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
648           // is the only valid choice.
649           if (opt_mode != MODE_COMPRESS)
650                     message_fatal(_("Decompression support was disabled "
651                                         "at build time"));
652 #endif
653 
654           // Never remove the source file when the destination is not on disk.
655           // In test mode the data is written nowhere, but setting opt_stdout
656           // will make the rest of the code behave well.
657           if (opt_stdout || opt_mode == MODE_TEST) {
658                     opt_keep_original = true;
659                     opt_stdout = true;
660           }
661 
662           // When compressing, if no --format flag was used, or it
663           // was --format=auto, we compress to the .xz format.
664           if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
665                     opt_format = FORMAT_XZ;
666 
667           // Compression settings need to be validated (options themselves and
668           // their memory usage) when compressing to any file format. It has to
669           // be done also when uncompressing raw data, since for raw decoding
670           // the options given on the command line are used to know what kind
671           // of raw data we are supposed to decode.
672           if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
673                     coder_set_compression_settings();
674 
675           // If no filenames are given, use stdin.
676           if (argv[optind] == NULL && args->files_name == NULL) {
677                     // We don't modify or free() the "-" constant. The caller
678                     // modifies this so don't make the struct itself const.
679                     static char *names_stdin[2] = { (char *)"-", NULL };
680                     args->arg_names = names_stdin;
681                     args->arg_count = 1;
682           } else {
683                     // We got at least one filename from the command line, or
684                     // --files or --files0 was specified.
685                     args->arg_names = argv + optind;
686                     args->arg_count = argc - optind;
687           }
688 
689           return;
690 }
691 
692 
693 #ifndef NDEBUG
694 extern void
args_free(void)695 args_free(void)
696 {
697           free(opt_block_list);
698           return;
699 }
700 #endif
701