1 /* quotearg.c - quote arguments for output
2 
3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005 Free Software
4    Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 #include <sys/cdefs.h>
20 __RCSID("$NetBSD: quotearg.c,v 1.3 2016/05/17 14:00:09 christos Exp $");
21 
22 
23 /* Written by Paul Eggert <eggert@twinsun.com> */
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 #include "quotearg.h"
30 
31 #include "xalloc.h"
32 
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdbool.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "gettext.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
43 
44 #if HAVE_WCHAR_H
45 
46 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared.  */
47 # include <stdio.h>
48 # include <time.h>
49 
50 # include <wchar.h>
51 #endif
52 
53 #if !HAVE_MBRTOWC
54 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
55    other macros are defined only for documentation and to satisfy C
56    syntax.  */
57 # undef MB_CUR_MAX
58 # define MB_CUR_MAX 1
59 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
60 # define iswprint(wc) isprint ((unsigned char) (wc))
61 # undef HAVE_MBSINIT
62 #endif
63 
64 #if !defined mbsinit && !HAVE_MBSINIT
65 # define mbsinit(ps) 1
66 #endif
67 
68 #ifndef iswprint
69 # if HAVE_WCTYPE_H
70 #  include <wctype.h>
71 # endif
72 # if !defined iswprint && !HAVE_ISWPRINT
73 #  define iswprint(wc) 1
74 # endif
75 #endif
76 
77 #ifndef SIZE_MAX
78 # define SIZE_MAX ((size_t) -1)
79 #endif
80 
81 #define INT_BITS (sizeof (int) * CHAR_BIT)
82 
83 struct quoting_options
84 {
85   /* Basic quoting style.  */
86   enum quoting_style style;
87 
88   /* Quote the characters indicated by this bit vector even if the
89      quoting style would not normally require them to be quoted.  */
90   unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
91 };
92 
93 /* Names of quoting styles.  */
94 char const *const quoting_style_args[] =
95 {
96   "literal",
97   "shell",
98   "shell-always",
99   "c",
100   "escape",
101   "locale",
102   "clocale",
103   0
104 };
105 
106 /* Correspondences to quoting style names.  */
107 enum quoting_style const quoting_style_vals[] =
108 {
109   literal_quoting_style,
110   shell_quoting_style,
111   shell_always_quoting_style,
112   c_quoting_style,
113   escape_quoting_style,
114   locale_quoting_style,
115   clocale_quoting_style
116 };
117 
118 /* The default quoting options.  */
119 static struct quoting_options default_quoting_options;
120 
121 /* Allocate a new set of quoting options, with contents initially identical
122    to O if O is not null, or to the default if O is null.
123    It is the caller's responsibility to free the result.  */
124 struct quoting_options *
clone_quoting_options(struct quoting_options * o)125 clone_quoting_options (struct quoting_options *o)
126 {
127   int e = errno;
128   struct quoting_options *p = xmalloc (sizeof *p);
129   *p = *(o ? o : &default_quoting_options);
130   errno = e;
131   return p;
132 }
133 
134 /* Get the value of O's quoting style.  If O is null, use the default.  */
135 enum quoting_style
get_quoting_style(struct quoting_options * o)136 get_quoting_style (struct quoting_options *o)
137 {
138   return (o ? o : &default_quoting_options)->style;
139 }
140 
141 /* In O (or in the default if O is null),
142    set the value of the quoting style to S.  */
143 void
set_quoting_style(struct quoting_options * o,enum quoting_style s)144 set_quoting_style (struct quoting_options *o, enum quoting_style s)
145 {
146   (o ? o : &default_quoting_options)->style = s;
147 }
148 
149 /* In O (or in the default if O is null),
150    set the value of the quoting options for character C to I.
151    Return the old value.  Currently, the only values defined for I are
152    0 (the default) and 1 (which means to quote the character even if
153    it would not otherwise be quoted).  */
154 int
set_char_quoting(struct quoting_options * o,char c,int i)155 set_char_quoting (struct quoting_options *o, char c, int i)
156 {
157   unsigned char uc = c;
158   unsigned int *p =
159     (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
160   int shift = uc % INT_BITS;
161   int r = (*p >> shift) & 1;
162   *p ^= ((i & 1) ^ r) << shift;
163   return r;
164 }
165 
166 /* MSGID approximates a quotation mark.  Return its translation if it
167    has one; otherwise, return either it or "\"", depending on S.  */
168 static char const *
gettext_quote(char const * msgid,enum quoting_style s)169 gettext_quote (char const *msgid, enum quoting_style s)
170 {
171   char const *translation = _(msgid);
172   if (translation == msgid && s == clocale_quoting_style)
173     translation = "\"";
174   return translation;
175 }
176 
177 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
178    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
179    non-quoting-style part of O to control quoting.
180    Terminate the output with a null character, and return the written
181    size of the output, not counting the terminating null.
182    If BUFFERSIZE is too small to store the output string, return the
183    value that would have been returned had BUFFERSIZE been large enough.
184    If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
185 
186    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
187    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
188    style specified by O, and O may not be null.  */
189 
190 static size_t
quotearg_buffer_restyled(char * buffer,size_t buffersize,char const * arg,size_t argsize,enum quoting_style quoting_style,struct quoting_options const * o)191 quotearg_buffer_restyled (char *buffer, size_t buffersize,
192                                 char const *arg, size_t argsize,
193                                 enum quoting_style quoting_style,
194                                 struct quoting_options const *o)
195 {
196   size_t i;
197   size_t len = 0;
198   char const *quote_string = 0;
199   size_t quote_string_len = 0;
200   bool backslash_escapes = false;
201   bool unibyte_locale = MB_CUR_MAX == 1;
202 
203 #define STORE(c) \
204     do \
205       { \
206           if (len < buffersize) \
207             buffer[len] = (c); \
208           len++; \
209       } \
210     while (0)
211 
212   switch (quoting_style)
213     {
214     case c_quoting_style:
215       STORE ('"');
216       backslash_escapes = true;
217       quote_string = "\"";
218       quote_string_len = 1;
219       break;
220 
221     case escape_quoting_style:
222       backslash_escapes = true;
223       break;
224 
225     case locale_quoting_style:
226     case clocale_quoting_style:
227       {
228           /* TRANSLATORS:
229              Get translations for open and closing quotation marks.
230 
231              The message catalog should translate "`" to a left
232              quotation mark suitable for the locale, and similarly for
233              "'".  If the catalog has no translation,
234              locale_quoting_style quotes `like this', and
235              clocale_quoting_style quotes "like this".
236 
237              For example, an American English Unicode locale should
238              translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
239              should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
240              MARK).  A British English Unicode locale should instead
241              translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
242              U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
243 
244              If you don't know what to put here, please see
245              <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
246              and use glyphs suitable for your language.  */
247 
248           char const *left = gettext_quote (N_("`"), quoting_style);
249           char const *right = gettext_quote (N_("'"), quoting_style);
250           for (quote_string = left; *quote_string; quote_string++)
251             STORE (*quote_string);
252           backslash_escapes = true;
253           quote_string = right;
254           quote_string_len = strlen (quote_string);
255       }
256       break;
257 
258     case shell_always_quoting_style:
259       STORE ('\'');
260       quote_string = "'";
261       quote_string_len = 1;
262       break;
263 
264     default:
265       break;
266     }
267 
268   for (i = 0;  ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize);  i++)
269     {
270       unsigned char c;
271       unsigned char esc;
272 
273       if (backslash_escapes
274             && quote_string_len
275             && i + quote_string_len <= argsize
276             && memcmp (arg + i, quote_string, quote_string_len) == 0)
277           STORE ('\\');
278 
279       c = arg[i];
280       switch (c)
281           {
282           case '\0':
283             if (backslash_escapes)
284               {
285                 STORE ('\\');
286                 STORE ('0');
287                 STORE ('0');
288                 c = '0';
289               }
290             break;
291 
292           case '?':
293             switch (quoting_style)
294               {
295               case shell_quoting_style:
296                 goto use_shell_always_quoting_style;
297 
298               case c_quoting_style:
299                 if (i + 2 < argsize && arg[i + 1] == '?')
300                     switch (arg[i + 2])
301                       {
302                       case '!': case '\'':
303                       case '(': case ')': case '-': case '/':
304                       case '<': case '=': case '>':
305                         /* Escape the second '?' in what would otherwise be
306                            a trigraph.  */
307                         c = arg[i + 2];
308                         i += 2;
309                         STORE ('?');
310                         STORE ('\\');
311                         STORE ('?');
312                         break;
313                       }
314                 break;
315 
316               default:
317                 break;
318               }
319             break;
320 
321           case '\a': esc = 'a'; goto c_escape;
322           case '\b': esc = 'b'; goto c_escape;
323           case '\f': esc = 'f'; goto c_escape;
324           case '\n': esc = 'n'; goto c_and_shell_escape;
325           case '\r': esc = 'r'; goto c_and_shell_escape;
326           case '\t': esc = 't'; goto c_and_shell_escape;
327           case '\v': esc = 'v'; goto c_escape;
328           case '\\': esc = c; goto c_and_shell_escape;
329 
330           c_and_shell_escape:
331             if (quoting_style == shell_quoting_style)
332               goto use_shell_always_quoting_style;
333           c_escape:
334             if (backslash_escapes)
335               {
336                 c = esc;
337                 goto store_escape;
338               }
339             break;
340 
341           case '{': case '}': /* sometimes special if isolated */
342             if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
343               break;
344             /* Fall through.  */
345           case '#': case '~':
346             if (i != 0)
347               break;
348             /* Fall through.  */
349           case ' ':
350           case '!': /* special in bash */
351           case '"': case '$': case '&':
352           case '(': case ')': case '*': case ';':
353           case '<':
354           case '=': /* sometimes special in 0th or (with "set -k") later args */
355           case '>': case '[':
356           case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
357           case '`': case '|':
358             /* A shell special character.  In theory, '$' and '`' could
359                be the first bytes of multibyte characters, which means
360                we should check them with mbrtowc, but in practice this
361                doesn't happen so it's not worth worrying about.  */
362             if (quoting_style == shell_quoting_style)
363               goto use_shell_always_quoting_style;
364             break;
365 
366           case '\'':
367             switch (quoting_style)
368               {
369               case shell_quoting_style:
370                 goto use_shell_always_quoting_style;
371 
372               case shell_always_quoting_style:
373                 STORE ('\'');
374                 STORE ('\\');
375                 STORE ('\'');
376                 break;
377 
378               default:
379                 break;
380               }
381             break;
382 
383           case '%': case '+': case ',': case '-': case '.': case '/':
384           case '0': case '1': case '2': case '3': case '4': case '5':
385           case '6': case '7': case '8': case '9': case ':':
386           case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
387           case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
388           case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
389           case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
390           case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
391           case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
392           case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
393           case 'o': case 'p': case 'q': case 'r': case 's': case 't':
394           case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
395             /* These characters don't cause problems, no matter what the
396                quoting style is.  They cannot start multibyte sequences.  */
397             break;
398 
399           default:
400             /* If we have a multibyte sequence, copy it until we reach
401                its end, find an error, or come back to the initial shift
402                state.  For C-like styles, if the sequence has
403                unprintable characters, escape the whole sequence, since
404                we can't easily escape single characters within it.  */
405             {
406               /* Length of multibyte sequence found so far.  */
407               size_t m;
408 
409               bool printable;
410 
411               if (unibyte_locale)
412                 {
413                     m = 1;
414                     printable = isprint (c) != 0;
415                 }
416               else
417                 {
418                     mbstate_t mbstate;
419                     memset (&mbstate, 0, sizeof mbstate);
420 
421                     m = 0;
422                     printable = true;
423                     if (argsize == SIZE_MAX)
424                       argsize = strlen (arg);
425 
426                     do
427                       {
428                         wchar_t w;
429                         size_t bytes = mbrtowc (&w, &arg[i + m],
430                                                       argsize - (i + m), &mbstate);
431                         if (bytes == 0)
432                           break;
433                         else if (bytes == (size_t) -1)
434                           {
435                               printable = false;
436                               break;
437                           }
438                         else if (bytes == (size_t) -2)
439                           {
440                               printable = false;
441                               while (i + m < argsize && arg[i + m])
442                                 m++;
443                               break;
444                           }
445                         else
446                           {
447                               /* Work around a bug with older shells that "see" a '\'
448                                  that is really the 2nd byte of a multibyte character.
449                                  In practice the problem is limited to ASCII
450                                  chars >= '@' that are shell special chars.  */
451                               if ('[' == 0x5b && quoting_style == shell_quoting_style)
452                                 {
453                                   size_t j;
454                                   for (j = 1; j < bytes; j++)
455                                     switch (arg[i + m + j])
456                                         {
457                                         case '[': case '\\': case '^':
458                                         case '`': case '|':
459                                           goto use_shell_always_quoting_style;
460                                         }
461                                 }
462 
463                               if (! iswprint (w))
464                                 printable = false;
465                               m += bytes;
466                           }
467                       }
468                     while (! mbsinit (&mbstate));
469                 }
470 
471               if (1 < m || (backslash_escapes && ! printable))
472                 {
473                     /* Output a multibyte sequence, or an escaped
474                        unprintable unibyte character.  */
475                     size_t ilim = i + m;
476 
477                     for (;;)
478                       {
479                         if (backslash_escapes && ! printable)
480                           {
481                               STORE ('\\');
482                               STORE ('0' + (c >> 6));
483                               STORE ('0' + ((c >> 3) & 7));
484                               c = '0' + (c & 7);
485                           }
486                         if (ilim <= i + 1)
487                           break;
488                         STORE (c);
489                         c = arg[++i];
490                       }
491 
492                     goto store_c;
493                 }
494             }
495           }
496 
497       if (! (backslash_escapes
498                && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
499           goto store_c;
500 
501     store_escape:
502       STORE ('\\');
503 
504     store_c:
505       STORE (c);
506     }
507 
508   if (i == 0 && quoting_style == shell_quoting_style)
509     goto use_shell_always_quoting_style;
510 
511   if (quote_string)
512     for (; *quote_string; quote_string++)
513       STORE (*quote_string);
514 
515   if (len < buffersize)
516     buffer[len] = '\0';
517   return len;
518 
519  use_shell_always_quoting_style:
520   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
521                                            shell_always_quoting_style, o);
522 }
523 
524 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
525    argument ARG (of size ARGSIZE), using O to control quoting.
526    If O is null, use the default.
527    Terminate the output with a null character, and return the written
528    size of the output, not counting the terminating null.
529    If BUFFERSIZE is too small to store the output string, return the
530    value that would have been returned had BUFFERSIZE been large enough.
531    If ARGSIZE is SIZE_MAX, use the string length of the argument for
532    ARGSIZE.  */
533 size_t
quotearg_buffer(char * buffer,size_t buffersize,char const * arg,size_t argsize,struct quoting_options const * o)534 quotearg_buffer (char *buffer, size_t buffersize,
535                      char const *arg, size_t argsize,
536                      struct quoting_options const *o)
537 {
538   struct quoting_options const *p = o ? o : &default_quoting_options;
539   int e = errno;
540   size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
541                                                p->style, p);
542   errno = e;
543   return r;
544 }
545 
546 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
547    allocated storage containing the quoted string.  */
548 char *
quotearg_alloc(char const * arg,size_t argsize,struct quoting_options const * o)549 quotearg_alloc (char const *arg, size_t argsize,
550                     struct quoting_options const *o)
551 {
552   int e = errno;
553   size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
554   char *buf = xmalloc (bufsize);
555   quotearg_buffer (buf, bufsize, arg, argsize, o);
556   errno = e;
557   return buf;
558 }
559 
560 /* Use storage slot N to return a quoted version of argument ARG.
561    ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
562    null-terminated string.
563    OPTIONS specifies the quoting options.
564    The returned value points to static storage that can be
565    reused by the next call to this function with the same value of N.
566    N must be nonnegative.  N is deliberately declared with type "int"
567    to allow for future extensions (using negative values).  */
568 static char *
quotearg_n_options(int n,char const * arg,size_t argsize,struct quoting_options const * options)569 quotearg_n_options (int n, char const *arg, size_t argsize,
570                         struct quoting_options const *options)
571 {
572   int e = errno;
573 
574   /* Preallocate a slot 0 buffer, so that the caller can always quote
575      one small component of a "memory exhausted" message in slot 0.  */
576   static char slot0[256];
577   static unsigned int nslots = 1;
578   unsigned int n0 = n;
579   struct slotvec
580     {
581       size_t size;
582       char *val;
583     };
584   static struct slotvec slotvec0 = {sizeof slot0, slot0};
585   static struct slotvec *slotvec = &slotvec0;
586 
587   if (n < 0)
588     abort ();
589 
590   if (nslots <= n0)
591     {
592       unsigned int n1 = n0 + 1;
593 
594       /* XXX: wrong int cast to avoid gcc warning */
595       if (xalloc_oversized ((int)n1, sizeof *slotvec))
596           xalloc_die ();
597 
598       if (slotvec == &slotvec0)
599           {
600             slotvec = xmalloc (sizeof *slotvec);
601             *slotvec = slotvec0;
602           }
603       slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
604       memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
605       nslots = n1;
606     }
607 
608   {
609     size_t size = slotvec[n].size;
610     char *val = slotvec[n].val;
611     size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
612 
613     if (size <= qsize)
614       {
615           slotvec[n].size = size = qsize + 1;
616           if (val != slot0)
617             free (val);
618           slotvec[n].val = val = xmalloc (size);
619           quotearg_buffer (val, size, arg, argsize, options);
620       }
621 
622     errno = e;
623     return val;
624   }
625 }
626 
627 char *
quotearg_n(int n,char const * arg)628 quotearg_n (int n, char const *arg)
629 {
630   return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
631 }
632 
633 char *
quotearg(char const * arg)634 quotearg (char const *arg)
635 {
636   return quotearg_n (0, arg);
637 }
638 
639 /* Return quoting options for STYLE, with no extra quoting.  */
640 static struct quoting_options
quoting_options_from_style(enum quoting_style style)641 quoting_options_from_style (enum quoting_style style)
642 {
643   struct quoting_options o;
644   o.style = style;
645   memset (o.quote_these_too, 0, sizeof o.quote_these_too);
646   return o;
647 }
648 
649 char *
quotearg_n_style(int n,enum quoting_style s,char const * arg)650 quotearg_n_style (int n, enum quoting_style s, char const *arg)
651 {
652   struct quoting_options const o = quoting_options_from_style (s);
653   return quotearg_n_options (n, arg, SIZE_MAX, &o);
654 }
655 
656 char *
quotearg_n_style_mem(int n,enum quoting_style s,char const * arg,size_t argsize)657 quotearg_n_style_mem (int n, enum quoting_style s,
658                           char const *arg, size_t argsize)
659 {
660   struct quoting_options const o = quoting_options_from_style (s);
661   return quotearg_n_options (n, arg, argsize, &o);
662 }
663 
664 char *
quotearg_style(enum quoting_style s,char const * arg)665 quotearg_style (enum quoting_style s, char const *arg)
666 {
667   return quotearg_n_style (0, s, arg);
668 }
669 
670 char *
quotearg_char(char const * arg,char ch)671 quotearg_char (char const *arg, char ch)
672 {
673   struct quoting_options options;
674   options = default_quoting_options;
675   set_char_quoting (&options, ch, 1);
676   return quotearg_n_options (0, arg, SIZE_MAX, &options);
677 }
678 
679 char *
quotearg_colon(char const * arg)680 quotearg_colon (char const *arg)
681 {
682   return quotearg_char (arg, ':');
683 }
684