xref: /trueos/gnu/usr.bin/grep/quotearg.c (revision d4c611919468bb0fb6a564c4b8cada55792c3fc4)
1 /* quotearg.c - quote arguments for output
2    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17 
18 /* Written by Paul Eggert <eggert@twinsun.com> */
19 
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 
24 #if HAVE_STDDEF_H
25 # include <stddef.h>  /* For the definition of size_t on windows w/MSVC.  */
26 #endif
27 #include <sys/types.h>
28 #include <quotearg.h>
29 #include <xalloc.h>
30 
31 #include <ctype.h>
32 
33 #if ENABLE_NLS
34 # include <libintl.h>
35 # define _(text) gettext (text)
36 #else
37 # define _(text) text
38 #endif
39 #define N_(text) text
40 
41 #if HAVE_LIMITS_H
42 # include <limits.h>
43 #endif
44 #ifndef CHAR_BIT
45 # define CHAR_BIT 8
46 #endif
47 #ifndef UCHAR_MAX
48 # define UCHAR_MAX ((unsigned char) -1)
49 #endif
50 
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
53 #else
54 # define ALERT_CHAR '\7'
55 #endif
56 
57 #if HAVE_STDLIB_H
58 # include <stdlib.h>
59 #endif
60 
61 #if HAVE_STRING_H
62 # include <string.h>
63 #endif
64 
65 #if HAVE_WCHAR_H
66 # include <wchar.h>
67 #endif
68 
69 #if !HAVE_MBRTOWC
70 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
71    other macros are defined only for documentation and to satisfy C
72    syntax.  */
73 # undef MB_CUR_MAX
74 # define MB_CUR_MAX 1
75 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
76 # define mbsinit(ps) 1
77 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
78 #endif
79 
80 #ifndef iswprint
81 # if HAVE_WCTYPE_H
82 #  include <wctype.h>
83 # endif
84 # if !defined iswprint && !HAVE_ISWPRINT
85 #  define iswprint(wc) 1
86 # endif
87 #endif
88 
89 #define INT_BITS (sizeof (int) * CHAR_BIT)
90 
91 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
92 # define IN_CTYPE_DOMAIN(c) 1
93 #else
94 # define IN_CTYPE_DOMAIN(c) isascii(c)
95 #endif
96 
97 /* Undefine to protect against the definition in wctype.h of solaris2.6.   */
98 #undef ISPRINT
99 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
100 
101 struct quoting_options
102 {
103   /* Basic quoting style.  */
104   enum quoting_style style;
105 
106   /* Quote the characters indicated by this bit vector even if the
107      quoting style would not normally require them to be quoted.  */
108   int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
109 };
110 
111 /* Names of quoting styles.  */
112 char const *const quoting_style_args[] =
113 {
114   "literal",
115   "shell",
116   "shell-always",
117   "c",
118   "escape",
119   "locale",
120   "clocale",
121   0
122 };
123 
124 /* Correspondences to quoting style names.  */
125 enum quoting_style const quoting_style_vals[] =
126 {
127   literal_quoting_style,
128   shell_quoting_style,
129   shell_always_quoting_style,
130   c_quoting_style,
131   escape_quoting_style,
132   locale_quoting_style,
133   clocale_quoting_style
134 };
135 
136 /* The default quoting options.  */
137 static struct quoting_options default_quoting_options;
138 
139 /* Allocate a new set of quoting options, with contents initially identical
140    to O if O is not null, or to the default if O is null.
141    It is the caller's responsibility to free the result.  */
142 struct quoting_options *
clone_quoting_options(struct quoting_options * o)143 clone_quoting_options (struct quoting_options *o)
144 {
145   struct quoting_options *p
146     = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
147   *p = *(o ? o : &default_quoting_options);
148   return p;
149 }
150 
151 /* Get the value of O's quoting style.  If O is null, use the default.  */
152 enum quoting_style
get_quoting_style(struct quoting_options * o)153 get_quoting_style (struct quoting_options *o)
154 {
155   return (o ? o : &default_quoting_options)->style;
156 }
157 
158 /* In O (or in the default if O is null),
159    set the value of the quoting style to S.  */
160 void
set_quoting_style(struct quoting_options * o,enum quoting_style s)161 set_quoting_style (struct quoting_options *o, enum quoting_style s)
162 {
163   (o ? o : &default_quoting_options)->style = s;
164 }
165 
166 /* In O (or in the default if O is null),
167    set the value of the quoting options for character C to I.
168    Return the old value.  Currently, the only values defined for I are
169    0 (the default) and 1 (which means to quote the character even if
170    it would not otherwise be quoted).  */
171 int
set_char_quoting(struct quoting_options * o,char c,int i)172 set_char_quoting (struct quoting_options *o, char c, int i)
173 {
174   unsigned char uc = c;
175   int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
176   int shift = uc % INT_BITS;
177   int r = (*p >> shift) & 1;
178   *p ^= ((i & 1) ^ r) << shift;
179   return r;
180 }
181 
182 /* MSGID approximates a quotation mark.  Return its translation if it
183    has one; otherwise, return either it or "\"", depending on S.  */
184 static char const *
gettext_quote(char const * msgid,enum quoting_style s)185 gettext_quote (char const *msgid, enum quoting_style s)
186 {
187   char const *translation = _(msgid);
188   if (translation == msgid && s == clocale_quoting_style)
189     translation = "\"";
190   return translation;
191 }
192 
193 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
194    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
195    non-quoting-style part of O to control quoting.
196    Terminate the output with a null character, and return the written
197    size of the output, not counting the terminating null.
198    If BUFFERSIZE is too small to store the output string, return the
199    value that would have been returned had BUFFERSIZE been large enough.
200    If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
201 
202    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
203    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
204    style specified by O, and O may not be null.  */
205 
206 static size_t
quotearg_buffer_restyled(char * buffer,size_t buffersize,char const * arg,size_t argsize,enum quoting_style quoting_style,struct quoting_options const * o)207 quotearg_buffer_restyled (char *buffer, size_t buffersize,
208 			  char const *arg, size_t argsize,
209 			  enum quoting_style quoting_style,
210 			  struct quoting_options const *o)
211 {
212   size_t i;
213   size_t len = 0;
214   char const *quote_string = 0;
215   size_t quote_string_len = 0;
216   int backslash_escapes = 0;
217   int unibyte_locale = MB_CUR_MAX == 1;
218 
219 #define STORE(c) \
220     do \
221       { \
222 	if (len < buffersize) \
223 	  buffer[len] = (c); \
224 	len++; \
225       } \
226     while (0)
227 
228   switch (quoting_style)
229     {
230     case c_quoting_style:
231       STORE ('"');
232       backslash_escapes = 1;
233       quote_string = "\"";
234       quote_string_len = 1;
235       break;
236 
237     case escape_quoting_style:
238       backslash_escapes = 1;
239       break;
240 
241     case locale_quoting_style:
242     case clocale_quoting_style:
243       {
244 	/* Get translations for open and closing quotation marks.
245 
246 	   The message catalog should translate "`" to a left
247 	   quotation mark suitable for the locale, and similarly for
248 	   "'".  If the catalog has no translation,
249 	   locale_quoting_style quotes `like this', and
250 	   clocale_quoting_style quotes "like this".
251 
252 	   For example, an American English Unicode locale should
253 	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
254 	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
255 	   MARK).  A British English Unicode locale should instead
256 	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
257 	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.  */
258 
259 	char const *left = gettext_quote (N_("`"), quoting_style);
260 	char const *right = gettext_quote (N_("'"), quoting_style);
261 	for (quote_string = left; *quote_string; quote_string++)
262 	  STORE (*quote_string);
263 	backslash_escapes = 1;
264 	quote_string = right;
265 	quote_string_len = strlen (quote_string);
266       }
267       break;
268 
269     case shell_always_quoting_style:
270       STORE ('\'');
271       quote_string = "'";
272       quote_string_len = 1;
273       break;
274 
275     default:
276       break;
277     }
278 
279   for (i = 0;  ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize);  i++)
280     {
281       unsigned char c;
282       unsigned char esc;
283 
284       if (backslash_escapes
285 	  && quote_string_len
286 	  && i + quote_string_len <= argsize
287 	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
288 	STORE ('\\');
289 
290       c = arg[i];
291       switch (c)
292 	{
293 	case '?':
294 	  switch (quoting_style)
295 	    {
296 	    case shell_quoting_style:
297 	      goto use_shell_always_quoting_style;
298 
299 	    case c_quoting_style:
300 	      if (i + 2 < argsize && arg[i + 1] == '?')
301 		switch (arg[i + 2])
302 		  {
303 		  case '!': case '\'':
304 		  case '(': case ')': case '-': case '/':
305 		  case '<': case '=': case '>':
306 		    /* Escape the second '?' in what would otherwise be
307 		       a trigraph.  */
308 		    i += 2;
309 		    c = arg[i + 2];
310 		    STORE ('?');
311 		    STORE ('\\');
312 		    STORE ('?');
313 		    break;
314 		  }
315 	      break;
316 
317 	    default:
318 	      break;
319 	    }
320 	  break;
321 
322 	case ALERT_CHAR: esc = 'a'; goto c_escape;
323 	case '\b': esc = 'b'; goto c_escape;
324 	case '\f': esc = 'f'; goto c_escape;
325 	case '\n': esc = 'n'; goto c_and_shell_escape;
326 	case '\r': esc = 'r'; goto c_and_shell_escape;
327 	case '\t': esc = 't'; goto c_and_shell_escape;
328 	case '\v': esc = 'v'; goto c_escape;
329 	case '\\': esc = c; goto c_and_shell_escape;
330 
331 	c_and_shell_escape:
332 	  if (quoting_style == shell_quoting_style)
333 	    goto use_shell_always_quoting_style;
334 	c_escape:
335 	  if (backslash_escapes)
336 	    {
337 	      c = esc;
338 	      goto store_escape;
339 	    }
340 	  break;
341 
342 	case '#': case '~':
343 	  if (i != 0)
344 	    break;
345 	  /* Fall through.  */
346 	case ' ':
347 	case '!': /* special in bash */
348 	case '"': case '$': case '&':
349 	case '(': case ')': case '*': case ';':
350 	case '<': case '>': case '[':
351 	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
352 	case '`': case '|':
353 	  /* A shell special character.  In theory, '$' and '`' could
354 	     be the first bytes of multibyte characters, which means
355 	     we should check them with mbrtowc, but in practice this
356 	     doesn't happen so it's not worth worrying about.  */
357 	  if (quoting_style == shell_quoting_style)
358 	    goto use_shell_always_quoting_style;
359 	  break;
360 
361 	case '\'':
362 	  switch (quoting_style)
363 	    {
364 	    case shell_quoting_style:
365 	      goto use_shell_always_quoting_style;
366 
367 	    case shell_always_quoting_style:
368 	      STORE ('\'');
369 	      STORE ('\\');
370 	      STORE ('\'');
371 	      break;
372 
373 	    default:
374 	      break;
375 	    }
376 	  break;
377 
378 	case '%': case '+': case ',': case '-': case '.': case '/':
379 	case '0': case '1': case '2': case '3': case '4': case '5':
380 	case '6': case '7': case '8': case '9': case ':': case '=':
381 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
382 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
383 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
384 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
385 	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
386 	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
387 	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
388 	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
389 	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
390 	case '{': case '}':
391 	  /* These characters don't cause problems, no matter what the
392 	     quoting style is.  They cannot start multibyte sequences.  */
393 	  break;
394 
395 	default:
396 	  /* If we have a multibyte sequence, copy it until we reach
397 	     its end, find an error, or come back to the initial shift
398 	     state.  For C-like styles, if the sequence has
399 	     unprintable characters, escape the whole sequence, since
400 	     we can't easily escape single characters within it.  */
401 	  {
402 	    /* Length of multibyte sequence found so far.  */
403 	    size_t m;
404 
405 	    int printable;
406 
407 	    if (unibyte_locale)
408 	      {
409 		m = 1;
410 		printable = ISPRINT (c);
411 	      }
412 	    else
413 	      {
414 		mbstate_t mbstate;
415 		memset (&mbstate, 0, sizeof mbstate);
416 
417 		m = 0;
418 		printable = 1;
419 		if (argsize == (size_t) -1)
420 		  argsize = strlen (arg);
421 
422 		do
423 		  {
424 		    wchar_t w;
425 		    size_t bytes = mbrtowc (&w, &arg[i + m],
426 					    argsize - (i + m), &mbstate);
427 		    if (bytes == 0)
428 		      break;
429 		    else if (bytes == (size_t) -1)
430 		      {
431 			printable = 0;
432 			break;
433 		      }
434 		    else if (bytes == (size_t) -2)
435 		      {
436 			printable = 0;
437 			while (i + m < argsize && arg[i + m])
438 			  m++;
439 			break;
440 		      }
441 		    else
442 		      {
443 			if (! iswprint (w))
444 			  printable = 0;
445 			m += bytes;
446 		      }
447 		  }
448 		while (! mbsinit (&mbstate));
449 	      }
450 
451 	    if (1 < m || (backslash_escapes && ! printable))
452 	      {
453 		/* Output a multibyte sequence, or an escaped
454 		   unprintable unibyte character.  */
455 		size_t ilim = i + m;
456 
457 		for (;;)
458 		  {
459 		    if (backslash_escapes && ! printable)
460 		      {
461 			STORE ('\\');
462 			STORE ('0' + (c >> 6));
463 			STORE ('0' + ((c >> 3) & 7));
464 			c = '0' + (c & 7);
465 		      }
466 		    if (ilim <= i + 1)
467 		      break;
468 		    STORE (c);
469 		    c = arg[++i];
470 		  }
471 
472 		goto store_c;
473 	      }
474 	  }
475 	}
476 
477       if (! (backslash_escapes
478 	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
479 	goto store_c;
480 
481     store_escape:
482       STORE ('\\');
483 
484     store_c:
485       STORE (c);
486     }
487 
488   if (quote_string)
489     for (; *quote_string; quote_string++)
490       STORE (*quote_string);
491 
492   if (len < buffersize)
493     buffer[len] = '\0';
494   return len;
495 
496  use_shell_always_quoting_style:
497   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
498 				   shell_always_quoting_style, o);
499 }
500 
501 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
502    argument ARG (of size ARGSIZE), using O to control quoting.
503    If O is null, use the default.
504    Terminate the output with a null character, and return the written
505    size of the output, not counting the terminating null.
506    If BUFFERSIZE is too small to store the output string, return the
507    value that would have been returned had BUFFERSIZE been large enough.
508    If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */
509 size_t
quotearg_buffer(char * buffer,size_t buffersize,char const * arg,size_t argsize,struct quoting_options const * o)510 quotearg_buffer (char *buffer, size_t buffersize,
511 		 char const *arg, size_t argsize,
512 		 struct quoting_options const *o)
513 {
514   struct quoting_options const *p = o ? o : &default_quoting_options;
515   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
516 				   p->style, p);
517 }
518 
519 /* Use storage slot N to return a quoted version of the string ARG.
520    OPTIONS specifies the quoting options.
521    The returned value points to static storage that can be
522    reused by the next call to this function with the same value of N.
523    N must be nonnegative.  N is deliberately declared with type "int"
524    to allow for future extensions (using negative values).  */
525 static char *
quotearg_n_options(int n,char const * arg,struct quoting_options const * options)526 quotearg_n_options (int n, char const *arg,
527 		    struct quoting_options const *options)
528 {
529   /* Preallocate a slot 0 buffer, so that the caller can always quote
530      one small component of a "memory exhausted" message in slot 0.  */
531   static char slot0[256];
532   static unsigned int nslots = 1;
533   struct slotvec
534     {
535       size_t size;
536       char *val;
537     };
538   static struct slotvec slotvec0 = {sizeof slot0, slot0};
539   static struct slotvec *slotvec = &slotvec0;
540 
541   if (nslots <= n)
542     {
543       int n1 = n + 1;
544       size_t s = n1 * sizeof (struct slotvec);
545       if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
546 	abort ();
547       if (slotvec == &slotvec0)
548 	{
549 	  slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
550 	  *slotvec = slotvec0;
551 	}
552       slotvec = (struct slotvec *) xrealloc (slotvec, s);
553       memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
554       nslots = n;
555     }
556 
557   {
558     size_t size = slotvec[n].size;
559     char *val = slotvec[n].val;
560     size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
561 
562     if (size <= qsize)
563       {
564 	slotvec[n].size = size = qsize + 1;
565 	slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
566 	quotearg_buffer (val, size, arg, (size_t) -1, options);
567       }
568 
569     return val;
570   }
571 }
572 
573 char *
quotearg_n(unsigned int n,char const * arg)574 quotearg_n (unsigned int n, char const *arg)
575 {
576   return quotearg_n_options (n, arg, &default_quoting_options);
577 }
578 
579 char *
quotearg(char const * arg)580 quotearg (char const *arg)
581 {
582   return quotearg_n (0, arg);
583 }
584 
585 char *
quotearg_n_style(unsigned int n,enum quoting_style s,char const * arg)586 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
587 {
588   struct quoting_options o;
589   o.style = s;
590   memset (o.quote_these_too, 0, sizeof o.quote_these_too);
591   return quotearg_n_options (n, arg, &o);
592 }
593 
594 char *
quotearg_style(enum quoting_style s,char const * arg)595 quotearg_style (enum quoting_style s, char const *arg)
596 {
597   return quotearg_n_style (0, s, arg);
598 }
599 
600 char *
quotearg_char(char const * arg,char ch)601 quotearg_char (char const *arg, char ch)
602 {
603   struct quoting_options options;
604   options = default_quoting_options;
605   set_char_quoting (&options, ch, 1);
606   return quotearg_n_options (0, arg, &options);
607 }
608 
609 char *
quotearg_colon(char const * arg)610 quotearg_colon (char const *arg)
611 {
612   return quotearg_char (arg, ':');
613 }
614