xref: /dragonfly/sys/kern/subr_scanf.c (revision eb68dd758f2657b7ba96fc0138e539ecc2672f2f)
1 /*-
2  * Copyright (c) 1990, 1993
3  *        The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Chris Torek.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD: src/sys/kern/subr_scanf.c,v 1.13 1999/11/24 01:03:01 archie Exp $
33  * $DragonFly: src/sys/kern/subr_scanf.c,v 1.4 2006/12/13 21:58:50 dillon Exp $
34  * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
35  * From: static char sccsid[] = "@(#)strtol.c     8.1 (Berkeley) 6/4/93";
36  * From: static char sccsid[] = "@(#)strtoul.c    8.1 (Berkeley) 6/4/93";
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/ctype.h>
42 #include <machine/limits.h>
43 
44 /*
45  * Note that stdarg.h and the ANSI style va_start macro is used for both
46  * ANSI and traditional C compilers.
47  */
48 #include <machine/stdarg.h>
49 
50 #define   BUF                 32        /* Maximum length of numeric string. */
51 
52 /*
53  * Flags used during conversion.
54  */
55 #define   LONG                0x01      /* l: long or double */
56 #define   SHORT               0x04      /* h: short */
57 #define   SUPPRESS  0x08      /* suppress assignment */
58 #define   POINTER             0x10      /* weird %p pointer (`fake hex') */
59 #define   NOSKIP              0x20      /* do not skip blanks */
60 #define   QUAD                0x400
61 #define   SHORTSHORT          0x4000    /* hh: char */
62 
63 /*
64  * The following are used in numeric conversions only:
65  * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
66  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
67  */
68 #define   SIGNOK              0x40      /* +/- is (still) legal */
69 #define   NDIGITS             0x80      /* no digits detected */
70 
71 #define   DPTOK               0x100     /* (float) decimal point is still legal */
72 #define   EXPOK               0x200     /* (float) exponent (e+3, etc) still legal */
73 
74 #define   PFXOK               0x100     /* 0x prefix is (still) legal */
75 #define   NZDIGITS  0x200     /* no zero digits detected */
76 
77 /*
78  * Conversion types.
79  */
80 #define   CT_CHAR             0         /* %c conversion */
81 #define   CT_CCL              1         /* %[...] conversion */
82 #define   CT_STRING 2         /* %s conversion */
83 #define   CT_INT              3         /* integer, i.e., strtoq or strtouq */
84 typedef u_quad_t (*ccfntype)(const char *, char **, int);
85 
86 static const u_char *__sccl(char *, const u_char *);
87 
88 int
ksscanf(const char * ibuf,const char * fmt,...)89 ksscanf(const char *ibuf, const char *fmt, ...)
90 {
91           __va_list ap;
92           int ret;
93 
94           __va_start(ap, fmt);
95           ret = kvsscanf(ibuf, fmt, ap);
96           __va_end(ap);
97           return(ret);
98 }
99 
100 int
kvsscanf(const char * inp,char const * fmt0,__va_list ap)101 kvsscanf(const char *inp, char const *fmt0, __va_list ap)
102 {
103           int inr;
104           const u_char *fmt = (const u_char *)fmt0;
105           int c;                        /* character from format, or conversion */
106           size_t width;                 /* field width, or 0 */
107           char *p;            /* points into all kinds of strings */
108           int n;                        /* handy integer */
109           int flags;                    /* flags as defined above */
110           char *p0;           /* saves original value of p when necessary */
111           int nassigned;                /* number of fields assigned */
112           int nconversions;   /* number of conversions */
113           int nread;                    /* number of characters consumed from fp */
114           int base;           /* base argument to strtoq/strtouq */
115           ccfntype ccfn;                /* conversion function (strtoq/strtouq) */
116           char ccltab[256];   /* character class table for %[...] */
117           char buf[BUF];                /* buffer for numeric conversions */
118 
119           /* `basefix' is used to avoid `if' tests in the integer scanner */
120           static short basefix[17] =
121                     { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
122 
123           inr = strlen(inp);
124 
125           nassigned = 0;
126           nconversions = 0;
127           nread = 0;
128           base = 0;           /* XXX just to keep gcc happy */
129           ccfn = NULL;                  /* XXX just to keep gcc happy */
130           for (;;) {
131                     c = *fmt++;
132                     if (c == 0)
133                               return (nassigned);
134                     if (isspace(c)) {
135                               while (inr > 0 && isspace(*inp))
136                                         nread++, inr--, inp++;
137                               continue;
138                     }
139                     if (c != '%')
140                               goto literal;
141                     width = 0;
142                     flags = 0;
143                     /*
144                      * switch on the format.  continue if done;
145                      * break once format type is derived.
146                      */
147 again:              c = *fmt++;
148                     switch (c) {
149                     case '%':
150 literal:
151                               if (inr <= 0)
152                                         goto input_failure;
153                               if (*inp != c)
154                                         goto match_failure;
155                               inr--, inp++;
156                               nread++;
157                               continue;
158 
159                     case '*':
160                               flags |= SUPPRESS;
161                               goto again;
162                     case 'l':
163                               flags |= LONG;
164                               goto again;
165                     case 'q':
166                               flags |= QUAD;
167                               goto again;
168                     case 'h':
169                               if (flags & SHORT) {
170                                         flags &= ~SHORT;
171                                         flags |= SHORTSHORT;
172                               } else
173                                         flags |= SHORT;
174                               goto again;
175 
176                     case '0': case '1': case '2': case '3': case '4':
177                     case '5': case '6': case '7': case '8': case '9':
178                               width = width * 10 + c - '0';
179                               goto again;
180 
181                     /*
182                      * Conversions.
183                      *
184                      */
185                     case 'd':
186                               c = CT_INT;
187                               ccfn = (ccfntype)strtoq;
188                               base = 10;
189                               break;
190 
191                     case 'i':
192                               c = CT_INT;
193                               ccfn = (ccfntype)strtoq;
194                               base = 0;
195                               break;
196 
197                     case 'o':
198                               c = CT_INT;
199                               ccfn = strtouq;
200                               base = 8;
201                               break;
202 
203                     case 'u':
204                               c = CT_INT;
205                               ccfn = strtouq;
206                               base = 10;
207                               break;
208 
209                     case 'x':
210                               flags |= PFXOK;     /* enable 0x prefixing */
211                               c = CT_INT;
212                               ccfn = strtouq;
213                               base = 16;
214                               break;
215 
216                     case 's':
217                               c = CT_STRING;
218                               break;
219 
220                     case '[':
221                               fmt = __sccl(ccltab, fmt);
222                               flags |= NOSKIP;
223                               c = CT_CCL;
224                               break;
225 
226                     case 'c':
227                               flags |= NOSKIP;
228                               c = CT_CHAR;
229                               break;
230 
231                     case 'p': /* pointer format is like hex */
232                               flags |= POINTER | PFXOK;
233                               c = CT_INT;
234                               ccfn = strtouq;
235                               base = 16;
236                               break;
237 
238                     case 'n':
239                               nconversions++;
240                               if (flags & SUPPRESS)         /* ??? */
241                                         continue;
242                               if (flags & SHORTSHORT)
243                                         *__va_arg(ap, char *) = nread;
244                               else if (flags & SHORT)
245                                         *__va_arg(ap, short *) = nread;
246                               else if (flags & LONG)
247                                         *__va_arg(ap, long *) = nread;
248                               else if (flags & QUAD)
249                                         *__va_arg(ap, quad_t *) = nread;
250                               else
251                                         *__va_arg(ap, int *) = nread;
252                               continue;
253                     }
254 
255                     /*
256                      * We have a conversion that requires input.
257                      */
258                     if (inr <= 0)
259                               goto input_failure;
260 
261                     /*
262                      * Consume leading white space, except for formats
263                      * that suppress this.
264                      */
265                     if ((flags & NOSKIP) == 0) {
266                               while (isspace(*inp)) {
267                                         nread++;
268                                         if (--inr > 0)
269                                                   inp++;
270                                         else
271                                                   goto input_failure;
272                               }
273                               /*
274                                * Note that there is at least one character in
275                                * the buffer, so conversions that do not set NOSKIP
276                                * can no longer result in an input failure.
277                                */
278                     }
279 
280                     /*
281                      * Do the conversion.
282                      */
283                     switch (c) {
284 
285                     case CT_CHAR:
286                               /* scan arbitrary characters (sets NOSKIP) */
287                               if (width == 0)
288                                         width = 1;
289                               if (flags & SUPPRESS) {
290                                         size_t sum = 0;
291                                         for (;;) {
292                                                   if ((n = inr) < width) {
293                                                             sum += n;
294                                                             width -= n;
295                                                             inp += n;
296                                                             if (sum == 0)
297                                                                       goto input_failure;
298                                                             break;
299                                                   } else {
300                                                             sum += width;
301                                                             inr -= width;
302                                                             inp += width;
303                                                             break;
304                                                   }
305                                         }
306                                         nread += sum;
307                               } else {
308                                         bcopy(inp, __va_arg(ap, char *), width);
309                                         inr -= width;
310                                         inp += width;
311                                         nread += width;
312                                         nassigned++;
313                               }
314                               nconversions++;
315                               break;
316 
317                     case CT_CCL:
318                               /* scan a (nonempty) character class (sets NOSKIP) */
319                               if (width == 0)
320                                         width = (size_t)~0; /* `infinity' */
321                               /* take only those things in the class */
322                               if (flags & SUPPRESS) {
323                                         n = 0;
324                                         while (ccltab[(unsigned char)*inp]) {
325                                                   n++, inr--, inp++;
326                                                   if (--width == 0)
327                                                             break;
328                                                   if (inr <= 0) {
329                                                             if (n == 0)
330                                                                       goto input_failure;
331                                                             break;
332                                                   }
333                                         }
334                                         if (n == 0)
335                                                   goto match_failure;
336                               } else {
337                                         p0 = p = __va_arg(ap, char *);
338                                         while (ccltab[(unsigned char)*inp]) {
339                                                   inr--;
340                                                   *p++ = *inp++;
341                                                   if (--width == 0)
342                                                             break;
343                                                   if (inr <= 0) {
344                                                             if (p == p0)
345                                                                       goto input_failure;
346                                                             break;
347                                                   }
348                                         }
349                                         n = p - p0;
350                                         if (n == 0)
351                                                   goto match_failure;
352                                         *p = 0;
353                                         nassigned++;
354                               }
355                               nread += n;
356                               nconversions++;
357                               break;
358 
359                     case CT_STRING:
360                               /* like CCL, but zero-length string OK, & no NOSKIP */
361                               if (width == 0)
362                                         width = (size_t)~0;
363                               if (flags & SUPPRESS) {
364                                         n = 0;
365                                         while (!isspace(*inp)) {
366                                                   n++, inr--, inp++;
367                                                   if (--width == 0)
368                                                             break;
369                                                   if (inr <= 0)
370                                                             break;
371                                         }
372                                         nread += n;
373                               } else {
374                                         p0 = p = __va_arg(ap, char *);
375                                         while (!isspace(*inp)) {
376                                                   inr--;
377                                                   *p++ = *inp++;
378                                                   if (--width == 0)
379                                                             break;
380                                                   if (inr <= 0)
381                                                             break;
382                                         }
383                                         *p = 0;
384                                         nread += p - p0;
385                                         nassigned++;
386                               }
387                               nconversions++;
388                               continue;
389 
390                     case CT_INT:
391                               /* scan an integer as if by strtoq/strtouq */
392 #ifdef hardway
393                               if (width == 0 || width > sizeof(buf) - 1)
394                                         width = sizeof(buf) - 1;
395 #else
396                               /* size_t is unsigned, hence this optimisation */
397                               if (--width > sizeof(buf) - 2)
398                                         width = sizeof(buf) - 2;
399                               width++;
400 #endif
401                               flags |= SIGNOK | NDIGITS | NZDIGITS;
402                               for (p = buf; width; width--) {
403                                         c = *inp;
404                                         /*
405                                          * Switch on the character; `goto ok'
406                                          * if we accept it as a part of number.
407                                          */
408                                         switch (c) {
409 
410                                         /*
411                                          * The digit 0 is always legal, but is
412                                          * special.  For %i conversions, if no
413                                          * digits (zero or nonzero) have been
414                                          * scanned (only signs), we will have
415                                          * base==0.  In that case, we should set
416                                          * it to 8 and enable 0x prefixing.
417                                          * Also, if we have not scanned zero digits
418                                          * before this, do not turn off prefixing
419                                          * (someone else will turn it off if we
420                                          * have scanned any nonzero digits).
421                                          */
422                                         case '0':
423                                                   if (base == 0) {
424                                                             base = 8;
425                                                             flags |= PFXOK;
426                                                   }
427                                                   if (flags & NZDIGITS)
428                                                       flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
429                                                   else
430                                                       flags &= ~(SIGNOK|PFXOK|NDIGITS);
431                                                   goto ok;
432 
433                                         /* 1 through 7 always legal */
434                                         case '1': case '2': case '3':
435                                         case '4': case '5': case '6': case '7':
436                                                   base = basefix[base];
437                                                   flags &= ~(SIGNOK | PFXOK | NDIGITS);
438                                                   goto ok;
439 
440                                         /* digits 8 and 9 ok iff decimal or hex */
441                                         case '8': case '9':
442                                                   base = basefix[base];
443                                                   if (base <= 8)
444                                                             break;    /* not legal here */
445                                                   flags &= ~(SIGNOK | PFXOK | NDIGITS);
446                                                   goto ok;
447 
448                                         /* letters ok iff hex */
449                                         case 'A': case 'B': case 'C':
450                                         case 'D': case 'E': case 'F':
451                                         case 'a': case 'b': case 'c':
452                                         case 'd': case 'e': case 'f':
453                                                   /* no need to fix base here */
454                                                   if (base <= 10)
455                                                             break;    /* not legal here */
456                                                   flags &= ~(SIGNOK | PFXOK | NDIGITS);
457                                                   goto ok;
458 
459                                         /* sign ok only as first character */
460                                         case '+': case '-':
461                                                   if (flags & SIGNOK) {
462                                                             flags &= ~SIGNOK;
463                                                             goto ok;
464                                                   }
465                                                   break;
466 
467                                         /* x ok iff flag still set & 2nd char */
468                                         case 'x': case 'X':
469                                                   if (flags & PFXOK && p == buf + 1) {
470                                                             base = 16;          /* if %i */
471                                                             flags &= ~PFXOK;
472                                                             goto ok;
473                                                   }
474                                                   break;
475                                         }
476 
477                                         /*
478                                          * If we got here, c is not a legal character
479                                          * for a number.  Stop accumulating digits.
480                                          */
481                                         break;
482                     ok:
483                                         /*
484                                          * c is legal: store it and look at the next.
485                                          */
486                                         *p++ = c;
487                                         if (--inr > 0)
488                                                   inp++;
489                                         else
490                                                   break;              /* end of input */
491                               }
492                               /*
493                                * If we had only a sign, it is no good; push
494                                * back the sign.  If the number ends in `x',
495                                * it was [sign] '0' 'x', so push back the x
496                                * and treat it as [sign] '0'.
497                                */
498                               if (flags & NDIGITS) {
499                                         if (p > buf) {
500                                                   inp--;
501                                                   inr++;
502                                         }
503                                         goto match_failure;
504                               }
505                               c = ((u_char *)p)[-1];
506                               if (c == 'x' || c == 'X') {
507                                         --p;
508                                         inp--;
509                                         inr++;
510                               }
511                               if ((flags & SUPPRESS) == 0) {
512                                         u_quad_t res;
513 
514                                         *p = 0;
515                                         res = (*ccfn)(buf, NULL, base);
516                                         if (flags & POINTER)
517                                                   *__va_arg(ap, void **) =
518                                                             (void *)(uintptr_t)res;
519                                         else if (flags & SHORTSHORT)
520                                                   *__va_arg(ap, char *) = res;
521                                         else if (flags & SHORT)
522                                                   *__va_arg(ap, short *) = res;
523                                         else if (flags & LONG)
524                                                   *__va_arg(ap, long *) = res;
525                                         else if (flags & QUAD)
526                                                   *__va_arg(ap, quad_t *) = res;
527                                         else
528                                                   *__va_arg(ap, int *) = res;
529                                         nassigned++;
530                               }
531                               nread += p - buf;
532                               nconversions++;
533                               break;
534 
535                     }
536           }
537 input_failure:
538           return (nconversions != 0 ? nassigned : -1);
539 match_failure:
540           return (nassigned);
541 }
542 
543 /*
544  * Fill in the given table from the scanset at the given format
545  * (just after `[').  Return a pointer to the character past the
546  * closing `]'.  The table has a 1 wherever characters should be
547  * considered part of the scanset.
548  */
549 static const u_char *
__sccl(char * tab,const u_char * fmt)550 __sccl(char *tab, const u_char *fmt)
551 {
552           int c, n, v;
553 
554           /* first `clear' the whole table */
555           c = *fmt++;                   /* first char hat => negated scanset */
556           if (c == '^') {
557                     v = 1;              /* default => accept */
558                     c = *fmt++;         /* get new first char */
559           } else
560                     v = 0;              /* default => reject */
561 
562           /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
563           for (n = 0; n < 256; n++)
564                          tab[n] = v;    /* memset(tab, v, 256) */
565 
566           if (c == 0)
567                     return (fmt - 1);/* format ended before closing ] */
568 
569           /*
570            * Now set the entries corresponding to the actual scanset
571            * to the opposite of the above.
572            *
573            * The first character may be ']' (or '-') without being special;
574            * the last character may be '-'.
575            */
576           v = 1 - v;
577           for (;;) {
578                     tab[c] = v;                   /* take character c */
579 doswitch:
580                     n = *fmt++;                   /* and examine the next */
581                     switch (n) {
582 
583                     case 0:                       /* format ended too soon */
584                               return (fmt - 1);
585 
586                     case '-':
587                               /*
588                                * A scanset of the form
589                                *        [01+-]
590                                * is defined as `the digit 0, the digit 1,
591                                * the character +, the character -', but
592                                * the effect of a scanset such as
593                                *        [a-zA-Z0-9]
594                                * is implementation defined.  The V7 Unix
595                                * scanf treats `a-z' as `the letters a through
596                                * z', but treats `a-a' as `the letter a, the
597                                * character -, and the letter a'.
598                                *
599                                * For compatibility, the `-' is not considerd
600                                * to define a range if the character following
601                                * it is either a close bracket (required by ANSI)
602                                * or is not numerically greater than the character
603                                * we just stored in the table (c).
604                                */
605                               n = *fmt;
606                               if (n == ']' || n < c) {
607                                         c = '-';
608                                         break;    /* resume the for(;;) */
609                               }
610                               fmt++;
611                               /* fill in the range */
612                               do {
613                                   tab[++c] = v;
614                               } while (c < n);
615                               c = n;
616                               /*
617                                * Alas, the V7 Unix scanf also treats formats
618                                * such as [a-c-e] as `the letters a through e'.
619                                * This too is permitted by the standard....
620                                */
621                               goto doswitch;
622                               break;
623 
624                     case ']':           /* end of scanset */
625                               return (fmt);
626 
627                     default:            /* just another character */
628                               c = n;
629                               break;
630                     }
631           }
632           /* NOTREACHED */
633 }
634 
635