1 /*        $NetBSD: apprentice.c,v 1.29 2024/04/05 16:56:58 christos Exp $       */
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * apprentice - make one pass through /etc/magic, learning its secrets.
32  */
33 
34 #include "file.h"
35 
36 #ifndef   lint
37 #if 0
38 FILE_RCSID("@(#)$File: apprentice.c,v 1.342 2023/07/17 14:38:35 christos Exp $")
39 #else
40 __RCSID("$NetBSD: apprentice.c,v 1.29 2024/04/05 16:56:58 christos Exp $");
41 #endif
42 #endif    /* lint */
43 
44 #include "magic.h"
45 #include <stdlib.h>
46 #ifdef HAVE_UNISTD_H
47 #include <unistd.h>
48 #endif
49 #include <stddef.h>
50 #include <string.h>
51 #include <assert.h>
52 #include <ctype.h>
53 #include <fcntl.h>
54 #ifdef QUICK
55 #include <sys/mman.h>
56 #endif
57 #include <dirent.h>
58 #include <limits.h>
59 #ifdef HAVE_BYTESWAP_H
60 #include <byteswap.h>
61 #endif
62 #ifdef HAVE_SYS_BSWAP_H
63 #include <sys/bswap.h>
64 #endif
65 
66 
67 #define   EATAB {while (isascii(CAST(unsigned char, *l)) && \
68                           isspace(CAST(unsigned char, *l)))  ++l;}
69 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
70                               tolower(CAST(unsigned char, l)) : (l))
71 /*
72  * Work around a bug in headers on Digital Unix.
73  * At least confirmed for: OSF1 V4.0 878
74  */
75 #if defined(__osf__) && defined(__DECC)
76 #ifdef MAP_FAILED
77 #undef MAP_FAILED
78 #endif
79 #endif
80 
81 #ifndef MAP_FAILED
82 #define MAP_FAILED (void *) -1
83 #endif
84 
85 #ifndef MAP_FILE
86 #define MAP_FILE 0
87 #endif
88 
89 #define ALLOC_CHUNK CAST(size_t, 10)
90 #define ALLOC_INCR  CAST(size_t, 200)
91 
92 #define MAP_TYPE_USER         0
93 #define MAP_TYPE_MALLOC       1
94 #define MAP_TYPE_MMAP         2
95 
96 struct magic_entry {
97           struct magic *mp;
98           uint32_t cont_count;
99           uint32_t max_count;
100 };
101 
102 struct magic_entry_set {
103           struct magic_entry *me;
104           uint32_t count;
105           uint32_t max;
106 };
107 
108 struct magic_map {
109           void *p;
110           size_t len;
111           int type;
112           struct magic *magic[MAGIC_SETS];
113           uint32_t nmagic[MAGIC_SETS];
114 };
115 
116 static int file_formats[FILE_NAMES_SIZE];
117 static const size_t file_nformats = FILE_NAMES_SIZE;
118 const char *file_names[FILE_NAMES_SIZE];
119 const size_t file_nnames = FILE_NAMES_SIZE;
120 
121 file_private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
122 file_private int hextoint(int);
123 file_private const char *getstr(struct magic_set *, struct magic *, const char *,
124     int);
125 file_private int parse(struct magic_set *, struct magic_entry *, const char *,
126     size_t, int);
127 file_private void eatsize(const char **);
128 file_private int apprentice_1(struct magic_set *, const char *, int);
129 file_private ssize_t apprentice_magic_strength_1(const struct magic *);
130 file_private int apprentice_sort(const void *, const void *);
131 file_private void apprentice_list(struct mlist *, int );
132 file_private struct magic_map *apprentice_load(struct magic_set *,
133     const char *, int);
134 file_private struct mlist *mlist_alloc(void);
135 file_private void mlist_free_all(struct magic_set *);
136 file_private void mlist_free(struct mlist *);
137 file_private void byteswap(struct magic *, uint32_t);
138 file_private void bs1(struct magic *);
139 
140 #if defined(HAVE_BYTESWAP_H)
141 #define swap2(x)    bswap_16(x)
142 #define swap4(x)    bswap_32(x)
143 #define swap8(x)    bswap_64(x)
144 #elif defined(HAVE_SYS_BSWAP_H)
145 #define swap2(x)    bswap16(x)
146 #define swap4(x)    bswap32(x)
147 #define swap8(x)    bswap64(x)
148 #else
149 file_private uint16_t swap2(uint16_t);
150 file_private uint32_t swap4(uint32_t);
151 file_private uint64_t swap8(uint64_t);
152 #endif
153 
154 file_private char *mkdbname(struct magic_set *, const char *, int);
155 file_private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
156     size_t);
157 file_private struct magic_map *apprentice_map(struct magic_set *, const char *);
158 file_private int check_buffer(struct magic_set *, struct magic_map *, const char *);
159 file_private void apprentice_unmap(struct magic_map *);
160 file_private int apprentice_compile(struct magic_set *, struct magic_map *,
161     const char *);
162 file_private int check_format_type(const char *, int, const char **);
163 file_private int check_format(struct magic_set *, struct magic *);
164 file_private int get_op(char);
165 file_private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
166     size_t);
167 file_private int parse_strength(struct magic_set *, struct magic_entry *,
168     const char *, size_t);
169 file_private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
170     size_t);
171 file_private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
172     size_t);
173 
174 
175 file_private size_t magicsize = sizeof(struct magic);
176 
177 file_private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
178 
179 file_private struct {
180           const char *name;
181           size_t len;
182           int (*fun)(struct magic_set *, struct magic_entry *, const char *,
183               size_t);
184 } bang[] = {
185 #define   DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
186           DECLARE_FIELD(mime),
187           DECLARE_FIELD(apple),
188           DECLARE_FIELD(ext),
189           DECLARE_FIELD(strength),
190 #undef    DECLARE_FIELD
191           { NULL, 0, NULL }
192 };
193 
194 #ifdef COMPILE_ONLY
195 
196 int main(int, char *[]);
197 
198 int
main(int argc,char * argv[])199 main(int argc, char *argv[])
200 {
201           int ret;
202           struct magic_set *ms;
203           char *progname;
204 
205           if ((progname = strrchr(argv[0], '/')) != NULL)
206                     progname++;
207           else
208                     progname = argv[0];
209 
210           if (argc != 2) {
211                     (void)fprintf(stderr, "Usage: %s file\n", progname);
212                     return 1;
213           }
214 
215           if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
216                     (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
217                     return 1;
218           }
219           ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
220           if (ret == 1)
221                     (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
222           magic_close(ms);
223           return ret;
224 }
225 #endif /* COMPILE_ONLY */
226 
227 struct type_tbl_s {
228           const char name[16];
229           const size_t len;
230           const int type;
231           const int format;
232 };
233 
234 /*
235  * XXX - the actual Single UNIX Specification says that "long" means "long",
236  * as in the C data type, but we treat it as meaning "4-byte integer".
237  * Given that the OS X version of file 5.04 did the same, I guess that passes
238  * the actual test; having "long" be dependent on how big a "long" is on
239  * the machine running "file" is silly.
240  */
241 static const struct type_tbl_s type_tbl[] = {
242 # define XX(s)                s, (sizeof(s) - 1)
243 # define XX_NULL    "", 0
244           { XX("invalid"),    FILE_INVALID,                 FILE_FMT_NONE },
245           { XX("byte"),                 FILE_BYTE,                    FILE_FMT_NUM },
246           { XX("short"),                FILE_SHORT,                   FILE_FMT_NUM },
247           { XX("default"),    FILE_DEFAULT,                 FILE_FMT_NONE },
248           { XX("long"),                 FILE_LONG,                    FILE_FMT_NUM },
249           { XX("string"),               FILE_STRING,                  FILE_FMT_STR },
250           { XX("date"),                 FILE_DATE,                    FILE_FMT_STR },
251           { XX("beshort"),    FILE_BESHORT,                 FILE_FMT_NUM },
252           { XX("belong"),               FILE_BELONG,                  FILE_FMT_NUM },
253           { XX("bedate"),               FILE_BEDATE,                  FILE_FMT_STR },
254           { XX("leshort"),    FILE_LESHORT,                 FILE_FMT_NUM },
255           { XX("lelong"),               FILE_LELONG,                  FILE_FMT_NUM },
256           { XX("ledate"),               FILE_LEDATE,                  FILE_FMT_STR },
257           { XX("pstring"),    FILE_PSTRING,                 FILE_FMT_STR },
258           { XX("ldate"),                FILE_LDATE,                   FILE_FMT_STR },
259           { XX("beldate"),    FILE_BELDATE,                 FILE_FMT_STR },
260           { XX("leldate"),    FILE_LELDATE,                 FILE_FMT_STR },
261           { XX("regex"),                FILE_REGEX,                   FILE_FMT_STR },
262           { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
263           { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
264           { XX("search"),               FILE_SEARCH,                  FILE_FMT_STR },
265           { XX("medate"),               FILE_MEDATE,                  FILE_FMT_STR },
266           { XX("meldate"),    FILE_MELDATE,                 FILE_FMT_STR },
267           { XX("melong"),               FILE_MELONG,                  FILE_FMT_NUM },
268           { XX("quad"),                 FILE_QUAD,                    FILE_FMT_QUAD },
269           { XX("lequad"),               FILE_LEQUAD,                  FILE_FMT_QUAD },
270           { XX("bequad"),               FILE_BEQUAD,                  FILE_FMT_QUAD },
271           { XX("qdate"),                FILE_QDATE,                   FILE_FMT_STR },
272           { XX("leqdate"),    FILE_LEQDATE,                 FILE_FMT_STR },
273           { XX("beqdate"),    FILE_BEQDATE,                 FILE_FMT_STR },
274           { XX("qldate"),               FILE_QLDATE,                  FILE_FMT_STR },
275           { XX("leqldate"),   FILE_LEQLDATE,                FILE_FMT_STR },
276           { XX("beqldate"),   FILE_BEQLDATE,                FILE_FMT_STR },
277           { XX("float"),                FILE_FLOAT,                   FILE_FMT_FLOAT },
278           { XX("befloat"),    FILE_BEFLOAT,                 FILE_FMT_FLOAT },
279           { XX("lefloat"),    FILE_LEFLOAT,                 FILE_FMT_FLOAT },
280           { XX("double"),               FILE_DOUBLE,                  FILE_FMT_DOUBLE },
281           { XX("bedouble"),   FILE_BEDOUBLE,                FILE_FMT_DOUBLE },
282           { XX("ledouble"),   FILE_LEDOUBLE,                FILE_FMT_DOUBLE },
283           { XX("leid3"),                FILE_LEID3,                   FILE_FMT_NUM },
284           { XX("beid3"),                FILE_BEID3,                   FILE_FMT_NUM },
285           { XX("indirect"),   FILE_INDIRECT,                FILE_FMT_NUM },
286           { XX("qwdate"),               FILE_QWDATE,                  FILE_FMT_STR },
287           { XX("leqwdate"),   FILE_LEQWDATE,                FILE_FMT_STR },
288           { XX("beqwdate"),   FILE_BEQWDATE,                FILE_FMT_STR },
289           { XX("name"),                 FILE_NAME,                    FILE_FMT_NONE },
290           { XX("use"),                  FILE_USE,           FILE_FMT_NONE },
291           { XX("clear"),                FILE_CLEAR,                   FILE_FMT_NONE },
292           { XX("der"),                  FILE_DER,           FILE_FMT_STR },
293           { XX("guid"),                 FILE_GUID,                    FILE_FMT_STR },
294           { XX("offset"),               FILE_OFFSET,                  FILE_FMT_QUAD },
295           { XX("bevarint"),   FILE_BEVARINT,                FILE_FMT_STR },
296           { XX("levarint"),   FILE_LEVARINT,                FILE_FMT_STR },
297           { XX("msdosdate"),  FILE_MSDOSDATE,               FILE_FMT_STR },
298           { XX("lemsdosdate"),          FILE_LEMSDOSDATE,   FILE_FMT_STR },
299           { XX("bemsdosdate"),          FILE_BEMSDOSDATE,   FILE_FMT_STR },
300           { XX("msdostime"),  FILE_MSDOSTIME,               FILE_FMT_STR },
301           { XX("lemsdostime"),          FILE_LEMSDOSTIME,   FILE_FMT_STR },
302           { XX("bemsdostime"),          FILE_BEMSDOSTIME,   FILE_FMT_STR },
303           { XX("octal"),                FILE_OCTAL,                   FILE_FMT_STR },
304           { XX_NULL,                    FILE_INVALID,                 FILE_FMT_NONE },
305 };
306 
307 /*
308  * These are not types, and cannot be preceded by "u" to make them
309  * unsigned.
310  */
311 static const struct type_tbl_s special_tbl[] = {
312           { XX("der"),                  FILE_DER,           FILE_FMT_STR },
313           { XX("name"),                 FILE_NAME,                    FILE_FMT_STR },
314           { XX("use"),                  FILE_USE,           FILE_FMT_STR },
315           { XX("octal"),                FILE_OCTAL,                   FILE_FMT_STR },
316           { XX_NULL,                    FILE_INVALID,                 FILE_FMT_NONE },
317 };
318 # undef XX
319 # undef XX_NULL
320 
321 file_private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)322 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
323 {
324           const struct type_tbl_s *p;
325 
326           for (p = tbl; p->len; p++) {
327                     if (strncmp(l, p->name, p->len) == 0) {
328                               if (t)
329                                         *t = l + p->len;
330                               break;
331                     }
332           }
333           return p->type;
334 }
335 
336 file_private off_t
maxoff_t(void)337 maxoff_t(void) {
338           if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
339                     return CAST(off_t, INT_MAX);
340           if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
341                     return CAST(off_t, LONG_MAX);
342           return 0x7fffffff;
343 }
344 
345 file_private int
get_standard_integer_type(const char * l,const char ** t)346 get_standard_integer_type(const char *l, const char **t)
347 {
348           int type;
349 
350           if (isalpha(CAST(unsigned char, l[1]))) {
351                     switch (l[1]) {
352                     case 'C':
353                               /* "dC" and "uC" */
354                               type = FILE_BYTE;
355                               break;
356                     case 'S':
357                               /* "dS" and "uS" */
358                               type = FILE_SHORT;
359                               break;
360                     case 'I':
361                     case 'L':
362                               /*
363                                * "dI", "dL", "uI", and "uL".
364                                *
365                                * XXX - the actual Single UNIX Specification says
366                                * that "L" means "long", as in the C data type,
367                                * but we treat it as meaning "4-byte integer".
368                                * Given that the OS X version of file 5.04 did
369                                * the same, I guess that passes the actual SUS
370                                * validation suite; having "dL" be dependent on
371                                * how big a "long" is on the machine running
372                                * "file" is silly.
373                                */
374                               type = FILE_LONG;
375                               break;
376                     case 'Q':
377                               /* "dQ" and "uQ" */
378                               type = FILE_QUAD;
379                               break;
380                     default:
381                               /* "d{anything else}", "u{anything else}" */
382                               return FILE_INVALID;
383                     }
384                     l += 2;
385           } else if (isdigit(CAST(unsigned char, l[1]))) {
386                     /*
387                      * "d{num}" and "u{num}"; we only support {num} values
388                      * of 1, 2, 4, and 8 - the Single UNIX Specification
389                      * doesn't say anything about whether arbitrary
390                      * values should be supported, but both the Solaris 10
391                      * and OS X Mountain Lion versions of file passed the
392                      * Single UNIX Specification validation suite, and
393                      * neither of them support values bigger than 8 or
394                      * non-power-of-2 values.
395                      */
396                     if (isdigit(CAST(unsigned char, l[2]))) {
397                               /* Multi-digit, so > 9 */
398                               return FILE_INVALID;
399                     }
400                     switch (l[1]) {
401                     case '1':
402                               type = FILE_BYTE;
403                               break;
404                     case '2':
405                               type = FILE_SHORT;
406                               break;
407                     case '4':
408                               type = FILE_LONG;
409                               break;
410                     case '8':
411                               type = FILE_QUAD;
412                               break;
413                     default:
414                               /* XXX - what about 3, 5, 6, or 7? */
415                               return FILE_INVALID;
416                     }
417                     l += 2;
418           } else {
419                     /*
420                      * "d" or "u" by itself.
421                      */
422                     type = FILE_LONG;
423                     ++l;
424           }
425           if (t)
426                     *t = l;
427           return type;
428 }
429 
430 file_private void
init_file_tables(void)431 init_file_tables(void)
432 {
433           static int done = 0;
434           const struct type_tbl_s *p;
435 
436           if (done)
437                     return;
438           done++;
439 
440           for (p = type_tbl; p->len; p++) {
441                     assert(p->type < FILE_NAMES_SIZE);
442                     file_names[p->type] = p->name;
443                     file_formats[p->type] = p->format;
444           }
445           assert(p - type_tbl == FILE_NAMES_SIZE);
446 }
447 
448 file_private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)449 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
450 {
451           struct mlist *ml;
452 
453           mlp->map = NULL;
454           if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
455                     return -1;
456 
457           ml->map = idx == 0 ? map : NULL;
458           ml->magic = map->magic[idx];
459           ml->nmagic = map->nmagic[idx];
460           if (ml->nmagic) {
461                     ml->magic_rxcomp = CAST(file_regex_t **,
462                         calloc(ml->nmagic, sizeof(*ml->magic_rxcomp)));
463                     if (ml->magic_rxcomp == NULL) {
464                               free(ml);
465                               return -1;
466                     }
467           } else
468                     ml->magic_rxcomp = NULL;
469           mlp->prev->next = ml;
470           ml->prev = mlp->prev;
471           ml->next = mlp;
472           mlp->prev = ml;
473           return 0;
474 }
475 
476 /*
477  * Handle one file or directory.
478  */
479 file_private int
apprentice_1(struct magic_set * ms,const char * fn,int action)480 apprentice_1(struct magic_set *ms, const char *fn, int action)
481 {
482           struct magic_map *map;
483 #ifndef COMPILE_ONLY
484           size_t i;
485 #endif
486 
487           if (magicsize != FILE_MAGICSIZE) {
488                     file_error(ms, 0, "magic element size %lu != %lu",
489                         CAST(unsigned long, sizeof(*map->magic[0])),
490                         CAST(unsigned long, FILE_MAGICSIZE));
491                     return -1;
492           }
493 
494           if (action == FILE_COMPILE) {
495                     map = apprentice_load(ms, fn, action);
496                     if (map == NULL)
497                               return -1;
498                     return apprentice_compile(ms, map, fn);
499           }
500 
501 #ifndef COMPILE_ONLY
502           map = apprentice_map(ms, fn);
503           if (map == NULL) {
504                     if (ms->flags & MAGIC_CHECK)
505                               file_magwarn(NULL, "using regular magic file `%s'", fn);
506                     map = apprentice_load(ms, fn, action);
507                     if (map == NULL)
508                               return -1;
509           }
510 
511           for (i = 0; i < MAGIC_SETS; i++) {
512                     if (add_mlist(ms->mlist[i], map, i) == -1) {
513                               /* failed to add to any list, free explicitly */
514                               if (i == 0)
515                                         apprentice_unmap(map);
516                               else
517                                         mlist_free_all(ms);
518                               file_oomem(ms, sizeof(*ms->mlist[0]));
519                               return -1;
520                     }
521           }
522 
523           if (action == FILE_LIST) {
524                     for (i = 0; i < MAGIC_SETS; i++) {
525                               printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
526                                   i);
527                               apprentice_list(ms->mlist[i], BINTEST);
528                               printf("Text patterns:\n");
529                               apprentice_list(ms->mlist[i], TEXTTEST);
530                     }
531           }
532           return 0;
533 #else
534           return 0;
535 #endif /* COMPILE_ONLY */
536 }
537 
538 file_protected void
file_ms_free(struct magic_set * ms)539 file_ms_free(struct magic_set *ms)
540 {
541           size_t i;
542           if (ms == NULL)
543                     return;
544           for (i = 0; i < MAGIC_SETS; i++)
545                     mlist_free(ms->mlist[i]);
546           free(ms->o.pbuf);
547           free(ms->o.buf);
548           free(ms->c.li);
549 #ifdef USE_C_LOCALE
550           freelocale(ms->c_lc_ctype);
551 #endif
552           free(ms);
553 }
554 
555 file_protected struct magic_set *
file_ms_alloc(int flags)556 file_ms_alloc(int flags)
557 {
558           struct magic_set *ms;
559           size_t i, len;
560 
561           if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u),
562               sizeof(*ms)))) == NULL)
563                     return NULL;
564 
565           if (magic_setflags(ms, flags) == -1) {
566                     errno = EINVAL;
567                     goto free;
568           }
569 
570           ms->o.buf = ms->o.pbuf = NULL;
571           ms->o.blen = 0;
572           len = (ms->c.len = 10) * sizeof(*ms->c.li);
573 
574           if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
575                     goto free;
576 
577           ms->event_flags = 0;
578           ms->error = -1;
579           for (i = 0; i < MAGIC_SETS; i++)
580                     ms->mlist[i] = NULL;
581           ms->file = "unknown";
582           ms->line = 0;
583           ms->indir_max = FILE_INDIR_MAX;
584           ms->name_max = FILE_NAME_MAX;
585           ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
586           ms->elf_shsize_max = FILE_ELF_SHSIZE_MAX;
587           ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
588           ms->elf_notes_max = FILE_ELF_NOTES_MAX;
589           ms->regex_max = FILE_REGEX_MAX;
590           ms->bytes_max = FILE_BYTES_MAX;
591           ms->encoding_max = FILE_ENCODING_MAX;
592 #ifdef USE_C_LOCALE
593           ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
594           assert(ms->c_lc_ctype != NULL);
595 #endif
596           return ms;
597 free:
598           free(ms);
599           return NULL;
600 }
601 
602 file_private void
apprentice_unmap(struct magic_map * map)603 apprentice_unmap(struct magic_map *map)
604 {
605           size_t i;
606           char *p;
607           if (map == NULL)
608                     return;
609 
610           switch (map->type) {
611           case MAP_TYPE_USER:
612                     break;
613           case MAP_TYPE_MALLOC:
614                     p = CAST(char *, map->p);
615                     for (i = 0; i < MAGIC_SETS; i++) {
616                               char *b = RCAST(char *, map->magic[i]);
617                               if (p != NULL && b >= p && b <= p + map->len)
618                                         continue;
619                               free(b);
620                     }
621                     free(p);
622                     break;
623 #ifdef QUICK
624           case MAP_TYPE_MMAP:
625                     if (map->p && map->p != MAP_FAILED)
626                               (void)munmap(map->p, map->len);
627                     break;
628 #endif
629           default:
630                     fprintf(stderr, "Bad map type %d", map->type);
631                     abort();
632           }
633           free(map);
634 }
635 
636 file_private struct mlist *
mlist_alloc(void)637 mlist_alloc(void)
638 {
639           struct mlist *mlist;
640           if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
641                     return NULL;
642           }
643           mlist->next = mlist->prev = mlist;
644           return mlist;
645 }
646 
647 file_private void
mlist_free_all(struct magic_set * ms)648 mlist_free_all(struct magic_set *ms)
649 {
650           size_t i;
651 
652           for (i = 0; i < MAGIC_SETS; i++) {
653                     mlist_free(ms->mlist[i]);
654                     ms->mlist[i] = NULL;
655           }
656 }
657 
658 file_private void
mlist_free_one(struct mlist * ml)659 mlist_free_one(struct mlist *ml)
660 {
661           size_t i;
662 
663           if (ml->map)
664                     apprentice_unmap(CAST(struct magic_map *, ml->map));
665 
666           for (i = 0; i < ml->nmagic; ++i) {
667                     if (ml->magic_rxcomp[i]) {
668                               file_regfree(ml->magic_rxcomp[i]);
669                               free(ml->magic_rxcomp[i]);
670                               ml->magic_rxcomp[i] = NULL;
671                     }
672           }
673           free(ml->magic_rxcomp);
674           ml->magic_rxcomp = NULL;
675           free(ml);
676 }
677 
678 file_private void
mlist_free(struct mlist * mlist)679 mlist_free(struct mlist *mlist)
680 {
681           struct mlist *ml, *next;
682 
683           if (mlist == NULL)
684                     return;
685 
686           for (ml = mlist->next; ml != mlist;) {
687                     next = ml->next;
688                     mlist_free_one(ml);
689                     ml = next;
690           }
691           mlist_free_one(mlist);
692 }
693 
694 #ifndef COMPILE_ONLY
695 /* void **bufs: an array of compiled magic files */
696 file_protected int
buffer_apprentice(struct magic_set * ms,struct magic ** bufs,size_t * sizes,size_t nbufs)697 buffer_apprentice(struct magic_set *ms, struct magic **bufs,
698     size_t *sizes, size_t nbufs)
699 {
700           size_t i, j;
701           struct magic_map *map;
702 
703           if (nbufs == 0)
704                     return -1;
705 
706           (void)file_reset(ms, 0);
707 
708           init_file_tables();
709 
710           for (i = 0; i < MAGIC_SETS; i++) {
711                     mlist_free(ms->mlist[i]);
712                     if ((ms->mlist[i] = mlist_alloc()) == NULL) {
713                               file_oomem(ms, sizeof(*ms->mlist[0]));
714                               goto fail;
715                     }
716           }
717 
718           for (i = 0; i < nbufs; i++) {
719                     map = apprentice_buf(ms, bufs[i], sizes[i]);
720                     if (map == NULL)
721                               goto fail;
722 
723                     for (j = 0; j < MAGIC_SETS; j++) {
724                               if (add_mlist(ms->mlist[j], map, j) == -1) {
725                                         file_oomem(ms, sizeof(*ms->mlist[0]));
726                                         goto fail;
727                               }
728                     }
729           }
730 
731           return 0;
732 fail:
733           mlist_free_all(ms);
734           return -1;
735 }
736 #endif
737 
738 /* const char *fn: list of magic files and directories */
739 file_protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)740 file_apprentice(struct magic_set *ms, const char *fn, int action)
741 {
742           char *p, *mfn;
743           int fileerr, errs = -1;
744           size_t i, j;
745 
746           (void)file_reset(ms, 0);
747 
748           if ((fn = magic_getpath(fn, action)) == NULL)
749                     return -1;
750 
751           init_file_tables();
752 
753           if ((mfn = strdup(fn)) == NULL) {
754                     file_oomem(ms, strlen(fn));
755                     return -1;
756           }
757 
758           for (i = 0; i < MAGIC_SETS; i++) {
759                     mlist_free(ms->mlist[i]);
760                     if ((ms->mlist[i] = mlist_alloc()) == NULL) {
761                               file_oomem(ms, sizeof(*ms->mlist[0]));
762                               for (j = 0; j < i; j++) {
763                                         mlist_free(ms->mlist[j]);
764                                         ms->mlist[j] = NULL;
765                               }
766                               free(mfn);
767                               return -1;
768                     }
769           }
770           fn = mfn;
771 
772           while (fn) {
773                     p = CCAST(char *, strchr(fn, PATHSEP));
774                     if (p)
775                               *p++ = '\0';
776                     if (*fn == '\0')
777                               break;
778                     fileerr = apprentice_1(ms, fn, action);
779                     errs = MAX(errs, fileerr);
780                     fn = p;
781           }
782 
783           free(mfn);
784 
785           if (errs == -1) {
786                     for (i = 0; i < MAGIC_SETS; i++) {
787                               mlist_free(ms->mlist[i]);
788                               ms->mlist[i] = NULL;
789                     }
790                     file_error(ms, 0, "could not find any valid magic files!");
791                     return -1;
792           }
793 
794 #if 0
795           /*
796            * Always leave the database loaded
797            */
798           if (action == FILE_LOAD)
799                     return 0;
800 
801           for (i = 0; i < MAGIC_SETS; i++) {
802                     mlist_free(ms->mlist[i]);
803                     ms->mlist[i] = NULL;
804           }
805 #endif
806 
807           switch (action) {
808           case FILE_LOAD:
809           case FILE_COMPILE:
810           case FILE_CHECK:
811           case FILE_LIST:
812                     return 0;
813           default:
814                     file_error(ms, 0, "Invalid action %d", action);
815                     return -1;
816           }
817 }
818 
819 /*
820  * Compute the real length of a magic expression, for the purposes
821  * of determining how "strong" a magic expression is (approximating
822  * how specific its matches are):
823  *        - magic characters count 0 unless escaped.
824  *        - [] expressions count 1
825  *        - {} expressions count 0
826  *        - regular characters or escaped magic characters count 1
827  *        - 0 length expressions count as one
828  */
829 file_private size_t
nonmagic(const char * str)830 nonmagic(const char *str)
831 {
832           const char *p;
833           size_t rv = 0;
834 
835           for (p = str; *p; p++)
836                     switch (*p) {
837                     case '\\':          /* Escaped anything counts 1 */
838                               if (!*++p)
839                                         p--;
840                               rv++;
841                               continue;
842                     case '?': /* Magic characters count 0 */
843                     case '*':
844                     case '.':
845                     case '+':
846                     case '^':
847                     case '$':
848                               continue;
849                     case '[': /* Bracketed expressions count 1 the ']' */
850                               while (*p && *p != ']')
851                                         p++;
852                               p--;
853                               continue;
854                     case '{': /* Braced expressions count 0 */
855                               while (*p && *p != '}')
856                                         p++;
857                               if (!*p)
858                                         p--;
859                               continue;
860                     default:  /* Anything else counts 1 */
861                               rv++;
862                               continue;
863                     }
864 
865           return rv == 0 ? 1 : rv;      /* Return at least 1 */
866 }
867 
868 
869 file_private size_t
typesize(int type)870 typesize(int type)
871 {
872           switch (type) {
873           case FILE_BYTE:
874                     return 1;
875 
876           case FILE_SHORT:
877           case FILE_LESHORT:
878           case FILE_BESHORT:
879           case FILE_MSDOSDATE:
880           case FILE_BEMSDOSDATE:
881           case FILE_LEMSDOSDATE:
882           case FILE_MSDOSTIME:
883           case FILE_BEMSDOSTIME:
884           case FILE_LEMSDOSTIME:
885                     return 2;
886 
887           case FILE_LONG:
888           case FILE_LELONG:
889           case FILE_BELONG:
890           case FILE_MELONG:
891                     return 4;
892 
893           case FILE_DATE:
894           case FILE_LEDATE:
895           case FILE_BEDATE:
896           case FILE_MEDATE:
897           case FILE_LDATE:
898           case FILE_LELDATE:
899           case FILE_BELDATE:
900           case FILE_MELDATE:
901           case FILE_FLOAT:
902           case FILE_BEFLOAT:
903           case FILE_LEFLOAT:
904           case FILE_BEID3:
905           case FILE_LEID3:
906                     return 4;
907 
908           case FILE_QUAD:
909           case FILE_BEQUAD:
910           case FILE_LEQUAD:
911           case FILE_QDATE:
912           case FILE_LEQDATE:
913           case FILE_BEQDATE:
914           case FILE_QLDATE:
915           case FILE_LEQLDATE:
916           case FILE_BEQLDATE:
917           case FILE_QWDATE:
918           case FILE_LEQWDATE:
919           case FILE_BEQWDATE:
920           case FILE_DOUBLE:
921           case FILE_BEDOUBLE:
922           case FILE_LEDOUBLE:
923           case FILE_OFFSET:
924           case FILE_BEVARINT:
925           case FILE_LEVARINT:
926                     return 8;
927 
928           case FILE_GUID:
929                     return 16;
930 
931           default:
932                     return FILE_BADSIZE;
933           }
934 }
935 
936 /*
937  * Get weight of this magic entry, for sorting purposes.
938  */
939 file_private ssize_t
apprentice_magic_strength_1(const struct magic * m)940 apprentice_magic_strength_1(const struct magic *m)
941 {
942 #define MULT 10U
943           size_t ts, v;
944           ssize_t val = 2 * MULT;       /* baseline strength */
945 
946           switch (m->type) {
947           case FILE_DEFAULT:  /* make sure this sorts last */
948                     if (m->factor_op != FILE_FACTOR_OP_NONE) {
949                               file_magwarn(NULL, "Usupported factor_op in default %d",
950                                   m->factor_op);
951                     }
952                     return 0;
953 
954           case FILE_BYTE:
955           case FILE_SHORT:
956           case FILE_LESHORT:
957           case FILE_BESHORT:
958           case FILE_LONG:
959           case FILE_LELONG:
960           case FILE_BELONG:
961           case FILE_MELONG:
962           case FILE_DATE:
963           case FILE_LEDATE:
964           case FILE_BEDATE:
965           case FILE_MEDATE:
966           case FILE_LDATE:
967           case FILE_LELDATE:
968           case FILE_BELDATE:
969           case FILE_MELDATE:
970           case FILE_FLOAT:
971           case FILE_BEFLOAT:
972           case FILE_LEFLOAT:
973           case FILE_QUAD:
974           case FILE_BEQUAD:
975           case FILE_LEQUAD:
976           case FILE_QDATE:
977           case FILE_LEQDATE:
978           case FILE_BEQDATE:
979           case FILE_QLDATE:
980           case FILE_LEQLDATE:
981           case FILE_BEQLDATE:
982           case FILE_QWDATE:
983           case FILE_LEQWDATE:
984           case FILE_BEQWDATE:
985           case FILE_DOUBLE:
986           case FILE_BEDOUBLE:
987           case FILE_LEDOUBLE:
988           case FILE_BEVARINT:
989           case FILE_LEVARINT:
990           case FILE_GUID:
991           case FILE_BEID3:
992           case FILE_LEID3:
993           case FILE_OFFSET:
994           case FILE_MSDOSDATE:
995           case FILE_BEMSDOSDATE:
996           case FILE_LEMSDOSDATE:
997           case FILE_MSDOSTIME:
998           case FILE_BEMSDOSTIME:
999           case FILE_LEMSDOSTIME:
1000                     ts = typesize(m->type);
1001                     if (ts == FILE_BADSIZE) {
1002                               (void)fprintf(stderr, "Bad size for type %d\n",
1003                                   m->type);
1004                               abort();
1005                     }
1006                     val += ts * MULT;
1007                     break;
1008 
1009           case FILE_PSTRING:
1010           case FILE_STRING:
1011           case FILE_OCTAL:
1012                     val += m->vallen * MULT;
1013                     break;
1014 
1015           case FILE_BESTRING16:
1016           case FILE_LESTRING16:
1017                     val += m->vallen * MULT / 2;
1018                     break;
1019 
1020           case FILE_SEARCH:
1021                     if (m->vallen == 0)
1022                               break;
1023                     val += m->vallen * MAX(MULT / m->vallen, 1);
1024                     break;
1025 
1026           case FILE_REGEX:
1027                     v = nonmagic(m->value.s);
1028                     val += v * MAX(MULT / v, 1);
1029                     break;
1030 
1031           case FILE_INDIRECT:
1032           case FILE_NAME:
1033           case FILE_USE:
1034           case FILE_CLEAR:
1035                     break;
1036 
1037           case FILE_DER:
1038                     val += MULT;
1039                     break;
1040 
1041           default:
1042                     (void)fprintf(stderr, "Bad type %d\n", m->type);
1043                     abort();
1044           }
1045 
1046           switch (m->reln) {
1047           case 'x': /* matches anything penalize */
1048           case '!':       /* matches almost anything penalize */
1049                     val = 0;
1050                     break;
1051 
1052           case '=': /* Exact match, prefer */
1053                     val += MULT;
1054                     break;
1055 
1056           case '>':
1057           case '<': /* comparison match reduce strength */
1058                     val -= 2 * MULT;
1059                     break;
1060 
1061           case '^':
1062           case '&': /* masking bits, we could count them too */
1063                     val -= MULT;
1064                     break;
1065 
1066           default:
1067                     (void)fprintf(stderr, "Bad relation %c\n", m->reln);
1068                     abort();
1069           }
1070 
1071           return val;
1072 }
1073 
1074 
1075 /*ARGSUSED*/
1076 file_protected size_t
file_magic_strength(const struct magic * m,size_t nmagic)1077 file_magic_strength(const struct magic *m,
1078     size_t nmagic __attribute__((__unused__)))
1079 {
1080           ssize_t val = apprentice_magic_strength_1(m);
1081 
1082 #ifdef notyet
1083           if (m->desc[0] == '\0') {
1084                     size_t i;
1085                     /*
1086                      * Magic entries with no description get their continuations
1087                      * added
1088                      */
1089                     for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) {
1090                               ssize_t v = apprentice_magic_strength_1(&m[i]) >>
1091                                   (i + 1);
1092                               val += v;
1093                               if (m[i].desc[0] != '\0')
1094                                         break;
1095                     }
1096           }
1097 #endif
1098 
1099           switch (m->factor_op) {
1100           case FILE_FACTOR_OP_NONE:
1101                     break;
1102           case FILE_FACTOR_OP_PLUS:
1103                     val += m->factor;
1104                     break;
1105           case FILE_FACTOR_OP_MINUS:
1106                     val -= m->factor;
1107                     break;
1108           case FILE_FACTOR_OP_TIMES:
1109                     val *= m->factor;
1110                     break;
1111           case FILE_FACTOR_OP_DIV:
1112                     val /= m->factor;
1113                     break;
1114           default:
1115                     (void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op);
1116                     abort();
1117           }
1118 
1119           if (val <= 0)       /* ensure we only return 0 for FILE_DEFAULT */
1120                     val = 1;
1121 
1122 #ifndef notyet
1123           /*
1124            * Magic entries with no description get a bonus because they depend
1125            * on subsequent magic entries to print something.
1126            */
1127           if (m->desc[0] == '\0')
1128                     val++;
1129 #endif
1130 
1131           return val;
1132 }
1133 
1134 /*
1135  * Sort callback for sorting entries by "strength" (basically length)
1136  */
1137 file_private int
apprentice_sort(const void * a,const void * b)1138 apprentice_sort(const void *a, const void *b)
1139 {
1140           const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1141           const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1142           size_t sa = file_magic_strength(ma->mp, ma->cont_count);
1143           size_t sb = file_magic_strength(mb->mp, mb->cont_count);
1144           if (sa == sb) {
1145                     int x = memcmp(ma->mp, mb->mp, sizeof(*ma->mp));
1146                     if (x == 0)
1147                               abort();
1148                     return x > 0 ? -1 : 1;
1149           }
1150           else if (sa > sb)
1151                     return -1;
1152           else
1153                     return 1;
1154 }
1155 
1156 /*
1157  * Shows sorted patterns list in the order which is used for the matching
1158  */
1159 file_private void
apprentice_list(struct mlist * mlist,int mode)1160 apprentice_list(struct mlist *mlist, int mode)
1161 {
1162           uint32_t magindex, descindex, mimeindex, lineindex;
1163           struct mlist *ml;
1164           for (ml = mlist->next; ml != mlist; ml = ml->next) {
1165                     for (magindex = 0; magindex < ml->nmagic; magindex++) {
1166                               struct magic *m = &ml->magic[magindex];
1167                               if ((m->flag & mode) != mode) {
1168                                         /* Skip sub-tests */
1169                                         while (magindex + 1 < ml->nmagic &&
1170                                                ml->magic[magindex + 1].cont_level != 0)
1171                                                   ++magindex;
1172                                         continue; /* Skip to next top-level test*/
1173                               }
1174 
1175                               /*
1176                                * Try to iterate over the tree until we find item with
1177                                * description/mimetype.
1178                                */
1179                               lineindex = descindex = mimeindex = magindex;
1180                               for (; magindex + 1 < ml->nmagic &&
1181                                  ml->magic[magindex + 1].cont_level != 0;
1182                                  magindex++) {
1183                                         uint32_t mi = magindex + 1;
1184                                         if (*ml->magic[descindex].desc == '\0'
1185                                             && *ml->magic[mi].desc)
1186                                                   descindex = mi;
1187                                         if (*ml->magic[mimeindex].mimetype == '\0'
1188                                             && *ml->magic[mi].mimetype)
1189                                                   mimeindex = mi;
1190                               }
1191 
1192                               printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1193                                   file_magic_strength(m, ml->nmagic - magindex),
1194                                   ml->magic[lineindex].lineno,
1195                                   ml->magic[descindex].desc,
1196                                   ml->magic[mimeindex].mimetype);
1197                     }
1198           }
1199 }
1200 
1201 file_private void
set_test_type(struct magic * mstart,struct magic * m)1202 set_test_type(struct magic *mstart, struct magic *m)
1203 {
1204           switch (m->type) {
1205           case FILE_BYTE:
1206           case FILE_SHORT:
1207           case FILE_LONG:
1208           case FILE_DATE:
1209           case FILE_BESHORT:
1210           case FILE_BELONG:
1211           case FILE_BEDATE:
1212           case FILE_LESHORT:
1213           case FILE_LELONG:
1214           case FILE_LEDATE:
1215           case FILE_LDATE:
1216           case FILE_BELDATE:
1217           case FILE_LELDATE:
1218           case FILE_MEDATE:
1219           case FILE_MELDATE:
1220           case FILE_MELONG:
1221           case FILE_QUAD:
1222           case FILE_LEQUAD:
1223           case FILE_BEQUAD:
1224           case FILE_QDATE:
1225           case FILE_LEQDATE:
1226           case FILE_BEQDATE:
1227           case FILE_QLDATE:
1228           case FILE_LEQLDATE:
1229           case FILE_BEQLDATE:
1230           case FILE_QWDATE:
1231           case FILE_LEQWDATE:
1232           case FILE_BEQWDATE:
1233           case FILE_FLOAT:
1234           case FILE_BEFLOAT:
1235           case FILE_LEFLOAT:
1236           case FILE_DOUBLE:
1237           case FILE_BEDOUBLE:
1238           case FILE_LEDOUBLE:
1239           case FILE_BEVARINT:
1240           case FILE_LEVARINT:
1241           case FILE_DER:
1242           case FILE_GUID:
1243           case FILE_OFFSET:
1244           case FILE_MSDOSDATE:
1245           case FILE_BEMSDOSDATE:
1246           case FILE_LEMSDOSDATE:
1247           case FILE_MSDOSTIME:
1248           case FILE_BEMSDOSTIME:
1249           case FILE_LEMSDOSTIME:
1250           case FILE_OCTAL:
1251                     mstart->flag |= BINTEST;
1252                     break;
1253           case FILE_STRING:
1254           case FILE_PSTRING:
1255           case FILE_BESTRING16:
1256           case FILE_LESTRING16:
1257                     /* Allow text overrides */
1258                     if (mstart->str_flags & STRING_TEXTTEST)
1259                               mstart->flag |= TEXTTEST;
1260                     else
1261                               mstart->flag |= BINTEST;
1262                     break;
1263           case FILE_REGEX:
1264           case FILE_SEARCH:
1265                     /* Check for override */
1266                     if (mstart->str_flags & STRING_BINTEST)
1267                               mstart->flag |= BINTEST;
1268                     if (mstart->str_flags & STRING_TEXTTEST)
1269                               mstart->flag |= TEXTTEST;
1270 
1271                     if (mstart->flag & (TEXTTEST|BINTEST))
1272                               break;
1273 
1274                     /* binary test if pattern is not text */
1275                     if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1276                         NULL) <= 0)
1277                               mstart->flag |= BINTEST;
1278                     else
1279                               mstart->flag |= TEXTTEST;
1280                     break;
1281           case FILE_DEFAULT:
1282                     /* can't deduce anything; we shouldn't see this at the
1283                        top level anyway */
1284                     break;
1285           case FILE_INVALID:
1286           default:
1287                     /* invalid search type, but no need to complain here */
1288                     break;
1289           }
1290 }
1291 
1292 file_private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1293 addentry(struct magic_set *ms, struct magic_entry *me,
1294    struct magic_entry_set *mset)
1295 {
1296           size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1297           if (mset[i].me == NULL || mset[i].count == mset[i].max) {
1298                     struct magic_entry *mp;
1299 
1300                     size_t incr = mset[i].max + ALLOC_INCR;
1301                     if ((mp = CAST(struct magic_entry *,
1302                         realloc(mset[i].me, sizeof(*mp) * incr))) ==
1303                         NULL) {
1304                               file_oomem(ms, sizeof(*mp) * incr);
1305                               return -1;
1306                     }
1307                     (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1308                         ALLOC_INCR);
1309                     mset[i].me = mp;
1310                     mset[i].max = CAST(uint32_t, incr);
1311                     assert(mset[i].max == incr);
1312           }
1313           mset[i].me[mset[i].count++] = *me;
1314           memset(me, 0, sizeof(*me));
1315           return 0;
1316 }
1317 
1318 /*
1319  * Load and parse one file.
1320  */
1321 file_private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1322 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1323    struct magic_entry_set *mset)
1324 {
1325           size_t lineno = 0, llen = 0;
1326           char *line = NULL;
1327           ssize_t len;
1328           struct magic_entry me;
1329 
1330           FILE *f = fopen(ms->file = fn, "r");
1331           if (f == NULL) {
1332                     if (errno != ENOENT)
1333                               file_error(ms, errno, "cannot read magic file `%s'",
1334                                            fn);
1335                     (*errs)++;
1336                     return;
1337           }
1338 
1339           memset(&me, 0, sizeof(me));
1340           /* read and parse this file */
1341           for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
1342               ms->line++) {
1343                     if (len == 0) /* null line, garbage, etc */
1344                               continue;
1345                     if (line[len - 1] == '\n') {
1346                               lineno++;
1347                               line[len - 1] = '\0'; /* delete newline */
1348                     }
1349                     switch (line[0]) {
1350                     case '\0':          /* empty, do not parse */
1351                     case '#': /* comment, do not parse */
1352                               continue;
1353                     case '!':
1354                               if (line[1] == ':') {
1355                                         size_t i;
1356 
1357                                         for (i = 0; bang[i].name != NULL; i++) {
1358                                                   if (CAST(size_t, len - 2) > bang[i].len &&
1359                                                       memcmp(bang[i].name, line + 2,
1360                                                       bang[i].len) == 0)
1361                                                             break;
1362                                         }
1363                                         if (bang[i].name == NULL) {
1364                                                   file_error(ms, 0,
1365                                                       "Unknown !: entry `%s'", line);
1366                                                   (*errs)++;
1367                                                   continue;
1368                                         }
1369                                         if (me.mp == NULL) {
1370                                                   file_error(ms, 0,
1371                                                       "No current entry for :!%s type",
1372                                                             bang[i].name);
1373                                                   (*errs)++;
1374                                                   continue;
1375                                         }
1376                                         if ((*bang[i].fun)(ms, &me,
1377                                             line + bang[i].len + 2,
1378                                             len - bang[i].len - 2) != 0) {
1379                                                   (*errs)++;
1380                                                   continue;
1381                                         }
1382                                         continue;
1383                               }
1384                               /*FALLTHROUGH*/
1385                     default:
1386                     again:
1387                               switch (parse(ms, &me, line, lineno, action)) {
1388                               case 0:
1389                                         continue;
1390                               case 1:
1391                                         (void)addentry(ms, &me, mset);
1392                                         goto again;
1393                               default:
1394                                         (*errs)++;
1395                                         break;
1396                               }
1397                     }
1398           }
1399           if (me.mp)
1400                     (void)addentry(ms, &me, mset);
1401           free(line);
1402           (void)fclose(f);
1403 }
1404 
1405 /*
1406  * parse a file or directory of files
1407  * const char *fn: name of magic file or directory
1408  */
1409 file_private int
cmpstrp(const void * p1,const void * p2)1410 cmpstrp(const void *p1, const void *p2)
1411 {
1412         return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1413 }
1414 
1415 
1416 file_private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1417 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1418     uint32_t starttest)
1419 {
1420           static const char text[] = "text";
1421           static const char binary[] = "binary";
1422           static const size_t len = sizeof(text);
1423 
1424           uint32_t i = starttest;
1425 
1426           do {
1427                     set_test_type(me[starttest].mp, me[i].mp);
1428                     if ((ms->flags & MAGIC_DEBUG) == 0)
1429                               continue;
1430                     (void)fprintf(stderr, "%s%s%s: %s\n",
1431                         me[i].mp->mimetype,
1432                         me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1433                         me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1434                         me[i].mp->flag & BINTEST ? binary : text);
1435                     if (me[i].mp->flag & BINTEST) {
1436                               char *p = strstr(me[i].mp->desc, text);
1437                               if (p && (p == me[i].mp->desc ||
1438                                   isspace(CAST(unsigned char, p[-1]))) &&
1439                                   (p + len - me[i].mp->desc == MAXstring
1440                                   || (p[len] == '\0' ||
1441                                   isspace(CAST(unsigned char, p[len])))))
1442                                         (void)fprintf(stderr, "*** Possible "
1443                                             "binary test for text type\n");
1444                     }
1445           } while (++i < nme && me[i].mp->cont_level != 0);
1446           return i;
1447 }
1448 
1449 file_private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1450 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1451 {
1452           uint32_t i;
1453           for (i = 0; i < nme; i++) {
1454                     if (me[i].mp->cont_level == 0 &&
1455                         me[i].mp->type == FILE_DEFAULT) {
1456                               while (++i < nme)
1457                                         if (me[i].mp->cont_level == 0)
1458                                                   break;
1459                               if (i != nme) {
1460                                         /* XXX - Ugh! */
1461                                         ms->line = me[i].mp->lineno;
1462                                         file_magwarn(ms,
1463                                             "level 0 \"default\" did not sort last");
1464                               }
1465                               return;
1466                     }
1467           }
1468 }
1469 
1470 file_private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1471 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1472     struct magic **ma, uint32_t *nma)
1473 {
1474           uint32_t i, mentrycount = 0;
1475           size_t slen;
1476 
1477           for (i = 0; i < nme; i++)
1478                     mentrycount += me[i].cont_count;
1479 
1480           if (mentrycount == 0) {
1481                     *ma = NULL;
1482                     *nma = 0;
1483                     return 0;
1484           }
1485 
1486           slen = sizeof(**ma) * mentrycount;
1487           if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
1488                     file_oomem(ms, slen);
1489                     return -1;
1490           }
1491 
1492           mentrycount = 0;
1493           for (i = 0; i < nme; i++) {
1494                     (void)memcpy(*ma + mentrycount, me[i].mp,
1495                         me[i].cont_count * sizeof(**ma));
1496                     mentrycount += me[i].cont_count;
1497           }
1498           *nma = mentrycount;
1499           return 0;
1500 }
1501 
1502 file_private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1503 magic_entry_free(struct magic_entry *me, uint32_t nme)
1504 {
1505           uint32_t i;
1506           if (me == NULL)
1507                     return;
1508           for (i = 0; i < nme; i++)
1509                     free(me[i].mp);
1510           free(me);
1511 }
1512 
1513 file_private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1514 apprentice_load(struct magic_set *ms, const char *fn, int action)
1515 {
1516           int errs = 0;
1517           uint32_t i, j;
1518           size_t files = 0, maxfiles = 0;
1519           char **filearr = NULL, *mfn;
1520           struct stat st;
1521           struct magic_map *map;
1522           struct magic_entry_set mset[MAGIC_SETS];
1523           DIR *dir;
1524           struct dirent *d;
1525 
1526           memset(mset, 0, sizeof(mset));
1527           ms->flags |= MAGIC_CHECK;     /* Enable checks for parsed files */
1528 
1529 
1530           if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
1531           {
1532                     file_oomem(ms, sizeof(*map));
1533                     return NULL;
1534           }
1535           map->type = MAP_TYPE_MALLOC;
1536 
1537           /* print silly verbose header for USG compat. */
1538           if (action == FILE_CHECK)
1539                     (void)fprintf(stderr, "%s\n", usg_hdr);
1540 
1541           /* load directory or file */
1542           if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1543                     dir = opendir(fn);
1544                     if (!dir) {
1545                               errs++;
1546                               goto out;
1547                     }
1548                     while ((d = readdir(dir)) != NULL) {
1549                               if (d->d_name[0] == '.')
1550                                         continue;
1551                               if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
1552                                         file_oomem(ms,
1553                                             strlen(fn) + strlen(d->d_name) + 2);
1554                                         errs++;
1555                                         closedir(dir);
1556                                         goto out;
1557                               }
1558                               if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1559                                         free(mfn);
1560                                         continue;
1561                               }
1562                               if (files >= maxfiles) {
1563                                         size_t mlen;
1564                                         char **nfilearr;
1565                                         maxfiles = (maxfiles + 1) * 2;
1566                                         mlen = maxfiles * sizeof(*filearr);
1567                                         if ((nfilearr = CAST(char **,
1568                                             realloc(filearr, mlen))) == NULL) {
1569                                                   file_oomem(ms, mlen);
1570                                                   free(mfn);
1571                                                   closedir(dir);
1572                                                   errs++;
1573                                                   goto out;
1574                                         }
1575                                         filearr = nfilearr;
1576                               }
1577                               filearr[files++] = mfn;
1578                     }
1579                     closedir(dir);
1580                     if (filearr) {
1581                               qsort(filearr, files, sizeof(*filearr), cmpstrp);
1582                               for (i = 0; i < files; i++) {
1583                                         load_1(ms, action, filearr[i], &errs, mset);
1584                                         free(filearr[i]);
1585                               }
1586                               free(filearr);
1587                               filearr = NULL;
1588                     }
1589           } else
1590                     load_1(ms, action, fn, &errs, mset);
1591           if (errs)
1592                     goto out;
1593 
1594           for (j = 0; j < MAGIC_SETS; j++) {
1595                     /* Set types of tests */
1596                     for (i = 0; i < mset[j].count; ) {
1597                               if (mset[j].me[i].mp->cont_level != 0) {
1598                                         i++;
1599                                         continue;
1600                               }
1601                               i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1602                     }
1603                     if (mset[j].me)
1604                               qsort(mset[j].me, mset[j].count, sizeof(*mset[0].me),
1605                                   apprentice_sort);
1606 
1607                     /*
1608                      * Make sure that any level 0 "default" line is last
1609                      * (if one exists).
1610                      */
1611                     set_last_default(ms, mset[j].me, mset[j].count);
1612 
1613                     /* coalesce per file arrays into a single one, if needed */
1614                     if (mset[j].count == 0)
1615                               continue;
1616 
1617                     if (coalesce_entries(ms, mset[j].me, mset[j].count,
1618                         &map->magic[j], &map->nmagic[j]) == -1) {
1619                               errs++;
1620                               goto out;
1621                     }
1622           }
1623 
1624 out:
1625           free(filearr);
1626           for (j = 0; j < MAGIC_SETS; j++)
1627                     magic_entry_free(mset[j].me, mset[j].count);
1628 
1629           if (errs) {
1630                     apprentice_unmap(map);
1631                     return NULL;
1632           }
1633           return map;
1634 }
1635 
1636 /*
1637  * extend the sign bit if the comparison is to be signed
1638  */
1639 file_protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1640 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1641 {
1642           if (!(m->flag & UNSIGNED)) {
1643                     switch(m->type) {
1644                     /*
1645                      * Do not remove the casts below.  They are
1646                      * vital.  When later compared with the data,
1647                      * the sign extension must have happened.
1648                      */
1649                     case FILE_BYTE:
1650                               v = CAST(signed char,  v);
1651                               break;
1652                     case FILE_SHORT:
1653                     case FILE_BESHORT:
1654                     case FILE_LESHORT:
1655                               v = CAST(short, v);
1656                               break;
1657                     case FILE_DATE:
1658                     case FILE_BEDATE:
1659                     case FILE_LEDATE:
1660                     case FILE_MEDATE:
1661                     case FILE_LDATE:
1662                     case FILE_BELDATE:
1663                     case FILE_LELDATE:
1664                     case FILE_MELDATE:
1665                     case FILE_LONG:
1666                     case FILE_BELONG:
1667                     case FILE_LELONG:
1668                     case FILE_MELONG:
1669                     case FILE_FLOAT:
1670                     case FILE_BEFLOAT:
1671                     case FILE_LEFLOAT:
1672                     case FILE_MSDOSDATE:
1673                     case FILE_BEMSDOSDATE:
1674                     case FILE_LEMSDOSDATE:
1675                     case FILE_MSDOSTIME:
1676                     case FILE_BEMSDOSTIME:
1677                     case FILE_LEMSDOSTIME:
1678                               v = CAST(int32_t, v);
1679                               break;
1680                     case FILE_QUAD:
1681                     case FILE_BEQUAD:
1682                     case FILE_LEQUAD:
1683                     case FILE_QDATE:
1684                     case FILE_QLDATE:
1685                     case FILE_QWDATE:
1686                     case FILE_BEQDATE:
1687                     case FILE_BEQLDATE:
1688                     case FILE_BEQWDATE:
1689                     case FILE_LEQDATE:
1690                     case FILE_LEQLDATE:
1691                     case FILE_LEQWDATE:
1692                     case FILE_DOUBLE:
1693                     case FILE_BEDOUBLE:
1694                     case FILE_LEDOUBLE:
1695                     case FILE_OFFSET:
1696                     case FILE_BEVARINT:
1697                     case FILE_LEVARINT:
1698                               v = CAST(int64_t, v);
1699                               break;
1700                     case FILE_STRING:
1701                     case FILE_PSTRING:
1702                     case FILE_BESTRING16:
1703                     case FILE_LESTRING16:
1704                     case FILE_REGEX:
1705                     case FILE_SEARCH:
1706                     case FILE_DEFAULT:
1707                     case FILE_INDIRECT:
1708                     case FILE_NAME:
1709                     case FILE_USE:
1710                     case FILE_CLEAR:
1711                     case FILE_DER:
1712                     case FILE_GUID:
1713                     case FILE_OCTAL:
1714                               break;
1715                     default:
1716                               if (ms->flags & MAGIC_CHECK)
1717                                   file_magwarn(ms, "cannot happen: m->type=%d\n",
1718                                             m->type);
1719                               return FILE_BADSIZE;
1720                     }
1721           }
1722           return v;
1723 }
1724 
1725 file_private int
string_modifier_check(struct magic_set * ms,struct magic * m)1726 string_modifier_check(struct magic_set *ms, struct magic *m)
1727 {
1728           if ((ms->flags & MAGIC_CHECK) == 0)
1729                     return 0;
1730 
1731           if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1732               (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1733                     file_magwarn(ms,
1734                         "'/BHhLl' modifiers are only allowed for pascal strings\n");
1735                     return -1;
1736           }
1737           switch (m->type) {
1738           case FILE_BESTRING16:
1739           case FILE_LESTRING16:
1740                     if (m->str_flags != 0) {
1741                               file_magwarn(ms,
1742                                   "no modifiers allowed for 16-bit strings\n");
1743                               return -1;
1744                     }
1745                     break;
1746           case FILE_STRING:
1747           case FILE_PSTRING:
1748                     if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1749                               file_magwarn(ms,
1750                                   "'/%c' only allowed on regex and search\n",
1751                                   CHAR_REGEX_OFFSET_START);
1752                               return -1;
1753                     }
1754                     break;
1755           case FILE_SEARCH:
1756                     if (m->str_range == 0) {
1757                               file_magwarn(ms,
1758                                   "missing range; defaulting to %d\n",
1759                             STRING_DEFAULT_RANGE);
1760                               m->str_range = STRING_DEFAULT_RANGE;
1761                               return -1;
1762                     }
1763                     break;
1764           case FILE_REGEX:
1765                     if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1766                               file_magwarn(ms, "'/%c' not allowed on regex\n",
1767                                   CHAR_COMPACT_WHITESPACE);
1768                               return -1;
1769                     }
1770                     if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1771                               file_magwarn(ms, "'/%c' not allowed on regex\n",
1772                                   CHAR_COMPACT_OPTIONAL_WHITESPACE);
1773                               return -1;
1774                     }
1775                     break;
1776           default:
1777                     file_magwarn(ms, "coding error: m->type=%d\n",
1778                         m->type);
1779                     return -1;
1780           }
1781           return 0;
1782 }
1783 
1784 file_private int
get_op(char c)1785 get_op(char c)
1786 {
1787           switch (c) {
1788           case '&':
1789                     return FILE_OPAND;
1790           case '|':
1791                     return FILE_OPOR;
1792           case '^':
1793                     return FILE_OPXOR;
1794           case '+':
1795                     return FILE_OPADD;
1796           case '-':
1797                     return FILE_OPMINUS;
1798           case '*':
1799                     return FILE_OPMULTIPLY;
1800           case '/':
1801                     return FILE_OPDIVIDE;
1802           case '%':
1803                     return FILE_OPMODULO;
1804           default:
1805                     return -1;
1806           }
1807 }
1808 
1809 #ifdef ENABLE_CONDITIONALS
1810 file_private int
get_cond(const char * l,const char ** t)1811 get_cond(const char *l, const char **t)
1812 {
1813           static const struct cond_tbl_s {
1814                     char name[8];
1815                     size_t len;
1816                     int cond;
1817           } cond_tbl[] = {
1818                     { "if",             2,        COND_IF },
1819                     { "elif", 4,        COND_ELIF },
1820                     { "else", 4,        COND_ELSE },
1821                     { "",               0,        COND_NONE },
1822           };
1823           const struct cond_tbl_s *p;
1824 
1825           for (p = cond_tbl; p->len; p++) {
1826                     if (strncmp(l, p->name, p->len) == 0 &&
1827                         isspace(CAST(unsigned char, l[p->len]))) {
1828                               if (t)
1829                                         *t = l + p->len;
1830                               break;
1831                     }
1832           }
1833           return p->cond;
1834 }
1835 
1836 file_private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1837 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1838 {
1839           int last_cond;
1840           last_cond = ms->c.li[cont_level].last_cond;
1841 
1842           switch (cond) {
1843           case COND_IF:
1844                     if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1845                               if (ms->flags & MAGIC_CHECK)
1846                                         file_magwarn(ms, "syntax error: `if'");
1847                               return -1;
1848                     }
1849                     last_cond = COND_IF;
1850                     break;
1851 
1852           case COND_ELIF:
1853                     if (last_cond != COND_IF && last_cond != COND_ELIF) {
1854                               if (ms->flags & MAGIC_CHECK)
1855                                         file_magwarn(ms, "syntax error: `elif'");
1856                               return -1;
1857                     }
1858                     last_cond = COND_ELIF;
1859                     break;
1860 
1861           case COND_ELSE:
1862                     if (last_cond != COND_IF && last_cond != COND_ELIF) {
1863                               if (ms->flags & MAGIC_CHECK)
1864                                         file_magwarn(ms, "syntax error: `else'");
1865                               return -1;
1866                     }
1867                     last_cond = COND_NONE;
1868                     break;
1869 
1870           case COND_NONE:
1871                     last_cond = COND_NONE;
1872                     break;
1873           }
1874 
1875           ms->c.li[cont_level].last_cond = last_cond;
1876           return 0;
1877 }
1878 #endif /* ENABLE_CONDITIONALS */
1879 
1880 file_private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1881 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1882 {
1883           const char *l = *lp;
1884 
1885           while (!isspace(CAST(unsigned char, *++l)))
1886                     switch (*l) {
1887                     case CHAR_INDIRECT_RELATIVE:
1888                               m->str_flags |= INDIRECT_RELATIVE;
1889                               break;
1890                     default:
1891                               if (ms->flags & MAGIC_CHECK)
1892                                         file_magwarn(ms, "indirect modifier `%c' "
1893                                                   "invalid", *l);
1894                               *lp = l;
1895                               return -1;
1896                     }
1897           *lp = l;
1898           return 0;
1899 }
1900 
1901 file_private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)1902 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1903     int op)
1904 {
1905           const char *l = *lp;
1906           char *t;
1907           uint64_t val;
1908 
1909           ++l;
1910           m->mask_op |= op;
1911           val = CAST(uint64_t, strtoull(l, &t, 0));
1912           l = t;
1913           m->num_mask = file_signextend(ms, m, val);
1914           eatsize(&l);
1915           *lp = l;
1916 }
1917 
1918 file_private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1919 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1920 {
1921           const char *l = *lp;
1922           char *t;
1923           int have_range = 0;
1924 
1925           while (!isspace(CAST(unsigned char, *++l))) {
1926                     switch (*l) {
1927                     case '0':  case '1':  case '2':
1928                     case '3':  case '4':  case '5':
1929                     case '6':  case '7':  case '8':
1930                     case '9':
1931                               if (have_range && (ms->flags & MAGIC_CHECK))
1932                                         file_magwarn(ms, "multiple ranges");
1933                               have_range = 1;
1934                               m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1935                               if (m->str_range == 0)
1936                                         file_magwarn(ms, "zero range");
1937                               l = t - 1;
1938                               break;
1939                     case CHAR_COMPACT_WHITESPACE:
1940                               m->str_flags |= STRING_COMPACT_WHITESPACE;
1941                               break;
1942                     case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1943                               m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1944                               break;
1945                     case CHAR_IGNORE_LOWERCASE:
1946                               m->str_flags |= STRING_IGNORE_LOWERCASE;
1947                               break;
1948                     case CHAR_IGNORE_UPPERCASE:
1949                               m->str_flags |= STRING_IGNORE_UPPERCASE;
1950                               break;
1951                     case CHAR_REGEX_OFFSET_START:
1952                               m->str_flags |= REGEX_OFFSET_START;
1953                               break;
1954                     case CHAR_BINTEST:
1955                               m->str_flags |= STRING_BINTEST;
1956                               break;
1957                     case CHAR_TEXTTEST:
1958                               m->str_flags |= STRING_TEXTTEST;
1959                               break;
1960                     case CHAR_TRIM:
1961                               m->str_flags |= STRING_TRIM;
1962                               break;
1963                     case CHAR_FULL_WORD:
1964                               m->str_flags |= STRING_FULL_WORD;
1965                               break;
1966                     case CHAR_PSTRING_1_LE:
1967 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1968                               if (m->type != FILE_PSTRING)
1969                                         goto bad;
1970                               SET_LENGTH(PSTRING_1_LE);
1971                               break;
1972                     case CHAR_PSTRING_2_BE:
1973                               if (m->type != FILE_PSTRING)
1974                                         goto bad;
1975                               SET_LENGTH(PSTRING_2_BE);
1976                               break;
1977                     case CHAR_PSTRING_2_LE:
1978                               if (m->type != FILE_PSTRING)
1979                                         goto bad;
1980                               SET_LENGTH(PSTRING_2_LE);
1981                               break;
1982                     case CHAR_PSTRING_4_BE:
1983                               if (m->type != FILE_PSTRING)
1984                                         goto bad;
1985                               SET_LENGTH(PSTRING_4_BE);
1986                               break;
1987                     case CHAR_PSTRING_4_LE:
1988                               switch (m->type) {
1989                               case FILE_PSTRING:
1990                               case FILE_REGEX:
1991                                         break;
1992                               default:
1993                                         goto bad;
1994                               }
1995                               SET_LENGTH(PSTRING_4_LE);
1996                               break;
1997                     case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1998                               if (m->type != FILE_PSTRING)
1999                                         goto bad;
2000                               m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
2001                               break;
2002                     default:
2003                     bad:
2004                               if (ms->flags & MAGIC_CHECK)
2005                                         file_magwarn(ms, "string modifier `%c' "
2006                                                   "invalid", *l);
2007                               goto out;
2008                     }
2009                     /* allow multiple '/' for readability */
2010                     if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
2011                               l++;
2012           }
2013           if (string_modifier_check(ms, m) == -1)
2014                     goto out;
2015           *lp = l;
2016           return 0;
2017 out:
2018           *lp = l;
2019           return -1;
2020 }
2021 
2022 /*
2023  * parse one line from magic file, put into magic[index++] if valid
2024  */
2025 file_private int
parse(struct magic_set * ms,struct magic_entry * me,const char * line,size_t lineno,int action)2026 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
2027     size_t lineno, int action)
2028 {
2029 #ifdef ENABLE_CONDITIONALS
2030           static uint32_t last_cont_level = 0;
2031 #endif
2032           size_t i;
2033           struct magic *m;
2034           const char *l = line;
2035           char *t;
2036           int op;
2037           uint32_t cont_level;
2038           int32_t diff;
2039 
2040           cont_level = 0;
2041 
2042           /*
2043            * Parse the offset.
2044            */
2045           while (*l == '>') {
2046                     ++l;                /* step over */
2047                     cont_level++;
2048           }
2049 #ifdef ENABLE_CONDITIONALS
2050           if (cont_level == 0 || cont_level > last_cont_level)
2051                     if (file_check_mem(ms, cont_level) == -1)
2052                               return -1;
2053           last_cont_level = cont_level;
2054 #endif
2055           if (cont_level != 0) {
2056                     if (me->mp == NULL) {
2057                               file_magerror(ms, "No current entry for continuation");
2058                               return -1;
2059                     }
2060                     if (me->cont_count == 0) {
2061                               file_magerror(ms, "Continuations present with 0 count");
2062                               return -1;
2063                     }
2064                     m = &me->mp[me->cont_count - 1];
2065                     diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
2066                     if (diff > 1)
2067                               file_magwarn(ms, "New continuation level %u is more "
2068                                   "than one larger than current level %u", cont_level,
2069                                   m->cont_level);
2070                     if (me->cont_count == me->max_count) {
2071                               struct magic *nm;
2072                               size_t cnt = me->max_count + ALLOC_CHUNK;
2073                               if ((nm = CAST(struct magic *, realloc(me->mp,
2074                                   sizeof(*nm) * cnt))) == NULL) {
2075                                         file_oomem(ms, sizeof(*nm) * cnt);
2076                                         return -1;
2077                               }
2078                               me->mp = nm;
2079                               me->max_count = CAST(uint32_t, cnt);
2080                     }
2081                     m = &me->mp[me->cont_count++];
2082                     (void)memset(m, 0, sizeof(*m));
2083                     m->cont_level = cont_level;
2084           } else {
2085                     static const size_t len = sizeof(*m) * ALLOC_CHUNK;
2086                     if (me->mp != NULL)
2087                               return 1;
2088                     if ((m = CAST(struct magic *, malloc(len))) == NULL) {
2089                               file_oomem(ms, len);
2090                               return -1;
2091                     }
2092                     me->mp = m;
2093                     me->max_count = ALLOC_CHUNK;
2094                     (void)memset(m, 0, sizeof(*m));
2095                     m->factor_op = FILE_FACTOR_OP_NONE;
2096                     m->cont_level = 0;
2097                     me->cont_count = 1;
2098           }
2099           m->lineno = CAST(uint32_t, lineno);
2100 
2101           if (*l == '&') {  /* m->cont_level == 0 checked below. */
2102                 ++l;            /* step over */
2103                 m->flag |= OFFADD;
2104         }
2105           if (*l == '(') {
2106                     ++l;                /* step over */
2107                     m->flag |= INDIR;
2108                     if (m->flag & OFFADD)
2109                               m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
2110 
2111                     if (*l == '&') {  /* m->cont_level == 0 checked below */
2112                               ++l;            /* step over */
2113                               m->flag |= OFFADD;
2114                     }
2115           }
2116           /* Indirect offsets are not valid at level 0. */
2117           if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
2118                     if (ms->flags & MAGIC_CHECK)
2119                               file_magwarn(ms, "relative offset at level 0");
2120                     return -1;
2121           }
2122 
2123           /* get offset, then skip over it */
2124           if (*l == '-') {
2125                     ++l;            /* step over */
2126                     m->flag |= OFFNEGATIVE;
2127           }
2128           m->offset = CAST(int32_t, strtol(l, &t, 0));
2129         if (l == t) {
2130                     if (ms->flags & MAGIC_CHECK)
2131                               file_magwarn(ms, "offset `%s' invalid", l);
2132                     return -1;
2133           }
2134 
2135         l = t;
2136 
2137           if (m->flag & INDIR) {
2138                     m->in_type = FILE_LONG;
2139                     m->in_offset = 0;
2140                     m->in_op = 0;
2141                     /*
2142                      * read [.,lbs][+-]nnnnn)
2143                      */
2144                     if (*l == '.' || *l == ',') {
2145                               if (*l == ',')
2146                                         m->in_op |= FILE_OPSIGNED;
2147                               l++;
2148                               switch (*l) {
2149                               case 'l':
2150                                         m->in_type = FILE_LELONG;
2151                                         break;
2152                               case 'L':
2153                                         m->in_type = FILE_BELONG;
2154                                         break;
2155                               case 'm':
2156                                         m->in_type = FILE_MELONG;
2157                                         break;
2158                               case 'h':
2159                               case 's':
2160                                         m->in_type = FILE_LESHORT;
2161                                         break;
2162                               case 'H':
2163                               case 'S':
2164                                         m->in_type = FILE_BESHORT;
2165                                         break;
2166                               case 'c':
2167                               case 'b':
2168                               case 'C':
2169                               case 'B':
2170                                         m->in_type = FILE_BYTE;
2171                                         break;
2172                               case 'e':
2173                               case 'f':
2174                               case 'g':
2175                                         m->in_type = FILE_LEDOUBLE;
2176                                         break;
2177                               case 'E':
2178                               case 'F':
2179                               case 'G':
2180                                         m->in_type = FILE_BEDOUBLE;
2181                                         break;
2182                               case 'i':
2183                                         m->in_type = FILE_LEID3;
2184                                         break;
2185                               case 'I':
2186                                         m->in_type = FILE_BEID3;
2187                                         break;
2188                               case 'o':
2189                                         m->in_type = FILE_OCTAL;
2190                                         break;
2191                               case 'q':
2192                                         m->in_type = FILE_LEQUAD;
2193                                         break;
2194                               case 'Q':
2195                                         m->in_type = FILE_BEQUAD;
2196                                         break;
2197                               default:
2198                                         if (ms->flags & MAGIC_CHECK)
2199                                                   file_magwarn(ms,
2200                                                       "indirect offset type `%c' invalid",
2201                                                       *l);
2202                                         return -1;
2203                               }
2204                               l++;
2205                     }
2206 
2207                     if (*l == '~') {
2208                               m->in_op |= FILE_OPINVERSE;
2209                               l++;
2210                     }
2211                     if ((op = get_op(*l)) != -1) {
2212                               m->in_op |= op;
2213                               l++;
2214                     }
2215                     if (*l == '(') {
2216                               m->in_op |= FILE_OPINDIRECT;
2217                               l++;
2218                     }
2219                     if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
2220                               m->in_offset = CAST(int32_t, strtol(l, &t, 0));
2221                               if (l == t) {
2222                                         if (ms->flags & MAGIC_CHECK)
2223                                                   file_magwarn(ms,
2224                                                       "in_offset `%s' invalid", l);
2225                                         return -1;
2226                               }
2227                               l = t;
2228                     }
2229                     if (*l++ != ')' ||
2230                         ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2231                               if (ms->flags & MAGIC_CHECK)
2232                                         file_magwarn(ms,
2233                                             "missing ')' in indirect offset");
2234                               return -1;
2235                     }
2236           }
2237           EATAB;
2238 
2239 #ifdef ENABLE_CONDITIONALS
2240           m->cond = get_cond(l, &l);
2241           if (check_cond(ms, m->cond, cont_level) == -1)
2242                     return -1;
2243 
2244           EATAB;
2245 #endif
2246 
2247           /*
2248            * Parse the type.
2249            */
2250           if (*l == 'u') {
2251                     /*
2252                      * Try it as a keyword type prefixed by "u"; match what
2253                      * follows the "u".  If that fails, try it as an SUS
2254                      * integer type.
2255                      */
2256                     m->type = get_type(type_tbl, l + 1, &l);
2257                     if (m->type == FILE_INVALID) {
2258                               /*
2259                                * Not a keyword type; parse it as an SUS type,
2260                                * 'u' possibly followed by a number or C/S/L.
2261                                */
2262                               m->type = get_standard_integer_type(l, &l);
2263                     }
2264                     /* It's unsigned. */
2265                     if (m->type != FILE_INVALID)
2266                               m->flag |= UNSIGNED;
2267           } else {
2268                     /*
2269                      * Try it as a keyword type.  If that fails, try it as
2270                      * an SUS integer type if it begins with "d" or as an
2271                      * SUS string type if it begins with "s".  In any case,
2272                      * it's not unsigned.
2273                      */
2274                     m->type = get_type(type_tbl, l, &l);
2275                     if (m->type == FILE_INVALID) {
2276                               /*
2277                                * Not a keyword type; parse it as an SUS type,
2278                                * either 'd' possibly followed by a number or
2279                                * C/S/L, or just 's'.
2280                                */
2281                               if (*l == 'd')
2282                                         m->type = get_standard_integer_type(l, &l);
2283                               else if (*l == 's'
2284                                   && !isalpha(CAST(unsigned char, l[1]))) {
2285                                         m->type = FILE_STRING;
2286                                         ++l;
2287                               }
2288                     }
2289           }
2290 
2291           if (m->type == FILE_INVALID) {
2292                     /* Not found - try it as a special keyword. */
2293                     m->type = get_type(special_tbl, l, &l);
2294           }
2295 
2296           if (m->type == FILE_INVALID) {
2297                     if (ms->flags & MAGIC_CHECK)
2298                               file_magwarn(ms, "type `%s' invalid", l);
2299                     return -1;
2300           }
2301 
2302           if (m->type == FILE_NAME && cont_level != 0) {
2303                     if (ms->flags & MAGIC_CHECK)
2304                               file_magwarn(ms, "`name%s' entries can only be "
2305                                   "declared at top level", l);
2306                     return -1;
2307           }
2308 
2309           /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2310           /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2311 
2312           m->mask_op = 0;
2313           if (*l == '~') {
2314                     if (!IS_STRING(m->type))
2315                               m->mask_op |= FILE_OPINVERSE;
2316                     else if (ms->flags & MAGIC_CHECK)
2317                               file_magwarn(ms, "'~' invalid for string types");
2318                     ++l;
2319           }
2320           m->str_range = 0;
2321           m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2322           if ((op = get_op(*l)) != -1) {
2323                     if (IS_STRING(m->type)) {
2324                               int r;
2325 
2326                               if (op != FILE_OPDIVIDE) {
2327                                         if (ms->flags & MAGIC_CHECK)
2328                                                   file_magwarn(ms,
2329                                                       "invalid string/indirect op: "
2330                                                       "`%c'", *t);
2331                                         return -1;
2332                               }
2333 
2334                               if (m->type == FILE_INDIRECT)
2335                                         r = parse_indirect_modifier(ms, m, &l);
2336                               else
2337                                         r = parse_string_modifier(ms, m, &l);
2338                               if (r == -1)
2339                                         return -1;
2340                     } else
2341                               parse_op_modifier(ms, m, &l, op);
2342           }
2343 
2344           /*
2345            * We used to set mask to all 1's here, instead let's just not do
2346            * anything if mask = 0 (unless you have a better idea)
2347            */
2348           EATAB;
2349 
2350           switch (*l) {
2351           case '>':
2352           case '<':
2353                     m->reln = *l;
2354                     ++l;
2355                     if (*l == '=') {
2356                               if (ms->flags & MAGIC_CHECK) {
2357                                         file_magwarn(ms, "%c= not supported",
2358                                             m->reln);
2359                                         return -1;
2360                               }
2361                        ++l;
2362                     }
2363                     break;
2364           /* Old-style anding: "0 byte &0x80 dynamically linked" */
2365           case '&':
2366           case '^':
2367           case '=':
2368                     m->reln = *l;
2369                     ++l;
2370                     if (*l == '=') {
2371                        /* HP compat: ignore &= etc. */
2372                        ++l;
2373                     }
2374                     break;
2375           case '!':
2376                     m->reln = *l;
2377                     ++l;
2378                     break;
2379           default:
2380                     m->reln = '=';      /* the default relation */
2381                     if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
2382                         isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2383                               m->reln = *l;
2384                               ++l;
2385                     }
2386                     break;
2387           }
2388           /*
2389            * Grab the value part, except for an 'x' reln.
2390            */
2391           if (m->reln != 'x' && getvalue(ms, m, &l, action))
2392                     return -1;
2393 
2394           /*
2395            * TODO finish this macro and start using it!
2396            * #define offsetcheck {if (offset > ms->bytes_max -1)
2397            *        magwarn("offset too big"); }
2398            */
2399 
2400           /*
2401            * Now get last part - the description
2402            */
2403           EATAB;
2404           if (l[0] == '\b') {
2405                     ++l;
2406                     m->flag |= NOSPACE;
2407           } else if ((l[0] == '\\') && (l[1] == 'b')) {
2408                     ++l;
2409                     ++l;
2410                     m->flag |= NOSPACE;
2411           }
2412           for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2413                     continue;
2414           if (i == sizeof(m->desc)) {
2415                     m->desc[sizeof(m->desc) - 1] = '\0';
2416                     if (ms->flags & MAGIC_CHECK)
2417                               file_magwarn(ms, "description `%s' truncated", m->desc);
2418           }
2419 
2420         /*
2421            * We only do this check while compiling, or if any of the magic
2422            * files were not compiled.
2423          */
2424         if (ms->flags & MAGIC_CHECK) {
2425                     if (check_format(ms, m) == -1)
2426                               return -1;
2427           }
2428 #ifndef COMPILE_ONLY
2429           if (action == FILE_CHECK) {
2430                     file_mdump(m);
2431           }
2432 #endif
2433           m->mimetype[0] = '\0';                  /* initialise MIME type to none */
2434           return 0;
2435 }
2436 
2437 /*
2438  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2439  * if valid
2440  */
2441 /*ARGSUSED*/
2442 file_private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2443 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
2444     size_t len __attribute__((__unused__)))
2445 {
2446           const char *l = line;
2447           char *el;
2448           unsigned long factor;
2449           char sbuf[512];
2450           struct magic *m = &me->mp[0];
2451 
2452           if (m->factor_op != FILE_FACTOR_OP_NONE) {
2453                     file_magwarn(ms,
2454                         "Current entry already has a strength type: %c %d",
2455                         m->factor_op, m->factor);
2456                     return -1;
2457           }
2458           if (m->type == FILE_NAME) {
2459                     file_magwarn(ms, "%s: Strength setting is not supported in "
2460                         "\"name\" magic entries",
2461                         file_printable(ms, sbuf, sizeof(sbuf), m->value.s,
2462                         sizeof(m->value.s)));
2463                     return -1;
2464           }
2465           EATAB;
2466           switch (*l) {
2467           case FILE_FACTOR_OP_NONE:
2468                     break;
2469           case FILE_FACTOR_OP_PLUS:
2470           case FILE_FACTOR_OP_MINUS:
2471           case FILE_FACTOR_OP_TIMES:
2472           case FILE_FACTOR_OP_DIV:
2473                     m->factor_op = *l++;
2474                     break;
2475           default:
2476                     file_magwarn(ms, "Unknown factor op `%c'", *l);
2477                     return -1;
2478           }
2479           EATAB;
2480           factor = strtoul(l, &el, 0);
2481           if (factor > 255) {
2482                     file_magwarn(ms, "Too large factor `%lu'", factor);
2483                     goto out;
2484           }
2485           if (*el && !isspace(CAST(unsigned char, *el))) {
2486                     file_magwarn(ms, "Bad factor `%s'", l);
2487                     goto out;
2488           }
2489           m->factor = CAST(uint8_t, factor);
2490           if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2491                     file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2492                         m->factor_op, m->factor);
2493                     goto out;
2494           }
2495           return 0;
2496 out:
2497           m->factor_op = FILE_FACTOR_OP_NONE;
2498           m->factor = 0;
2499           return -1;
2500 }
2501 
2502 file_private int
goodchar(unsigned char x,const char * extra)2503 goodchar(unsigned char x, const char *extra)
2504 {
2505           return (isascii(x) && isalnum(x)) || strchr(extra, x);
2506 }
2507 
2508 file_private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,size_t llen,off_t off,size_t len,const char * name,const char * extra,int nt)2509 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2510     size_t llen, off_t off, size_t len, const char *name, const char *extra,
2511     int nt)
2512 {
2513           size_t i;
2514           const char *l = line;
2515           struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2516           char *buf = CAST(char *, CAST(void *, m)) + off;
2517 
2518           if (buf[0] != '\0') {
2519                     len = nt ? strlen(buf) : len;
2520                     file_magwarn(ms, "Current entry already has a %s type "
2521                         "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2522                     return -1;
2523           }
2524 
2525           if (*m->desc == '\0') {
2526                     file_magwarn(ms, "Current entry does not yet have a "
2527                         "description for adding a %s type", name);
2528                     return -1;
2529           }
2530 
2531           EATAB;
2532           for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
2533               buf[i++] = *l++)
2534                     continue;
2535 
2536           if (i == len && *l) {
2537                     if (nt)
2538                               buf[len - 1] = '\0';
2539                     if (ms->flags & MAGIC_CHECK)
2540                               file_magwarn(ms, "%s type `%s' truncated %"
2541                                   SIZE_T_FORMAT "u", name, line, i);
2542           } else {
2543                     if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2544                               file_magwarn(ms, "%s type `%s' has bad char '%c'",
2545                                   name, line, *l);
2546                     if (nt)
2547                               buf[i] = '\0';
2548           }
2549 
2550           if (i > 0)
2551                     return 0;
2552 
2553           file_magerror(ms, "Bad magic entry '%s'", line);
2554           return -1;
2555 }
2556 
2557 /*
2558  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2559  * magic[index - 1]
2560  */
2561 file_private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2562 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
2563     size_t len)
2564 {
2565           return parse_extra(ms, me, line, len,
2566               CAST(off_t, offsetof(struct magic, apple)),
2567               sizeof(me->mp[0].apple), "APPLE", "!+-./?", 0);
2568 }
2569 
2570 /*
2571  * Parse a comma-separated list of extensions
2572  */
2573 file_private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2574 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
2575     size_t len)
2576 {
2577           return parse_extra(ms, me, line, len,
2578               CAST(off_t, offsetof(struct magic, ext)),
2579               sizeof(me->mp[0].ext), "EXTENSION", ",!+-/@?_$&~", 0);
2580               /* & for b&w */
2581               /* ~ for journal~ */
2582 }
2583 
2584 /*
2585  * parse a MIME annotation line from magic file, put into magic[index - 1]
2586  * if valid
2587  */
2588 file_private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2589 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
2590     size_t len)
2591 {
2592           return parse_extra(ms, me, line, len,
2593               CAST(off_t, offsetof(struct magic, mimetype)),
2594               sizeof(me->mp[0].mimetype), "MIME", "+-/.$?:{}", 1);
2595 }
2596 
2597 file_private int
check_format_type(const char * ptr,int type,const char ** estr)2598 check_format_type(const char *ptr, int type, const char **estr)
2599 {
2600           int quad = 0, h;
2601           size_t len, cnt;
2602           if (*ptr == '\0') {
2603                     /* Missing format string; bad */
2604                     *estr = "missing format spec";
2605                     return -1;
2606           }
2607 
2608           switch (file_formats[type]) {
2609           case FILE_FMT_QUAD:
2610                     quad = 1;
2611                     /*FALLTHROUGH*/
2612           case FILE_FMT_NUM:
2613                     if (quad == 0) {
2614                               switch (type) {
2615                               case FILE_BYTE:
2616                                         h = 2;
2617                                         break;
2618                               case FILE_SHORT:
2619                               case FILE_BESHORT:
2620                               case FILE_LESHORT:
2621                                         h = 1;
2622                                         break;
2623                               case FILE_LONG:
2624                               case FILE_BELONG:
2625                               case FILE_LELONG:
2626                               case FILE_MELONG:
2627                               case FILE_LEID3:
2628                               case FILE_BEID3:
2629                               case FILE_INDIRECT:
2630                                         h = 0;
2631                                         break;
2632                               default:
2633                                         fprintf(stderr, "Bad number format %d", type);
2634                                         abort();
2635                               }
2636                     } else
2637                               h = 0;
2638                     while (*ptr && strchr("-.#", *ptr) != NULL)
2639                               ptr++;
2640 #define CHECKLEN() do { \
2641           for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
2642                     len = len * 10 + (*ptr - '0'); \
2643           if (cnt > 5 || len > 1024) \
2644                     goto toolong; \
2645 } while (/*CONSTCOND*/0)
2646 
2647                     CHECKLEN();
2648                     if (*ptr == '.')
2649                               ptr++;
2650                     CHECKLEN();
2651                     if (quad) {
2652                               if (*ptr++ != 'l')
2653                                         goto invalid;
2654                               if (*ptr++ != 'l')
2655                                         goto invalid;
2656                     }
2657 
2658                     switch (*ptr++) {
2659 #ifdef STRICT_FORMAT          /* "long" formats are int formats for us */
2660                     /* so don't accept the 'l' modifier */
2661                     case 'l':
2662                               switch (*ptr++) {
2663                               case 'i':
2664                               case 'd':
2665                               case 'u':
2666                               case 'o':
2667                               case 'x':
2668                               case 'X':
2669                                         if (h == 0)
2670                                                   return 0;
2671                                         /*FALLTHROUGH*/
2672                               default:
2673                                         goto invalid;
2674                               }
2675 
2676                     /*
2677                      * Don't accept h and hh modifiers. They make writing
2678                      * magic entries more complicated, for very little benefit
2679                      */
2680                     case 'h':
2681                               if (h-- <= 0)
2682                                         goto invalid;
2683                               switch (*ptr++) {
2684                               case 'h':
2685                                         if (h-- <= 0)
2686                                                   goto invalid;
2687                                         switch (*ptr++) {
2688                                         case 'i':
2689                                         case 'd':
2690                                         case 'u':
2691                                         case 'o':
2692                                         case 'x':
2693                                         case 'X':
2694                                                   return 0;
2695                                         default:
2696                                                   goto invalid;
2697                                         }
2698                               case 'i':
2699                               case 'd':
2700                               case 'u':
2701                               case 'o':
2702                               case 'x':
2703                               case 'X':
2704                                         if (h == 0)
2705                                                   return 0;
2706                                         /*FALLTHROUGH*/
2707                               default:
2708                                         goto invalid;
2709                               }
2710 #endif
2711                     case 'c':
2712                               if (h == 2)
2713                                         return 0;
2714                               goto invalid;
2715                     case 'i':
2716                     case 'd':
2717                     case 'u':
2718                     case 'o':
2719                     case 'x':
2720                     case 'X':
2721 #ifdef STRICT_FORMAT
2722                               if (h == 0)
2723                                         return 0;
2724                               /*FALLTHROUGH*/
2725 #else
2726                               return 0;
2727 #endif
2728                     default:
2729                               goto invalid;
2730                     }
2731 
2732           case FILE_FMT_FLOAT:
2733           case FILE_FMT_DOUBLE:
2734                     if (*ptr == '-')
2735                               ptr++;
2736                     if (*ptr == '.')
2737                               ptr++;
2738                     CHECKLEN();
2739                     if (*ptr == '.')
2740                               ptr++;
2741                     CHECKLEN();
2742                     switch (*ptr++) {
2743                     case 'e':
2744                     case 'E':
2745                     case 'f':
2746                     case 'F':
2747                     case 'g':
2748                     case 'G':
2749                               return 0;
2750 
2751                     default:
2752                               goto invalid;
2753                     }
2754 
2755 
2756           case FILE_FMT_STR:
2757                     if (*ptr == '-')
2758                               ptr++;
2759                     while (isdigit(CAST(unsigned char, *ptr)))
2760                               ptr++;
2761                     if (*ptr == '.') {
2762                               ptr++;
2763                               while (isdigit(CAST(unsigned char , *ptr)))
2764                                         ptr++;
2765                     }
2766 
2767                     switch (*ptr++) {
2768                     case 's':
2769                               return 0;
2770                     default:
2771                               goto invalid;
2772                     }
2773 
2774           default:
2775                     /* internal error */
2776                     fprintf(stderr, "Bad file format %d", type);
2777                     abort();
2778           }
2779 invalid:
2780           *estr = "not valid";
2781           return -1;
2782 toolong:
2783           *estr = "too long";
2784           return -1;
2785 }
2786 
2787 /*
2788  * Check that the optional printf format in description matches
2789  * the type of the magic.
2790  */
2791 file_private int
check_format(struct magic_set * ms,struct magic * m)2792 check_format(struct magic_set *ms, struct magic *m)
2793 {
2794           char *ptr;
2795           const char *estr;
2796 
2797           for (ptr = m->desc; *ptr; ptr++)
2798                     if (*ptr == '%')
2799                               break;
2800           if (*ptr == '\0') {
2801                     /* No format string; ok */
2802                     return 1;
2803           }
2804 
2805           assert(file_nformats == file_nnames);
2806 
2807           if (m->type >= file_nformats) {
2808                     file_magwarn(ms, "Internal error inconsistency between "
2809                         "m->type and format strings");
2810                     return -1;
2811           }
2812           if (file_formats[m->type] == FILE_FMT_NONE) {
2813                     file_magwarn(ms, "No format string for `%s' with description "
2814                         "`%s'", m->desc, file_names[m->type]);
2815                     return -1;
2816           }
2817 
2818           ptr++;
2819           if (check_format_type(ptr, m->type, &estr) == -1) {
2820                     /*
2821                      * TODO: this error message is unhelpful if the format
2822                      * string is not one character long
2823                      */
2824                     file_magwarn(ms, "Printf format is %s for type "
2825                         "`%s' in description `%s'", estr,
2826                         file_names[m->type], m->desc);
2827                     return -1;
2828           }
2829 
2830           for (; *ptr; ptr++) {
2831                     if (*ptr == '%') {
2832                               file_magwarn(ms,
2833                                   "Too many format strings (should have at most one) "
2834                                   "for `%s' with description `%s'",
2835                                   file_names[m->type], m->desc);
2836                               return -1;
2837                     }
2838           }
2839           return 0;
2840 }
2841 
2842 /*
2843  * Read a numeric value from a pointer, into the value union of a magic
2844  * pointer, according to the magic type.  Update the string pointer to point
2845  * just after the number read.  Return 0 for success, non-zero for failure.
2846  */
2847 file_private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2848 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2849 {
2850           char *ep;
2851           uint64_t ull;
2852           int y;
2853 
2854           switch (m->type) {
2855           case FILE_BESTRING16:
2856           case FILE_LESTRING16:
2857           case FILE_STRING:
2858           case FILE_PSTRING:
2859           case FILE_REGEX:
2860           case FILE_SEARCH:
2861           case FILE_NAME:
2862           case FILE_USE:
2863           case FILE_DER:
2864           case FILE_OCTAL:
2865                     *p = getstr(ms, m, *p, action == FILE_COMPILE);
2866                     if (*p == NULL) {
2867                               if (ms->flags & MAGIC_CHECK)
2868                                         file_magwarn(ms, "cannot get string from `%s'",
2869                                             m->value.s);
2870                               return -1;
2871                     }
2872                     if (m->type == FILE_REGEX) {
2873                               file_regex_t rx;
2874                               int rc =
2875                                   file_regcomp(ms, &rx, m->value.s, REG_EXTENDED);
2876                               if (rc == 0) {
2877                                         file_regfree(&rx);
2878                               }
2879                               return rc ? -1 : 0;
2880                     }
2881                     return 0;
2882           default:
2883                     if (m->reln == 'x')
2884                               return 0;
2885                     break;
2886           }
2887 
2888           switch (m->type) {
2889           case FILE_FLOAT:
2890           case FILE_BEFLOAT:
2891           case FILE_LEFLOAT:
2892                     errno = 0;
2893 #ifdef HAVE_STRTOF
2894                     m->value.f = strtof(*p, &ep);
2895 #else
2896                     m->value.f = (float)strtod(*p, &ep);
2897 #endif
2898                     if (errno == 0)
2899                               *p = ep;
2900                     return 0;
2901           case FILE_DOUBLE:
2902           case FILE_BEDOUBLE:
2903           case FILE_LEDOUBLE:
2904                     errno = 0;
2905                     m->value.d = strtod(*p, &ep);
2906                     if (errno == 0)
2907                               *p = ep;
2908                     return 0;
2909           case FILE_GUID:
2910                     if (file_parse_guid(*p, m->value.guid) == -1)
2911                               return -1;
2912                     *p += FILE_GUID_SIZE - 1;
2913                     return 0;
2914           default:
2915                     errno = 0;
2916                     ull = CAST(uint64_t, strtoull(*p, &ep, 0));
2917                     m->value.q = file_signextend(ms, m, ull);
2918                     if (*p == ep) {
2919                               file_magwarn(ms, "Unparsable number `%s'", *p);
2920                               return -1;
2921                     } else {
2922                               size_t ts = typesize(m->type);
2923                               uint64_t x;
2924                               const char *q;
2925 
2926                               if (ts == FILE_BADSIZE) {
2927                                         file_magwarn(ms,
2928                                             "Expected numeric type got `%s'",
2929                                             type_tbl[m->type].name);
2930                                         return -1;
2931                               }
2932                               for (q = *p; isspace(CAST(unsigned char, *q)); q++)
2933                                         continue;
2934                               if (*q == '-' && ull != UINT64_MAX)
2935                                         ull = -CAST(int64_t, ull);
2936                               switch (ts) {
2937                               case 1:
2938                                         x = CAST(uint64_t, ull & ~0xffULL);
2939                                         y = (x & ~0xffULL) != ~0xffULL;
2940                                         break;
2941                               case 2:
2942                                         x = CAST(uint64_t, ull & ~0xffffULL);
2943                                         y = (x & ~0xffffULL) != ~0xffffULL;
2944                                         break;
2945                               case 4:
2946                                         x = CAST(uint64_t, ull & ~0xffffffffULL);
2947                                         y = (x & ~0xffffffffULL) != ~0xffffffffULL;
2948                                         break;
2949                               case 8:
2950                                         x = 0;
2951                                         y = 0;
2952                                         break;
2953                               default:
2954                                         fprintf(stderr, "Bad width %zu", ts);
2955                                         abort();
2956                               }
2957                               if (x && y) {
2958                                         file_magwarn(ms, "Overflow for numeric"
2959                                             " type `%s' value %#" PRIx64,
2960                                             type_tbl[m->type].name, ull);
2961                                         return -1;
2962                               }
2963                     }
2964                     if (errno == 0) {
2965                               *p = ep;
2966                               eatsize(p);
2967                     }
2968                     return 0;
2969           }
2970 }
2971 
2972 /*
2973  * Convert a string containing C character escapes.  Stop at an unescaped
2974  * space or tab.
2975  * Copy the converted version to "m->value.s", and the length in m->vallen.
2976  * Return updated scan pointer as function result. Warn if set.
2977  */
2978 file_private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2979 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2980 {
2981           const char *origs = s;
2982           char      *p = m->value.s;
2983           size_t  plen = sizeof(m->value.s);
2984           char      *origp = p;
2985           char      *pmax = p + plen - 1;
2986           int       c;
2987           int       val;
2988           size_t    bracket_nesting = 0;
2989 
2990           while ((c = *s++) != '\0') {
2991                     if (isspace(CAST(unsigned char, c)))
2992                               break;
2993                     if (p >= pmax) {
2994                               file_error(ms, 0, "string too long: `%s'", origs);
2995                               return NULL;
2996                     }
2997                     if (c != '\\') {
2998                         if (c == '[') {
2999                                   bracket_nesting++;
3000                         }
3001                         if (c == ']' && bracket_nesting > 0) {
3002                                   bracket_nesting--;
3003                         }
3004                         *p++ = CAST(char, c);
3005                         continue;
3006                     }
3007                     switch(c = *s++) {
3008 
3009                     case '\0':
3010                               if (warn)
3011                                         file_magwarn(ms, "incomplete escape");
3012                               s--;
3013                               goto out;
3014                     case '.':
3015                               if (m->type == FILE_REGEX &&
3016                                   bracket_nesting == 0 && warn) {
3017                                         file_magwarn(ms, "escaped dot ('.') found, "
3018                                             "use \\\\. instead");
3019                               }
3020                               warn = 0; /* already did */
3021                               /*FALLTHROUGH*/
3022                     case '\t':
3023                               if (warn) {
3024                                         file_magwarn(ms,
3025                                             "escaped tab found, use \\\\t instead");
3026                                         warn = 0; /* already did */
3027                               }
3028                               /*FALLTHROUGH*/
3029                     default:
3030                               if (warn) {
3031                                         if (isprint(CAST(unsigned char, c))) {
3032                                                   /* Allow escaping of
3033                                                    * ``relations'' */
3034                                                   if (strchr("<>&^=!", c) == NULL
3035                                                       && (m->type != FILE_REGEX ||
3036                                                       strchr("[]().*?^$|{}", c)
3037                                                       == NULL)) {
3038                                                             file_magwarn(ms, "no "
3039                                                                 "need to escape "
3040                                                                 "`%c'", c);
3041                                                   }
3042                                         } else {
3043                                                   file_magwarn(ms,
3044                                                       "unknown escape sequence: "
3045                                                       "\\%03o", c);
3046                                         }
3047                               }
3048                               /*FALLTHROUGH*/
3049                     /* space, perhaps force people to use \040? */
3050                     case ' ':
3051 #if 0
3052                     /*
3053                      * Other things people escape, but shouldn't need to,
3054                      * so we disallow them
3055                      */
3056                     case '\'':
3057                     case '"':
3058                     case '?':
3059 #endif
3060                     /* Relations */
3061                     case '>':
3062                     case '<':
3063                     case '&':
3064                     case '^':
3065                     case '=':
3066                     case '!':
3067                     /* and backslash itself */
3068                     case '\\':
3069                               *p++ = CAST(char, c);
3070                               break;
3071 
3072                     case 'a':
3073                               *p++ = '\a';
3074                               break;
3075 
3076                     case 'b':
3077                               *p++ = '\b';
3078                               break;
3079 
3080                     case 'f':
3081                               *p++ = '\f';
3082                               break;
3083 
3084                     case 'n':
3085                               *p++ = '\n';
3086                               break;
3087 
3088                     case 'r':
3089                               *p++ = '\r';
3090                               break;
3091 
3092                     case 't':
3093                               *p++ = '\t';
3094                               break;
3095 
3096                     case 'v':
3097                               *p++ = '\v';
3098                               break;
3099 
3100                     /* \ and up to 3 octal digits */
3101                     case '0':
3102                     case '1':
3103                     case '2':
3104                     case '3':
3105                     case '4':
3106                     case '5':
3107                     case '6':
3108                     case '7':
3109                               val = c - '0';
3110                               c = *s++;  /* try for 2 */
3111                               if (c >= '0' && c <= '7') {
3112                                         val = (val << 3) | (c - '0');
3113                                         c = *s++;  /* try for 3 */
3114                                         if (c >= '0' && c <= '7')
3115                                                   val = (val << 3) | (c-'0');
3116                                         else
3117                                                   --s;
3118                               }
3119                               else
3120                                         --s;
3121                               *p++ = CAST(char, val);
3122                               break;
3123 
3124                     /* \x and up to 2 hex digits */
3125                     case 'x':
3126                               val = 'x';          /* Default if no digits */
3127                               c = hextoint(*s++); /* Get next char */
3128                               if (c >= 0) {
3129                                         val = c;
3130                                         c = hextoint(*s++);
3131                                         if (c >= 0)
3132                                                   val = (val << 4) + c;
3133                                         else
3134                                                   --s;
3135                               } else
3136                                         --s;
3137                               *p++ = CAST(char, val);
3138                               break;
3139                     }
3140           }
3141           --s;
3142 out:
3143           *p = '\0';
3144           m->vallen = CAST(unsigned char, (p - origp));
3145           if (m->type == FILE_PSTRING) {
3146                     size_t l =  file_pstring_length_size(ms, m);
3147                     if (l == FILE_BADSIZE)
3148                               return NULL;
3149                     m->vallen += CAST(unsigned char, l);
3150           }
3151           return s;
3152 }
3153 
3154 
3155 /* Single hex char to int; -1 if not a hex char. */
3156 file_private int
hextoint(int c)3157 hextoint(int c)
3158 {
3159           if (!isascii(CAST(unsigned char, c)))
3160                     return -1;
3161           if (isdigit(CAST(unsigned char, c)))
3162                     return c - '0';
3163           if ((c >= 'a') && (c <= 'f'))
3164                     return c + 10 - 'a';
3165           if (( c>= 'A') && (c <= 'F'))
3166                     return c + 10 - 'A';
3167           return -1;
3168 }
3169 
3170 
3171 /*
3172  * Print a string containing C character escapes.
3173  */
3174 file_protected void
file_showstr(FILE * fp,const char * s,size_t len)3175 file_showstr(FILE *fp, const char *s, size_t len)
3176 {
3177           char      c;
3178 
3179           for (;;) {
3180                     if (len == FILE_BADSIZE) {
3181                               c = *s++;
3182                               if (c == '\0')
3183                                         break;
3184                     }
3185                     else  {
3186                               if (len-- == 0)
3187                                         break;
3188                               c = *s++;
3189                     }
3190                     if (c >= 040 && c <= 0176)    /* TODO isprint && !iscntrl */
3191                               (void) fputc(c, fp);
3192                     else {
3193                               (void) fputc('\\', fp);
3194                               switch (c) {
3195                               case '\a':
3196                                         (void) fputc('a', fp);
3197                                         break;
3198 
3199                               case '\b':
3200                                         (void) fputc('b', fp);
3201                                         break;
3202 
3203                               case '\f':
3204                                         (void) fputc('f', fp);
3205                                         break;
3206 
3207                               case '\n':
3208                                         (void) fputc('n', fp);
3209                                         break;
3210 
3211                               case '\r':
3212                                         (void) fputc('r', fp);
3213                                         break;
3214 
3215                               case '\t':
3216                                         (void) fputc('t', fp);
3217                                         break;
3218 
3219                               case '\v':
3220                                         (void) fputc('v', fp);
3221                                         break;
3222 
3223                               default:
3224                                         (void) fprintf(fp, "%.3o", c & 0377);
3225                                         break;
3226                               }
3227                     }
3228           }
3229 }
3230 
3231 /*
3232  * eatsize(): Eat the size spec from a number [eg. 10UL]
3233  */
3234 file_private void
eatsize(const char ** p)3235 eatsize(const char **p)
3236 {
3237           const char *l = *p;
3238 
3239           if (LOWCASE(*l) == 'u')
3240                     l++;
3241 
3242           switch (LOWCASE(*l)) {
3243           case 'l':    /* long */
3244           case 's':    /* short */
3245           case 'h':    /* short */
3246           case 'b':    /* char/byte */
3247           case 'c':    /* char/byte */
3248                     l++;
3249                     /*FALLTHROUGH*/
3250           default:
3251                     break;
3252           }
3253 
3254           *p = l;
3255 }
3256 
3257 /*
3258  * handle a buffer containing a compiled file.
3259  */
3260 file_private struct magic_map *
apprentice_buf(struct magic_set * ms,struct magic * buf,size_t len)3261 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
3262 {
3263           struct magic_map *map;
3264 
3265           if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3266                     file_oomem(ms, sizeof(*map));
3267                     return NULL;
3268           }
3269           map->len = len;
3270           map->p = buf;
3271           map->type = MAP_TYPE_USER;
3272           if (check_buffer(ms, map, "buffer") != 0) {
3273                     apprentice_unmap(map);
3274                     return NULL;
3275           }
3276           return map;
3277 }
3278 
3279 /*
3280  * handle a compiled file.
3281  */
3282 
3283 file_private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)3284 apprentice_map(struct magic_set *ms, const char *fn)
3285 {
3286           int fd;
3287           struct stat st;
3288           char *dbname = NULL;
3289           struct magic_map *map;
3290           struct magic_map *rv = NULL;
3291 
3292           fd = -1;
3293           if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3294                     file_oomem(ms, sizeof(*map));
3295                     goto error;
3296           }
3297           map->type = MAP_TYPE_USER;    /* unspecified */
3298 
3299           dbname = mkdbname(ms, fn, 0);
3300           if (dbname == NULL)
3301                     goto error;
3302 
3303           if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
3304                     goto error;
3305 
3306           if (fstat(fd, &st) == -1) {
3307                     file_error(ms, errno, "cannot stat `%s'", dbname);
3308                     goto error;
3309           }
3310           if (st.st_size < 8 || st.st_size > maxoff_t()) {
3311                     file_error(ms, 0, "file `%s' is too %s", dbname,
3312                         st.st_size < 8 ? "small" : "large");
3313                     goto error;
3314           }
3315 
3316           map->len = CAST(size_t, st.st_size);
3317 #ifdef QUICK
3318           map->type = MAP_TYPE_MMAP;
3319           if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE,
3320               MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) {
3321                     file_error(ms, errno, "cannot map `%s'", dbname);
3322                     goto error;
3323           }
3324 #else
3325           map->type = MAP_TYPE_MALLOC;
3326           if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
3327                     file_oomem(ms, map->len);
3328                     goto error;
3329           }
3330           if (read(fd, map->p, map->len) != (ssize_t)map->len) {
3331                     file_badread(ms);
3332                     goto error;
3333           }
3334 #endif
3335           (void)close(fd);
3336           fd = -1;
3337 
3338           if (check_buffer(ms, map, dbname) != 0) {
3339                     goto error;
3340           }
3341 #ifdef QUICK
3342           if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) {
3343                     file_error(ms, errno, "cannot mprotect `%s'", dbname);
3344                     goto error;
3345           }
3346 #endif
3347 
3348           free(dbname);
3349           return map;
3350 
3351 error:
3352           if (fd != -1)
3353                     (void)close(fd);
3354           apprentice_unmap(map);
3355           free(dbname);
3356           return rv;
3357 }
3358 
3359 file_private int
check_buffer(struct magic_set * ms,struct magic_map * map,const char * dbname)3360 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3361 {
3362           uint32_t *ptr;
3363           uint32_t entries, nentries;
3364           uint32_t version;
3365           int i, needsbyteswap;
3366 
3367           ptr = CAST(uint32_t *, map->p);
3368           if (*ptr != MAGICNO) {
3369                     if (swap4(*ptr) != MAGICNO) {
3370                               file_error(ms, 0, "bad magic in `%s'", dbname);
3371                               return -1;
3372                     }
3373                     needsbyteswap = 1;
3374           } else
3375                     needsbyteswap = 0;
3376           if (needsbyteswap)
3377                     version = swap4(ptr[1]);
3378           else
3379                     version = ptr[1];
3380           if (version != VERSIONNO) {
3381                     file_error(ms, 0, "File %s supports only version %d magic "
3382                         "files. `%s' is version %d", VERSION,
3383                         VERSIONNO, dbname, version);
3384                     return -1;
3385           }
3386           entries = CAST(uint32_t, map->len / sizeof(struct magic));
3387           if ((entries * sizeof(struct magic)) != map->len) {
3388                     file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3389                         "a multiple of %" SIZE_T_FORMAT "u",
3390                         dbname, map->len, sizeof(struct magic));
3391                     return -1;
3392           }
3393           map->magic[0] = CAST(struct magic *, map->p) + 1;
3394           nentries = 0;
3395           for (i = 0; i < MAGIC_SETS; i++) {
3396                     if (needsbyteswap)
3397                               map->nmagic[i] = swap4(ptr[i + 2]);
3398                     else
3399                               map->nmagic[i] = ptr[i + 2];
3400                     if (i != MAGIC_SETS - 1)
3401                               map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3402                     nentries += map->nmagic[i];
3403           }
3404           if (entries != nentries + 1) {
3405                     file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3406                         dbname, entries, nentries + 1);
3407                     return -1;
3408           }
3409           if (needsbyteswap)
3410                     for (i = 0; i < MAGIC_SETS; i++)
3411                               byteswap(map->magic[i], map->nmagic[i]);
3412           return 0;
3413 }
3414 
3415 /*
3416  * handle an mmaped file.
3417  */
3418 file_private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3419 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3420 {
3421           static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3422           static const size_t m = sizeof(**map->magic);
3423           int fd = -1;
3424           size_t len;
3425           char *dbname;
3426           int rv = -1;
3427           uint32_t i;
3428           union {
3429                     struct magic m;
3430                     uint32_t h[2 + MAGIC_SETS];
3431           } hdr;
3432 
3433           dbname = mkdbname(ms, fn, 1);
3434 
3435           if (dbname == NULL)
3436                     goto out;
3437 
3438           if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
3439           {
3440                     file_error(ms, errno, "cannot open `%s'", dbname);
3441                     goto out;
3442           }
3443           memset(&hdr, 0, sizeof(hdr));
3444           hdr.h[0] = MAGICNO;
3445           hdr.h[1] = VERSIONNO;
3446           memcpy(hdr.h + 2, map->nmagic, nm);
3447 
3448           if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) {
3449                     file_error(ms, errno, "error writing `%s'", dbname);
3450                     goto out2;
3451           }
3452 
3453           for (i = 0; i < MAGIC_SETS; i++) {
3454                     len = m * map->nmagic[i];
3455                     if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) {
3456                               file_error(ms, errno, "error writing `%s'", dbname);
3457                               goto out2;
3458                     }
3459           }
3460 
3461           rv = 0;
3462 out2:
3463           if (fd != -1)
3464                     (void)close(fd);
3465 out:
3466           apprentice_unmap(map);
3467           free(dbname);
3468           return rv;
3469 }
3470 
3471 file_private const char ext[] = ".mgc";
3472 /*
3473  * make a dbname
3474  */
3475 file_private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3476 mkdbname(struct magic_set *ms, const char *fn, int strip)
3477 {
3478           const char *p, *q;
3479           char *buf;
3480 
3481           if (strip) {
3482                     if ((p = strrchr(fn, '/')) != NULL)
3483                               fn = ++p;
3484           }
3485 
3486           for (q = fn; *q; q++)
3487                     continue;
3488           /* Look for .mgc */
3489           for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3490                     if (*p != *q)
3491                               break;
3492 
3493           /* Did not find .mgc, restore q */
3494           if (p >= ext)
3495                     while (*q)
3496                               q++;
3497 
3498           q++;
3499           /* Compatibility with old code that looked in .mime */
3500           if (ms->flags & MAGIC_MIME) {
3501                     if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext)
3502                         < 0)
3503                               return NULL;
3504                     if (access(buf, R_OK) != -1) {
3505                               ms->flags &= MAGIC_MIME_TYPE;
3506                               return buf;
3507                     }
3508                     free(buf);
3509           }
3510           if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0)
3511                     return NULL;
3512 
3513           /* Compatibility with old code that looked in .mime */
3514           if (strstr(fn, ".mime") != NULL)
3515                     ms->flags &= MAGIC_MIME_TYPE;
3516           return buf;
3517 }
3518 
3519 /*
3520  * Byteswap an mmap'ed file if needed
3521  */
3522 file_private void
byteswap(struct magic * magic,uint32_t nmagic)3523 byteswap(struct magic *magic, uint32_t nmagic)
3524 {
3525           uint32_t i;
3526           for (i = 0; i < nmagic; i++)
3527                     bs1(&magic[i]);
3528 }
3529 
3530 #if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H)
3531 /*
3532  * swap a short
3533  */
3534 file_private uint16_t
swap2(uint16_t sv)3535 swap2(uint16_t sv)
3536 {
3537           uint16_t rv;
3538           uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3539           uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3540           d[0] = s[1];
3541           d[1] = s[0];
3542           return rv;
3543 }
3544 
3545 /*
3546  * swap an int
3547  */
3548 file_private uint32_t
swap4(uint32_t sv)3549 swap4(uint32_t sv)
3550 {
3551           uint32_t rv;
3552           uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3553           uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3554           d[0] = s[3];
3555           d[1] = s[2];
3556           d[2] = s[1];
3557           d[3] = s[0];
3558           return rv;
3559 }
3560 
3561 /*
3562  * swap a quad
3563  */
3564 file_private uint64_t
swap8(uint64_t sv)3565 swap8(uint64_t sv)
3566 {
3567           uint64_t rv;
3568           uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3569           uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3570 # if 0
3571           d[0] = s[3];
3572           d[1] = s[2];
3573           d[2] = s[1];
3574           d[3] = s[0];
3575           d[4] = s[7];
3576           d[5] = s[6];
3577           d[6] = s[5];
3578           d[7] = s[4];
3579 # else
3580           d[0] = s[7];
3581           d[1] = s[6];
3582           d[2] = s[5];
3583           d[3] = s[4];
3584           d[4] = s[3];
3585           d[5] = s[2];
3586           d[6] = s[1];
3587           d[7] = s[0];
3588 # endif
3589           return rv;
3590 }
3591 #endif
3592 
3593 file_protected uintmax_t
file_varint2uintmax_t(const unsigned char * us,int t,size_t * l)3594 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3595 {
3596         uintmax_t x = 0;
3597         const unsigned char *c;
3598         if (t == FILE_LEVARINT) {
3599                 for (c = us; *c; c++) {
3600                         if ((*c & 0x80) == 0)
3601                                 break;
3602                 }
3603                     if (l)
3604                               *l = c - us + 1;
3605                 for (; c >= us; c--) {
3606                         x |= *c & 0x7f;
3607                         x <<= 7;
3608                 }
3609         } else {
3610                 for (c = us; *c; c++) {
3611                               x |= *c & 0x7f;
3612                               if ((*c & 0x80) == 0)
3613                                         break;
3614                               x <<= 7;
3615                 }
3616                     if (l)
3617                               *l = c - us + 1;
3618         }
3619           return x;
3620 }
3621 
3622 
3623 /*
3624  * byteswap a single magic entry
3625  */
3626 file_private void
bs1(struct magic * m)3627 bs1(struct magic *m)
3628 {
3629           m->cont_level = swap2(m->cont_level);
3630           m->offset = swap4(CAST(uint32_t, m->offset));
3631           m->in_offset = swap4(CAST(uint32_t, m->in_offset));
3632           m->lineno = swap4(CAST(uint32_t, m->lineno));
3633           if (IS_STRING(m->type)) {
3634                     m->str_range = swap4(m->str_range);
3635                     m->str_flags = swap4(m->str_flags);
3636           }
3637           else {
3638                     m->value.q = swap8(m->value.q);
3639                     m->num_mask = swap8(m->num_mask);
3640           }
3641 }
3642 
3643 file_protected size_t
file_pstring_length_size(struct magic_set * ms,const struct magic * m)3644 file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3645 {
3646           switch (m->str_flags & PSTRING_LEN) {
3647           case PSTRING_1_LE:
3648                     return 1;
3649           case PSTRING_2_LE:
3650           case PSTRING_2_BE:
3651                     return 2;
3652           case PSTRING_4_LE:
3653           case PSTRING_4_BE:
3654                     return 4;
3655           default:
3656                     file_error(ms, 0, "corrupt magic file "
3657                         "(bad pascal string length %d)",
3658                         m->str_flags & PSTRING_LEN);
3659                     return FILE_BADSIZE;
3660           }
3661 }
3662 file_protected size_t
file_pstring_get_length(struct magic_set * ms,const struct magic * m,const char * ss)3663 file_pstring_get_length(struct magic_set *ms, const struct magic *m,
3664     const char *ss)
3665 {
3666           size_t len = 0;
3667           const unsigned char *s = RCAST(const unsigned char *, ss);
3668           unsigned int s3, s2, s1, s0;
3669 
3670           switch (m->str_flags & PSTRING_LEN) {
3671           case PSTRING_1_LE:
3672                     len = *s;
3673                     break;
3674           case PSTRING_2_LE:
3675                     s0 = s[0];
3676                     s1 = s[1];
3677                     len = (s1 << 8) | s0;
3678                     break;
3679           case PSTRING_2_BE:
3680                     s0 = s[0];
3681                     s1 = s[1];
3682                     len = (s0 << 8) | s1;
3683                     break;
3684           case PSTRING_4_LE:
3685                     s0 = s[0];
3686                     s1 = s[1];
3687                     s2 = s[2];
3688                     s3 = s[3];
3689                     len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3690                     break;
3691           case PSTRING_4_BE:
3692                     s0 = s[0];
3693                     s1 = s[1];
3694                     s2 = s[2];
3695                     s3 = s[3];
3696                     len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3697                     break;
3698           default:
3699                     file_error(ms, 0, "corrupt magic file "
3700                         "(bad pascal string length %d)",
3701                         m->str_flags & PSTRING_LEN);
3702                     return FILE_BADSIZE;
3703           }
3704 
3705           if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
3706                     size_t l = file_pstring_length_size(ms, m);
3707                     if (l == FILE_BADSIZE)
3708                               return l;
3709                     len -= l;
3710           }
3711 
3712           return len;
3713 }
3714 
3715 file_protected int
file_magicfind(struct magic_set * ms,const char * name,struct mlist * v)3716 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3717 {
3718           uint32_t i, j;
3719           struct mlist *mlist, *ml;
3720 
3721           mlist = ms->mlist[1];
3722 
3723           for (ml = mlist->next; ml != mlist; ml = ml->next) {
3724                     struct magic *ma = ml->magic;
3725                     for (i = 0; i < ml->nmagic; i++) {
3726                               if (ma[i].type != FILE_NAME)
3727                                         continue;
3728                               if (strcmp(ma[i].value.s, name) == 0) {
3729                                         v->magic = &ma[i];
3730                                         v->magic_rxcomp = &(ml->magic_rxcomp[i]);
3731                                         for (j = i + 1; j < ml->nmagic; j++)
3732                                             if (ma[j].cont_level == 0)
3733                                                       break;
3734                                         v->nmagic = j - i;
3735                                         return 0;
3736                               }
3737                     }
3738           }
3739           return -1;
3740 }
3741