1 /* $OpenBSD: apprentice.c,v 1.23 2006/10/31 18:06:27 ray Exp $ */
2 /*
3 * Copyright (c) Ian F. Darwin 1986-1995.
4 * Software written by Ian F. Darwin and others;
5 * maintained 1995-present by Christos Zoulas and others.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice immediately at the beginning of the file, without modification,
12 * this list of conditions, and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 /*
30 * apprentice - make one pass through /etc/magic, learning its secrets.
31 */
32
33 #include "file.h"
34 #include "magic.h"
35 #include <stdlib.h>
36 #ifdef HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39 #include <string.h>
40 #include <ctype.h>
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <sys/param.h>
44 #include <sys/mman.h>
45
46 __RCSID("$MirOS: src/usr.bin/file/apprentice.c,v 1.2 2007/07/10 14:22:35 tg Exp $");
47
48 #define EATAB {while (isascii((unsigned char) *l) && \
49 isspace((unsigned char) *l)) ++l;}
50 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
51 tolower((unsigned char) (l)) : (l))
52 /*
53 * Work around a bug in headers on Digital Unix.
54 * At least confirmed for: OSF1 V4.0 878
55 */
56 #if defined(__osf__) && defined(__DECC)
57 #ifdef MAP_FAILED
58 #undef MAP_FAILED
59 #endif
60 #endif
61
62 #ifndef MAP_FAILED
63 #define MAP_FAILED (void *) -1
64 #endif
65
66 #ifndef MAP_FILE
67 #define MAP_FILE 0
68 #endif
69
70 #ifndef MAXPATHLEN
71 #define MAXPATHLEN 1024
72 #endif
73
74 private int getvalue(struct magic_set *ms, struct magic *, char **);
75 private int hextoint(int);
76 private char *getstr(struct magic_set *, char *, char *, int, int *);
77 private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
78 private void eatsize(char **);
79 private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
80 private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
81 const char *, int);
82 private void byteswap(struct magic *, uint32_t);
83 private void bs1(struct magic *);
84 private uint16_t swap2(uint16_t);
85 private uint32_t swap4(uint32_t);
86 private char *mkdbname(const char *, char *, size_t);
87 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
88 const char *);
89 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
90 const char *);
91
92 private size_t maxmagic = 0;
93 private size_t magicsize = sizeof(struct magic);
94
95 #ifdef COMPILE_ONLY
96 const char *magicfile;
97 char *progname;
98 int lineno;
99
100 int main(int, char *[]);
101
102 int
main(int argc,char * argv[])103 main(int argc, char *argv[])
104 {
105 int ret;
106
107 if ((progname = strrchr(argv[0], '/')) != NULL)
108 progname++;
109 else
110 progname = argv[0];
111
112 if (argc != 2) {
113 (void)fprintf(stderr, "usage: %s file\n", progname);
114 exit(1);
115 }
116 magicfile = argv[1];
117
118 exit(file_apprentice(magicfile, COMPILE, MAGIC_CHECK) == -1 ? 1 : 0);
119 }
120 #endif /* COMPILE_ONLY */
121
122
123 /*
124 * Handle one file.
125 */
126 private int
apprentice_1(struct magic_set * ms,const char * fn,int action,struct mlist * mlist)127 apprentice_1(struct magic_set *ms, const char *fn, int action,
128 struct mlist *mlist)
129 {
130 struct magic *magic = NULL;
131 uint32_t nmagic = 0;
132 struct mlist *ml;
133 int rv = -1;
134 int mapped;
135
136 if (magicsize != FILE_MAGICSIZE) {
137 file_error(ms, 0, "magic element size %lu != %lu",
138 (unsigned long)sizeof(*magic),
139 (unsigned long)FILE_MAGICSIZE);
140 return -1;
141 }
142
143 if (action == FILE_COMPILE) {
144 rv = apprentice_file(ms, &magic, &nmagic, fn, action);
145 if (rv != 0)
146 return -1;
147 rv = apprentice_compile(ms, &magic, &nmagic, fn);
148 free(magic);
149 return rv;
150 }
151 #ifndef COMPILE_ONLY
152 if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
153 if (ms->flags & MAGIC_CHECK)
154 file_magwarn("using regular magic file `%s'", fn);
155 rv = apprentice_file(ms, &magic, &nmagic, fn, action);
156 if (rv != 0)
157 return -1;
158 mapped = 0;
159 }
160
161 if (rv == -1)
162 return rv;
163 mapped = rv;
164
165 if (magic == NULL || nmagic == 0) {
166 file_delmagic(magic, mapped, nmagic);
167 return -1;
168 }
169
170 if ((ml = malloc(sizeof(*ml))) == NULL) {
171 file_delmagic(magic, mapped, nmagic);
172 file_oomem(ms);
173 return -1;
174 }
175
176 ml->magic = magic;
177 ml->nmagic = nmagic;
178 ml->mapped = mapped;
179
180 mlist->prev->next = ml;
181 ml->prev = mlist->prev;
182 ml->next = mlist;
183 mlist->prev = ml;
184
185 return 0;
186 #endif /* COMPILE_ONLY */
187 }
188
189 protected void
file_delmagic(struct magic * p,int type,size_t entries)190 file_delmagic(struct magic *p, int type, size_t entries)
191 {
192 if (p == NULL)
193 return;
194 switch (type) {
195 case 2:
196 p--;
197 (void)munmap((void *)p, sizeof(*p) * (entries + 1));
198 break;
199 case 1:
200 p--;
201 case 0:
202 free(p);
203 break;
204 default:
205 abort();
206 }
207 }
208
209
210 /* const char *fn: list of magic files */
211 protected struct mlist *
file_apprentice(struct magic_set * ms,const char * fn,int action)212 file_apprentice(struct magic_set *ms, const char *fn, int action)
213 {
214 char *p, *mfn, *afn = NULL;
215 int file_err, errs = -1;
216 struct mlist *mlist;
217
218 if (fn == NULL)
219 fn = getenv("MAGIC");
220 if (fn == NULL)
221 fn = MAGIC;
222
223 if ((fn = mfn = strdup(fn)) == NULL) {
224 file_oomem(ms);
225 return NULL;
226 }
227
228 if ((mlist = malloc(sizeof(*mlist))) == NULL) {
229 free(mfn);
230 file_oomem(ms);
231 return NULL;
232 }
233 mlist->next = mlist->prev = mlist;
234
235 while (fn) {
236 p = strchr(fn, PATHSEP);
237 if (p)
238 *p++ = '\0';
239 if (*fn == '\0')
240 break;
241 if (ms->flags & MAGIC_MIME) {
242 size_t len = strlen(fn) + 5 + 1;
243 if ((afn = malloc(len)) == NULL) {
244 free(mfn);
245 free(mlist);
246 file_oomem(ms);
247 return NULL;
248 }
249 (void)strlcpy(afn, fn, len);
250 (void)strlcat(afn, ".mime", len);
251 fn = afn;
252 }
253 file_err = apprentice_1(ms, fn, action, mlist);
254 if (file_err > errs)
255 errs = file_err;
256 if (afn) {
257 free(afn);
258 afn = NULL;
259 }
260 fn = p;
261 }
262 if (errs == -1) {
263 free(mfn);
264 free(mlist);
265 mlist = NULL;
266 file_error(ms, 0, "could not find any magic files!");
267 return NULL;
268 }
269 free(mfn);
270 return mlist;
271 }
272
273 /*
274 * parse from a file
275 * const char *fn: name of magic file
276 */
277 private int
apprentice_file(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,const char * fn,int action)278 apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
279 const char *fn, int action)
280 {
281 private const char hdr[] =
282 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
283 FILE *f;
284 char line[BUFSIZ];
285 int lineno;
286 int errs = 0;
287
288 f = fopen(fn, "r");
289 if (f == NULL) {
290 if (errno != ENOENT)
291 file_error(ms, errno, "cannot read magic file `%s'",
292 fn);
293 return -1;
294 }
295
296 maxmagic = MAXMAGIS;
297 *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
298 if (*magicp == NULL) {
299 (void)fclose(f);
300 file_oomem(ms);
301 return -1;
302 }
303
304 /* print silly verbose header for USG compat. */
305 if (action == FILE_CHECK)
306 (void)fprintf(stderr, "%s\n", hdr);
307
308 /* parse it */
309 for (lineno = 1; fgets(line, sizeof(line), f) != NULL; lineno++) {
310 char *p;
311
312 if (line[0]=='#') /* comment, do not parse */
313 continue;
314 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
315 continue;
316 /* delete newline */
317 if ((p = strchr(line, '\n')) != NULL)
318 *p = '\0';
319 if (parse(ms, magicp, nmagicp, line, action) != 0)
320 errs = 1;
321 }
322
323 (void)fclose(f);
324 if (errs) {
325 free(*magicp);
326 *magicp = NULL;
327 *nmagicp = 0;
328 }
329 return errs;
330 }
331
332 /*
333 * extend the sign bit if the comparison is to be signed
334 */
335 protected uint32_t
file_signextend(struct magic_set * ms,struct magic * m,uint32_t v)336 file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
337 {
338 if (!(m->flag & UNSIGNED))
339 switch(m->type) {
340 /*
341 * Do not remove the casts below. They are
342 * vital. When later compared with the data,
343 * the sign extension must have happened.
344 */
345 case FILE_BYTE:
346 v = (char) v;
347 break;
348 case FILE_SHORT:
349 case FILE_BESHORT:
350 case FILE_LESHORT:
351 v = (short) v;
352 break;
353 case FILE_DATE:
354 case FILE_BEDATE:
355 case FILE_LEDATE:
356 case FILE_LDATE:
357 case FILE_BELDATE:
358 case FILE_LELDATE:
359 case FILE_LONG:
360 case FILE_BELONG:
361 case FILE_LELONG:
362 v = (int32_t) v;
363 break;
364 case FILE_STRING:
365 case FILE_PSTRING:
366 break;
367 case FILE_REGEX:
368 break;
369 default:
370 if (ms->flags & MAGIC_CHECK)
371 file_magwarn("cannot happen: m->type=%d\n",
372 m->type);
373 return ~0U;
374 }
375 return v;
376 }
377
378 /*
379 * parse one line from magic file, put into magic[index++] if valid
380 */
381 private int
parse(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,char * l,int action)382 parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
383 int action)
384 {
385 struct magic *m;
386 char *t;
387 private const char *fops = FILE_OPS;
388 uint32_t val;
389
390 #define ALLOC_INCR 200
391 if (*nmagicp + 1 >= maxmagic){
392 maxmagic += ALLOC_INCR;
393 if ((m = (struct magic *) realloc(*magicp,
394 sizeof(struct magic) * maxmagic)) == NULL) {
395 file_oomem(ms);
396 if (*magicp)
397 free(*magicp);
398 return -1;
399 }
400 *magicp = m;
401 memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
402 * ALLOC_INCR);
403 }
404 m = &(*magicp)[*nmagicp];
405 m->flag = 0;
406 m->cont_level = 0;
407
408 while (*l == '>') {
409 ++l; /* step over */
410 m->cont_level++;
411 }
412
413 if (m->cont_level != 0 && *l == '(') {
414 ++l; /* step over */
415 m->flag |= INDIR;
416 }
417 if (m->cont_level != 0 && *l == '&') {
418 ++l; /* step over */
419 m->flag |= OFFADD;
420 }
421
422 /* get offset, then skip over it */
423 m->offset = (uint32_t)strtoul(l, &t, 0);
424 if (l == t)
425 if (ms->flags & MAGIC_CHECK)
426 file_magwarn("offset %s invalid", l);
427 l = t;
428
429 if (m->flag & INDIR) {
430 m->in_type = FILE_LONG;
431 m->in_offset = 0;
432 /*
433 * read [.lbs][+-]nnnnn)
434 */
435 if (*l == '.') {
436 l++;
437 switch (*l) {
438 case 'l':
439 m->in_type = FILE_LELONG;
440 break;
441 case 'L':
442 m->in_type = FILE_BELONG;
443 break;
444 case 'h':
445 case 's':
446 m->in_type = FILE_LESHORT;
447 break;
448 case 'H':
449 case 'S':
450 m->in_type = FILE_BESHORT;
451 break;
452 case 'c':
453 case 'b':
454 case 'C':
455 case 'B':
456 m->in_type = FILE_BYTE;
457 break;
458 default:
459 if (ms->flags & MAGIC_CHECK)
460 file_magwarn(
461 "indirect offset type %c invalid",
462 *l);
463 break;
464 }
465 l++;
466 }
467 if (*l == '~') {
468 m->in_op = FILE_OPINVERSE;
469 l++;
470 }
471 switch (*l) {
472 case '&':
473 m->in_op |= FILE_OPAND;
474 l++;
475 break;
476 case '|':
477 m->in_op |= FILE_OPOR;
478 l++;
479 break;
480 case '^':
481 m->in_op |= FILE_OPXOR;
482 l++;
483 break;
484 case '+':
485 m->in_op |= FILE_OPADD;
486 l++;
487 break;
488 case '-':
489 m->in_op |= FILE_OPMINUS;
490 l++;
491 break;
492 case '*':
493 m->in_op |= FILE_OPMULTIPLY;
494 l++;
495 break;
496 case '/':
497 m->in_op |= FILE_OPDIVIDE;
498 l++;
499 break;
500 case '%':
501 m->in_op |= FILE_OPMODULO;
502 l++;
503 break;
504 }
505 if (isdigit((unsigned char)*l))
506 m->in_offset = (uint32_t)strtoul(l, &t, 0);
507 else
508 t = l;
509 if (*t++ != ')')
510 if (ms->flags & MAGIC_CHECK)
511 file_magwarn("missing ')' in indirect offset");
512 l = t;
513 }
514
515
516 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
517 ++l;
518 EATAB;
519
520 #define NBYTE 4
521 #define NSHORT 5
522 #define NLONG 4
523 #define NSTRING 6
524 #define NDATE 4
525 #define NBESHORT 7
526 #define NBELONG 6
527 #define NBEDATE 6
528 #define NLESHORT 7
529 #define NLELONG 6
530 #define NLEDATE 6
531 #define NPSTRING 7
532 #define NLDATE 5
533 #define NBELDATE 7
534 #define NLELDATE 7
535 #define NREGEX 5
536
537 if (*l == 'u') {
538 ++l;
539 m->flag |= UNSIGNED;
540 }
541
542 /* get type, skip it */
543 if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
544 m->type = FILE_BYTE;
545 l += NBYTE;
546 } else if (strncmp(l, "byte", NBYTE)==0) {
547 m->type = FILE_BYTE;
548 l += NBYTE;
549 } else if (strncmp(l, "short", NSHORT)==0) {
550 m->type = FILE_SHORT;
551 l += NSHORT;
552 } else if (strncmp(l, "long", NLONG)==0) {
553 m->type = FILE_LONG;
554 l += NLONG;
555 } else if (strncmp(l, "string", NSTRING)==0) {
556 m->type = FILE_STRING;
557 l += NSTRING;
558 } else if (strncmp(l, "date", NDATE)==0) {
559 m->type = FILE_DATE;
560 l += NDATE;
561 } else if (strncmp(l, "beshort", NBESHORT)==0) {
562 m->type = FILE_BESHORT;
563 l += NBESHORT;
564 } else if (strncmp(l, "belong", NBELONG)==0) {
565 m->type = FILE_BELONG;
566 l += NBELONG;
567 } else if (strncmp(l, "bedate", NBEDATE)==0) {
568 m->type = FILE_BEDATE;
569 l += NBEDATE;
570 } else if (strncmp(l, "leshort", NLESHORT)==0) {
571 m->type = FILE_LESHORT;
572 l += NLESHORT;
573 } else if (strncmp(l, "lelong", NLELONG)==0) {
574 m->type = FILE_LELONG;
575 l += NLELONG;
576 } else if (strncmp(l, "ledate", NLEDATE)==0) {
577 m->type = FILE_LEDATE;
578 l += NLEDATE;
579 } else if (strncmp(l, "pstring", NPSTRING)==0) {
580 m->type = FILE_PSTRING;
581 l += NPSTRING;
582 } else if (strncmp(l, "ldate", NLDATE)==0) {
583 m->type = FILE_LDATE;
584 l += NLDATE;
585 } else if (strncmp(l, "beldate", NBELDATE)==0) {
586 m->type = FILE_BELDATE;
587 l += NBELDATE;
588 } else if (strncmp(l, "leldate", NLELDATE)==0) {
589 m->type = FILE_LELDATE;
590 l += NLELDATE;
591 } else if (strncmp(l, "regex", NREGEX)==0) {
592 m->type = FILE_REGEX;
593 l += sizeof("regex");
594 } else {
595 if (ms->flags & MAGIC_CHECK)
596 file_magwarn("type %s invalid", l);
597 return -1;
598 }
599 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
600 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
601 if (*l == '~') {
602 if (FILE_STRING != m->type && FILE_PSTRING != m->type)
603 m->mask_op = FILE_OPINVERSE;
604 ++l;
605 }
606 if ((t = strchr(fops, *l)) != NULL) {
607 uint32_t op = (uint32_t)(t - fops);
608 if (op != FILE_OPDIVIDE ||
609 (FILE_STRING != m->type && FILE_PSTRING != m->type)) {
610 ++l;
611 m->mask_op |= op;
612 val = (uint32_t)strtoul(l, &l, 0);
613 m->mask = file_signextend(ms, m, val);
614 eatsize(&l);
615 } else {
616 m->mask = 0L;
617 while (!isspace((unsigned char)*++l)) {
618 switch (*l) {
619 case CHAR_IGNORE_LOWERCASE:
620 m->mask |= STRING_IGNORE_LOWERCASE;
621 break;
622 case CHAR_COMPACT_BLANK:
623 m->mask |= STRING_COMPACT_BLANK;
624 break;
625 case CHAR_COMPACT_OPTIONAL_BLANK:
626 m->mask |=
627 STRING_COMPACT_OPTIONAL_BLANK;
628 break;
629 default:
630 if (ms->flags & MAGIC_CHECK)
631 file_magwarn(
632 "string extension %c invalid",
633 *l);
634 return -1;
635 }
636 }
637 }
638 }
639 /*
640 * We used to set mask to all 1's here, instead let's just not do
641 * anything if mask = 0 (unless you have a better idea)
642 */
643 EATAB;
644
645 switch (*l) {
646 case '>':
647 case '<':
648 /* Old-style anding: "0 byte &0x80 dynamically linked" */
649 case '&':
650 case '^':
651 case '=':
652 m->reln = *l;
653 ++l;
654 if (*l == '=') {
655 /* HP compat: ignore &= etc. */
656 ++l;
657 }
658 break;
659 case '!':
660 if (m->type != FILE_STRING && m->type != FILE_PSTRING) {
661 m->reln = *l;
662 ++l;
663 break;
664 }
665 /*FALLTHROUGH*/
666 default:
667 if (*l == 'x' && isascii((unsigned char)l[1]) &&
668 isspace((unsigned char)l[1])) {
669 m->reln = *l;
670 ++l;
671 goto GetDesc; /* Bill The Cat */
672 }
673 m->reln = '=';
674 break;
675 }
676 EATAB;
677
678 if (getvalue(ms, m, &l))
679 return -1;
680 /*
681 * TODO finish this macro and start using it!
682 * #define offsetcheck {if (offset > HOWMANY-1)
683 * magwarn("offset too big"); }
684 */
685
686 /*
687 * now get last part - the description
688 */
689 GetDesc:
690 EATAB;
691 if (l[0] == '\b') {
692 ++l;
693 m->nospflag = 1;
694 } else if ((l[0] == '\\') && (l[1] == 'b')) {
695 ++l;
696 ++l;
697 m->nospflag = 1;
698 } else
699 m->nospflag = 0;
700
701 strlcpy(m->desc, l, sizeof(m->desc));
702
703 #ifndef COMPILE_ONLY
704 if (action == FILE_CHECK) {
705 file_mdump(m);
706 }
707 #endif
708 ++(*nmagicp); /* make room for next */
709 return 0;
710 }
711
712 /*
713 * Read a numeric value from a pointer, into the value union of a magic
714 * pointer, according to the magic type. Update the string pointer to point
715 * just after the number read. Return 0 for success, non-zero for failure.
716 */
717 private int
getvalue(struct magic_set * ms,struct magic * m,char ** p)718 getvalue(struct magic_set *ms, struct magic *m, char **p)
719 {
720 int slen;
721
722 switch (m->type) {
723 case FILE_STRING:
724 case FILE_PSTRING:
725 case FILE_REGEX:
726 *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
727 if (*p == NULL) {
728 if (ms->flags & MAGIC_CHECK)
729 file_magwarn("cannot get string from `%s'",
730 m->value.s);
731 return -1;
732 }
733 m->vallen = slen;
734 return 0;
735 default:
736 if (m->reln != 'x') {
737 m->value.l = file_signextend(ms, m,
738 (uint32_t)strtoul(*p, p, 0));
739 eatsize(p);
740 }
741 return 0;
742 }
743 }
744
745 /*
746 * Convert a string containing C character escapes. Stop at an unescaped
747 * space or tab.
748 * Copy the converted version to "p", returning its length in *slen.
749 * Return updated scan pointer as function result.
750 */
751 private char *
getstr(struct magic_set * ms,char * s,char * p,int plen,int * slen)752 getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
753 {
754 char *origs = s, *origp = p;
755 char *pmax = p + plen - 1;
756 int c;
757 int val;
758
759 while ((c = *s++) != '\0') {
760 if (isspace((unsigned char) c))
761 break;
762 if (p >= pmax) {
763 file_error(ms, 0, "string too long: `%s'", origs);
764 return NULL;
765 }
766 if(c == '\\') {
767 switch(c = *s++) {
768
769 case '\0':
770 goto out;
771
772 default:
773 *p++ = (char) c;
774 break;
775
776 case 'n':
777 *p++ = '\n';
778 break;
779
780 case 'r':
781 *p++ = '\r';
782 break;
783
784 case 'b':
785 *p++ = '\b';
786 break;
787
788 case 't':
789 *p++ = '\t';
790 break;
791
792 case 'f':
793 *p++ = '\f';
794 break;
795
796 case 'v':
797 *p++ = '\v';
798 break;
799
800 /* \ and up to 3 octal digits */
801 case '0':
802 case '1':
803 case '2':
804 case '3':
805 case '4':
806 case '5':
807 case '6':
808 case '7':
809 val = c - '0';
810 c = *s++; /* try for 2 */
811 if(c >= '0' && c <= '7') {
812 val = (val<<3) | (c - '0');
813 c = *s++; /* try for 3 */
814 if(c >= '0' && c <= '7')
815 val = (val<<3) | (c-'0');
816 else
817 --s;
818 }
819 else
820 --s;
821 *p++ = (char)val;
822 break;
823
824 /* \x and up to 2 hex digits */
825 case 'x':
826 val = 'x'; /* Default if no digits */
827 c = hextoint(*s++); /* Get next char */
828 if (c >= 0) {
829 val = c;
830 c = hextoint(*s++);
831 if (c >= 0)
832 val = (val << 4) + c;
833 else
834 --s;
835 } else
836 --s;
837 *p++ = (char)val;
838 break;
839 }
840 } else
841 *p++ = (char)c;
842 }
843 out:
844 *p = '\0';
845 *slen = p - origp;
846 return s;
847 }
848
849
850 /* Single hex char to int; -1 if not a hex char. */
851 private int
hextoint(int c)852 hextoint(int c)
853 {
854 if (!isascii((unsigned char) c))
855 return -1;
856 if (isdigit((unsigned char) c))
857 return c - '0';
858 if ((c >= 'a')&&(c <= 'f'))
859 return c + 10 - 'a';
860 if (( c>= 'A')&&(c <= 'F'))
861 return c + 10 - 'A';
862 return -1;
863 }
864
865
866 /*
867 * Print a string containing C character escapes.
868 */
869 protected void
file_showstr(FILE * fp,const char * s,size_t len)870 file_showstr(FILE *fp, const char *s, size_t len)
871 {
872 char c;
873
874 for (;;) {
875 c = *s++;
876 if (len == ~0U) {
877 if (c == '\0')
878 break;
879 }
880 else {
881 if (len-- == 0)
882 break;
883 }
884 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
885 (void) fputc(c, fp);
886 else {
887 (void) fputc('\\', fp);
888 switch (c) {
889
890 case '\n':
891 (void) fputc('n', fp);
892 break;
893
894 case '\r':
895 (void) fputc('r', fp);
896 break;
897
898 case '\b':
899 (void) fputc('b', fp);
900 break;
901
902 case '\t':
903 (void) fputc('t', fp);
904 break;
905
906 case '\f':
907 (void) fputc('f', fp);
908 break;
909
910 case '\v':
911 (void) fputc('v', fp);
912 break;
913
914 default:
915 (void) fprintf(fp, "%.3o", c & 0377);
916 break;
917 }
918 }
919 }
920 }
921
922 /*
923 * eatsize(): Eat the size spec from a number [eg. 10UL]
924 */
925 private void
eatsize(char ** p)926 eatsize(char **p)
927 {
928 char *l = *p;
929
930 if (LOWCASE(*l) == 'u')
931 l++;
932
933 switch (LOWCASE(*l)) {
934 case 'l': /* long */
935 case 's': /* short */
936 case 'h': /* short */
937 case 'b': /* char/byte */
938 case 'c': /* char/byte */
939 l++;
940 /*FALLTHROUGH*/
941 default:
942 break;
943 }
944
945 *p = l;
946 }
947
948 /*
949 * handle a compiled file.
950 */
951 private int
apprentice_map(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,const char * fn)952 apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
953 const char *fn)
954 {
955 int fd;
956 struct stat st;
957 uint32_t *ptr;
958 uint32_t version;
959 int needsbyteswap;
960 char buf[MAXPATHLEN];
961 char *dbname = mkdbname(fn, buf, sizeof(buf));
962 void *mm = NULL;
963
964 if (dbname == NULL)
965 return -1;
966
967 if ((fd = open(dbname, O_RDONLY)) == -1)
968 return -1;
969
970 if (fstat(fd, &st) == -1) {
971 file_error(ms, errno, "cannot stat `%s'", dbname);
972 goto error;
973 }
974 if (st.st_size < 16) {
975 file_error(ms, 0, "file `%s' is too small", dbname);
976 goto error;
977 }
978
979 #ifdef QUICK
980 if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
981 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
982 file_error(ms, errno, "cannot map `%s'", dbname);
983 goto error;
984 }
985 #define RET 2
986 #else
987 if ((mm = malloc((size_t)st.st_size)) == NULL) {
988 file_oomem(ms);
989 goto error;
990 }
991 if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) {
992 file_badread(ms);
993 goto error;
994 }
995 #define RET 1
996 #endif
997 *magicp = mm;
998 (void)close(fd);
999 fd = -1;
1000 ptr = (uint32_t *)(void *)*magicp;
1001 if (*ptr != MAGICNO) {
1002 if (swap4(*ptr) != MAGICNO) {
1003 file_error(ms, 0, "bad magic in `%s'", fn);
1004 goto error;
1005 }
1006 needsbyteswap = 1;
1007 } else
1008 needsbyteswap = 0;
1009 if (needsbyteswap)
1010 version = swap4(ptr[1]);
1011 else
1012 version = ptr[1];
1013 if (version != VERSIONNO) {
1014 file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
1015 version, VERSIONNO, dbname);
1016 goto error;
1017 }
1018 *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
1019 (*magicp)++;
1020 if (needsbyteswap)
1021 byteswap(*magicp, *nmagicp);
1022 return RET;
1023
1024 error:
1025 if (fd != -1)
1026 (void)close(fd);
1027 if (mm) {
1028 #ifdef QUICK
1029 (void)munmap((void *)mm, (size_t)st.st_size);
1030 #else
1031 free(mm);
1032 #endif
1033 } else {
1034 *magicp = NULL;
1035 *nmagicp = 0;
1036 }
1037 return -1;
1038 }
1039
1040 private const uint32_t ar[] = {
1041 MAGICNO, VERSIONNO
1042 };
1043 /*
1044 * handle an mmaped file.
1045 */
1046 private int
apprentice_compile(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,const char * fn)1047 apprentice_compile(struct magic_set *ms, struct magic **magicp,
1048 uint32_t *nmagicp, const char *fn)
1049 {
1050 int fd;
1051 char buf[MAXPATHLEN];
1052 char *dbname = mkdbname(fn, buf, sizeof(buf));
1053
1054 if (dbname == NULL)
1055 return -1;
1056
1057 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
1058 file_error(ms, errno, "cannot open `%s'", dbname);
1059 return -1;
1060 }
1061
1062 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
1063 file_error(ms, errno, "error writing `%s'", dbname);
1064 return -1;
1065 }
1066
1067 if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
1068 != sizeof(struct magic)) {
1069 file_error(ms, errno, "error seeking `%s'", dbname);
1070 return -1;
1071 }
1072
1073 if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
1074 != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
1075 file_error(ms, errno, "error writing `%s'", dbname);
1076 return -1;
1077 }
1078
1079 (void)close(fd);
1080 return 0;
1081 }
1082
1083 private const char ext[] = ".mgc";
1084 /*
1085 * make a dbname
1086 */
1087 private char *
mkdbname(const char * fn,char * buf,size_t bufsiz)1088 mkdbname(const char *fn, char *buf, size_t bufsiz)
1089 {
1090 #ifdef notdef
1091 const char *p;
1092 if ((p = strrchr(fn, '/')) != NULL)
1093 fn = ++p;
1094 #endif
1095 (void)snprintf(buf, bufsiz, "%s%s", fn, ext);
1096 return buf;
1097 }
1098
1099 /*
1100 * Byteswap an mmap'ed file if needed
1101 */
1102 private void
byteswap(struct magic * magic,uint32_t nmagic)1103 byteswap(struct magic *magic, uint32_t nmagic)
1104 {
1105 uint32_t i;
1106 for (i = 0; i < nmagic; i++)
1107 bs1(&magic[i]);
1108 }
1109
1110 /*
1111 * swap a short
1112 */
1113 private uint16_t
swap2(uint16_t sv)1114 swap2(uint16_t sv)
1115 {
1116 uint16_t rv;
1117 uint8_t *s = (uint8_t *)(void *)&sv;
1118 uint8_t *d = (uint8_t *)(void *)&rv;
1119 d[0] = s[1];
1120 d[1] = s[0];
1121 return rv;
1122 }
1123
1124 /*
1125 * swap an int
1126 */
1127 private uint32_t
swap4(uint32_t sv)1128 swap4(uint32_t sv)
1129 {
1130 uint32_t rv;
1131 uint8_t *s = (uint8_t *)(void *)&sv;
1132 uint8_t *d = (uint8_t *)(void *)&rv;
1133 d[0] = s[3];
1134 d[1] = s[2];
1135 d[2] = s[1];
1136 d[3] = s[0];
1137 return rv;
1138 }
1139
1140 /*
1141 * byteswap a single magic entry
1142 */
1143 private void
bs1(struct magic * m)1144 bs1(struct magic *m)
1145 {
1146 m->cont_level = swap2(m->cont_level);
1147 m->offset = swap4((uint32_t)m->offset);
1148 m->in_offset = swap4((uint32_t)m->in_offset);
1149 if (m->type != FILE_STRING)
1150 m->value.l = swap4(m->value.l);
1151 m->mask = swap4(m->mask);
1152 }
1153