1 /*	$OpenBSD: apprentice.c,v 1.23 2006/10/31 18:06:27 ray Exp $ */
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * apprentice - make one pass through /etc/magic, learning its secrets.
31  */
32 
33 #include "file.h"
34 #include "magic.h"
35 #include <stdlib.h>
36 #ifdef HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39 #include <string.h>
40 #include <ctype.h>
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <sys/param.h>
44 #include <sys/mman.h>
45 
46 __RCSID("$MirOS: src/usr.bin/file/apprentice.c,v 1.2 2007/07/10 14:22:35 tg Exp $");
47 
48 #define	EATAB {while (isascii((unsigned char) *l) && \
49 		      isspace((unsigned char) *l))  ++l;}
50 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
51 			tolower((unsigned char) (l)) : (l))
52 /*
53  * Work around a bug in headers on Digital Unix.
54  * At least confirmed for: OSF1 V4.0 878
55  */
56 #if defined(__osf__) && defined(__DECC)
57 #ifdef MAP_FAILED
58 #undef MAP_FAILED
59 #endif
60 #endif
61 
62 #ifndef MAP_FAILED
63 #define MAP_FAILED (void *) -1
64 #endif
65 
66 #ifndef MAP_FILE
67 #define MAP_FILE 0
68 #endif
69 
70 #ifndef MAXPATHLEN
71 #define MAXPATHLEN	1024
72 #endif
73 
74 private int getvalue(struct magic_set *ms, struct magic *, char **);
75 private int hextoint(int);
76 private char *getstr(struct magic_set *, char *, char *, int, int *);
77 private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
78 private void eatsize(char **);
79 private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
80 private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
81     const char *, int);
82 private void byteswap(struct magic *, uint32_t);
83 private void bs1(struct magic *);
84 private uint16_t swap2(uint16_t);
85 private uint32_t swap4(uint32_t);
86 private char *mkdbname(const char *, char *, size_t);
87 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
88     const char *);
89 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
90     const char *);
91 
92 private size_t maxmagic = 0;
93 private size_t magicsize = sizeof(struct magic);
94 
95 #ifdef COMPILE_ONLY
96 const char *magicfile;
97 char *progname;
98 int lineno;
99 
100 int main(int, char *[]);
101 
102 int
main(int argc,char * argv[])103 main(int argc, char *argv[])
104 {
105 	int ret;
106 
107 	if ((progname = strrchr(argv[0], '/')) != NULL)
108 		progname++;
109 	else
110 		progname = argv[0];
111 
112 	if (argc != 2) {
113 		(void)fprintf(stderr, "usage: %s file\n", progname);
114 		exit(1);
115 	}
116 	magicfile = argv[1];
117 
118 	exit(file_apprentice(magicfile, COMPILE, MAGIC_CHECK) == -1 ? 1 : 0);
119 }
120 #endif /* COMPILE_ONLY */
121 
122 
123 /*
124  * Handle one file.
125  */
126 private int
apprentice_1(struct magic_set * ms,const char * fn,int action,struct mlist * mlist)127 apprentice_1(struct magic_set *ms, const char *fn, int action,
128     struct mlist *mlist)
129 {
130 	struct magic *magic = NULL;
131 	uint32_t nmagic = 0;
132 	struct mlist *ml;
133 	int rv = -1;
134 	int mapped;
135 
136 	if (magicsize != FILE_MAGICSIZE) {
137 		file_error(ms, 0, "magic element size %lu != %lu",
138 		    (unsigned long)sizeof(*magic),
139 		    (unsigned long)FILE_MAGICSIZE);
140 		return -1;
141 	}
142 
143 	if (action == FILE_COMPILE) {
144 		rv = apprentice_file(ms, &magic, &nmagic, fn, action);
145 		if (rv != 0)
146 			return -1;
147 		rv = apprentice_compile(ms, &magic, &nmagic, fn);
148 		free(magic);
149 		return rv;
150 	}
151 #ifndef COMPILE_ONLY
152 	if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
153 		if (ms->flags & MAGIC_CHECK)
154 			file_magwarn("using regular magic file `%s'", fn);
155 		rv = apprentice_file(ms, &magic, &nmagic, fn, action);
156 		if (rv != 0)
157 			return -1;
158 		mapped = 0;
159 	}
160 
161 	if (rv == -1)
162 		return rv;
163 	mapped = rv;
164 
165 	if (magic == NULL || nmagic == 0) {
166 		file_delmagic(magic, mapped, nmagic);
167 		return -1;
168 	}
169 
170 	if ((ml = malloc(sizeof(*ml))) == NULL) {
171 		file_delmagic(magic, mapped, nmagic);
172 		file_oomem(ms);
173 		return -1;
174 	}
175 
176 	ml->magic = magic;
177 	ml->nmagic = nmagic;
178 	ml->mapped = mapped;
179 
180 	mlist->prev->next = ml;
181 	ml->prev = mlist->prev;
182 	ml->next = mlist;
183 	mlist->prev = ml;
184 
185 	return 0;
186 #endif /* COMPILE_ONLY */
187 }
188 
189 protected void
file_delmagic(struct magic * p,int type,size_t entries)190 file_delmagic(struct magic *p, int type, size_t entries)
191 {
192 	if (p == NULL)
193 		return;
194 	switch (type) {
195 	case 2:
196 		p--;
197 		(void)munmap((void *)p, sizeof(*p) * (entries + 1));
198 		break;
199 	case 1:
200 		p--;
201 	case 0:
202 		free(p);
203 		break;
204 	default:
205 		abort();
206 	}
207 }
208 
209 
210 /* const char *fn: list of magic files */
211 protected struct mlist *
file_apprentice(struct magic_set * ms,const char * fn,int action)212 file_apprentice(struct magic_set *ms, const char *fn, int action)
213 {
214 	char *p, *mfn, *afn = NULL;
215 	int file_err, errs = -1;
216 	struct mlist *mlist;
217 
218 	if (fn == NULL)
219 		fn = getenv("MAGIC");
220 	if (fn == NULL)
221 		fn = MAGIC;
222 
223 	if ((fn = mfn = strdup(fn)) == NULL) {
224 		file_oomem(ms);
225 		return NULL;
226 	}
227 
228 	if ((mlist = malloc(sizeof(*mlist))) == NULL) {
229 		free(mfn);
230 		file_oomem(ms);
231 		return NULL;
232 	}
233 	mlist->next = mlist->prev = mlist;
234 
235 	while (fn) {
236 		p = strchr(fn, PATHSEP);
237 		if (p)
238 			*p++ = '\0';
239 		if (*fn == '\0')
240 			break;
241 		if (ms->flags & MAGIC_MIME) {
242 			size_t len = strlen(fn) + 5 + 1;
243 			if ((afn = malloc(len)) == NULL) {
244 				free(mfn);
245 				free(mlist);
246 				file_oomem(ms);
247 				return NULL;
248 			}
249 			(void)strlcpy(afn, fn, len);
250 			(void)strlcat(afn, ".mime", len);
251 			fn = afn;
252 		}
253 		file_err = apprentice_1(ms, fn, action, mlist);
254 		if (file_err > errs)
255 			errs = file_err;
256 		if (afn) {
257 			free(afn);
258 			afn = NULL;
259 		}
260 		fn = p;
261 	}
262 	if (errs == -1) {
263 		free(mfn);
264 		free(mlist);
265 		mlist = NULL;
266 		file_error(ms, 0, "could not find any magic files!");
267 		return NULL;
268 	}
269 	free(mfn);
270 	return mlist;
271 }
272 
273 /*
274  * parse from a file
275  * const char *fn: name of magic file
276  */
277 private int
apprentice_file(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,const char * fn,int action)278 apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
279     const char *fn, int action)
280 {
281 	private const char hdr[] =
282 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
283 	FILE *f;
284 	char line[BUFSIZ];
285 	int lineno;
286 	int errs = 0;
287 
288 	f = fopen(fn, "r");
289 	if (f == NULL) {
290 		if (errno != ENOENT)
291 			file_error(ms, errno, "cannot read magic file `%s'",
292 			    fn);
293 		return -1;
294 	}
295 
296         maxmagic = MAXMAGIS;
297 	*magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
298 	if (*magicp == NULL) {
299 		(void)fclose(f);
300 		file_oomem(ms);
301 		return -1;
302 	}
303 
304 	/* print silly verbose header for USG compat. */
305 	if (action == FILE_CHECK)
306 		(void)fprintf(stderr, "%s\n", hdr);
307 
308 	/* parse it */
309 	for (lineno = 1; fgets(line, sizeof(line), f) != NULL; lineno++) {
310 		char *p;
311 
312 		if (line[0]=='#')	/* comment, do not parse */
313 			continue;
314 		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
315 			continue;
316 		/* delete newline */
317 		if ((p = strchr(line, '\n')) != NULL)
318 			*p = '\0';
319 		if (parse(ms, magicp, nmagicp, line, action) != 0)
320 			errs = 1;
321 	}
322 
323 	(void)fclose(f);
324 	if (errs) {
325 		free(*magicp);
326 		*magicp = NULL;
327 		*nmagicp = 0;
328 	}
329 	return errs;
330 }
331 
332 /*
333  * extend the sign bit if the comparison is to be signed
334  */
335 protected uint32_t
file_signextend(struct magic_set * ms,struct magic * m,uint32_t v)336 file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
337 {
338 	if (!(m->flag & UNSIGNED))
339 		switch(m->type) {
340 		/*
341 		 * Do not remove the casts below.  They are
342 		 * vital.  When later compared with the data,
343 		 * the sign extension must have happened.
344 		 */
345 		case FILE_BYTE:
346 			v = (char) v;
347 			break;
348 		case FILE_SHORT:
349 		case FILE_BESHORT:
350 		case FILE_LESHORT:
351 			v = (short) v;
352 			break;
353 		case FILE_DATE:
354 		case FILE_BEDATE:
355 		case FILE_LEDATE:
356 		case FILE_LDATE:
357 		case FILE_BELDATE:
358 		case FILE_LELDATE:
359 		case FILE_LONG:
360 		case FILE_BELONG:
361 		case FILE_LELONG:
362 			v = (int32_t) v;
363 			break;
364 		case FILE_STRING:
365 		case FILE_PSTRING:
366 			break;
367 		case FILE_REGEX:
368 			break;
369 		default:
370 			if (ms->flags & MAGIC_CHECK)
371 			    file_magwarn("cannot happen: m->type=%d\n",
372 				    m->type);
373 			return ~0U;
374 		}
375 	return v;
376 }
377 
378 /*
379  * parse one line from magic file, put into magic[index++] if valid
380  */
381 private int
parse(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,char * l,int action)382 parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
383     int action)
384 {
385 	struct magic *m;
386 	char *t;
387 	private const char *fops = FILE_OPS;
388 	uint32_t val;
389 
390 #define ALLOC_INCR	200
391 	if (*nmagicp + 1 >= maxmagic){
392 		maxmagic += ALLOC_INCR;
393 		if ((m = (struct magic *) realloc(*magicp,
394 		    sizeof(struct magic) * maxmagic)) == NULL) {
395 			file_oomem(ms);
396 			if (*magicp)
397 				free(*magicp);
398 			return -1;
399 		}
400 		*magicp = m;
401 		memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
402 		    * ALLOC_INCR);
403 	}
404 	m = &(*magicp)[*nmagicp];
405 	m->flag = 0;
406 	m->cont_level = 0;
407 
408 	while (*l == '>') {
409 		++l;		/* step over */
410 		m->cont_level++;
411 	}
412 
413 	if (m->cont_level != 0 && *l == '(') {
414 		++l;		/* step over */
415 		m->flag |= INDIR;
416 	}
417 	if (m->cont_level != 0 && *l == '&') {
418                 ++l;            /* step over */
419                 m->flag |= OFFADD;
420         }
421 
422 	/* get offset, then skip over it */
423 	m->offset = (uint32_t)strtoul(l, &t, 0);
424         if (l == t)
425 		if (ms->flags & MAGIC_CHECK)
426 			file_magwarn("offset %s invalid", l);
427         l = t;
428 
429 	if (m->flag & INDIR) {
430 		m->in_type = FILE_LONG;
431 		m->in_offset = 0;
432 		/*
433 		 * read [.lbs][+-]nnnnn)
434 		 */
435 		if (*l == '.') {
436 			l++;
437 			switch (*l) {
438 			case 'l':
439 				m->in_type = FILE_LELONG;
440 				break;
441 			case 'L':
442 				m->in_type = FILE_BELONG;
443 				break;
444 			case 'h':
445 			case 's':
446 				m->in_type = FILE_LESHORT;
447 				break;
448 			case 'H':
449 			case 'S':
450 				m->in_type = FILE_BESHORT;
451 				break;
452 			case 'c':
453 			case 'b':
454 			case 'C':
455 			case 'B':
456 				m->in_type = FILE_BYTE;
457 				break;
458 			default:
459 				if (ms->flags & MAGIC_CHECK)
460 					file_magwarn(
461 					    "indirect offset type %c invalid",
462 					    *l);
463 				break;
464 			}
465 			l++;
466 		}
467 		if (*l == '~') {
468 			m->in_op = FILE_OPINVERSE;
469 			l++;
470 		}
471 		switch (*l) {
472 		case '&':
473 			m->in_op |= FILE_OPAND;
474 			l++;
475 			break;
476 		case '|':
477 			m->in_op |= FILE_OPOR;
478 			l++;
479 			break;
480 		case '^':
481 			m->in_op |= FILE_OPXOR;
482 			l++;
483 			break;
484 		case '+':
485 			m->in_op |= FILE_OPADD;
486 			l++;
487 			break;
488 		case '-':
489 			m->in_op |= FILE_OPMINUS;
490 			l++;
491 			break;
492 		case '*':
493 			m->in_op |= FILE_OPMULTIPLY;
494 			l++;
495 			break;
496 		case '/':
497 			m->in_op |= FILE_OPDIVIDE;
498 			l++;
499 			break;
500 		case '%':
501 			m->in_op |= FILE_OPMODULO;
502 			l++;
503 			break;
504 		}
505 		if (isdigit((unsigned char)*l))
506 			m->in_offset = (uint32_t)strtoul(l, &t, 0);
507 		else
508 			t = l;
509 		if (*t++ != ')')
510 			if (ms->flags & MAGIC_CHECK)
511 				file_magwarn("missing ')' in indirect offset");
512 		l = t;
513 	}
514 
515 
516 	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
517 		++l;
518 	EATAB;
519 
520 #define NBYTE		4
521 #define NSHORT		5
522 #define NLONG		4
523 #define NSTRING 	6
524 #define NDATE		4
525 #define NBESHORT	7
526 #define NBELONG		6
527 #define NBEDATE		6
528 #define NLESHORT	7
529 #define NLELONG		6
530 #define NLEDATE		6
531 #define NPSTRING	7
532 #define NLDATE		5
533 #define NBELDATE	7
534 #define NLELDATE	7
535 #define NREGEX		5
536 
537 	if (*l == 'u') {
538 		++l;
539 		m->flag |= UNSIGNED;
540 	}
541 
542 	/* get type, skip it */
543 	if (strncmp(l, "char", NBYTE)==0) {	/* HP/UX compat */
544 		m->type = FILE_BYTE;
545 		l += NBYTE;
546 	} else if (strncmp(l, "byte", NBYTE)==0) {
547 		m->type = FILE_BYTE;
548 		l += NBYTE;
549 	} else if (strncmp(l, "short", NSHORT)==0) {
550 		m->type = FILE_SHORT;
551 		l += NSHORT;
552 	} else if (strncmp(l, "long", NLONG)==0) {
553 		m->type = FILE_LONG;
554 		l += NLONG;
555 	} else if (strncmp(l, "string", NSTRING)==0) {
556 		m->type = FILE_STRING;
557 		l += NSTRING;
558 	} else if (strncmp(l, "date", NDATE)==0) {
559 		m->type = FILE_DATE;
560 		l += NDATE;
561 	} else if (strncmp(l, "beshort", NBESHORT)==0) {
562 		m->type = FILE_BESHORT;
563 		l += NBESHORT;
564 	} else if (strncmp(l, "belong", NBELONG)==0) {
565 		m->type = FILE_BELONG;
566 		l += NBELONG;
567 	} else if (strncmp(l, "bedate", NBEDATE)==0) {
568 		m->type = FILE_BEDATE;
569 		l += NBEDATE;
570 	} else if (strncmp(l, "leshort", NLESHORT)==0) {
571 		m->type = FILE_LESHORT;
572 		l += NLESHORT;
573 	} else if (strncmp(l, "lelong", NLELONG)==0) {
574 		m->type = FILE_LELONG;
575 		l += NLELONG;
576 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
577 		m->type = FILE_LEDATE;
578 		l += NLEDATE;
579 	} else if (strncmp(l, "pstring", NPSTRING)==0) {
580 		m->type = FILE_PSTRING;
581 		l += NPSTRING;
582 	} else if (strncmp(l, "ldate", NLDATE)==0) {
583 		m->type = FILE_LDATE;
584 		l += NLDATE;
585 	} else if (strncmp(l, "beldate", NBELDATE)==0) {
586 		m->type = FILE_BELDATE;
587 		l += NBELDATE;
588 	} else if (strncmp(l, "leldate", NLELDATE)==0) {
589 		m->type = FILE_LELDATE;
590 		l += NLELDATE;
591 	} else if (strncmp(l, "regex", NREGEX)==0) {
592 		m->type = FILE_REGEX;
593 		l += sizeof("regex");
594 	} else {
595 		if (ms->flags & MAGIC_CHECK)
596 			file_magwarn("type %s invalid", l);
597 		return -1;
598 	}
599 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
600 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
601 	if (*l == '~') {
602 		if (FILE_STRING != m->type && FILE_PSTRING != m->type)
603 			m->mask_op = FILE_OPINVERSE;
604 		++l;
605 	}
606 	if ((t = strchr(fops,  *l)) != NULL) {
607 		uint32_t op = (uint32_t)(t - fops);
608 		if (op != FILE_OPDIVIDE ||
609 		    (FILE_STRING != m->type && FILE_PSTRING != m->type)) {
610 			++l;
611 			m->mask_op |= op;
612 			val = (uint32_t)strtoul(l, &l, 0);
613 			m->mask = file_signextend(ms, m, val);
614 			eatsize(&l);
615 		} else {
616 			m->mask = 0L;
617 			while (!isspace((unsigned char)*++l)) {
618 				switch (*l) {
619 				case CHAR_IGNORE_LOWERCASE:
620 					m->mask |= STRING_IGNORE_LOWERCASE;
621 					break;
622 				case CHAR_COMPACT_BLANK:
623 					m->mask |= STRING_COMPACT_BLANK;
624 					break;
625 				case CHAR_COMPACT_OPTIONAL_BLANK:
626 					m->mask |=
627 					    STRING_COMPACT_OPTIONAL_BLANK;
628 					break;
629 				default:
630 					if (ms->flags & MAGIC_CHECK)
631 						file_magwarn(
632 						"string extension %c invalid",
633 						*l);
634 					return -1;
635 				}
636 			}
637 		}
638 	}
639 	/*
640 	 * We used to set mask to all 1's here, instead let's just not do
641 	 * anything if mask = 0 (unless you have a better idea)
642 	 */
643 	EATAB;
644 
645 	switch (*l) {
646 	case '>':
647 	case '<':
648 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
649 	case '&':
650 	case '^':
651 	case '=':
652   		m->reln = *l;
653   		++l;
654 		if (*l == '=') {
655 		   /* HP compat: ignore &= etc. */
656 		   ++l;
657 		}
658 		break;
659 	case '!':
660 		if (m->type != FILE_STRING && m->type != FILE_PSTRING) {
661 			m->reln = *l;
662 			++l;
663 			break;
664 		}
665 		/*FALLTHROUGH*/
666 	default:
667 		if (*l == 'x' && isascii((unsigned char)l[1]) &&
668 		    isspace((unsigned char)l[1])) {
669 			m->reln = *l;
670 			++l;
671 			goto GetDesc;	/* Bill The Cat */
672 		}
673   		m->reln = '=';
674 		break;
675 	}
676   	EATAB;
677 
678 	if (getvalue(ms, m, &l))
679 		return -1;
680 	/*
681 	 * TODO finish this macro and start using it!
682 	 * #define offsetcheck {if (offset > HOWMANY-1)
683 	 *	magwarn("offset too big"); }
684 	 */
685 
686 	/*
687 	 * now get last part - the description
688 	 */
689 GetDesc:
690 	EATAB;
691 	if (l[0] == '\b') {
692 		++l;
693 		m->nospflag = 1;
694 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
695 		++l;
696 		++l;
697 		m->nospflag = 1;
698 	} else
699 		m->nospflag = 0;
700 
701 	strlcpy(m->desc, l, sizeof(m->desc));
702 
703 #ifndef COMPILE_ONLY
704 	if (action == FILE_CHECK) {
705 		file_mdump(m);
706 	}
707 #endif
708 	++(*nmagicp);		/* make room for next */
709 	return 0;
710 }
711 
712 /*
713  * Read a numeric value from a pointer, into the value union of a magic
714  * pointer, according to the magic type.  Update the string pointer to point
715  * just after the number read.  Return 0 for success, non-zero for failure.
716  */
717 private int
getvalue(struct magic_set * ms,struct magic * m,char ** p)718 getvalue(struct magic_set *ms, struct magic *m, char **p)
719 {
720 	int slen;
721 
722 	switch (m->type) {
723 	case FILE_STRING:
724 	case FILE_PSTRING:
725 	case FILE_REGEX:
726 		*p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
727 		if (*p == NULL) {
728 			if (ms->flags & MAGIC_CHECK)
729 				file_magwarn("cannot get string from `%s'",
730 				    m->value.s);
731 			return -1;
732 		}
733 		m->vallen = slen;
734 		return 0;
735 	default:
736 		if (m->reln != 'x') {
737 			m->value.l = file_signextend(ms, m,
738 			    (uint32_t)strtoul(*p, p, 0));
739 			eatsize(p);
740 		}
741 		return 0;
742 	}
743 }
744 
745 /*
746  * Convert a string containing C character escapes.  Stop at an unescaped
747  * space or tab.
748  * Copy the converted version to "p", returning its length in *slen.
749  * Return updated scan pointer as function result.
750  */
751 private char *
getstr(struct magic_set * ms,char * s,char * p,int plen,int * slen)752 getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
753 {
754 	char	*origs = s, *origp = p;
755 	char	*pmax = p + plen - 1;
756 	int	c;
757 	int	val;
758 
759 	while ((c = *s++) != '\0') {
760 		if (isspace((unsigned char) c))
761 			break;
762 		if (p >= pmax) {
763 			file_error(ms, 0, "string too long: `%s'", origs);
764 			return NULL;
765 		}
766 		if(c == '\\') {
767 			switch(c = *s++) {
768 
769 			case '\0':
770 				goto out;
771 
772 			default:
773 				*p++ = (char) c;
774 				break;
775 
776 			case 'n':
777 				*p++ = '\n';
778 				break;
779 
780 			case 'r':
781 				*p++ = '\r';
782 				break;
783 
784 			case 'b':
785 				*p++ = '\b';
786 				break;
787 
788 			case 't':
789 				*p++ = '\t';
790 				break;
791 
792 			case 'f':
793 				*p++ = '\f';
794 				break;
795 
796 			case 'v':
797 				*p++ = '\v';
798 				break;
799 
800 			/* \ and up to 3 octal digits */
801 			case '0':
802 			case '1':
803 			case '2':
804 			case '3':
805 			case '4':
806 			case '5':
807 			case '6':
808 			case '7':
809 				val = c - '0';
810 				c = *s++;  /* try for 2 */
811 				if(c >= '0' && c <= '7') {
812 					val = (val<<3) | (c - '0');
813 					c = *s++;  /* try for 3 */
814 					if(c >= '0' && c <= '7')
815 						val = (val<<3) | (c-'0');
816 					else
817 						--s;
818 				}
819 				else
820 					--s;
821 				*p++ = (char)val;
822 				break;
823 
824 			/* \x and up to 2 hex digits */
825 			case 'x':
826 				val = 'x';	/* Default if no digits */
827 				c = hextoint(*s++);	/* Get next char */
828 				if (c >= 0) {
829 					val = c;
830 					c = hextoint(*s++);
831 					if (c >= 0)
832 						val = (val << 4) + c;
833 					else
834 						--s;
835 				} else
836 					--s;
837 				*p++ = (char)val;
838 				break;
839 			}
840 		} else
841 			*p++ = (char)c;
842 	}
843 out:
844 	*p = '\0';
845 	*slen = p - origp;
846 	return s;
847 }
848 
849 
850 /* Single hex char to int; -1 if not a hex char. */
851 private int
hextoint(int c)852 hextoint(int c)
853 {
854 	if (!isascii((unsigned char) c))
855 		return -1;
856 	if (isdigit((unsigned char) c))
857 		return c - '0';
858 	if ((c >= 'a')&&(c <= 'f'))
859 		return c + 10 - 'a';
860 	if (( c>= 'A')&&(c <= 'F'))
861 		return c + 10 - 'A';
862 	return -1;
863 }
864 
865 
866 /*
867  * Print a string containing C character escapes.
868  */
869 protected void
file_showstr(FILE * fp,const char * s,size_t len)870 file_showstr(FILE *fp, const char *s, size_t len)
871 {
872 	char	c;
873 
874 	for (;;) {
875 		c = *s++;
876 		if (len == ~0U) {
877 			if (c == '\0')
878 				break;
879 		}
880 		else  {
881 			if (len-- == 0)
882 				break;
883 		}
884 		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
885 			(void) fputc(c, fp);
886 		else {
887 			(void) fputc('\\', fp);
888 			switch (c) {
889 
890 			case '\n':
891 				(void) fputc('n', fp);
892 				break;
893 
894 			case '\r':
895 				(void) fputc('r', fp);
896 				break;
897 
898 			case '\b':
899 				(void) fputc('b', fp);
900 				break;
901 
902 			case '\t':
903 				(void) fputc('t', fp);
904 				break;
905 
906 			case '\f':
907 				(void) fputc('f', fp);
908 				break;
909 
910 			case '\v':
911 				(void) fputc('v', fp);
912 				break;
913 
914 			default:
915 				(void) fprintf(fp, "%.3o", c & 0377);
916 				break;
917 			}
918 		}
919 	}
920 }
921 
922 /*
923  * eatsize(): Eat the size spec from a number [eg. 10UL]
924  */
925 private void
eatsize(char ** p)926 eatsize(char **p)
927 {
928 	char *l = *p;
929 
930 	if (LOWCASE(*l) == 'u')
931 		l++;
932 
933 	switch (LOWCASE(*l)) {
934 	case 'l':    /* long */
935 	case 's':    /* short */
936 	case 'h':    /* short */
937 	case 'b':    /* char/byte */
938 	case 'c':    /* char/byte */
939 		l++;
940 		/*FALLTHROUGH*/
941 	default:
942 		break;
943 	}
944 
945 	*p = l;
946 }
947 
948 /*
949  * handle a compiled file.
950  */
951 private int
apprentice_map(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,const char * fn)952 apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
953     const char *fn)
954 {
955 	int fd;
956 	struct stat st;
957 	uint32_t *ptr;
958 	uint32_t version;
959 	int needsbyteswap;
960 	char buf[MAXPATHLEN];
961 	char *dbname = mkdbname(fn, buf, sizeof(buf));
962 	void *mm = NULL;
963 
964 	if (dbname == NULL)
965 		return -1;
966 
967 	if ((fd = open(dbname, O_RDONLY)) == -1)
968 		return -1;
969 
970 	if (fstat(fd, &st) == -1) {
971 		file_error(ms, errno, "cannot stat `%s'", dbname);
972 		goto error;
973 	}
974 	if (st.st_size < 16) {
975 		file_error(ms, 0, "file `%s' is too small", dbname);
976 		goto error;
977 	}
978 
979 #ifdef QUICK
980 	if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
981 	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
982 		file_error(ms, errno, "cannot map `%s'", dbname);
983 		goto error;
984 	}
985 #define RET	2
986 #else
987 	if ((mm = malloc((size_t)st.st_size)) == NULL) {
988 		file_oomem(ms);
989 		goto error;
990 	}
991 	if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) {
992 		file_badread(ms);
993 		goto error;
994 	}
995 #define RET	1
996 #endif
997 	*magicp = mm;
998 	(void)close(fd);
999 	fd = -1;
1000 	ptr = (uint32_t *)(void *)*magicp;
1001 	if (*ptr != MAGICNO) {
1002 		if (swap4(*ptr) != MAGICNO) {
1003 			file_error(ms, 0, "bad magic in `%s'", fn);
1004 			goto error;
1005 		}
1006 		needsbyteswap = 1;
1007 	} else
1008 		needsbyteswap = 0;
1009 	if (needsbyteswap)
1010 		version = swap4(ptr[1]);
1011 	else
1012 		version = ptr[1];
1013 	if (version != VERSIONNO) {
1014 		file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
1015 		    version, VERSIONNO, dbname);
1016 		goto error;
1017 	}
1018 	*nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
1019 	(*magicp)++;
1020 	if (needsbyteswap)
1021 		byteswap(*magicp, *nmagicp);
1022 	return RET;
1023 
1024 error:
1025 	if (fd != -1)
1026 		(void)close(fd);
1027 	if (mm) {
1028 #ifdef QUICK
1029 		(void)munmap((void *)mm, (size_t)st.st_size);
1030 #else
1031 		free(mm);
1032 #endif
1033 	} else {
1034 		*magicp = NULL;
1035 		*nmagicp = 0;
1036 	}
1037 	return -1;
1038 }
1039 
1040 private const uint32_t ar[] = {
1041     MAGICNO, VERSIONNO
1042 };
1043 /*
1044  * handle an mmaped file.
1045  */
1046 private int
apprentice_compile(struct magic_set * ms,struct magic ** magicp,uint32_t * nmagicp,const char * fn)1047 apprentice_compile(struct magic_set *ms, struct magic **magicp,
1048     uint32_t *nmagicp, const char *fn)
1049 {
1050 	int fd;
1051 	char buf[MAXPATHLEN];
1052 	char *dbname = mkdbname(fn, buf, sizeof(buf));
1053 
1054 	if (dbname == NULL)
1055 		return -1;
1056 
1057 	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
1058 		file_error(ms, errno, "cannot open `%s'", dbname);
1059 		return -1;
1060 	}
1061 
1062 	if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
1063 		file_error(ms, errno, "error writing `%s'", dbname);
1064 		return -1;
1065 	}
1066 
1067 	if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
1068 	    != sizeof(struct magic)) {
1069 		file_error(ms, errno, "error seeking `%s'", dbname);
1070 		return -1;
1071 	}
1072 
1073 	if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
1074 	    != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
1075 		file_error(ms, errno, "error writing `%s'", dbname);
1076 		return -1;
1077 	}
1078 
1079 	(void)close(fd);
1080 	return 0;
1081 }
1082 
1083 private const char ext[] = ".mgc";
1084 /*
1085  * make a dbname
1086  */
1087 private char *
mkdbname(const char * fn,char * buf,size_t bufsiz)1088 mkdbname(const char *fn, char *buf, size_t bufsiz)
1089 {
1090 #ifdef notdef
1091 	const char *p;
1092 	if ((p = strrchr(fn, '/')) != NULL)
1093 		fn = ++p;
1094 #endif
1095 	(void)snprintf(buf, bufsiz, "%s%s", fn, ext);
1096 	return buf;
1097 }
1098 
1099 /*
1100  * Byteswap an mmap'ed file if needed
1101  */
1102 private void
byteswap(struct magic * magic,uint32_t nmagic)1103 byteswap(struct magic *magic, uint32_t nmagic)
1104 {
1105 	uint32_t i;
1106 	for (i = 0; i < nmagic; i++)
1107 		bs1(&magic[i]);
1108 }
1109 
1110 /*
1111  * swap a short
1112  */
1113 private uint16_t
swap2(uint16_t sv)1114 swap2(uint16_t sv)
1115 {
1116 	uint16_t rv;
1117 	uint8_t *s = (uint8_t *)(void *)&sv;
1118 	uint8_t *d = (uint8_t *)(void *)&rv;
1119 	d[0] = s[1];
1120 	d[1] = s[0];
1121 	return rv;
1122 }
1123 
1124 /*
1125  * swap an int
1126  */
1127 private uint32_t
swap4(uint32_t sv)1128 swap4(uint32_t sv)
1129 {
1130 	uint32_t rv;
1131 	uint8_t *s = (uint8_t *)(void *)&sv;
1132 	uint8_t *d = (uint8_t *)(void *)&rv;
1133 	d[0] = s[3];
1134 	d[1] = s[2];
1135 	d[2] = s[1];
1136 	d[3] = s[0];
1137 	return rv;
1138 }
1139 
1140 /*
1141  * byteswap a single magic entry
1142  */
1143 private void
bs1(struct magic * m)1144 bs1(struct magic *m)
1145 {
1146 	m->cont_level = swap2(m->cont_level);
1147 	m->offset = swap4((uint32_t)m->offset);
1148 	m->in_offset = swap4((uint32_t)m->in_offset);
1149 	if (m->type != FILE_STRING)
1150 		m->value.l = swap4(m->value.l);
1151 	m->mask = swap4(m->mask);
1152 }
1153