1 /*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26 #include "file.h"
27
28 #ifndef lint
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.53 2015/04/09 20:01:41 christos Exp $")
30 #endif
31
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <string.h>
36 #include <time.h>
37 #include <ctype.h>
38
39 #include "cdf.h"
40 #include "magic.h"
41
42 #ifndef __arraycount
43 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
44 #endif
45
46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
47
48 static const struct nv {
49 const char *pattern;
50 const char *mime;
51 } app2mime[] = {
52 { "Word", "msword", },
53 { "Excel", "vnd.ms-excel", },
54 { "Powerpoint", "vnd.ms-powerpoint", },
55 { "Crystal Reports", "x-rpt", },
56 { "Advanced Installer", "vnd.ms-msi", },
57 { "InstallShield", "vnd.ms-msi", },
58 { "Microsoft Patch Compiler", "vnd.ms-msi", },
59 { "NAnt", "vnd.ms-msi", },
60 { "Windows Installer", "vnd.ms-msi", },
61 { NULL, NULL, },
62 }, name2mime[] = {
63 { "WordDocument", "msword", },
64 { "PowerPoint", "vnd.ms-powerpoint", },
65 { "DigitalSignature", "vnd.ms-msi", },
66 { NULL, NULL, },
67 }, name2desc[] = {
68 { "WordDocument", "Microsoft Office Word",},
69 { "PowerPoint", "Microsoft PowerPoint", },
70 { "DigitalSignature", "Microsoft Installer", },
71 { NULL, NULL, },
72 };
73
74 static const struct cv {
75 uint64_t clsid[2];
76 const char *mime;
77 } clsid2mime[] = {
78 {
79 { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
80 "x-msi",
81 },
82 { { 0, 0 },
83 NULL,
84 },
85 }, clsid2desc[] = {
86 {
87 { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
88 "MSI Installer",
89 },
90 { { 0, 0 },
91 NULL,
92 },
93 };
94
95 private const char *
cdf_clsid_to_mime(const uint64_t clsid[2],const struct cv * cv)96 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
97 {
98 size_t i;
99 for (i = 0; cv[i].mime != NULL; i++) {
100 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
101 return cv[i].mime;
102 }
103 #ifdef CDF_DEBUG
104 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
105 clsid[1]);
106 #endif
107 return NULL;
108 }
109
110 private const char *
cdf_app_to_mime(const char * vbuf,const struct nv * nv)111 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
112 {
113 size_t i;
114 const char *rv = NULL;
115 #ifdef USE_C_LOCALE
116 locale_t old_lc_ctype, c_lc_ctype;
117
118 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
119 assert(c_lc_ctype != NULL);
120 old_lc_ctype = uselocale(c_lc_ctype);
121 assert(old_lc_ctype != NULL);
122 #endif
123 for (i = 0; nv[i].pattern != NULL; i++)
124 if (strcasestr(vbuf, nv[i].pattern) != NULL) {
125 rv = nv[i].mime;
126 break;
127 }
128 #ifdef CDF_DEBUG
129 fprintf(stderr, "unknown app %s\n", vbuf);
130 #endif
131 #ifdef USE_C_LOCALE
132 (void)uselocale(old_lc_ctype);
133 freelocale(c_lc_ctype);
134 #endif
135 return rv;
136 }
137
138 private int
cdf_file_property_info(struct magic_set * ms,const cdf_property_info_t * info,size_t count,const cdf_directory_t * root_storage)139 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
140 size_t count, const cdf_directory_t *root_storage)
141 {
142 size_t i;
143 cdf_timestamp_t tp;
144 struct timespec ts;
145 char buf[64];
146 const char *str = NULL;
147 const char *s;
148 int len;
149
150 if (!NOTMIME(ms) && root_storage)
151 str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
152 clsid2mime);
153
154 for (i = 0; i < count; i++) {
155 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
156 switch (info[i].pi_type) {
157 case CDF_NULL:
158 break;
159 case CDF_SIGNED16:
160 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
161 info[i].pi_s16) == -1)
162 return -1;
163 break;
164 case CDF_SIGNED32:
165 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
166 info[i].pi_s32) == -1)
167 return -1;
168 break;
169 case CDF_UNSIGNED32:
170 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
171 info[i].pi_u32) == -1)
172 return -1;
173 break;
174 case CDF_FLOAT:
175 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
176 info[i].pi_f) == -1)
177 return -1;
178 break;
179 case CDF_DOUBLE:
180 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
181 info[i].pi_d) == -1)
182 return -1;
183 break;
184 case CDF_LENGTH32_STRING:
185 case CDF_LENGTH32_WSTRING:
186 len = info[i].pi_str.s_len;
187 if (len > 1) {
188 char vbuf[1024];
189 size_t j, k = 1;
190
191 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
192 k++;
193 s = info[i].pi_str.s_buf;
194 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
195 if (*s == '\0')
196 break;
197 if (isprint((unsigned char)*s))
198 vbuf[j++] = *s;
199 }
200 if (j == sizeof(vbuf))
201 --j;
202 vbuf[j] = '\0';
203 if (NOTMIME(ms)) {
204 if (vbuf[0]) {
205 if (file_printf(ms, ", %s: %s",
206 buf, vbuf) == -1)
207 return -1;
208 }
209 } else if (str == NULL && info[i].pi_id ==
210 CDF_PROPERTY_NAME_OF_APPLICATION) {
211 str = cdf_app_to_mime(vbuf, app2mime);
212 }
213 }
214 break;
215 case CDF_FILETIME:
216 tp = info[i].pi_tp;
217 if (tp != 0) {
218 char tbuf[64];
219 if (tp < 1000000000000000LL) {
220 cdf_print_elapsed_time(tbuf,
221 sizeof(tbuf), tp);
222 if (NOTMIME(ms) && file_printf(ms,
223 ", %s: %s", buf, tbuf) == -1)
224 return -1;
225 } else {
226 char *c, *ec;
227 cdf_timestamp_to_timespec(&ts, tp);
228 c = cdf_ctime(&ts.tv_sec, tbuf);
229 if (c != NULL &&
230 (ec = strchr(c, '\n')) != NULL)
231 *ec = '\0';
232
233 if (NOTMIME(ms) && file_printf(ms,
234 ", %s: %s", buf, c) == -1)
235 return -1;
236 }
237 }
238 break;
239 case CDF_CLIPBOARD:
240 break;
241 default:
242 return -1;
243 }
244 }
245 if (!NOTMIME(ms)) {
246 if (str == NULL)
247 return 0;
248 if (file_printf(ms, "application/%s", str) == -1)
249 return -1;
250 }
251 return 1;
252 }
253
254 private int
cdf_file_catalog(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst)255 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
256 const cdf_stream_t *sst)
257 {
258 cdf_catalog_t *cat;
259 size_t i;
260 char buf[256];
261 cdf_catalog_entry_t *ce;
262
263 if (NOTMIME(ms)) {
264 if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
265 return -1;
266 if (cdf_unpack_catalog(h, sst, &cat) == -1)
267 return -1;
268 ce = cat->cat_e;
269 /* skip first entry since it has a , or paren */
270 for (i = 1; i < cat->cat_num; i++)
271 if (file_printf(ms, "%s%s",
272 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
273 i == cat->cat_num - 1 ? "]" : ", ") == -1) {
274 free(cat);
275 return -1;
276 }
277 free(cat);
278 } else {
279 if (file_printf(ms, "application/CDFV2") == -1)
280 return -1;
281 }
282 return 1;
283 }
284
285 private int
cdf_file_summary_info(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst,const cdf_directory_t * root_storage)286 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
287 const cdf_stream_t *sst, const cdf_directory_t *root_storage)
288 {
289 cdf_summary_info_header_t si;
290 cdf_property_info_t *info;
291 size_t count;
292 int m;
293
294 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
295 return -1;
296
297 if (NOTMIME(ms)) {
298 const char *str;
299
300 if (file_printf(ms, "Composite Document File V2 Document")
301 == -1)
302 return -1;
303
304 if (file_printf(ms, ", %s Endian",
305 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
306 return -2;
307 switch (si.si_os) {
308 case 2:
309 if (file_printf(ms, ", Os: Windows, Version %d.%d",
310 si.si_os_version & 0xff,
311 (uint32_t)si.si_os_version >> 8) == -1)
312 return -2;
313 break;
314 case 1:
315 if (file_printf(ms, ", Os: MacOS, Version %d.%d",
316 (uint32_t)si.si_os_version >> 8,
317 si.si_os_version & 0xff) == -1)
318 return -2;
319 break;
320 default:
321 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
322 si.si_os_version & 0xff,
323 (uint32_t)si.si_os_version >> 8) == -1)
324 return -2;
325 break;
326 }
327 if (root_storage) {
328 str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
329 clsid2desc);
330 if (str) {
331 if (file_printf(ms, ", %s", str) == -1)
332 return -2;
333 }
334 }
335 }
336
337 m = cdf_file_property_info(ms, info, count, root_storage);
338 free(info);
339
340 return m == -1 ? -2 : m;
341 }
342
343 #ifdef notdef
344 private char *
format_clsid(char * buf,size_t len,const uint64_t uuid[2])345 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
346 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
347 PRIx64 "-%.12" PRIx64,
348 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
349 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
350 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL,
351 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
352 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL);
353 return buf;
354 }
355 #endif
356
357 private int
cdf_file_catalog_info(struct magic_set * ms,const cdf_info_t * info,const cdf_header_t * h,const cdf_sat_t * sat,const cdf_sat_t * ssat,const cdf_stream_t * sst,const cdf_dir_t * dir,cdf_stream_t * scn)358 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
359 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
360 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
361 {
362 int i;
363
364 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
365 dir, "Catalog", scn)) == -1)
366 return i;
367 #ifdef CDF_DEBUG
368 cdf_dump_catalog(&h, &scn);
369 #endif
370 if ((i = cdf_file_catalog(ms, h, scn)) == -1)
371 return -1;
372 return i;
373 }
374
375 private struct sinfo {
376 const char *name;
377 const char *mime;
378 const char *sections[5];
379 const int types[5];
380 } sectioninfo[] = {
381 { "Encrypted", "encrypted",
382 {
383 "EncryptedPackage", NULL, NULL, NULL, NULL,
384 },
385 {
386 CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
387
388 },
389 },
390 { "QuickBooks", "quickbooks",
391 {
392 #if 0
393 "TaxForms", "PDFTaxForms", "modulesInBackup",
394 #endif
395 "mfbu_header", NULL, NULL, NULL, NULL,
396 },
397 {
398 #if 0
399 CDF_DIR_TYPE_USER_STORAGE,
400 CDF_DIR_TYPE_USER_STORAGE,
401 CDF_DIR_TYPE_USER_STREAM,
402 #endif
403 CDF_DIR_TYPE_USER_STREAM,
404 0, 0, 0, 0
405 },
406 },
407 };
408
409 private int
cdf_file_dir_info(struct magic_set * ms,const cdf_dir_t * dir)410 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
411 {
412 size_t sd, j;
413
414 for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
415 const struct sinfo *si = §ioninfo[sd];
416 for (j = 0; si->sections[j]; j++) {
417 if (cdf_find_stream(dir, si->sections[j], si->types[j])
418 <= 0) {
419 #ifdef CDF_DEBUG
420 fprintf(stderr, "Can't read %s\n",
421 si->sections[j]);
422 #endif
423 break;
424 }
425 }
426 if (si->sections[j] != NULL)
427 continue;
428 if (NOTMIME(ms)) {
429 if (file_printf(ms, "CDFV2 %s", si->name) == -1)
430 return -1;
431 } else {
432 if (file_printf(ms, "application/CDFV2-%s",
433 si->mime) == -1)
434 return -1;
435 }
436 return 1;
437 }
438 return -1;
439 }
440
441 protected int
file_trycdf(struct magic_set * ms,int fd,const unsigned char * buf,size_t nbytes)442 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
443 size_t nbytes)
444 {
445 cdf_info_t info;
446 cdf_header_t h;
447 cdf_sat_t sat, ssat;
448 cdf_stream_t sst, scn;
449 cdf_dir_t dir;
450 int i;
451 const char *expn = "";
452 const cdf_directory_t *root_storage;
453
454 info.i_fd = fd;
455 info.i_buf = buf;
456 info.i_len = nbytes;
457 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
458 return 0;
459 if (cdf_read_header(&info, &h) == -1)
460 return 0;
461 #ifdef CDF_DEBUG
462 cdf_dump_header(&h);
463 #endif
464
465 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
466 expn = "Can't read SAT";
467 goto out0;
468 }
469 #ifdef CDF_DEBUG
470 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
471 #endif
472
473 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
474 expn = "Can't read SSAT";
475 goto out1;
476 }
477 #ifdef CDF_DEBUG
478 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
479 #endif
480
481 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
482 expn = "Can't read directory";
483 goto out2;
484 }
485
486 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
487 &root_storage)) == -1) {
488 expn = "Cannot read short stream";
489 goto out3;
490 }
491 #ifdef CDF_DEBUG
492 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
493 #endif
494 #ifdef notdef
495 if (root_storage) {
496 if (NOTMIME(ms)) {
497 char clsbuf[128];
498 if (file_printf(ms, "CLSID %s, ",
499 format_clsid(clsbuf, sizeof(clsbuf),
500 root_storage->d_storage_uuid)) == -1)
501 return -1;
502 }
503 }
504 #endif
505
506 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
507 "FileHeader", &scn)) != -1) {
508 #define HWP5_SIGNATURE "HWP Document File"
509 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
510 && memcmp(scn.sst_tab, HWP5_SIGNATURE,
511 sizeof(HWP5_SIGNATURE) - 1) == 0) {
512 if (NOTMIME(ms)) {
513 if (file_printf(ms,
514 "Hangul (Korean) Word Processor File 5.x") == -1)
515 return -1;
516 } else {
517 if (file_printf(ms, "application/x-hwp") == -1)
518 return -1;
519 }
520 i = 1;
521 goto out5;
522 } else {
523 free(scn.sst_tab);
524 scn.sst_tab = NULL;
525 scn.sst_len = 0;
526 scn.sst_dirlen = 0;
527 }
528 }
529
530 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
531 &scn)) == -1) {
532 if (errno != ESRCH) {
533 expn = "Cannot read summary info";
534 goto out4;
535 }
536 i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
537 &dir, &scn);
538 if (i > 0)
539 goto out4;
540 i = cdf_file_dir_info(ms, &dir);
541 if (i < 0)
542 expn = "Cannot read section info";
543 goto out4;
544 }
545
546
547 #ifdef CDF_DEBUG
548 cdf_dump_summary_info(&h, &scn);
549 #endif
550 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
551 expn = "Can't expand summary_info";
552
553 if (i == 0) {
554 const char *str = NULL;
555 cdf_directory_t *d;
556 char name[__arraycount(d->d_name)];
557 size_t j, k;
558
559 for (j = 0; str == NULL && j < dir.dir_len; j++) {
560 d = &dir.dir_tab[j];
561 for (k = 0; k < sizeof(name); k++)
562 name[k] = (char)cdf_tole2(d->d_name[k]);
563 str = cdf_app_to_mime(name,
564 NOTMIME(ms) ? name2desc : name2mime);
565 }
566 if (NOTMIME(ms)) {
567 if (str != NULL) {
568 if (file_printf(ms, "%s", str) == -1)
569 return -1;
570 i = 1;
571 }
572 } else {
573 if (str == NULL)
574 str = "vnd.ms-office";
575 if (file_printf(ms, "application/%s", str) == -1)
576 return -1;
577 i = 1;
578 }
579 }
580 out5:
581 free(scn.sst_tab);
582 out4:
583 free(sst.sst_tab);
584 out3:
585 free(dir.dir_tab);
586 out2:
587 free(ssat.sat_tab);
588 out1:
589 free(sat.sat_tab);
590 out0:
591 if (i == -1) {
592 if (NOTMIME(ms)) {
593 if (file_printf(ms,
594 "Composite Document File V2 Document") == -1)
595 return -1;
596 if (*expn)
597 if (file_printf(ms, ", %s", expn) == -1)
598 return -1;
599 } else {
600 if (file_printf(ms, "application/CDFV2-unknown") == -1)
601 return -1;
602 }
603 i = 1;
604 }
605 return i;
606 }
607