1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27
28 __FBSDID("$FreeBSD$");
29
30
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 #ifdef HAVE_LIMITS_H
41 #include <limits.h>
42 #endif
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_ZLIB_H
47 #include <zlib.h>
48 #endif
49
50 #include "archive.h"
51 #include "archive_entry.h"
52 #include "archive_endian.h"
53 #include "archive_private.h"
54 #include "archive_read_private.h"
55
56 #ifdef HAVE_ZLIB_H
57 struct private_data {
58 z_stream stream;
59 char in_stream;
60 unsigned char *out_block;
61 size_t out_block_size;
62 int64_t total_out;
63 unsigned long crc;
64 uint32_t mtime;
65 char *name;
66 char eof; /* True = found end of compressed data. */
67 };
68
69 /* Gzip Filter. */
70 static ssize_t gzip_filter_read(struct archive_read_filter *, const void **);
71 static int gzip_filter_close(struct archive_read_filter *);
72 #endif
73
74 /*
75 * Note that we can detect gzip archives even if we can't decompress
76 * them. (In fact, we like detecting them because we can give better
77 * error messages.) So the bid framework here gets compiled even
78 * if zlib is unavailable.
79 *
80 * TODO: If zlib is unavailable, gzip_bidder_init() should
81 * use the compress_program framework to try to fire up an external
82 * gzip program.
83 */
84 static int gzip_bidder_bid(struct archive_read_filter_bidder *,
85 struct archive_read_filter *);
86 static int gzip_bidder_init(struct archive_read_filter *);
87
88 #if ARCHIVE_VERSION_NUMBER < 4000000
89 /* Deprecated; remove in libarchive 4.0 */
90 int
archive_read_support_compression_gzip(struct archive * a)91 archive_read_support_compression_gzip(struct archive *a)
92 {
93 return archive_read_support_filter_gzip(a);
94 }
95 #endif
96
97 int
archive_read_support_filter_gzip(struct archive * _a)98 archive_read_support_filter_gzip(struct archive *_a)
99 {
100 struct archive_read *a = (struct archive_read *)_a;
101 struct archive_read_filter_bidder *bidder;
102
103 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
104 ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip");
105
106 if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
107 return (ARCHIVE_FATAL);
108
109 bidder->data = NULL;
110 bidder->name = "gzip";
111 bidder->bid = gzip_bidder_bid;
112 bidder->init = gzip_bidder_init;
113 bidder->options = NULL;
114 bidder->free = NULL; /* No data, so no cleanup necessary. */
115 /* Signal the extent of gzip support with the return value here. */
116 #if HAVE_ZLIB_H
117 return (ARCHIVE_OK);
118 #else
119 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
120 "Using external gzip program");
121 return (ARCHIVE_WARN);
122 #endif
123 }
124
125 /*
126 * Read and verify the header.
127 *
128 * Returns zero if the header couldn't be validated, else returns
129 * number of bytes in header. If pbits is non-NULL, it receives a
130 * count of bits verified, suitable for use by bidder.
131 */
132 static ssize_t
peek_at_header(struct archive_read_filter * filter,int * pbits,struct private_data * state)133 peek_at_header(struct archive_read_filter *filter, int *pbits,
134 #ifdef HAVE_ZLIB_H
135 struct private_data *state
136 #else
137 void *state
138 #endif
139 )
140 {
141 const unsigned char *p;
142 ssize_t avail, len;
143 int bits = 0;
144 int header_flags;
145 #ifndef HAVE_ZLIB_H
146 (void)state; /* UNUSED */
147 #endif
148
149 /* Start by looking at the first ten bytes of the header, which
150 * is all fixed layout. */
151 len = 10;
152 p = __archive_read_filter_ahead(filter, len, &avail);
153 if (p == NULL || avail == 0)
154 return (0);
155 /* We only support deflation- third byte must be 0x08. */
156 if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
157 return (0);
158 bits += 24;
159 if ((p[3] & 0xE0)!= 0) /* No reserved flags set. */
160 return (0);
161 bits += 3;
162 header_flags = p[3];
163 /* Bytes 4-7 are mod time in little endian. */
164 #ifdef HAVE_ZLIB_H
165 if (state)
166 state->mtime = archive_le32dec(p + 4);
167 #endif
168 /* Byte 8 is deflate flags. */
169 /* XXXX TODO: return deflate flags back to consume_header for use
170 in initializing the decompressor. */
171 /* Byte 9 is OS. */
172
173 /* Optional extra data: 2 byte length plus variable body. */
174 if (header_flags & 4) {
175 p = __archive_read_filter_ahead(filter, len + 2, &avail);
176 if (p == NULL)
177 return (0);
178 len += ((int)p[len + 1] << 8) | (int)p[len];
179 len += 2;
180 }
181
182 /* Null-terminated optional filename. */
183 if (header_flags & 8) {
184 #ifdef HAVE_ZLIB_H
185 ssize_t file_start = len;
186 #endif
187 do {
188 ++len;
189 if (avail < len)
190 p = __archive_read_filter_ahead(filter,
191 len, &avail);
192 if (p == NULL)
193 return (0);
194 } while (p[len - 1] != 0);
195
196 #ifdef HAVE_ZLIB_H
197 if (state) {
198 /* Reset the name in case of repeat header reads. */
199 free(state->name);
200 state->name = strdup((const char *)&p[file_start]);
201 }
202 #endif
203 }
204
205 /* Null-terminated optional comment. */
206 if (header_flags & 16) {
207 do {
208 ++len;
209 if (avail < len)
210 p = __archive_read_filter_ahead(filter,
211 len, &avail);
212 if (p == NULL)
213 return (0);
214 } while (p[len - 1] != 0);
215 }
216
217 /* Optional header CRC */
218 if ((header_flags & 2)) {
219 p = __archive_read_filter_ahead(filter, len + 2, &avail);
220 if (p == NULL)
221 return (0);
222 #if 0
223 int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
224 int crc = /* XXX TODO: Compute header CRC. */;
225 if (crc != hcrc)
226 return (0);
227 bits += 16;
228 #endif
229 len += 2;
230 }
231
232 if (pbits != NULL)
233 *pbits = bits;
234 return (len);
235 }
236
237 /*
238 * Bidder just verifies the header and returns the number of verified bits.
239 */
240 static int
gzip_bidder_bid(struct archive_read_filter_bidder * self,struct archive_read_filter * filter)241 gzip_bidder_bid(struct archive_read_filter_bidder *self,
242 struct archive_read_filter *filter)
243 {
244 int bits_checked;
245
246 (void)self; /* UNUSED */
247
248 if (peek_at_header(filter, &bits_checked, NULL))
249 return (bits_checked);
250 return (0);
251 }
252
253 #ifndef HAVE_ZLIB_H
254
255 /*
256 * If we don't have the library on this system, we can't do the
257 * decompression directly. We can, however, try to run "gzip -d"
258 * in case that's available.
259 */
260 static int
gzip_bidder_init(struct archive_read_filter * self)261 gzip_bidder_init(struct archive_read_filter *self)
262 {
263 int r;
264
265 r = __archive_read_program(self, "gzip -d");
266 /* Note: We set the format here even if __archive_read_program()
267 * above fails. We do, after all, know what the format is
268 * even if we weren't able to read it. */
269 self->code = ARCHIVE_FILTER_GZIP;
270 self->name = "gzip";
271 return (r);
272 }
273
274 #else
275
276 static int
gzip_read_header(struct archive_read_filter * self,struct archive_entry * entry)277 gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
278 {
279 struct private_data *state;
280
281 state = (struct private_data *)self->data;
282
283 /* A mtime of 0 is considered invalid/missing. */
284 if (state->mtime != 0)
285 archive_entry_set_mtime(entry, state->mtime, 0);
286
287 /* If the name is available, extract it. */
288 if (state->name)
289 archive_entry_set_pathname(entry, state->name);
290
291 return (ARCHIVE_OK);
292 }
293
294 /*
295 * Initialize the filter object.
296 */
297 static int
gzip_bidder_init(struct archive_read_filter * self)298 gzip_bidder_init(struct archive_read_filter *self)
299 {
300 struct private_data *state;
301 static const size_t out_block_size = 64 * 1024;
302 void *out_block;
303
304 self->code = ARCHIVE_FILTER_GZIP;
305 self->name = "gzip";
306
307 state = (struct private_data *)calloc(sizeof(*state), 1);
308 out_block = (unsigned char *)malloc(out_block_size);
309 if (state == NULL || out_block == NULL) {
310 free(out_block);
311 free(state);
312 archive_set_error(&self->archive->archive, ENOMEM,
313 "Can't allocate data for gzip decompression");
314 return (ARCHIVE_FATAL);
315 }
316
317 self->data = state;
318 state->out_block_size = out_block_size;
319 state->out_block = out_block;
320 self->read = gzip_filter_read;
321 self->skip = NULL; /* not supported */
322 self->close = gzip_filter_close;
323 #ifdef HAVE_ZLIB_H
324 self->read_header = gzip_read_header;
325 #endif
326
327 state->in_stream = 0; /* We're not actually within a stream yet. */
328
329 return (ARCHIVE_OK);
330 }
331
332 static int
consume_header(struct archive_read_filter * self)333 consume_header(struct archive_read_filter *self)
334 {
335 struct private_data *state;
336 ssize_t avail;
337 size_t len;
338 int ret;
339
340 state = (struct private_data *)self->data;
341
342 /* If this is a real header, consume it. */
343 len = peek_at_header(self->upstream, NULL, state);
344 if (len == 0)
345 return (ARCHIVE_EOF);
346 __archive_read_filter_consume(self->upstream, len);
347
348 /* Initialize CRC accumulator. */
349 state->crc = crc32(0L, NULL, 0);
350
351 /* Initialize compression library. */
352 state->stream.next_in = (unsigned char *)(uintptr_t)
353 __archive_read_filter_ahead(self->upstream, 1, &avail);
354 state->stream.avail_in = (uInt)avail;
355 ret = inflateInit2(&(state->stream),
356 -15 /* Don't check for zlib header */);
357
358 /* Decipher the error code. */
359 switch (ret) {
360 case Z_OK:
361 state->in_stream = 1;
362 return (ARCHIVE_OK);
363 case Z_STREAM_ERROR:
364 archive_set_error(&self->archive->archive,
365 ARCHIVE_ERRNO_MISC,
366 "Internal error initializing compression library: "
367 "invalid setup parameter");
368 break;
369 case Z_MEM_ERROR:
370 archive_set_error(&self->archive->archive, ENOMEM,
371 "Internal error initializing compression library: "
372 "out of memory");
373 break;
374 case Z_VERSION_ERROR:
375 archive_set_error(&self->archive->archive,
376 ARCHIVE_ERRNO_MISC,
377 "Internal error initializing compression library: "
378 "invalid library version");
379 break;
380 default:
381 archive_set_error(&self->archive->archive,
382 ARCHIVE_ERRNO_MISC,
383 "Internal error initializing compression library: "
384 " Zlib error %d", ret);
385 break;
386 }
387 return (ARCHIVE_FATAL);
388 }
389
390 static int
consume_trailer(struct archive_read_filter * self)391 consume_trailer(struct archive_read_filter *self)
392 {
393 struct private_data *state;
394 const unsigned char *p;
395 ssize_t avail;
396
397 state = (struct private_data *)self->data;
398
399 state->in_stream = 0;
400 switch (inflateEnd(&(state->stream))) {
401 case Z_OK:
402 break;
403 default:
404 archive_set_error(&self->archive->archive,
405 ARCHIVE_ERRNO_MISC,
406 "Failed to clean up gzip decompressor");
407 return (ARCHIVE_FATAL);
408 }
409
410 /* GZip trailer is a fixed 8 byte structure. */
411 p = __archive_read_filter_ahead(self->upstream, 8, &avail);
412 if (p == NULL || avail == 0)
413 return (ARCHIVE_FATAL);
414
415 /* XXX TODO: Verify the length and CRC. */
416
417 /* We've verified the trailer, so consume it now. */
418 __archive_read_filter_consume(self->upstream, 8);
419
420 return (ARCHIVE_OK);
421 }
422
423 static ssize_t
gzip_filter_read(struct archive_read_filter * self,const void ** p)424 gzip_filter_read(struct archive_read_filter *self, const void **p)
425 {
426 struct private_data *state;
427 size_t decompressed;
428 ssize_t avail_in, max_in;
429 int ret;
430
431 state = (struct private_data *)self->data;
432
433 /* Empty our output buffer. */
434 state->stream.next_out = state->out_block;
435 state->stream.avail_out = (uInt)state->out_block_size;
436
437 /* Try to fill the output buffer. */
438 while (state->stream.avail_out > 0 && !state->eof) {
439 /* If we're not in a stream, read a header
440 * and initialize the decompression library. */
441 if (!state->in_stream) {
442 ret = consume_header(self);
443 if (ret == ARCHIVE_EOF) {
444 state->eof = 1;
445 break;
446 }
447 if (ret < ARCHIVE_OK)
448 return (ret);
449 }
450
451 /* Peek at the next available data. */
452 /* ZLib treats stream.next_in as const but doesn't declare
453 * it so, hence this ugly cast. */
454 state->stream.next_in = (unsigned char *)(uintptr_t)
455 __archive_read_filter_ahead(self->upstream, 1, &avail_in);
456 if (state->stream.next_in == NULL) {
457 archive_set_error(&self->archive->archive,
458 ARCHIVE_ERRNO_MISC,
459 "truncated gzip input");
460 return (ARCHIVE_FATAL);
461 }
462 if (UINT_MAX >= SSIZE_MAX)
463 max_in = SSIZE_MAX;
464 else
465 max_in = UINT_MAX;
466 if (avail_in > max_in)
467 avail_in = max_in;
468 state->stream.avail_in = (uInt)avail_in;
469
470 /* Decompress and consume some of that data. */
471 ret = inflate(&(state->stream), 0);
472 switch (ret) {
473 case Z_OK: /* Decompressor made some progress. */
474 __archive_read_filter_consume(self->upstream,
475 avail_in - state->stream.avail_in);
476 break;
477 case Z_STREAM_END: /* Found end of stream. */
478 __archive_read_filter_consume(self->upstream,
479 avail_in - state->stream.avail_in);
480 /* Consume the stream trailer; release the
481 * decompression library. */
482 ret = consume_trailer(self);
483 if (ret < ARCHIVE_OK)
484 return (ret);
485 break;
486 default:
487 /* Return an error. */
488 archive_set_error(&self->archive->archive,
489 ARCHIVE_ERRNO_MISC,
490 "gzip decompression failed");
491 return (ARCHIVE_FATAL);
492 }
493 }
494
495 /* We've read as much as we can. */
496 decompressed = state->stream.next_out - state->out_block;
497 state->total_out += decompressed;
498 if (decompressed == 0)
499 *p = NULL;
500 else
501 *p = state->out_block;
502 return (decompressed);
503 }
504
505 /*
506 * Clean up the decompressor.
507 */
508 static int
gzip_filter_close(struct archive_read_filter * self)509 gzip_filter_close(struct archive_read_filter *self)
510 {
511 struct private_data *state;
512 int ret;
513
514 state = (struct private_data *)self->data;
515 ret = ARCHIVE_OK;
516
517 if (state->in_stream) {
518 switch (inflateEnd(&(state->stream))) {
519 case Z_OK:
520 break;
521 default:
522 archive_set_error(&(self->archive->archive),
523 ARCHIVE_ERRNO_MISC,
524 "Failed to clean up gzip compressor");
525 ret = ARCHIVE_FATAL;
526 }
527 }
528
529 free(state->name);
530 free(state->out_block);
531 free(state);
532 return (ret);
533 }
534
535 #endif /* HAVE_ZLIB_H */
536