1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_STDLIB_H
32 #include <stdlib.h>
33 #endif
34 #ifdef HAVE_STRING_H
35 #include <string.h>
36 #endif
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_ZLIB_H
44 #include <zlib.h>
45 #endif
46 
47 #include "archive.h"
48 #include "archive_entry.h"
49 #include "archive_endian.h"
50 #include "archive_private.h"
51 #include "archive_read_private.h"
52 
53 #ifdef HAVE_ZLIB_H
54 struct private_data {
55           z_stream   stream;
56           char                 in_stream;
57           unsigned char       *out_block;
58           size_t               out_block_size;
59           int64_t              total_out;
60           unsigned long        crc;
61           uint32_t   mtime;
62           char                *name;
63           char                 eof; /* True = found end of compressed data. */
64 };
65 
66 /* Gzip Filter. */
67 static ssize_t      gzip_filter_read(struct archive_read_filter *, const void **);
68 static int          gzip_filter_close(struct archive_read_filter *);
69 #endif
70 
71 /*
72  * Note that we can detect gzip archives even if we can't decompress
73  * them.  (In fact, we like detecting them because we can give better
74  * error messages.)  So the bid framework here gets compiled even
75  * if zlib is unavailable.
76  *
77  * TODO: If zlib is unavailable, gzip_bidder_init() should
78  * use the compress_program framework to try to fire up an external
79  * gzip program.
80  */
81 static int          gzip_bidder_bid(struct archive_read_filter_bidder *,
82                         struct archive_read_filter *);
83 static int          gzip_bidder_init(struct archive_read_filter *);
84 
85 #if ARCHIVE_VERSION_NUMBER < 4000000
86 /* Deprecated; remove in libarchive 4.0 */
87 int
archive_read_support_compression_gzip(struct archive * a)88 archive_read_support_compression_gzip(struct archive *a)
89 {
90           return archive_read_support_filter_gzip(a);
91 }
92 #endif
93 
94 static const struct archive_read_filter_bidder_vtable
95 gzip_bidder_vtable = {
96           .bid = gzip_bidder_bid,
97           .init = gzip_bidder_init,
98 };
99 
100 int
archive_read_support_filter_gzip(struct archive * _a)101 archive_read_support_filter_gzip(struct archive *_a)
102 {
103           struct archive_read *a = (struct archive_read *)_a;
104 
105           if (__archive_read_register_bidder(a, NULL, "gzip",
106                                         &gzip_bidder_vtable) != ARCHIVE_OK)
107                     return (ARCHIVE_FATAL);
108 
109           /* Signal the extent of gzip support with the return value here. */
110 #if HAVE_ZLIB_H
111           return (ARCHIVE_OK);
112 #else
113           archive_set_error(_a, ARCHIVE_ERRNO_MISC,
114               "Using external gzip program");
115           return (ARCHIVE_WARN);
116 #endif
117 }
118 
119 /*
120  * Read and verify the header.
121  *
122  * Returns zero if the header couldn't be validated, else returns
123  * number of bytes in header.  If pbits is non-NULL, it receives a
124  * count of bits verified, suitable for use by bidder.
125  */
126 #define MAX_FILENAME_LENGTH (1024 * 1024L)
127 #define MAX_COMMENT_LENGTH (1024 * 1024L)
128 static ssize_t
peek_at_header(struct archive_read_filter * filter,int * pbits,struct private_data * state)129 peek_at_header(struct archive_read_filter *filter, int *pbits,
130 #ifdef HAVE_ZLIB_H
131                  struct private_data *state
132 #else
133                  void *state
134 #endif
135                 )
136 {
137           const unsigned char *p;
138           ssize_t avail, len;
139           int bits = 0;
140           int header_flags;
141 #ifndef HAVE_ZLIB_H
142           (void)state; /* UNUSED */
143 #endif
144 
145           /* Start by looking at the first ten bytes of the header, which
146            * is all fixed layout. */
147           len = 10;
148           p = __archive_read_filter_ahead(filter, len, &avail);
149           if (p == NULL || avail == 0)
150                     return (0);
151           /* We only support deflation- third byte must be 0x08. */
152           if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
153                     return (0);
154           bits += 24;
155           if ((p[3] & 0xE0)!= 0)        /* No reserved flags set. */
156                     return (0);
157           bits += 3;
158           header_flags = p[3];
159           /* Bytes 4-7 are mod time in little endian. */
160 #ifdef HAVE_ZLIB_H
161           if (state)
162                     state->mtime = archive_le32dec(p + 4);
163 #endif
164           /* Byte 8 is deflate flags. */
165           /* XXXX TODO: return deflate flags back to consume_header for use
166              in initializing the decompressor. */
167           /* Byte 9 is OS. */
168 
169           /* Optional extra data:  2 byte length plus variable body. */
170           if (header_flags & 4) {
171                     p = __archive_read_filter_ahead(filter, len + 2, &avail);
172                     if (p == NULL)
173                               return (0);
174                     len += ((int)p[len + 1] << 8) | (int)p[len];
175                     len += 2;
176           }
177 
178           /* Null-terminated optional filename. */
179           if (header_flags & 8) {
180 #ifdef HAVE_ZLIB_H
181                     ssize_t file_start = len;
182 #endif
183                     do {
184                               ++len;
185                               if (avail < len) {
186                                         if (avail > MAX_FILENAME_LENGTH) {
187                                                   return (0);
188                                         }
189                                         p = __archive_read_filter_ahead(filter,
190                                             len, &avail);
191                               }
192                               if (p == NULL)
193                                         return (0);
194                     } while (p[len - 1] != 0);
195 
196 #ifdef HAVE_ZLIB_H
197                     if (state) {
198                               /* Reset the name in case of repeat header reads. */
199                               free(state->name);
200                               state->name = strdup((const char *)&p[file_start]);
201                     }
202 #endif
203           }
204 
205           /* Null-terminated optional comment. */
206           if (header_flags & 16) {
207                     do {
208                               ++len;
209                               if (avail < len) {
210                                         if (avail > MAX_COMMENT_LENGTH) {
211                                                   return (0);
212                                         }
213                                         p = __archive_read_filter_ahead(filter,
214                                             len, &avail);
215                               }
216                               if (p == NULL)
217                                         return (0);
218                     } while (p[len - 1] != 0);
219           }
220 
221           /* Optional header CRC */
222           if ((header_flags & 2)) {
223                     p = __archive_read_filter_ahead(filter, len + 2, &avail);
224                     if (p == NULL)
225                               return (0);
226 #if 0
227           int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
228           int crc = /* XXX TODO: Compute header CRC. */;
229           if (crc != hcrc)
230                     return (0);
231           bits += 16;
232 #endif
233                     len += 2;
234           }
235 
236           if (pbits != NULL)
237                     *pbits = bits;
238           return (len);
239 }
240 
241 /*
242  * Bidder just verifies the header and returns the number of verified bits.
243  */
244 static int
gzip_bidder_bid(struct archive_read_filter_bidder * self,struct archive_read_filter * filter)245 gzip_bidder_bid(struct archive_read_filter_bidder *self,
246     struct archive_read_filter *filter)
247 {
248           int bits_checked;
249 
250           (void)self; /* UNUSED */
251 
252           if (peek_at_header(filter, &bits_checked, NULL))
253                     return (bits_checked);
254           return (0);
255 }
256 
257 #ifndef HAVE_ZLIB_H
258 
259 /*
260  * If we don't have the library on this system, we can't do the
261  * decompression directly.  We can, however, try to run "gzip -d"
262  * in case that's available.
263  */
264 static int
gzip_bidder_init(struct archive_read_filter * self)265 gzip_bidder_init(struct archive_read_filter *self)
266 {
267           int r;
268 
269           r = __archive_read_program(self, "gzip -d");
270           /* Note: We set the format here even if __archive_read_program()
271            * above fails.  We do, after all, know what the format is
272            * even if we weren't able to read it. */
273           self->code = ARCHIVE_FILTER_GZIP;
274           self->name = "gzip";
275           return (r);
276 }
277 
278 #else
279 
280 static int
gzip_read_header(struct archive_read_filter * self,struct archive_entry * entry)281 gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
282 {
283           struct private_data *state;
284 
285           state = (struct private_data *)self->data;
286 
287           /* A mtime of 0 is considered invalid/missing. */
288           if (state->mtime != 0)
289                     archive_entry_set_mtime(entry, state->mtime, 0);
290 
291           /* If the name is available, extract it. */
292           if (state->name)
293                     archive_entry_set_pathname(entry, state->name);
294 
295           return (ARCHIVE_OK);
296 }
297 
298 static const struct archive_read_filter_vtable
299 gzip_reader_vtable = {
300           .read = gzip_filter_read,
301           .close = gzip_filter_close,
302 #ifdef HAVE_ZLIB_H
303           .read_header = gzip_read_header,
304 #endif
305 };
306 
307 /*
308  * Initialize the filter object.
309  */
310 static int
gzip_bidder_init(struct archive_read_filter * self)311 gzip_bidder_init(struct archive_read_filter *self)
312 {
313           struct private_data *state;
314           static const size_t out_block_size = 64 * 1024;
315           void *out_block;
316 
317           self->code = ARCHIVE_FILTER_GZIP;
318           self->name = "gzip";
319 
320           state = calloc(1, sizeof(*state));
321           out_block = malloc(out_block_size);
322           if (state == NULL || out_block == NULL) {
323                     free(out_block);
324                     free(state);
325                     archive_set_error(&self->archive->archive, ENOMEM,
326                         "Can't allocate data for gzip decompression");
327                     return (ARCHIVE_FATAL);
328           }
329 
330           self->data = state;
331           state->out_block_size = out_block_size;
332           state->out_block = out_block;
333           self->vtable = &gzip_reader_vtable;
334 
335           state->in_stream = 0; /* We're not actually within a stream yet. */
336 
337           return (ARCHIVE_OK);
338 }
339 
340 static int
consume_header(struct archive_read_filter * self)341 consume_header(struct archive_read_filter *self)
342 {
343           struct private_data *state;
344           ssize_t avail;
345           size_t len;
346           int ret;
347 
348           state = (struct private_data *)self->data;
349 
350           /* If this is a real header, consume it. */
351           len = peek_at_header(self->upstream, NULL, state);
352           if (len == 0)
353                     return (ARCHIVE_EOF);
354           __archive_read_filter_consume(self->upstream, len);
355 
356           /* Initialize CRC accumulator. */
357           state->crc = crc32(0L, NULL, 0);
358 
359           /* Initialize compression library. */
360           state->stream.next_in = (unsigned char *)(uintptr_t)
361               __archive_read_filter_ahead(self->upstream, 1, &avail);
362           state->stream.avail_in = (uInt)avail;
363           ret = inflateInit2(&(state->stream),
364               -15 /* Don't check for zlib header */);
365 
366           /* Decipher the error code. */
367           switch (ret) {
368           case Z_OK:
369                     state->in_stream = 1;
370                     return (ARCHIVE_OK);
371           case Z_STREAM_ERROR:
372                     archive_set_error(&self->archive->archive,
373                         ARCHIVE_ERRNO_MISC,
374                         "Internal error initializing compression library: "
375                         "invalid setup parameter");
376                     break;
377           case Z_MEM_ERROR:
378                     archive_set_error(&self->archive->archive, ENOMEM,
379                         "Internal error initializing compression library: "
380                         "out of memory");
381                     break;
382           case Z_VERSION_ERROR:
383                     archive_set_error(&self->archive->archive,
384                         ARCHIVE_ERRNO_MISC,
385                         "Internal error initializing compression library: "
386                         "invalid library version");
387                     break;
388           default:
389                     archive_set_error(&self->archive->archive,
390                         ARCHIVE_ERRNO_MISC,
391                         "Internal error initializing compression library: "
392                         " Zlib error %d", ret);
393                     break;
394           }
395           return (ARCHIVE_FATAL);
396 }
397 
398 static int
consume_trailer(struct archive_read_filter * self)399 consume_trailer(struct archive_read_filter *self)
400 {
401           struct private_data *state;
402           const unsigned char *p;
403           ssize_t avail;
404 
405           state = (struct private_data *)self->data;
406 
407           state->in_stream = 0;
408           switch (inflateEnd(&(state->stream))) {
409           case Z_OK:
410                     break;
411           default:
412                     archive_set_error(&self->archive->archive,
413                         ARCHIVE_ERRNO_MISC,
414                         "Failed to clean up gzip decompressor");
415                     return (ARCHIVE_FATAL);
416           }
417 
418           /* GZip trailer is a fixed 8 byte structure. */
419           p = __archive_read_filter_ahead(self->upstream, 8, &avail);
420           if (p == NULL || avail == 0)
421                     return (ARCHIVE_FATAL);
422 
423           /* XXX TODO: Verify the length and CRC. */
424 
425           /* We've verified the trailer, so consume it now. */
426           __archive_read_filter_consume(self->upstream, 8);
427 
428           return (ARCHIVE_OK);
429 }
430 
431 static ssize_t
gzip_filter_read(struct archive_read_filter * self,const void ** p)432 gzip_filter_read(struct archive_read_filter *self, const void **p)
433 {
434           struct private_data *state;
435           size_t decompressed;
436           ssize_t avail_in, max_in;
437           int ret;
438 
439           state = (struct private_data *)self->data;
440 
441           /* Empty our output buffer. */
442           state->stream.next_out = state->out_block;
443           state->stream.avail_out = (uInt)state->out_block_size;
444 
445           /* Try to fill the output buffer. */
446           while (state->stream.avail_out > 0 && !state->eof) {
447                     /* If we're not in a stream, read a header
448                      * and initialize the decompression library. */
449                     if (!state->in_stream) {
450                               ret = consume_header(self);
451                               if (ret == ARCHIVE_EOF) {
452                                         state->eof = 1;
453                                         break;
454                               }
455                               if (ret < ARCHIVE_OK)
456                                         return (ret);
457                     }
458 
459                     /* Peek at the next available data. */
460                     /* ZLib treats stream.next_in as const but doesn't declare
461                      * it so, hence this ugly cast. */
462                     state->stream.next_in = (unsigned char *)(uintptr_t)
463                         __archive_read_filter_ahead(self->upstream, 1, &avail_in);
464                     if (state->stream.next_in == NULL) {
465                               archive_set_error(&self->archive->archive,
466                                   ARCHIVE_ERRNO_MISC,
467                                   "truncated gzip input");
468                               return (ARCHIVE_FATAL);
469                     }
470                     if (UINT_MAX >= SSIZE_MAX)
471                               max_in = SSIZE_MAX;
472                     else
473                               max_in = UINT_MAX;
474                     if (avail_in > max_in)
475                               avail_in = max_in;
476                     state->stream.avail_in = (uInt)avail_in;
477 
478                     /* Decompress and consume some of that data. */
479                     ret = inflate(&(state->stream), 0);
480                     switch (ret) {
481                     case Z_OK: /* Decompressor made some progress. */
482                               __archive_read_filter_consume(self->upstream,
483                                   avail_in - state->stream.avail_in);
484                               break;
485                     case Z_STREAM_END: /* Found end of stream. */
486                               __archive_read_filter_consume(self->upstream,
487                                   avail_in - state->stream.avail_in);
488                               /* Consume the stream trailer; release the
489                                * decompression library. */
490                               ret = consume_trailer(self);
491                               if (ret < ARCHIVE_OK)
492                                         return (ret);
493                               break;
494                     default:
495                               /* Return an error. */
496                               archive_set_error(&self->archive->archive,
497                                   ARCHIVE_ERRNO_MISC,
498                                   "gzip decompression failed");
499                               return (ARCHIVE_FATAL);
500                     }
501           }
502 
503           /* We've read as much as we can. */
504           decompressed = state->stream.next_out - state->out_block;
505           state->total_out += decompressed;
506           if (decompressed == 0)
507                     *p = NULL;
508           else
509                     *p = state->out_block;
510           return (decompressed);
511 }
512 
513 /*
514  * Clean up the decompressor.
515  */
516 static int
gzip_filter_close(struct archive_read_filter * self)517 gzip_filter_close(struct archive_read_filter *self)
518 {
519           struct private_data *state;
520           int ret;
521 
522           state = (struct private_data *)self->data;
523           ret = ARCHIVE_OK;
524 
525           if (state->in_stream) {
526                     switch (inflateEnd(&(state->stream))) {
527                     case Z_OK:
528                               break;
529                     default:
530                               archive_set_error(&(self->archive->archive),
531                                   ARCHIVE_ERRNO_MISC,
532                                   "Failed to clean up gzip compressor");
533                               ret = ARCHIVE_FATAL;
534                     }
535           }
536 
537           free(state->name);
538           free(state->out_block);
539           free(state);
540           return (ret);
541 }
542 
543 #endif /* HAVE_ZLIB_H */
544