1 /*-
2  * Copyright (c) 2003-2010 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_SYS_IOCTL_H
29 #include <sys/ioctl.h>
30 #endif
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_FCNTL_H
38 #include <fcntl.h>
39 #endif
40 #ifdef HAVE_IO_H
41 #include <io.h>
42 #endif
43 #ifdef HAVE_STDLIB_H
44 #include <stdlib.h>
45 #endif
46 #ifdef HAVE_STRING_H
47 #include <string.h>
48 #endif
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
53 #include <sys/disk.h>
54 #elif defined(__NetBSD__) || defined(__OpenBSD__)
55 #include <sys/disklabel.h>
56 #include <sys/dkio.h>
57 #elif defined(__DragonFly__)
58 #include <sys/diskslice.h>
59 #endif
60 
61 #include "archive.h"
62 #include "archive_private.h"
63 #include "archive_string.h"
64 
65 #ifndef O_BINARY
66 #define O_BINARY 0
67 #endif
68 #ifndef O_CLOEXEC
69 #define O_CLOEXEC   0
70 #endif
71 
72 struct read_file_data {
73           int        fd;
74           size_t     block_size;
75           void      *buffer;
76           mode_t     st_mode;  /* Mode bits for opened file. */
77           char       use_lseek;
78           enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
79           union {
80                     char       m[1];/* MBS filename. */
81                     wchar_t    w[1];/* WCS filename. */
82           } filename; /* Must be last! */
83 };
84 
85 static int          file_open(struct archive *, void *);
86 static int          file_close(struct archive *, void *);
87 static int file_close2(struct archive *, void *);
88 static int file_switch(struct archive *, void *, void *);
89 static ssize_t      file_read(struct archive *, void *, const void **buff);
90 static int64_t      file_seek(struct archive *, void *, int64_t request, int);
91 static int64_t      file_skip(struct archive *, void *, int64_t request);
92 static int64_t      file_skip_lseek(struct archive *, void *, int64_t request);
93 
94 int
archive_read_open_file(struct archive * a,const char * filename,size_t block_size)95 archive_read_open_file(struct archive *a, const char *filename,
96     size_t block_size)
97 {
98           return (archive_read_open_filename(a, filename, block_size));
99 }
100 
101 int
archive_read_open_filename(struct archive * a,const char * filename,size_t block_size)102 archive_read_open_filename(struct archive *a, const char *filename,
103     size_t block_size)
104 {
105           const char *filenames[2];
106           filenames[0] = filename;
107           filenames[1] = NULL;
108           return archive_read_open_filenames(a, filenames, block_size);
109 }
110 
111 int
archive_read_open_filenames(struct archive * a,const char ** filenames,size_t block_size)112 archive_read_open_filenames(struct archive *a, const char **filenames,
113     size_t block_size)
114 {
115           struct read_file_data *mine;
116           const char *filename = NULL;
117           if (filenames)
118                     filename = *(filenames++);
119 
120           archive_clear_error(a);
121           do
122           {
123                     if (filename == NULL)
124                               filename = "";
125                     mine = calloc(1,
126                               sizeof(*mine) + strlen(filename));
127                     if (mine == NULL)
128                               goto no_memory;
129                     strcpy(mine->filename.m, filename);
130                     mine->block_size = block_size;
131                     mine->fd = -1;
132                     mine->buffer = NULL;
133                     mine->st_mode = mine->use_lseek = 0;
134                     if (filename == NULL || filename[0] == '\0') {
135                               mine->filename_type = FNT_STDIN;
136                     } else
137                               mine->filename_type = FNT_MBS;
138                     if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
139                               return (ARCHIVE_FATAL);
140                     if (filenames == NULL)
141                               break;
142                     filename = *(filenames++);
143           } while (filename != NULL && filename[0] != '\0');
144           archive_read_set_open_callback(a, file_open);
145           archive_read_set_read_callback(a, file_read);
146           archive_read_set_skip_callback(a, file_skip);
147           archive_read_set_close_callback(a, file_close);
148           archive_read_set_switch_callback(a, file_switch);
149           archive_read_set_seek_callback(a, file_seek);
150 
151           return (archive_read_open1(a));
152 no_memory:
153           archive_set_error(a, ENOMEM, "No memory");
154           return (ARCHIVE_FATAL);
155 }
156 
157 /*
158  * This function is an implementation detail of archive_read_open_filename_w,
159  * which is exposed as a separate API on Windows.
160  */
161 #if !defined(_WIN32) || defined(__CYGWIN__)
162 static
163 #endif
164 int
archive_read_open_filenames_w(struct archive * a,const wchar_t ** wfilenames,size_t block_size)165 archive_read_open_filenames_w(struct archive *a, const wchar_t **wfilenames,
166     size_t block_size)
167 {
168           struct read_file_data *mine;
169           const wchar_t *wfilename = NULL;
170           if (wfilenames)
171                     wfilename = *(wfilenames++);
172 
173           archive_clear_error(a);
174           do
175           {
176                     if (wfilename == NULL)
177                               wfilename = L"";
178                     mine = calloc(1,
179                               sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
180                     if (mine == NULL)
181                               goto no_memory;
182                     mine->block_size = block_size;
183                     mine->fd = -1;
184 
185                     if (wfilename == NULL || wfilename[0] == L'\0') {
186                               mine->filename_type = FNT_STDIN;
187                     } else {
188 #if defined(_WIN32) && !defined(__CYGWIN__)
189                               mine->filename_type = FNT_WCS;
190                               wcscpy(mine->filename.w, wfilename);
191 #else
192                               /*
193                                * POSIX system does not support a wchar_t interface for
194                                * open() system call, so we have to translate a wchar_t
195                                * filename to multi-byte one and use it.
196                                */
197                               struct archive_string fn;
198 
199                               archive_string_init(&fn);
200                               if (archive_string_append_from_wcs(&fn, wfilename,
201                                   wcslen(wfilename)) != 0) {
202                                         if (errno == ENOMEM)
203                                                   archive_set_error(a, errno,
204                                                       "Can't allocate memory");
205                                         else
206                                                   archive_set_error(a, EINVAL,
207                                                       "Failed to convert a wide-character"
208                                                       " filename to a multi-byte filename");
209                                         archive_string_free(&fn);
210                                         free(mine);
211                                         return (ARCHIVE_FATAL);
212                               }
213                               mine->filename_type = FNT_MBS;
214                               strcpy(mine->filename.m, fn.s);
215                               archive_string_free(&fn);
216 #endif
217                     }
218                     if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
219                               return (ARCHIVE_FATAL);
220                     if (wfilenames == NULL)
221                               break;
222                     wfilename = *(wfilenames++);
223           } while (wfilename != NULL && wfilename[0] != '\0');
224           archive_read_set_open_callback(a, file_open);
225           archive_read_set_read_callback(a, file_read);
226           archive_read_set_skip_callback(a, file_skip);
227           archive_read_set_close_callback(a, file_close);
228           archive_read_set_switch_callback(a, file_switch);
229           archive_read_set_seek_callback(a, file_seek);
230 
231           return (archive_read_open1(a));
232 no_memory:
233           archive_set_error(a, ENOMEM, "No memory");
234           return (ARCHIVE_FATAL);
235 }
236 
237 int
archive_read_open_filename_w(struct archive * a,const wchar_t * wfilename,size_t block_size)238 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
239     size_t block_size)
240 {
241           const wchar_t *wfilenames[2];
242           wfilenames[0] = wfilename;
243           wfilenames[1] = NULL;
244           return archive_read_open_filenames_w(a, wfilenames, block_size);
245 }
246 
247 static int
file_open(struct archive * a,void * client_data)248 file_open(struct archive *a, void *client_data)
249 {
250           struct stat st;
251           struct read_file_data *mine = (struct read_file_data *)client_data;
252           void *buffer;
253           const char *filename = NULL;
254 #if defined(_WIN32) && !defined(__CYGWIN__)
255           const wchar_t *wfilename = NULL;
256 #endif
257           int fd = -1;
258           int is_disk_like = 0;
259 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
260           off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
261 #elif defined(__NetBSD__) || defined(__OpenBSD__)
262           struct disklabel dl;
263 #elif defined(__DragonFly__)
264           struct partinfo pi;
265 #endif
266 
267           archive_clear_error(a);
268           if (mine->filename_type == FNT_STDIN) {
269                     /* We used to delegate stdin support by
270                      * directly calling archive_read_open_fd(a,0,block_size)
271                      * here, but that doesn't (and shouldn't) handle the
272                      * end-of-file flush when reading stdout from a pipe.
273                      * Basically, read_open_fd() is intended for folks who
274                      * are willing to handle such details themselves.  This
275                      * API is intended to be a little smarter for folks who
276                      * want easy handling of the common case.
277                      */
278                     fd = 0;
279 #if defined(__CYGWIN__) || defined(_WIN32)
280                     setmode(0, O_BINARY);
281 #endif
282                     filename = "";
283           } else if (mine->filename_type == FNT_MBS) {
284                     filename = mine->filename.m;
285                     fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC);
286                     __archive_ensure_cloexec_flag(fd);
287                     if (fd < 0) {
288                               archive_set_error(a, errno,
289                                   "Failed to open '%s'", filename);
290                               return (ARCHIVE_FATAL);
291                     }
292           } else {
293 #if defined(_WIN32) && !defined(__CYGWIN__)
294                     wfilename = mine->filename.w;
295                     fd = _wopen(wfilename, O_RDONLY | O_BINARY);
296                     if (fd < 0 && errno == ENOENT) {
297                               wchar_t *fullpath;
298                               fullpath = __la_win_permissive_name_w(wfilename);
299                               if (fullpath != NULL) {
300                                         fd = _wopen(fullpath, O_RDONLY | O_BINARY);
301                                         free(fullpath);
302                               }
303                     }
304                     if (fd < 0) {
305                               archive_set_error(a, errno,
306                                   "Failed to open '%S'", wfilename);
307                               return (ARCHIVE_FATAL);
308                     }
309 #else
310                     archive_set_error(a, ARCHIVE_ERRNO_MISC,
311                         "Unexpedted operation in archive_read_open_filename");
312                     goto fail;
313 #endif
314           }
315           if (fstat(fd, &st) != 0) {
316 #if defined(_WIN32) && !defined(__CYGWIN__)
317                     if (mine->filename_type == FNT_WCS)
318                               archive_set_error(a, errno, "Can't stat '%S'",
319                                   wfilename);
320                     else
321 #endif
322                               archive_set_error(a, errno, "Can't stat '%s'",
323                                   filename);
324                     goto fail;
325           }
326 
327           /*
328            * Determine whether the input looks like a disk device or a
329            * tape device.  The results are used below to select an I/O
330            * strategy:
331            *  = "disk-like" devices support arbitrary lseek() and will
332            *    support I/O requests of any size.  So we get easy skipping
333            *    and can cheat on block sizes to get better performance.
334            *  = "tape-like" devices require strict blocking and use
335            *    specialized ioctls for seeking.
336            *  = "socket-like" devices cannot seek at all but can improve
337            *    performance by using nonblocking I/O to read "whatever is
338            *    available right now".
339            *
340            * Right now, we only specially recognize disk-like devices,
341            * but it should be straightforward to add probes and strategy
342            * here for tape-like and socket-like devices.
343            */
344           if (S_ISREG(st.st_mode)) {
345                     /* Safety:  Tell the extractor not to overwrite the input. */
346                     archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
347                     /* Regular files act like disks. */
348                     is_disk_like = 1;
349           }
350 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
351           /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
352           else if (S_ISCHR(st.st_mode) &&
353               ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
354               mediasize > 0) {
355                     is_disk_like = 1;
356           }
357 #elif defined(__NetBSD__) || defined(__OpenBSD__)
358           /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
359           else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
360               ioctl(fd, DIOCGDINFO, &dl) == 0 &&
361               dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
362                     is_disk_like = 1;
363           }
364 #elif defined(__DragonFly__)
365           /* DragonFly BSD:  if it supports DIOCGPART ioctl, it's disk-like. */
366           else if (S_ISCHR(st.st_mode) &&
367               ioctl(fd, DIOCGPART, &pi) == 0 &&
368               pi.media_size > 0) {
369                     is_disk_like = 1;
370           }
371 #elif defined(__linux__)
372           /* Linux:  All block devices are disk-like. */
373           else if (S_ISBLK(st.st_mode) &&
374               lseek(fd, 0, SEEK_CUR) == 0 &&
375               lseek(fd, 0, SEEK_SET) == 0 &&
376               lseek(fd, 0, SEEK_END) > 0 &&
377               lseek(fd, 0, SEEK_SET) == 0) {
378                     is_disk_like = 1;
379           }
380 #endif
381           /* TODO: Add an "is_tape_like" variable and appropriate tests. */
382 
383           /* Disk-like devices prefer power-of-two block sizes.  */
384           /* Use provided block_size as a guide so users have some control. */
385           if (is_disk_like) {
386                     size_t new_block_size = 64 * 1024;
387                     while (new_block_size < mine->block_size
388                         && new_block_size < 64 * 1024 * 1024)
389                               new_block_size *= 2;
390                     mine->block_size = new_block_size;
391           }
392           buffer = malloc(mine->block_size);
393           if (buffer == NULL) {
394                     archive_set_error(a, ENOMEM, "No memory");
395                     goto fail;
396           }
397           mine->buffer = buffer;
398           mine->fd = fd;
399           /* Remember mode so close can decide whether to flush. */
400           mine->st_mode = st.st_mode;
401 
402           /* Disk-like inputs can use lseek(). */
403           if (is_disk_like)
404                     mine->use_lseek = 1;
405 
406           return (ARCHIVE_OK);
407 fail:
408           /*
409            * Don't close file descriptors not opened or ones pointing referring
410            * to `FNT_STDIN`.
411            */
412           if (fd != -1 && fd != 0)
413                     close(fd);
414           return (ARCHIVE_FATAL);
415 }
416 
417 static ssize_t
file_read(struct archive * a,void * client_data,const void ** buff)418 file_read(struct archive *a, void *client_data, const void **buff)
419 {
420           struct read_file_data *mine = (struct read_file_data *)client_data;
421           ssize_t bytes_read;
422 
423           /* TODO: If a recent lseek() operation has left us
424            * mis-aligned, read and return a short block to try to get
425            * us back in alignment. */
426 
427           /* TODO: Someday, try mmap() here; if that succeeds, give
428            * the entire file to libarchive as a single block.  That
429            * could be a lot faster than block-by-block manual I/O. */
430 
431           /* TODO: We might be able to improve performance on pipes and
432            * sockets by setting non-blocking I/O and just accepting
433            * whatever we get here instead of waiting for a full block
434            * worth of data. */
435 
436           *buff = mine->buffer;
437           for (;;) {
438                     bytes_read = read(mine->fd, mine->buffer, mine->block_size);
439                     if (bytes_read < 0) {
440                               if (errno == EINTR)
441                                         continue;
442                               else if (mine->filename_type == FNT_STDIN)
443                                         archive_set_error(a, errno,
444                                             "Error reading stdin");
445                               else if (mine->filename_type == FNT_MBS)
446                                         archive_set_error(a, errno,
447                                             "Error reading '%s'", mine->filename.m);
448                               else
449                                         archive_set_error(a, errno,
450                                             "Error reading '%S'", mine->filename.w);
451                     }
452                     return (bytes_read);
453           }
454 }
455 
456 /*
457  * Regular files and disk-like block devices can use simple lseek
458  * without needing to round the request to the block size.
459  *
460  * TODO: This can leave future reads mis-aligned.  Since we know the
461  * offset here, we should store it and use it in file_read() above
462  * to determine whether we should perform a short read to get back
463  * into alignment.  Long series of mis-aligned reads can negatively
464  * impact disk throughput.  (Of course, the performance impact should
465  * be carefully tested; extra code complexity is only worthwhile if
466  * it does provide measurable improvement.)
467  *
468  * TODO: Be lazy about the actual seek.  There are a few pathological
469  * cases where libarchive makes a bunch of seek requests in a row
470  * without any intervening reads.  This isn't a huge performance
471  * problem, since the kernel handles seeks lazily already, but
472  * it would be very slightly faster if we simply remembered the
473  * seek request here and then actually performed the seek at the
474  * top of the read callback above.
475  */
476 static int64_t
file_skip_lseek(struct archive * a,void * client_data,int64_t request)477 file_skip_lseek(struct archive *a, void *client_data, int64_t request)
478 {
479           struct read_file_data *mine = (struct read_file_data *)client_data;
480 #if defined(_WIN32) && !defined(__CYGWIN__)
481           /* We use _lseeki64() on Windows. */
482           int64_t old_offset, new_offset;
483 #else
484           off_t old_offset, new_offset;
485 #endif
486 
487           /* We use off_t here because lseek() is declared that way. */
488 
489           /* TODO: Deal with case where off_t isn't 64 bits.
490            * This shouldn't be a problem on Linux or other POSIX
491            * systems, since the configuration logic for libarchive
492            * tries to obtain a 64-bit off_t.
493            */
494           if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
495               (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
496                     return (new_offset - old_offset);
497 
498           /* If lseek() fails, don't bother trying again. */
499           mine->use_lseek = 0;
500 
501           /* Let libarchive recover with read+discard */
502           if (errno == ESPIPE)
503                     return (0);
504 
505           /* If the input is corrupted or truncated, fail. */
506           if (mine->filename_type == FNT_STDIN)
507                     archive_set_error(a, errno, "Error seeking in stdin");
508           else if (mine->filename_type == FNT_MBS)
509                     archive_set_error(a, errno, "Error seeking in '%s'",
510                         mine->filename.m);
511           else
512                     archive_set_error(a, errno, "Error seeking in '%S'",
513                         mine->filename.w);
514           return (-1);
515 }
516 
517 
518 /*
519  * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
520  * accelerate operation on tape drives.
521  */
522 
523 static int64_t
file_skip(struct archive * a,void * client_data,int64_t request)524 file_skip(struct archive *a, void *client_data, int64_t request)
525 {
526           struct read_file_data *mine = (struct read_file_data *)client_data;
527 
528           /* Delegate skip requests. */
529           if (mine->use_lseek)
530                     return (file_skip_lseek(a, client_data, request));
531 
532           /* If we can't skip, return 0; libarchive will read+discard instead. */
533           return (0);
534 }
535 
536 /*
537  * TODO: Store the offset and use it in the read callback.
538  */
539 static int64_t
file_seek(struct archive * a,void * client_data,int64_t request,int whence)540 file_seek(struct archive *a, void *client_data, int64_t request, int whence)
541 {
542           struct read_file_data *mine = (struct read_file_data *)client_data;
543           int64_t r;
544 
545           /* We use off_t here because lseek() is declared that way. */
546           /* See above for notes about when off_t is less than 64 bits. */
547           r = lseek(mine->fd, request, whence);
548           if (r >= 0)
549                     return r;
550 
551           /* If the input is corrupted or truncated, fail. */
552           if (mine->filename_type == FNT_STDIN)
553                     archive_set_error(a, errno, "Error seeking in stdin");
554           else if (mine->filename_type == FNT_MBS)
555                     archive_set_error(a, errno, "Error seeking in '%s'",
556                         mine->filename.m);
557           else
558                     archive_set_error(a, errno, "Error seeking in '%S'",
559                         mine->filename.w);
560           return (ARCHIVE_FATAL);
561 }
562 
563 static int
file_close2(struct archive * a,void * client_data)564 file_close2(struct archive *a, void *client_data)
565 {
566           struct read_file_data *mine = (struct read_file_data *)client_data;
567 
568           (void)a; /* UNUSED */
569 
570           /* Only flush and close if open succeeded. */
571           if (mine->fd >= 0) {
572                     /*
573                      * Sometimes, we should flush the input before closing.
574                      *   Regular files: faster to just close without flush.
575                      *   Disk-like devices:  Ditto.
576                      *   Tapes: must not flush (user might need to
577                      *      read the "next" item on a non-rewind device).
578                      *   Pipes and sockets:  must flush (otherwise, the
579                      *      program feeding the pipe or socket may complain).
580                      * Here, I flush everything except for regular files and
581                      * device nodes.
582                      */
583                     if (!S_ISREG(mine->st_mode)
584                         && !S_ISCHR(mine->st_mode)
585                         && !S_ISBLK(mine->st_mode)) {
586                               ssize_t bytesRead;
587                               do {
588                                         bytesRead = read(mine->fd, mine->buffer,
589                                             mine->block_size);
590                               } while (bytesRead > 0);
591                     }
592                     /* If a named file was opened, then it needs to be closed. */
593                     if (mine->filename_type != FNT_STDIN)
594                               close(mine->fd);
595           }
596           free(mine->buffer);
597           mine->buffer = NULL;
598           mine->fd = -1;
599           return (ARCHIVE_OK);
600 }
601 
602 static int
file_close(struct archive * a,void * client_data)603 file_close(struct archive *a, void *client_data)
604 {
605           struct read_file_data *mine = (struct read_file_data *)client_data;
606           file_close2(a, client_data);
607           free(mine);
608           return (ARCHIVE_OK);
609 }
610 
611 static int
file_switch(struct archive * a,void * client_data1,void * client_data2)612 file_switch(struct archive *a, void *client_data1, void *client_data2)
613 {
614           file_close2(a, client_data1);
615           return file_open(a, client_data2);
616 }
617