1 /*-
2 * Copyright (c) 2003-2010 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD$");
28
29 #ifdef HAVE_SYS_IOCTL_H
30 #include <sys/ioctl.h>
31 #endif
32 #ifdef HAVE_SYS_STAT_H
33 #include <sys/stat.h>
34 #endif
35 #ifdef HAVE_ERRNO_H
36 #include <errno.h>
37 #endif
38 #ifdef HAVE_FCNTL_H
39 #include <fcntl.h>
40 #endif
41 #ifdef HAVE_IO_H
42 #include <io.h>
43 #endif
44 #ifdef HAVE_STDLIB_H
45 #include <stdlib.h>
46 #endif
47 #ifdef HAVE_STRING_H
48 #include <string.h>
49 #endif
50 #ifdef HAVE_UNISTD_H
51 #include <unistd.h>
52 #endif
53 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
54 #include <sys/disk.h>
55 #elif defined(__NetBSD__) || defined(__OpenBSD__)
56 #include <sys/disklabel.h>
57 #include <sys/dkio.h>
58 #elif defined(__DragonFly__)
59 #include <sys/diskslice.h>
60 #endif
61
62 #include "archive.h"
63 #include "archive_private.h"
64 #include "archive_string.h"
65
66 #ifndef O_BINARY
67 #define O_BINARY 0
68 #endif
69 #ifndef O_CLOEXEC
70 #define O_CLOEXEC 0
71 #endif
72
73 struct read_file_data {
74 int fd;
75 size_t block_size;
76 void *buffer;
77 mode_t st_mode; /* Mode bits for opened file. */
78 char use_lseek;
79 enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
80 union {
81 char m[1];/* MBS filename. */
82 wchar_t w[1];/* WCS filename. */
83 } filename; /* Must be last! */
84 };
85
86 static int file_open(struct archive *, void *);
87 static int file_close(struct archive *, void *);
88 static int file_close2(struct archive *, void *);
89 static int file_switch(struct archive *, void *, void *);
90 static ssize_t file_read(struct archive *, void *, const void **buff);
91 static int64_t file_seek(struct archive *, void *, int64_t request, int);
92 static int64_t file_skip(struct archive *, void *, int64_t request);
93 static int64_t file_skip_lseek(struct archive *, void *, int64_t request);
94
95 int
archive_read_open_file(struct archive * a,const char * filename,size_t block_size)96 archive_read_open_file(struct archive *a, const char *filename,
97 size_t block_size)
98 {
99 return (archive_read_open_filename(a, filename, block_size));
100 }
101
102 int
archive_read_open_filename(struct archive * a,const char * filename,size_t block_size)103 archive_read_open_filename(struct archive *a, const char *filename,
104 size_t block_size)
105 {
106 const char *filenames[2] = { filename, NULL };
107 return archive_read_open_filenames(a, filenames, block_size);
108 }
109
110 int
archive_read_open_filenames(struct archive * a,const char ** filenames,size_t block_size)111 archive_read_open_filenames(struct archive *a, const char **filenames,
112 size_t block_size)
113 {
114 struct read_file_data *mine;
115 const char *filename = NULL;
116 if (filenames)
117 filename = *(filenames++);
118
119 archive_clear_error(a);
120 do
121 {
122 if (filename == NULL)
123 filename = "";
124 mine = (struct read_file_data *)calloc(1,
125 sizeof(*mine) + strlen(filename));
126 if (mine == NULL)
127 goto no_memory;
128 strcpy(mine->filename.m, filename);
129 mine->block_size = block_size;
130 mine->fd = -1;
131 mine->buffer = NULL;
132 mine->st_mode = mine->use_lseek = 0;
133 if (filename == NULL || filename[0] == '\0') {
134 mine->filename_type = FNT_STDIN;
135 } else
136 mine->filename_type = FNT_MBS;
137 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
138 return (ARCHIVE_FATAL);
139 if (filenames == NULL)
140 break;
141 filename = *(filenames++);
142 } while (filename != NULL && filename[0] != '\0');
143 archive_read_set_open_callback(a, file_open);
144 archive_read_set_read_callback(a, file_read);
145 archive_read_set_skip_callback(a, file_skip);
146 archive_read_set_close_callback(a, file_close);
147 archive_read_set_switch_callback(a, file_switch);
148 archive_read_set_seek_callback(a, file_seek);
149
150 return (archive_read_open1(a));
151 no_memory:
152 archive_set_error(a, ENOMEM, "No memory");
153 return (ARCHIVE_FATAL);
154 }
155
156 int
archive_read_open_filename_w(struct archive * a,const wchar_t * wfilename,size_t block_size)157 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
158 size_t block_size)
159 {
160 struct read_file_data *mine = (struct read_file_data *)calloc(1,
161 sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
162 if (!mine)
163 {
164 archive_set_error(a, ENOMEM, "No memory");
165 return (ARCHIVE_FATAL);
166 }
167 mine->fd = -1;
168 mine->block_size = block_size;
169
170 if (wfilename == NULL || wfilename[0] == L'\0') {
171 mine->filename_type = FNT_STDIN;
172 } else {
173 #if defined(_WIN32) && !defined(__CYGWIN__)
174 mine->filename_type = FNT_WCS;
175 wcscpy(mine->filename.w, wfilename);
176 #else
177 /*
178 * POSIX system does not support a wchar_t interface for
179 * open() system call, so we have to translate a whcar_t
180 * filename to multi-byte one and use it.
181 */
182 struct archive_string fn;
183
184 archive_string_init(&fn);
185 if (archive_string_append_from_wcs(&fn, wfilename,
186 wcslen(wfilename)) != 0) {
187 if (errno == ENOMEM)
188 archive_set_error(a, errno,
189 "Can't allocate memory");
190 else
191 archive_set_error(a, EINVAL,
192 "Failed to convert a wide-character"
193 " filename to a multi-byte filename");
194 archive_string_free(&fn);
195 free(mine);
196 return (ARCHIVE_FATAL);
197 }
198 mine->filename_type = FNT_MBS;
199 strcpy(mine->filename.m, fn.s);
200 archive_string_free(&fn);
201 #endif
202 }
203 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
204 return (ARCHIVE_FATAL);
205 archive_read_set_open_callback(a, file_open);
206 archive_read_set_read_callback(a, file_read);
207 archive_read_set_skip_callback(a, file_skip);
208 archive_read_set_close_callback(a, file_close);
209 archive_read_set_switch_callback(a, file_switch);
210 archive_read_set_seek_callback(a, file_seek);
211
212 return (archive_read_open1(a));
213 }
214
215 static int
file_open(struct archive * a,void * client_data)216 file_open(struct archive *a, void *client_data)
217 {
218 struct stat st;
219 struct read_file_data *mine = (struct read_file_data *)client_data;
220 void *buffer;
221 const char *filename = NULL;
222 const wchar_t *wfilename = NULL;
223 int fd;
224 int is_disk_like = 0;
225 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
226 off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
227 #elif defined(__NetBSD__) || defined(__OpenBSD__)
228 struct disklabel dl;
229 #elif defined(__DragonFly__)
230 struct partinfo pi;
231 #endif
232
233 archive_clear_error(a);
234 if (mine->filename_type == FNT_STDIN) {
235 /* We used to delegate stdin support by
236 * directly calling archive_read_open_fd(a,0,block_size)
237 * here, but that doesn't (and shouldn't) handle the
238 * end-of-file flush when reading stdout from a pipe.
239 * Basically, read_open_fd() is intended for folks who
240 * are willing to handle such details themselves. This
241 * API is intended to be a little smarter for folks who
242 * want easy handling of the common case.
243 */
244 fd = 0;
245 #if defined(__CYGWIN__) || defined(_WIN32)
246 setmode(0, O_BINARY);
247 #endif
248 filename = "";
249 } else if (mine->filename_type == FNT_MBS) {
250 filename = mine->filename.m;
251 fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC);
252 __archive_ensure_cloexec_flag(fd);
253 if (fd < 0) {
254 archive_set_error(a, errno,
255 "Failed to open '%s'", filename);
256 return (ARCHIVE_FATAL);
257 }
258 } else {
259 #if defined(_WIN32) && !defined(__CYGWIN__)
260 wfilename = mine->filename.w;
261 fd = _wopen(wfilename, O_RDONLY | O_BINARY);
262 if (fd < 0 && errno == ENOENT) {
263 wchar_t *fullpath;
264 fullpath = __la_win_permissive_name_w(wfilename);
265 if (fullpath != NULL) {
266 fd = _wopen(fullpath, O_RDONLY | O_BINARY);
267 free(fullpath);
268 }
269 }
270 if (fd < 0) {
271 archive_set_error(a, errno,
272 "Failed to open '%S'", wfilename);
273 return (ARCHIVE_FATAL);
274 }
275 #else
276 archive_set_error(a, ARCHIVE_ERRNO_MISC,
277 "Unexpedted operation in archive_read_open_filename");
278 return (ARCHIVE_FATAL);
279 #endif
280 }
281 if (fstat(fd, &st) != 0) {
282 if (mine->filename_type == FNT_WCS)
283 archive_set_error(a, errno, "Can't stat '%S'",
284 wfilename);
285 else
286 archive_set_error(a, errno, "Can't stat '%s'",
287 filename);
288 return (ARCHIVE_FATAL);
289 }
290
291 /*
292 * Determine whether the input looks like a disk device or a
293 * tape device. The results are used below to select an I/O
294 * strategy:
295 * = "disk-like" devices support arbitrary lseek() and will
296 * support I/O requests of any size. So we get easy skipping
297 * and can cheat on block sizes to get better performance.
298 * = "tape-like" devices require strict blocking and use
299 * specialized ioctls for seeking.
300 * = "socket-like" devices cannot seek at all but can improve
301 * performance by using nonblocking I/O to read "whatever is
302 * available right now".
303 *
304 * Right now, we only specially recognize disk-like devices,
305 * but it should be straightforward to add probes and strategy
306 * here for tape-like and socket-like devices.
307 */
308 if (S_ISREG(st.st_mode)) {
309 /* Safety: Tell the extractor not to overwrite the input. */
310 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
311 /* Regular files act like disks. */
312 is_disk_like = 1;
313 }
314 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
315 /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
316 else if (S_ISCHR(st.st_mode) &&
317 ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
318 mediasize > 0) {
319 is_disk_like = 1;
320 }
321 #elif defined(__NetBSD__) || defined(__OpenBSD__)
322 /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
323 else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
324 ioctl(fd, DIOCGDINFO, &dl) == 0 &&
325 dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
326 is_disk_like = 1;
327 }
328 #elif defined(__DragonFly__)
329 /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */
330 else if (S_ISCHR(st.st_mode) &&
331 ioctl(fd, DIOCGPART, &pi) == 0 &&
332 pi.media_size > 0) {
333 is_disk_like = 1;
334 }
335 #elif defined(__linux__)
336 /* Linux: All block devices are disk-like. */
337 else if (S_ISBLK(st.st_mode) &&
338 lseek(fd, 0, SEEK_CUR) == 0 &&
339 lseek(fd, 0, SEEK_SET) == 0 &&
340 lseek(fd, 0, SEEK_END) > 0 &&
341 lseek(fd, 0, SEEK_SET) == 0) {
342 is_disk_like = 1;
343 }
344 #endif
345 /* TODO: Add an "is_tape_like" variable and appropriate tests. */
346
347 /* Disk-like devices prefer power-of-two block sizes. */
348 /* Use provided block_size as a guide so users have some control. */
349 if (is_disk_like) {
350 size_t new_block_size = 64 * 1024;
351 while (new_block_size < mine->block_size
352 && new_block_size < 64 * 1024 * 1024)
353 new_block_size *= 2;
354 mine->block_size = new_block_size;
355 }
356 buffer = malloc(mine->block_size);
357 if (mine == NULL || buffer == NULL) {
358 archive_set_error(a, ENOMEM, "No memory");
359 free(mine);
360 free(buffer);
361 return (ARCHIVE_FATAL);
362 }
363 mine->buffer = buffer;
364 mine->fd = fd;
365 /* Remember mode so close can decide whether to flush. */
366 mine->st_mode = st.st_mode;
367
368 /* Disk-like inputs can use lseek(). */
369 if (is_disk_like)
370 mine->use_lseek = 1;
371
372 return (ARCHIVE_OK);
373 }
374
375 static ssize_t
file_read(struct archive * a,void * client_data,const void ** buff)376 file_read(struct archive *a, void *client_data, const void **buff)
377 {
378 struct read_file_data *mine = (struct read_file_data *)client_data;
379 ssize_t bytes_read;
380
381 /* TODO: If a recent lseek() operation has left us
382 * mis-aligned, read and return a short block to try to get
383 * us back in alignment. */
384
385 /* TODO: Someday, try mmap() here; if that succeeds, give
386 * the entire file to libarchive as a single block. That
387 * could be a lot faster than block-by-block manual I/O. */
388
389 /* TODO: We might be able to improve performance on pipes and
390 * sockets by setting non-blocking I/O and just accepting
391 * whatever we get here instead of waiting for a full block
392 * worth of data. */
393
394 *buff = mine->buffer;
395 for (;;) {
396 bytes_read = read(mine->fd, mine->buffer, mine->block_size);
397 if (bytes_read < 0) {
398 if (errno == EINTR)
399 continue;
400 else if (mine->filename_type == FNT_STDIN)
401 archive_set_error(a, errno,
402 "Error reading stdin");
403 else if (mine->filename_type == FNT_MBS)
404 archive_set_error(a, errno,
405 "Error reading '%s'", mine->filename.m);
406 else
407 archive_set_error(a, errno,
408 "Error reading '%S'", mine->filename.w);
409 }
410 return (bytes_read);
411 }
412 }
413
414 /*
415 * Regular files and disk-like block devices can use simple lseek
416 * without needing to round the request to the block size.
417 *
418 * TODO: This can leave future reads mis-aligned. Since we know the
419 * offset here, we should store it and use it in file_read() above
420 * to determine whether we should perform a short read to get back
421 * into alignment. Long series of mis-aligned reads can negatively
422 * impact disk throughput. (Of course, the performance impact should
423 * be carefully tested; extra code complexity is only worthwhile if
424 * it does provide measurable improvement.)
425 *
426 * TODO: Be lazy about the actual seek. There are a few pathological
427 * cases where libarchive makes a bunch of seek requests in a row
428 * without any intervening reads. This isn't a huge performance
429 * problem, since the kernel handles seeks lazily already, but
430 * it would be very slightly faster if we simply remembered the
431 * seek request here and then actually performed the seek at the
432 * top of the read callback above.
433 */
434 static int64_t
file_skip_lseek(struct archive * a,void * client_data,int64_t request)435 file_skip_lseek(struct archive *a, void *client_data, int64_t request)
436 {
437 struct read_file_data *mine = (struct read_file_data *)client_data;
438 #if defined(_WIN32) && !defined(__CYGWIN__)
439 /* We use _lseeki64() on Windows. */
440 int64_t old_offset, new_offset;
441 #else
442 off_t old_offset, new_offset;
443 #endif
444
445 /* We use off_t here because lseek() is declared that way. */
446
447 /* TODO: Deal with case where off_t isn't 64 bits.
448 * This shouldn't be a problem on Linux or other POSIX
449 * systems, since the configuration logic for libarchive
450 * tries to obtain a 64-bit off_t.
451 */
452 if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
453 (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
454 return (new_offset - old_offset);
455
456 /* If lseek() fails, don't bother trying again. */
457 mine->use_lseek = 0;
458
459 /* Let libarchive recover with read+discard */
460 if (errno == ESPIPE)
461 return (0);
462
463 /* If the input is corrupted or truncated, fail. */
464 if (mine->filename_type == FNT_STDIN)
465 archive_set_error(a, errno, "Error seeking in stdin");
466 else if (mine->filename_type == FNT_MBS)
467 archive_set_error(a, errno, "Error seeking in '%s'",
468 mine->filename.m);
469 else
470 archive_set_error(a, errno, "Error seeking in '%S'",
471 mine->filename.w);
472 return (-1);
473 }
474
475
476 /*
477 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
478 * accelerate operation on tape drives.
479 */
480
481 static int64_t
file_skip(struct archive * a,void * client_data,int64_t request)482 file_skip(struct archive *a, void *client_data, int64_t request)
483 {
484 struct read_file_data *mine = (struct read_file_data *)client_data;
485
486 /* Delegate skip requests. */
487 if (mine->use_lseek)
488 return (file_skip_lseek(a, client_data, request));
489
490 /* If we can't skip, return 0; libarchive will read+discard instead. */
491 return (0);
492 }
493
494 /*
495 * TODO: Store the offset and use it in the read callback.
496 */
497 static int64_t
file_seek(struct archive * a,void * client_data,int64_t request,int whence)498 file_seek(struct archive *a, void *client_data, int64_t request, int whence)
499 {
500 struct read_file_data *mine = (struct read_file_data *)client_data;
501 int64_t r;
502
503 /* We use off_t here because lseek() is declared that way. */
504 /* See above for notes about when off_t is less than 64 bits. */
505 r = lseek(mine->fd, request, whence);
506 if (r >= 0)
507 return r;
508
509 /* If the input is corrupted or truncated, fail. */
510 if (mine->filename_type == FNT_STDIN)
511 archive_set_error(a, errno, "Error seeking in stdin");
512 else if (mine->filename_type == FNT_MBS)
513 archive_set_error(a, errno, "Error seeking in '%s'",
514 mine->filename.m);
515 else
516 archive_set_error(a, errno, "Error seeking in '%S'",
517 mine->filename.w);
518 return (ARCHIVE_FATAL);
519 }
520
521 static int
file_close2(struct archive * a,void * client_data)522 file_close2(struct archive *a, void *client_data)
523 {
524 struct read_file_data *mine = (struct read_file_data *)client_data;
525
526 (void)a; /* UNUSED */
527
528 /* Only flush and close if open succeeded. */
529 if (mine->fd >= 0) {
530 /*
531 * Sometimes, we should flush the input before closing.
532 * Regular files: faster to just close without flush.
533 * Disk-like devices: Ditto.
534 * Tapes: must not flush (user might need to
535 * read the "next" item on a non-rewind device).
536 * Pipes and sockets: must flush (otherwise, the
537 * program feeding the pipe or socket may complain).
538 * Here, I flush everything except for regular files and
539 * device nodes.
540 */
541 if (!S_ISREG(mine->st_mode)
542 && !S_ISCHR(mine->st_mode)
543 && !S_ISBLK(mine->st_mode)) {
544 ssize_t bytesRead;
545 do {
546 bytesRead = read(mine->fd, mine->buffer,
547 mine->block_size);
548 } while (bytesRead > 0);
549 }
550 /* If a named file was opened, then it needs to be closed. */
551 if (mine->filename_type != FNT_STDIN)
552 close(mine->fd);
553 }
554 free(mine->buffer);
555 mine->buffer = NULL;
556 mine->fd = -1;
557 return (ARCHIVE_OK);
558 }
559
560 static int
file_close(struct archive * a,void * client_data)561 file_close(struct archive *a, void *client_data)
562 {
563 struct read_file_data *mine = (struct read_file_data *)client_data;
564 file_close2(a, client_data);
565 free(mine);
566 return (ARCHIVE_OK);
567 }
568
569 static int
file_switch(struct archive * a,void * client_data1,void * client_data2)570 file_switch(struct archive *a, void *client_data1, void *client_data2)
571 {
572 file_close2(a, client_data1);
573 return file_open(a, client_data2);
574 }
575