1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       file_io.c
4 /// \brief      File opening, unlinking, and closing
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 
15 #include <fcntl.h>
16 
17 #ifdef TUKLIB_DOSLIKE
18 #         include <io.h>
19 #else
20 #         include <poll.h>
21 static bool warn_fchown;
22 #endif
23 
24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25 #         include <sys/time.h>
26 #elif defined(HAVE__FUTIME)
27 #         include <sys/utime.h>
28 #elif defined(HAVE_UTIME)
29 #         include <utime.h>
30 #endif
31 
32 #ifdef HAVE_CAPSICUM
33 #         ifdef HAVE_SYS_CAPSICUM_H
34 #                   include <sys/capsicum.h>
35 #         else
36 #                   include <sys/capability.h>
37 #         endif
38 #endif
39 
40 #include "tuklib_open_stdxxx.h"
41 
42 #ifndef O_BINARY
43 #         define O_BINARY 0
44 #endif
45 
46 #ifndef O_NOCTTY
47 #         define O_NOCTTY 0
48 #endif
49 
50 // Using this macro to silence a warning from gcc -Wlogical-op.
51 #if EAGAIN == EWOULDBLOCK
52 #         define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
53 #else
54 #         define IS_EAGAIN_OR_EWOULDBLOCK(e) \
55                     ((e) == EAGAIN || (e) == EWOULDBLOCK)
56 #endif
57 
58 
59 typedef enum {
60           IO_WAIT_MORE,    // Reading or writing is possible.
61           IO_WAIT_ERROR,   // Error or user_abort
62           IO_WAIT_TIMEOUT, // poll() timed out
63 } io_wait_ret;
64 
65 
66 /// If true, try to create sparse files when decompressing.
67 static bool try_sparse = true;
68 
69 #ifdef ENABLE_SANDBOX
70 /// True if the conditions for sandboxing (described in main()) have been met.
71 static bool sandbox_allowed = false;
72 #endif
73 
74 #ifndef TUKLIB_DOSLIKE
75 /// File status flags of standard input. This is used by io_open_src()
76 /// and io_close_src().
77 static int stdin_flags;
78 static bool restore_stdin_flags = false;
79 
80 /// Original file status flags of standard output. This is used by
81 /// io_open_dest() and io_close_dest() to save and restore the flags.
82 static int stdout_flags;
83 static bool restore_stdout_flags = false;
84 
85 /// Self-pipe used together with the user_abort variable to avoid
86 /// race conditions with signal handling.
87 static int user_abort_pipe[2];
88 #endif
89 
90 
91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
92 
93 
94 extern void
io_init(void)95 io_init(void)
96 {
97           // Make sure that stdin, stdout, and stderr are connected to
98           // a valid file descriptor. Exit immediately with exit code ERROR
99           // if we cannot make the file descriptors valid. Maybe we should
100           // print an error message, but our stderr could be screwed anyway.
101           tuklib_open_stdxxx(E_ERROR);
102 
103 #ifndef TUKLIB_DOSLIKE
104           // If fchown() fails setting the owner, we warn about it only if
105           // we are root.
106           warn_fchown = geteuid() == 0;
107 
108           // Create a pipe for the self-pipe trick.
109           if (pipe(user_abort_pipe))
110                     message_fatal(_("Error creating a pipe: %s"),
111                                         strerror(errno));
112 
113           // Make both ends of the pipe non-blocking.
114           for (unsigned i = 0; i < 2; ++i) {
115                     int flags = fcntl(user_abort_pipe[i], F_GETFL);
116                     if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
117                                         flags | O_NONBLOCK) == -1)
118                               message_fatal(_("Error creating a pipe: %s"),
119                                                   strerror(errno));
120           }
121 #endif
122 
123 #ifdef __DJGPP__
124           // Avoid doing useless things when statting files.
125           // This isn't important but doesn't hurt.
126           _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
127 #endif
128 
129           return;
130 }
131 
132 
133 #ifndef TUKLIB_DOSLIKE
134 extern void
io_write_to_user_abort_pipe(void)135 io_write_to_user_abort_pipe(void)
136 {
137           // If the write() fails, it's probably due to the pipe being full.
138           // Failing in that case is fine. If the reason is something else,
139           // there's not much we can do since this is called in a signal
140           // handler. So ignore the errors and try to avoid warnings with
141           // GCC and glibc when _FORTIFY_SOURCE=2 is used.
142           uint8_t b = '\0';
143           const int ret = write(user_abort_pipe[1], &b, 1);
144           (void)ret;
145           return;
146 }
147 #endif
148 
149 
150 extern void
io_no_sparse(void)151 io_no_sparse(void)
152 {
153           try_sparse = false;
154           return;
155 }
156 
157 
158 #ifdef ENABLE_SANDBOX
159 extern void
io_allow_sandbox(void)160 io_allow_sandbox(void)
161 {
162           sandbox_allowed = true;
163           return;
164 }
165 
166 
167 /// Enables operating-system-specific sandbox if it is possible.
168 /// src_fd is the file descriptor of the input file.
169 static void
io_sandbox_enter(int src_fd)170 io_sandbox_enter(int src_fd)
171 {
172           if (!sandbox_allowed) {
173                     message(V_DEBUG, _("Sandbox is disabled due "
174                                         "to incompatible command line arguments"));
175                     return;
176           }
177 
178           const char dummy_str[] = "x";
179 
180           // Try to ensure that both libc and xz locale files have been
181           // loaded when NLS is enabled.
182           snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL));
183 
184           // Try to ensure that iconv data files needed for handling multibyte
185           // characters have been loaded. This is needed at least with glibc.
186           tuklib_mbstr_width(dummy_str, NULL);
187 
188 #ifdef HAVE_CAPSICUM
189           // Capsicum needs FreeBSD 10.0 or later.
190           cap_rights_t rights;
191 
192           if (cap_rights_limit(src_fd, cap_rights_init(&rights,
193                               CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK)))
194                     goto error;
195 
196           if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
197                               CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP,
198                               CAP_WRITE, CAP_SEEK)))
199                     goto error;
200 
201           if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights,
202                               CAP_EVENT)))
203                     goto error;
204 
205           if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights,
206                               CAP_WRITE)))
207                     goto error;
208 
209           if (cap_enter())
210                     goto error;
211 
212 #else
213 #         error ENABLE_SANDBOX is defined but no sandboxing method was found.
214 #endif
215 
216           message(V_DEBUG, _("Sandbox was successfully enabled"));
217           return;
218 
219 error:
220           message(V_DEBUG, _("Failed to enable the sandbox"));
221 }
222 #endif // ENABLE_SANDBOX
223 
224 
225 #ifndef TUKLIB_DOSLIKE
226 /// \brief      Waits for input or output to become available or for a signal
227 ///
228 /// This uses the self-pipe trick to avoid a race condition that can occur
229 /// if a signal is caught after user_abort has been checked but before e.g.
230 /// read() has been called. In that situation read() could block unless
231 /// non-blocking I/O is used. With non-blocking I/O something like select()
232 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
233 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
234 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
235 /// old and very portable.
236 static io_wait_ret
io_wait(file_pair * pair,int timeout,bool is_reading)237 io_wait(file_pair *pair, int timeout, bool is_reading)
238 {
239           struct pollfd pfd[2];
240 
241           if (is_reading) {
242                     pfd[0].fd = pair->src_fd;
243                     pfd[0].events = POLLIN;
244           } else {
245                     pfd[0].fd = pair->dest_fd;
246                     pfd[0].events = POLLOUT;
247           }
248 
249           pfd[1].fd = user_abort_pipe[0];
250           pfd[1].events = POLLIN;
251 
252           while (true) {
253                     const int ret = poll(pfd, 2, timeout);
254 
255                     if (user_abort)
256                               return IO_WAIT_ERROR;
257 
258                     if (ret == -1) {
259                               if (errno == EINTR || errno == EAGAIN)
260                                         continue;
261 
262                               message_error(_("%s: poll() failed: %s"),
263                                                   is_reading ? pair->src_name
264                                                             : pair->dest_name,
265                                                   strerror(errno));
266                               return IO_WAIT_ERROR;
267                     }
268 
269                     if (ret == 0) {
270                               assert(opt_flush_timeout != 0);
271                               flush_needed = true;
272                               return IO_WAIT_TIMEOUT;
273                     }
274 
275                     if (pfd[0].revents != 0)
276                               return IO_WAIT_MORE;
277           }
278 }
279 #endif
280 
281 
282 /// \brief      Unlink a file
283 ///
284 /// This tries to verify that the file being unlinked really is the file that
285 /// we want to unlink by verifying device and inode numbers. There's still
286 /// a small unavoidable race, but this is much better than nothing (the file
287 /// could have been moved/replaced even hours earlier).
288 static void
io_unlink(const char * name,const struct stat * known_st)289 io_unlink(const char *name, const struct stat *known_st)
290 {
291 #if defined(TUKLIB_DOSLIKE)
292           // On DOS-like systems, st_ino is meaningless, so don't bother
293           // testing it. Just silence a compiler warning.
294           (void)known_st;
295 #else
296           struct stat new_st;
297 
298           // If --force was used, use stat() instead of lstat(). This way
299           // (de)compressing symlinks works correctly. However, it also means
300           // that xz cannot detect if a regular file foo is renamed to bar
301           // and then a symlink foo -> bar is created. Because of stat()
302           // instead of lstat(), xz will think that foo hasn't been replaced
303           // with another file. Thus, xz will remove foo even though it no
304           // longer is the same file that xz used when it started compressing.
305           // Probably it's not too bad though, so this doesn't need a more
306           // complex fix.
307           const int stat_ret = opt_force
308                               ? stat(name, &new_st) : lstat(name, &new_st);
309 
310           if (stat_ret
311 #         ifdef __VMS
312                               // st_ino is an array, and we don't want to
313                               // compare st_dev at all.
314                               || memcmp(&new_st.st_ino, &known_st->st_ino,
315                                         sizeof(new_st.st_ino)) != 0
316 #         else
317                               // Typical POSIX-like system
318                               || new_st.st_dev != known_st->st_dev
319                               || new_st.st_ino != known_st->st_ino
320 #         endif
321                               )
322                     // TRANSLATORS: When compression or decompression finishes,
323                     // and xz is going to remove the source file, xz first checks
324                     // if the source file still exists, and if it does, does its
325                     // device and inode numbers match what xz saw when it opened
326                     // the source file. If these checks fail, this message is
327                     // shown, %s being the filename, and the file is not deleted.
328                     // The check for device and inode numbers is there, because
329                     // it is possible that the user has put a new file in place
330                     // of the original file, and in that case it obviously
331                     // shouldn't be removed.
332                     message_error(_("%s: File seems to have been moved, "
333                                         "not removing"), name);
334           else
335 #endif
336                     // There's a race condition between lstat() and unlink()
337                     // but at least we have tried to avoid removing wrong file.
338                     if (unlink(name))
339                               message_error(_("%s: Cannot remove: %s"),
340                                                   name, strerror(errno));
341 
342           return;
343 }
344 
345 
346 /// \brief      Copies owner/group and permissions
347 ///
348 /// \todo       ACL and EA support
349 ///
350 static void
io_copy_attrs(const file_pair * pair)351 io_copy_attrs(const file_pair *pair)
352 {
353           // Skip chown and chmod on Windows.
354 #ifndef TUKLIB_DOSLIKE
355           // This function is more tricky than you may think at first.
356           // Blindly copying permissions may permit users to access the
357           // destination file who didn't have permission to access the
358           // source file.
359 
360           // Try changing the owner of the file. If we aren't root or the owner
361           // isn't already us, fchown() probably doesn't succeed. We warn
362           // about failing fchown() only if we are root.
363           if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown)
364                     message_warning(_("%s: Cannot set the file owner: %s"),
365                                         pair->dest_name, strerror(errno));
366 
367           mode_t mode;
368 
369           if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
370                     message_warning(_("%s: Cannot set the file group: %s"),
371                                         pair->dest_name, strerror(errno));
372                     // We can still safely copy some additional permissions:
373                     // `group' must be at least as strict as `other' and
374                     // also vice versa.
375                     //
376                     // NOTE: After this, the owner of the source file may
377                     // get additional permissions. This shouldn't be too bad,
378                     // because the owner would have had permission to chmod
379                     // the original file anyway.
380                     mode = ((pair->src_st.st_mode & 0070) >> 3)
381                                         & (pair->src_st.st_mode & 0007);
382                     mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
383           } else {
384                     // Drop the setuid, setgid, and sticky bits.
385                     mode = pair->src_st.st_mode & 0777;
386           }
387 
388           if (fchmod(pair->dest_fd, mode))
389                     message_warning(_("%s: Cannot set the file permissions: %s"),
390                                         pair->dest_name, strerror(errno));
391 #endif
392 
393           // Copy the timestamps. We have several possible ways to do this, of
394           // which some are better in both security and precision.
395           //
396           // First, get the nanosecond part of the timestamps. As of writing,
397           // it's not standardized by POSIX, and there are several names for
398           // the same thing in struct stat.
399           long atime_nsec;
400           long mtime_nsec;
401 
402 #         if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
403           // GNU and Solaris
404           atime_nsec = pair->src_st.st_atim.tv_nsec;
405           mtime_nsec = pair->src_st.st_mtim.tv_nsec;
406 
407 #         elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
408           // BSD
409           atime_nsec = pair->src_st.st_atimespec.tv_nsec;
410           mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
411 
412 #         elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
413           // GNU and BSD without extensions
414           atime_nsec = pair->src_st.st_atimensec;
415           mtime_nsec = pair->src_st.st_mtimensec;
416 
417 #         elif defined(HAVE_STRUCT_STAT_ST_UATIME)
418           // Tru64
419           atime_nsec = pair->src_st.st_uatime * 1000;
420           mtime_nsec = pair->src_st.st_umtime * 1000;
421 
422 #         elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
423           // UnixWare
424           atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
425           mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
426 
427 #         else
428           // Safe fallback
429           atime_nsec = 0;
430           mtime_nsec = 0;
431 #         endif
432 
433           // Construct a structure to hold the timestamps and call appropriate
434           // function to set the timestamps.
435 #if defined(HAVE_FUTIMENS)
436           // Use nanosecond precision.
437           struct timespec tv[2];
438           tv[0].tv_sec = pair->src_st.st_atime;
439           tv[0].tv_nsec = atime_nsec;
440           tv[1].tv_sec = pair->src_st.st_mtime;
441           tv[1].tv_nsec = mtime_nsec;
442 
443           (void)futimens(pair->dest_fd, tv);
444 
445 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
446           // Use microsecond precision.
447           struct timeval tv[2];
448           tv[0].tv_sec = pair->src_st.st_atime;
449           tv[0].tv_usec = atime_nsec / 1000;
450           tv[1].tv_sec = pair->src_st.st_mtime;
451           tv[1].tv_usec = mtime_nsec / 1000;
452 
453 #         if defined(HAVE_FUTIMES)
454           (void)futimes(pair->dest_fd, tv);
455 #         elif defined(HAVE_FUTIMESAT)
456           (void)futimesat(pair->dest_fd, NULL, tv);
457 #         else
458           // Argh, no function to use a file descriptor to set the timestamp.
459           (void)utimes(pair->dest_name, tv);
460 #         endif
461 
462 #elif defined(HAVE__FUTIME)
463           // Use one-second precision with Windows-specific _futime().
464           // We could use utime() too except that for some reason the
465           // timestamp will get reset at close(). With _futime() it works.
466           // This struct cannot be const as _futime() takes a non-const pointer.
467           struct _utimbuf buf = {
468                     .actime = pair->src_st.st_atime,
469                     .modtime = pair->src_st.st_mtime,
470           };
471 
472           // Avoid warnings.
473           (void)atime_nsec;
474           (void)mtime_nsec;
475 
476           (void)_futime(pair->dest_fd, &buf);
477 
478 #elif defined(HAVE_UTIME)
479           // Use one-second precision. utime() doesn't support using file
480           // descriptor either. Some systems have broken utime() prototype
481           // so don't make this const.
482           struct utimbuf buf = {
483                     .actime = pair->src_st.st_atime,
484                     .modtime = pair->src_st.st_mtime,
485           };
486 
487           // Avoid warnings.
488           (void)atime_nsec;
489           (void)mtime_nsec;
490 
491           (void)utime(pair->dest_name, &buf);
492 #endif
493 
494           return;
495 }
496 
497 
498 /// Opens the source file. Returns false on success, true on error.
499 static bool
io_open_src_real(file_pair * pair)500 io_open_src_real(file_pair *pair)
501 {
502           // There's nothing to open when reading from stdin.
503           if (pair->src_name == stdin_filename) {
504                     pair->src_fd = STDIN_FILENO;
505 #ifdef TUKLIB_DOSLIKE
506                     setmode(STDIN_FILENO, O_BINARY);
507 #else
508                     // Try to set stdin to non-blocking mode. It won't work
509                     // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
510                     // case we proceed as if stdin were non-blocking anyway
511                     // (in case of /dev/null it will be in practice). The
512                     // same applies to stdout in io_open_dest_real().
513                     stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
514                     if (stdin_flags == -1) {
515                               message_error(_("Error getting the file status flags "
516                                                   "from standard input: %s"),
517                                                   strerror(errno));
518                               return true;
519                     }
520 
521                     if ((stdin_flags & O_NONBLOCK) == 0
522                                         && fcntl(STDIN_FILENO, F_SETFL,
523                                                   stdin_flags | O_NONBLOCK) != -1)
524                               restore_stdin_flags = true;
525 #endif
526 #ifdef HAVE_POSIX_FADVISE
527                     // It will fail if stdin is a pipe and that's fine.
528                     (void)posix_fadvise(STDIN_FILENO, 0, 0,
529                                         opt_mode == MODE_LIST
530                                                   ? POSIX_FADV_RANDOM
531                                                   : POSIX_FADV_SEQUENTIAL);
532 #endif
533                     return false;
534           }
535 
536           // Symlinks are not followed unless writing to stdout or --force
537           // was used.
538           const bool follow_symlinks = opt_stdout || opt_force;
539 
540           // We accept only regular files if we are writing the output
541           // to disk too. bzip2 allows overriding this with --force but
542           // gzip and xz don't.
543           const bool reg_files_only = !opt_stdout;
544 
545           // Flags for open()
546           int flags = O_RDONLY | O_BINARY | O_NOCTTY;
547 
548 #ifndef TUKLIB_DOSLIKE
549           // Use non-blocking I/O:
550           //   - It prevents blocking when opening FIFOs and some other
551           //     special files, which is good if we want to accept only
552           //     regular files.
553           //   - It can help avoiding some race conditions with signal handling.
554           flags |= O_NONBLOCK;
555 #endif
556 
557 #if defined(O_NOFOLLOW)
558           if (!follow_symlinks)
559                     flags |= O_NOFOLLOW;
560 #elif !defined(TUKLIB_DOSLIKE)
561           // Some POSIX-like systems lack O_NOFOLLOW (it's not required
562           // by POSIX). Check for symlinks with a separate lstat() on
563           // these systems.
564           if (!follow_symlinks) {
565                     struct stat st;
566                     if (lstat(pair->src_name, &st)) {
567                               message_error("%s: %s", pair->src_name,
568                                                   strerror(errno));
569                               return true;
570 
571                     } else if (S_ISLNK(st.st_mode)) {
572                               message_warning(_("%s: Is a symbolic link, "
573                                                   "skipping"), pair->src_name);
574                               return true;
575                     }
576           }
577 #else
578           // Avoid warnings.
579           (void)follow_symlinks;
580 #endif
581 
582           // Try to open the file. Signals have been blocked so EINTR shouldn't
583           // be possible.
584           pair->src_fd = open(pair->src_name, flags);
585 
586           if (pair->src_fd == -1) {
587                     // Signals (that have a signal handler) have been blocked.
588                     assert(errno != EINTR);
589 
590 #ifdef O_NOFOLLOW
591                     // Give an understandable error message if the reason
592                     // for failing was that the file was a symbolic link.
593                     //
594                     // Note that at least Linux, OpenBSD, Solaris, and Darwin
595                     // use ELOOP to indicate that O_NOFOLLOW was the reason
596                     // that open() failed. Because there may be
597                     // directories in the pathname, ELOOP may occur also
598                     // because of a symlink loop in the directory part.
599                     // So ELOOP doesn't tell us what actually went wrong,
600                     // and this stupidity went into POSIX-1.2008 too.
601                     //
602                     // FreeBSD associates EMLINK with O_NOFOLLOW and
603                     // Tru64 uses ENOTSUP. We use these directly here
604                     // and skip the lstat() call and the associated race.
605                     // I want to hear if there are other kernels that
606                     // fail with something else than ELOOP with O_NOFOLLOW.
607                     bool was_symlink = false;
608 
609 #         if defined(__FreeBSD__) || defined(__DragonFly__)
610                     if (errno == EMLINK)
611                               was_symlink = true;
612 
613 #         elif defined(__digital__) && defined(__unix__)
614                     if (errno == ENOTSUP)
615                               was_symlink = true;
616 
617 #         elif defined(__NetBSD__)
618                     if (errno == EFTYPE)
619                               was_symlink = true;
620 
621 #         else
622                     if (errno == ELOOP && !follow_symlinks) {
623                               const int saved_errno = errno;
624                               struct stat st;
625                               if (lstat(pair->src_name, &st) == 0
626                                                   && S_ISLNK(st.st_mode))
627                                         was_symlink = true;
628 
629                               errno = saved_errno;
630                     }
631 #         endif
632 
633                     if (was_symlink)
634                               message_warning(_("%s: Is a symbolic link, "
635                                                   "skipping"), pair->src_name);
636                     else
637 #endif
638                               // Something else than O_NOFOLLOW failing
639                               // (assuming that the race conditions didn't
640                               // confuse us).
641                               message_error("%s: %s", pair->src_name,
642                                                   strerror(errno));
643 
644                     return true;
645           }
646 
647           // Stat the source file. We need the result also when we copy
648           // the permissions, and when unlinking.
649           //
650           // NOTE: Use stat() instead of fstat() with DJGPP, because
651           // then we have a better chance to get st_ino value that can
652           // be used in io_open_dest_real() to prevent overwriting the
653           // source file.
654 #ifdef __DJGPP__
655           if (stat(pair->src_name, &pair->src_st))
656                     goto error_msg;
657 #else
658           if (fstat(pair->src_fd, &pair->src_st))
659                     goto error_msg;
660 #endif
661 
662           if (S_ISDIR(pair->src_st.st_mode)) {
663                     message_warning(_("%s: Is a directory, skipping"),
664                                         pair->src_name);
665                     goto error;
666           }
667 
668           if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
669                     message_warning(_("%s: Not a regular file, skipping"),
670                                         pair->src_name);
671                     goto error;
672           }
673 
674 #ifndef TUKLIB_DOSLIKE
675           if (reg_files_only && !opt_force) {
676                     if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
677                               // gzip rejects setuid and setgid files even
678                               // when --force was used. bzip2 doesn't check
679                               // for them, but calls fchown() after fchmod(),
680                               // and many systems automatically drop setuid
681                               // and setgid bits there.
682                               //
683                               // We accept setuid and setgid files if
684                               // --force was used. We drop these bits
685                               // explicitly in io_copy_attr().
686                               message_warning(_("%s: File has setuid or "
687                                                   "setgid bit set, skipping"),
688                                                   pair->src_name);
689                               goto error;
690                     }
691 
692                     if (pair->src_st.st_mode & S_ISVTX) {
693                               message_warning(_("%s: File has sticky bit "
694                                                   "set, skipping"),
695                                                   pair->src_name);
696                               goto error;
697                     }
698 
699                     if (pair->src_st.st_nlink > 1) {
700                               message_warning(_("%s: Input file has more "
701                                                   "than one hard link, "
702                                                   "skipping"), pair->src_name);
703                               goto error;
704                     }
705           }
706 
707           // If it is something else than a regular file, wait until
708           // there is input available. This way reading from FIFOs
709           // will work when open() is used with O_NONBLOCK.
710           if (!S_ISREG(pair->src_st.st_mode)) {
711                     signals_unblock();
712                     const io_wait_ret ret = io_wait(pair, -1, true);
713                     signals_block();
714 
715                     if (ret != IO_WAIT_MORE)
716                               goto error;
717           }
718 #endif
719 
720 #ifdef HAVE_POSIX_FADVISE
721           // It will fail with some special files like FIFOs but that is fine.
722           (void)posix_fadvise(pair->src_fd, 0, 0,
723                               opt_mode == MODE_LIST
724                                         ? POSIX_FADV_RANDOM
725                                         : POSIX_FADV_SEQUENTIAL);
726 #endif
727 
728           return false;
729 
730 error_msg:
731           message_error("%s: %s", pair->src_name, strerror(errno));
732 error:
733           (void)close(pair->src_fd);
734           return true;
735 }
736 
737 
738 extern file_pair *
io_open_src(const char * src_name)739 io_open_src(const char *src_name)
740 {
741           if (is_empty_filename(src_name))
742                     return NULL;
743 
744           // Since we have only one file open at a time, we can use
745           // a statically allocated structure.
746           static file_pair pair;
747 
748           pair = (file_pair){
749                     .src_name = src_name,
750                     .dest_name = NULL,
751                     .src_fd = -1,
752                     .dest_fd = -1,
753                     .src_eof = false,
754                     .dest_try_sparse = false,
755                     .dest_pending_sparse = 0,
756           };
757 
758           // Block the signals, for which we have a custom signal handler, so
759           // that we don't need to worry about EINTR.
760           signals_block();
761           const bool error = io_open_src_real(&pair);
762           signals_unblock();
763 
764 #ifdef ENABLE_SANDBOX
765           if (!error)
766                     io_sandbox_enter(pair.src_fd);
767 #endif
768 
769           return error ? NULL : &pair;
770 }
771 
772 
773 /// \brief      Closes source file of the file_pair structure
774 ///
775 /// \param      pair    File whose src_fd should be closed
776 /// \param      success If true, the file will be removed from the disk if
777 ///                     closing succeeds and --keep hasn't been used.
778 static void
io_close_src(file_pair * pair,bool success)779 io_close_src(file_pair *pair, bool success)
780 {
781 #ifndef TUKLIB_DOSLIKE
782           if (restore_stdin_flags) {
783                     assert(pair->src_fd == STDIN_FILENO);
784 
785                     restore_stdin_flags = false;
786 
787                     if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
788                               message_error(_("Error restoring the status flags "
789                                                   "to standard input: %s"),
790                                                   strerror(errno));
791           }
792 #endif
793 
794           if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
795                     // Close the file before possibly unlinking it. On DOS-like
796                     // systems this is always required since unlinking will fail
797                     // if the file is open. On POSIX systems it usually works
798                     // to unlink open files, but in some cases it doesn't and
799                     // one gets EBUSY in errno.
800                     //
801                     // xz 5.2.2 and older unlinked the file before closing it
802                     // (except on DOS-like systems). The old code didn't handle
803                     // EBUSY and could fail e.g. on some CIFS shares. The
804                     // advantage of unlinking before closing is negligible
805                     // (avoids a race between close() and stat()/lstat() and
806                     // unlink()), so let's keep this simple.
807                     (void)close(pair->src_fd);
808 
809                     if (success && !opt_keep_original)
810                               io_unlink(pair->src_name, &pair->src_st);
811           }
812 
813           return;
814 }
815 
816 
817 static bool
io_open_dest_real(file_pair * pair)818 io_open_dest_real(file_pair *pair)
819 {
820           if (opt_stdout || pair->src_fd == STDIN_FILENO) {
821                     // We don't modify or free() this.
822                     pair->dest_name = (char *)"(stdout)";
823                     pair->dest_fd = STDOUT_FILENO;
824 #ifdef TUKLIB_DOSLIKE
825                     setmode(STDOUT_FILENO, O_BINARY);
826 #else
827                     // Try to set O_NONBLOCK if it isn't already set.
828                     // If it fails, we assume that stdout is non-blocking
829                     // in practice. See the comments in io_open_src_real()
830                     // for similar situation with stdin.
831                     //
832                     // NOTE: O_APPEND may be unset later in this function
833                     // and it relies on stdout_flags being set here.
834                     stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
835                     if (stdout_flags == -1) {
836                               message_error(_("Error getting the file status flags "
837                                                   "from standard output: %s"),
838                                                   strerror(errno));
839                               return true;
840                     }
841 
842                     if ((stdout_flags & O_NONBLOCK) == 0
843                                         && fcntl(STDOUT_FILENO, F_SETFL,
844                                                   stdout_flags | O_NONBLOCK) != -1)
845                                         restore_stdout_flags = true;
846 #endif
847           } else {
848                     pair->dest_name = suffix_get_dest_name(pair->src_name);
849                     if (pair->dest_name == NULL)
850                               return true;
851 
852 #ifdef __DJGPP__
853                     struct stat st;
854                     if (stat(pair->dest_name, &st) == 0) {
855                               // Check that it isn't a special file like "prn".
856                               if (st.st_dev == -1) {
857                                         message_error("%s: Refusing to write to "
858                                                             "a DOS special file",
859                                                             pair->dest_name);
860                                         free(pair->dest_name);
861                                         return true;
862                               }
863 
864                               // Check that we aren't overwriting the source file.
865                               if (st.st_dev == pair->src_st.st_dev
866                                                   && st.st_ino == pair->src_st.st_ino) {
867                                         message_error("%s: Output file is the same "
868                                                             "as the input file",
869                                                             pair->dest_name);
870                                         free(pair->dest_name);
871                                         return true;
872                               }
873                     }
874 #endif
875 
876                     // If --force was used, unlink the target file first.
877                     if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
878                               message_error(_("%s: Cannot remove: %s"),
879                                                   pair->dest_name, strerror(errno));
880                               free(pair->dest_name);
881                               return true;
882                     }
883 
884                     // Open the file.
885                     int flags = O_WRONLY | O_BINARY | O_NOCTTY
886                                         | O_CREAT | O_EXCL;
887 #ifndef TUKLIB_DOSLIKE
888                     flags |= O_NONBLOCK;
889 #endif
890                     const mode_t mode = S_IRUSR | S_IWUSR;
891                     pair->dest_fd = open(pair->dest_name, flags, mode);
892 
893                     if (pair->dest_fd == -1) {
894                               message_error("%s: %s", pair->dest_name,
895                                                   strerror(errno));
896                               free(pair->dest_name);
897                               return true;
898                     }
899           }
900 
901 #ifndef TUKLIB_DOSLIKE
902           // dest_st isn't used on DOS-like systems except as a dummy
903           // argument to io_unlink(), so don't fstat() on such systems.
904           if (fstat(pair->dest_fd, &pair->dest_st)) {
905                     // If fstat() really fails, we have a safe fallback here.
906 #         if defined(__VMS)
907                     pair->dest_st.st_ino[0] = 0;
908                     pair->dest_st.st_ino[1] = 0;
909                     pair->dest_st.st_ino[2] = 0;
910 #         else
911                     pair->dest_st.st_dev = 0;
912                     pair->dest_st.st_ino = 0;
913 #         endif
914           } else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
915                     // When writing to standard output, we need to be extra
916                     // careful:
917                     //  - It may be connected to something else than
918                     //    a regular file.
919                     //  - We aren't necessarily writing to a new empty file
920                     //    or to the end of an existing file.
921                     //  - O_APPEND may be active.
922                     //
923                     // TODO: I'm keeping this disabled for DOS-like systems
924                     // for now. FAT doesn't support sparse files, but NTFS
925                     // does, so maybe this should be enabled on Windows after
926                     // some testing.
927                     if (pair->dest_fd == STDOUT_FILENO) {
928                               if (!S_ISREG(pair->dest_st.st_mode))
929                                         return false;
930 
931                               if (stdout_flags & O_APPEND) {
932                                         // Creating a sparse file is not possible
933                                         // when O_APPEND is active (it's used by
934                                         // shell's >> redirection). As I understand
935                                         // it, it is safe to temporarily disable
936                                         // O_APPEND in xz, because if someone
937                                         // happened to write to the same file at the
938                                         // same time, results would be bad anyway
939                                         // (users shouldn't assume that xz uses any
940                                         // specific block size when writing data).
941                                         //
942                                         // The write position may be something else
943                                         // than the end of the file, so we must fix
944                                         // it to start writing at the end of the file
945                                         // to imitate O_APPEND.
946                                         if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
947                                                   return false;
948 
949                                         // Construct the new file status flags.
950                                         // If O_NONBLOCK was set earlier in this
951                                         // function, it must be kept here too.
952                                         int flags = stdout_flags & ~O_APPEND;
953                                         if (restore_stdout_flags)
954                                                   flags |= O_NONBLOCK;
955 
956                                         // If this fcntl() fails, we continue but won't
957                                         // try to create sparse output. The original
958                                         // flags will still be restored if needed (to
959                                         // unset O_NONBLOCK) when the file is finished.
960                                         if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
961                                                   return false;
962 
963                                         // Disabling O_APPEND succeeded. Mark
964                                         // that the flags should be restored
965                                         // in io_close_dest(). (This may have already
966                                         // been set when enabling O_NONBLOCK.)
967                                         restore_stdout_flags = true;
968 
969                               } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
970                                                   != pair->dest_st.st_size) {
971                                         // Writing won't start exactly at the end
972                                         // of the file. We cannot use sparse output,
973                                         // because it would probably corrupt the file.
974                                         return false;
975                               }
976                     }
977 
978                     pair->dest_try_sparse = true;
979           }
980 #endif
981 
982           return false;
983 }
984 
985 
986 extern bool
io_open_dest(file_pair * pair)987 io_open_dest(file_pair *pair)
988 {
989           signals_block();
990           const bool ret = io_open_dest_real(pair);
991           signals_unblock();
992           return ret;
993 }
994 
995 
996 /// \brief      Closes destination file of the file_pair structure
997 ///
998 /// \param      pair    File whose dest_fd should be closed
999 /// \param      success If false, the file will be removed from the disk.
1000 ///
1001 /// \return     Zero if closing succeeds. On error, -1 is returned and
1002 ///             error message printed.
1003 static bool
io_close_dest(file_pair * pair,bool success)1004 io_close_dest(file_pair *pair, bool success)
1005 {
1006 #ifndef TUKLIB_DOSLIKE
1007           // If io_open_dest() has disabled O_APPEND, restore it here.
1008           if (restore_stdout_flags) {
1009                     assert(pair->dest_fd == STDOUT_FILENO);
1010 
1011                     restore_stdout_flags = false;
1012 
1013                     if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
1014                               message_error(_("Error restoring the O_APPEND flag "
1015                                                   "to standard output: %s"),
1016                                                   strerror(errno));
1017                               return true;
1018                     }
1019           }
1020 #endif
1021 
1022           if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
1023                     return false;
1024 
1025           if (close(pair->dest_fd)) {
1026                     message_error(_("%s: Closing the file failed: %s"),
1027                                         pair->dest_name, strerror(errno));
1028 
1029                     // Closing destination file failed, so we cannot trust its
1030                     // contents. Get rid of junk:
1031                     io_unlink(pair->dest_name, &pair->dest_st);
1032                     free(pair->dest_name);
1033                     return true;
1034           }
1035 
1036           // If the operation using this file wasn't successful, we git rid
1037           // of the junk file.
1038           if (!success)
1039                     io_unlink(pair->dest_name, &pair->dest_st);
1040 
1041           free(pair->dest_name);
1042 
1043           return false;
1044 }
1045 
1046 
1047 extern void
io_close(file_pair * pair,bool success)1048 io_close(file_pair *pair, bool success)
1049 {
1050           // Take care of sparseness at the end of the output file.
1051           if (success && pair->dest_try_sparse
1052                               && pair->dest_pending_sparse > 0) {
1053                     // Seek forward one byte less than the size of the pending
1054                     // hole, then write one zero-byte. This way the file grows
1055                     // to its correct size. An alternative would be to use
1056                     // ftruncate() but that isn't portable enough (e.g. it
1057                     // doesn't work with FAT on Linux; FAT isn't that important
1058                     // since it doesn't support sparse files anyway, but we don't
1059                     // want to create corrupt files on it).
1060                     if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
1061                                         SEEK_CUR) == -1) {
1062                               message_error(_("%s: Seeking failed when trying "
1063                                                   "to create a sparse file: %s"),
1064                                                   pair->dest_name, strerror(errno));
1065                               success = false;
1066                     } else {
1067                               const uint8_t zero[1] = { '\0' };
1068                               if (io_write_buf(pair, zero, 1))
1069                                         success = false;
1070                     }
1071           }
1072 
1073           signals_block();
1074 
1075           // Copy the file attributes. We need to skip this if destination
1076           // file isn't open or it is standard output.
1077           if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
1078                     io_copy_attrs(pair);
1079 
1080           // Close the destination first. If it fails, we must not remove
1081           // the source file!
1082           if (io_close_dest(pair, success))
1083                     success = false;
1084 
1085           // Close the source file, and unlink it if the operation using this
1086           // file pair was successful and we haven't requested to keep the
1087           // source file.
1088           io_close_src(pair, success);
1089 
1090           signals_unblock();
1091 
1092           return;
1093 }
1094 
1095 
1096 extern void
io_fix_src_pos(file_pair * pair,size_t rewind_size)1097 io_fix_src_pos(file_pair *pair, size_t rewind_size)
1098 {
1099           assert(rewind_size <= IO_BUFFER_SIZE);
1100 
1101           if (rewind_size > 0) {
1102                     // This doesn't need to work on unseekable file descriptors,
1103                     // so just ignore possible errors.
1104                     (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
1105           }
1106 
1107           return;
1108 }
1109 
1110 
1111 extern size_t
io_read(file_pair * pair,io_buf * buf_union,size_t size)1112 io_read(file_pair *pair, io_buf *buf_union, size_t size)
1113 {
1114           // We use small buffers here.
1115           assert(size < SSIZE_MAX);
1116 
1117           uint8_t *buf = buf_union->u8;
1118           size_t left = size;
1119 
1120           while (left > 0) {
1121                     const ssize_t amount = read(pair->src_fd, buf, left);
1122 
1123                     if (amount == 0) {
1124                               pair->src_eof = true;
1125                               break;
1126                     }
1127 
1128                     if (amount == -1) {
1129                               if (errno == EINTR) {
1130                                         if (user_abort)
1131                                                   return SIZE_MAX;
1132 
1133                                         continue;
1134                               }
1135 
1136 #ifndef TUKLIB_DOSLIKE
1137                               if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1138                                         const io_wait_ret ret = io_wait(pair,
1139                                                             mytime_get_flush_timeout(),
1140                                                             true);
1141                                         switch (ret) {
1142                                         case IO_WAIT_MORE:
1143                                                   continue;
1144 
1145                                         case IO_WAIT_ERROR:
1146                                                   return SIZE_MAX;
1147 
1148                                         case IO_WAIT_TIMEOUT:
1149                                                   return size - left;
1150 
1151                                         default:
1152                                                   message_bug();
1153                                         }
1154                               }
1155 #endif
1156 
1157                               message_error(_("%s: Read error: %s"),
1158                                                   pair->src_name, strerror(errno));
1159 
1160                               return SIZE_MAX;
1161                     }
1162 
1163                     buf += (size_t)(amount);
1164                     left -= (size_t)(amount);
1165           }
1166 
1167           return size - left;
1168 }
1169 
1170 
1171 extern bool
io_pread(file_pair * pair,io_buf * buf,size_t size,off_t pos)1172 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos)
1173 {
1174           // Using lseek() and read() is more portable than pread() and
1175           // for us it is as good as real pread().
1176           if (lseek(pair->src_fd, pos, SEEK_SET) != pos) {
1177                     message_error(_("%s: Error seeking the file: %s"),
1178                                         pair->src_name, strerror(errno));
1179                     return true;
1180           }
1181 
1182           const size_t amount = io_read(pair, buf, size);
1183           if (amount == SIZE_MAX)
1184                     return true;
1185 
1186           if (amount != size) {
1187                     message_error(_("%s: Unexpected end of file"),
1188                                         pair->src_name);
1189                     return true;
1190           }
1191 
1192           return false;
1193 }
1194 
1195 
1196 static bool
is_sparse(const io_buf * buf)1197 is_sparse(const io_buf *buf)
1198 {
1199           assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1200 
1201           for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1202                     if (buf->u64[i] != 0)
1203                               return false;
1204 
1205           return true;
1206 }
1207 
1208 
1209 static bool
io_write_buf(file_pair * pair,const uint8_t * buf,size_t size)1210 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1211 {
1212           assert(size < SSIZE_MAX);
1213 
1214           while (size > 0) {
1215                     const ssize_t amount = write(pair->dest_fd, buf, size);
1216                     if (amount == -1) {
1217                               if (errno == EINTR) {
1218                                         if (user_abort)
1219                                                   return true;
1220 
1221                                         continue;
1222                               }
1223 
1224 #ifndef TUKLIB_DOSLIKE
1225                               if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1226                                         if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1227                                                   continue;
1228 
1229                                         return true;
1230                               }
1231 #endif
1232 
1233                               // Handle broken pipe specially. gzip and bzip2
1234                               // don't print anything on SIGPIPE. In addition,
1235                               // gzip --quiet uses exit status 2 (warning) on
1236                               // broken pipe instead of whatever raise(SIGPIPE)
1237                               // would make it return. It is there to hide "Broken
1238                               // pipe" message on some old shells (probably old
1239                               // GNU bash).
1240                               //
1241                               // We don't do anything special with --quiet, which
1242                               // is what bzip2 does too. If we get SIGPIPE, we
1243                               // will handle it like other signals by setting
1244                               // user_abort, and get EPIPE here.
1245                               if (errno != EPIPE)
1246                                         message_error(_("%s: Write error: %s"),
1247                                                   pair->dest_name, strerror(errno));
1248 
1249                               return true;
1250                     }
1251 
1252                     buf += (size_t)(amount);
1253                     size -= (size_t)(amount);
1254           }
1255 
1256           return false;
1257 }
1258 
1259 
1260 extern bool
io_write(file_pair * pair,const io_buf * buf,size_t size)1261 io_write(file_pair *pair, const io_buf *buf, size_t size)
1262 {
1263           assert(size <= IO_BUFFER_SIZE);
1264 
1265           if (pair->dest_try_sparse) {
1266                     // Check if the block is sparse (contains only zeros). If it
1267                     // sparse, we just store the amount and return. We will take
1268                     // care of actually skipping over the hole when we hit the
1269                     // next data block or close the file.
1270                     //
1271                     // Since io_close() requires that dest_pending_sparse > 0
1272                     // if the file ends with sparse block, we must also return
1273                     // if size == 0 to avoid doing the lseek().
1274                     if (size == IO_BUFFER_SIZE) {
1275                               if (is_sparse(buf)) {
1276                                         pair->dest_pending_sparse += size;
1277                                         return false;
1278                               }
1279                     } else if (size == 0) {
1280                               return false;
1281                     }
1282 
1283                     // This is not a sparse block. If we have a pending hole,
1284                     // skip it now.
1285                     if (pair->dest_pending_sparse > 0) {
1286                               if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1287                                                   SEEK_CUR) == -1) {
1288                                         message_error(_("%s: Seeking failed when "
1289                                                             "trying to create a sparse "
1290                                                             "file: %s"), pair->dest_name,
1291                                                             strerror(errno));
1292                                         return true;
1293                               }
1294 
1295                               pair->dest_pending_sparse = 0;
1296                     }
1297           }
1298 
1299           return io_write_buf(pair, buf->u8, size);
1300 }
1301