1 /*        $NetBSD: ukfs.c,v 1.60 2022/04/19 20:32:17 rillig Exp $     */
2 
3 /*
4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Finnish Cultural Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * This library enables access to files systems directly without
33  * involving system calls.
34  */
35 
36 #ifdef __linux__
37 #define _XOPEN_SOURCE 500
38 #define _BSD_SOURCE
39 #define _FILE_OFFSET_BITS 64
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 
48 #include <assert.h>
49 #include <dirent.h>
50 #include <dlfcn.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <stdint.h>
60 
61 #include <rump/ukfs.h>
62 
63 #include <rump/rump.h>
64 #include <rump/rumpvnode_if.h>
65 #include <rump/rump_syscalls.h>
66 
67 #include "ukfs_int_disklabel.h"
68 
69 #define UKFS_MODE_DEFAULT 0555
70 
71 struct ukfs {
72           pthread_spinlock_t ukfs_spin;
73 
74           struct mount *ukfs_mp;
75           struct lwp *ukfs_lwp;
76           void *ukfs_specific;
77 
78           int ukfs_devfd;
79 
80           char *ukfs_devpath;
81           char *ukfs_mountpath;
82           char *ukfs_cwd;
83 
84           struct ukfs_part *ukfs_part;
85 };
86 
87 static int builddirs(const char *, mode_t,
88     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
89 
90 struct mount *
ukfs_getmp(struct ukfs * ukfs)91 ukfs_getmp(struct ukfs *ukfs)
92 {
93 
94           return ukfs->ukfs_mp;
95 }
96 
97 void
ukfs_setspecific(struct ukfs * ukfs,void * priv)98 ukfs_setspecific(struct ukfs *ukfs, void *priv)
99 {
100 
101           ukfs->ukfs_specific = priv;
102 }
103 
104 void *
ukfs_getspecific(struct ukfs * ukfs)105 ukfs_getspecific(struct ukfs *ukfs)
106 {
107 
108           return ukfs->ukfs_specific;
109 }
110 
111 #ifdef DONT_WANT_PTHREAD_LINKAGE
112 #define pthread_spin_lock(a)
113 #define pthread_spin_unlock(a)
114 #define pthread_spin_init(a,b)
115 #define pthread_spin_destroy(a)
116 #endif
117 
118 static int
precall(struct ukfs * ukfs,struct lwp ** curlwp)119 precall(struct ukfs *ukfs, struct lwp **curlwp)
120 {
121 
122           /* save previous.  ensure start from pristine context */
123           *curlwp = rump_pub_lwproc_curlwp();
124           if (*curlwp)
125                     rump_pub_lwproc_switch(ukfs->ukfs_lwp);
126           rump_pub_lwproc_rfork(RUMP_RFCFDG);
127 
128           if (rump_sys_chroot(ukfs->ukfs_mountpath) == -1)
129                     return errno;
130           if (rump_sys_chdir(ukfs->ukfs_cwd) == -1)
131                     return errno;
132 
133           return 0;
134 }
135 
136 static void
postcall(struct lwp * curlwp)137 postcall(struct lwp *curlwp)
138 {
139 
140           rump_pub_lwproc_releaselwp();
141           if (curlwp)
142                     rump_pub_lwproc_switch(curlwp);
143 }
144 
145 #define PRECALL()                                                               \
146 struct lwp *ukfs_curlwp;                                                        \
147 do {                                                                                      \
148           int ukfs_rv;                                                                    \
149           if ((ukfs_rv = precall(ukfs, &ukfs_curlwp)) != 0) {                   \
150                     errno = ukfs_rv;                                            \
151                     return -1;                                                            \
152           }                                                                               \
153 } while (0)
154 
155 #define POSTCALL() postcall(ukfs_curlwp);
156 
157 struct ukfs_part {
158           pthread_spinlock_t part_lck;
159           int part_refcount;
160 
161           int part_type;
162           char part_labelchar;
163           off_t part_devoff;
164           off_t part_devsize;
165 };
166 
167 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
168 
169 static struct ukfs_part ukfs__part_none = {
170           .part_type = UKFS_PART_NONE,
171           .part_devoff = 0,
172           .part_devsize = RUMP_ETFS_SIZE_ENDOFF,
173 };
174 static struct ukfs_part ukfs__part_na;
175 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
176 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
177 
178 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
179 
180 int
_ukfs_init(int version)181 _ukfs_init(int version)
182 {
183           int rv;
184 
185           if (version != UKFS_VERSION) {
186                     errno = EPROGMISMATCH;
187                     warn("incompatible ukfs version, %d vs. %d",
188                         version, UKFS_VERSION);
189                     return -1;
190           }
191 
192           if ((rv = rump_init()) != 0) {
193                     errno = rv;
194                     return -1;
195           }
196 
197           return 0;
198 }
199 
200 /*ARGSUSED*/
201 static int
rumpmkdir(struct ukfs * dummy,const char * path,mode_t mode)202 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
203 {
204 
205           return rump_sys_mkdir(path, mode);
206 }
207 
208 int
ukfs_part_probe(char * devpath,struct ukfs_part ** partp)209 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
210 {
211           struct ukfs_part *part;
212           char *p;
213           int error = 0;
214           int devfd = -1;
215 
216           if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
217                     warnx("ukfs: %%PART is deprecated.  use "
218                         "%%DISKLABEL instead");
219                     errno = ENODEV;
220                     return -1;
221           }
222 
223           part = malloc(sizeof(*part));
224           if (part == NULL) {
225                     errno = ENOMEM;
226                     return -1;
227           }
228           if (pthread_spin_init(&part->part_lck, PTHREAD_PROCESS_PRIVATE) == -1) {
229                     error = errno;
230                     free(part);
231                     errno = error;
232                     return -1;
233           }
234           part->part_type = UKFS_PART_NONE;
235           part->part_refcount = 1;
236 
237           /*
238            * Check for magic in pathname:
239            *   disklabel: /regularpath%DISKLABEL:labelchar%\0
240            *     offsets: /regularpath%OFFSET:start,end%\0
241            */
242 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
243           if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
244               && strlen(p) == UKFS_DISKLABEL_MAGICLEN
245               && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
246                     if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
247                         *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
248                               struct ukfs__disklabel dl;
249                               struct ukfs__partition *pp;
250                               int imswapped;
251                               char buf[65536];
252                               char labelchar = *(MAGICADJ_DISKLABEL(p,0));
253                               int partition = labelchar - 'a';
254                               uint32_t poffset, psize;
255 
256                               *p = '\0';
257                               devfd = open(devpath, O_RDONLY);
258                               if (devfd == -1) {
259                                         error = errno;
260                                         goto out;
261                               }
262 
263                               /* Locate the disklabel and find the partition. */
264                               if (pread(devfd, buf, sizeof(buf), 0) == -1) {
265                                         error = errno;
266                                         goto out;
267                               }
268 
269                               if (ukfs__disklabel_scan(&dl, &imswapped,
270                                   buf, sizeof(buf)) != 0) {
271                                         error = ENOENT;
272                                         goto out;
273                               }
274 
275                               if (dl.d_npartitions < partition) {
276                                         error = ENOENT;
277                                         goto out;
278                               }
279 
280                               pp = &dl.d_partitions[partition];
281                               part->part_type = UKFS_PART_DISKLABEL;
282                               part->part_labelchar = labelchar;
283                               if (imswapped) {
284                                         poffset = bswap32(pp->p_offset);
285                                         psize = bswap32(pp->p_size);
286                               } else {
287                                         poffset = pp->p_offset;
288                                         psize = pp->p_size;
289                               }
290                               part->part_devoff = poffset << DEV_BSHIFT;
291                               part->part_devsize = psize << DEV_BSHIFT;
292                     } else {
293                               error = EINVAL;
294                     }
295 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
296           } else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
297               && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
298                     char *comma, *pers, *ep, *nptr;
299                     u_quad_t val;
300 
301                     comma = strchr(p, ',');
302                     if (comma == NULL) {
303                               error = EINVAL;
304                               goto out;
305                     }
306                     pers = strchr(comma, '%');
307                     if (pers == NULL) {
308                               error = EINVAL;
309                               goto out;
310                     }
311                     *comma = '\0';
312                     *pers = '\0';
313                     *p = '\0';
314 
315                     nptr = MAGICADJ_OFFSET(p,0);
316                     /* check if string is negative */
317                     if (*nptr == '-') {
318                               error = ERANGE;
319                               goto out;
320                     }
321                     val = strtouq(nptr, &ep, 10);
322                     if (val == UQUAD_MAX) {
323                               error = ERANGE;
324                               goto out;
325                     }
326                     if (*ep != '\0') {
327                               error = EADDRNOTAVAIL; /* creative ;) */
328                               goto out;
329                     }
330                     part->part_devoff = val;
331 
332                     /* omstart */
333 
334                     nptr = comma+1;
335                     /* check if string is negative */
336                     if (*nptr == '-') {
337                               error = ERANGE;
338                               goto out;
339                     }
340                     val = strtouq(nptr, &ep, 10);
341                     if (val == UQUAD_MAX) {
342                               error = ERANGE;
343                               goto out;
344                     }
345                     if (*ep != '\0') {
346                               error = EADDRNOTAVAIL; /* creative ;) */
347                               goto out;
348                     }
349                     part->part_devsize = val;
350                     part->part_type = UKFS_PART_OFFSET;
351           } else {
352                     ukfs_part_release(part);
353                     part = ukfs_part_none;
354           }
355 
356  out:
357           if (devfd != -1)
358                     close(devfd);
359           if (error) {
360                     free(part);
361                     errno = error;
362           } else {
363                     *partp = part;
364           }
365 
366           return error ? -1 : 0;
367 }
368 
369 int
ukfs_part_tostring(struct ukfs_part * part,char * str,size_t strsize)370 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
371 {
372           int rv;
373 
374           *str = '\0';
375           /* "pseudo" values */
376           if (part == ukfs_part_na) {
377                     errno = EINVAL;
378                     return -1;
379           }
380           if (part == ukfs_part_none)
381                     return 0;
382 
383           rv = 0;
384           switch (part->part_type) {
385           case UKFS_PART_NONE:
386                     break;
387 
388           case UKFS_PART_DISKLABEL:
389                     snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
390                     rv = 1;
391                     break;
392 
393           case UKFS_PART_OFFSET:
394                     snprintf(str, strsize, "[%llu,%llu]",
395                         (unsigned long long)part->part_devoff,
396                         (unsigned long long)(part->part_devoff+part->part_devsize));
397                     rv = 1;
398                     break;
399           }
400 
401           return rv;
402 }
403 
404 static void
unlockdev(int fd,struct ukfs_part * part)405 unlockdev(int fd, struct ukfs_part *part)
406 {
407           struct flock flarg;
408 
409           if (part == ukfs_part_na)
410                     return;
411 
412           memset(&flarg, 0, sizeof(flarg));
413           flarg.l_type = F_UNLCK;
414           flarg.l_whence = SEEK_SET;
415           flarg.l_start = part->part_devoff;
416           flarg.l_len = PART2LOCKSIZE(part->part_devsize);
417           if (fcntl(fd, F_SETLK, &flarg) == -1)
418                     warn("ukfs: cannot unlock device file");
419 }
420 
421 /*
422  * Open the disk file and flock it.  Also, if we are operation on
423  * an embedded partition, find the partition offset and size from
424  * the disklabel.
425  *
426  * We hard-fail only in two cases:
427  *  1) we failed to get the partition info out (don't know what offset
428  *     to mount from)
429  *  2) we failed to flock the source device (i.e. fcntl() fails,
430  *     not e.g. open() before it)
431  *
432  * Otherwise we let the code proceed to mount and let the file system
433  * throw the proper error.  The only questionable bit is that if we
434  * soft-fail before flock and mount does succeed...
435  *
436  * Returns: -1 error (errno reports error code)
437  *           0 success
438  *
439  * dfdp: -1  device is not open
440  *        n  device is open
441  */
442 static int
process_diskdevice(const char * devpath,struct ukfs_part * part,int rdonly,int * dfdp)443 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
444           int *dfdp)
445 {
446           struct stat sb;
447           int rv = 0, devfd;
448 
449           /* defaults */
450           *dfdp = -1;
451 
452           devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
453           if (devfd == -1) {
454                     rv = errno;
455                     goto out;
456           }
457 
458           if (fstat(devfd, &sb) == -1) {
459                     rv = errno;
460                     goto out;
461           }
462 
463           /*
464            * We do this only for non-block device since the
465            * (NetBSD) kernel allows block device open only once.
466            * We also need to close the device for fairly obvious reasons.
467            */
468           if (!S_ISBLK(sb.st_mode)) {
469                     struct flock flarg;
470 
471                     memset(&flarg, 0, sizeof(flarg));
472                     flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
473                     flarg.l_whence = SEEK_SET;
474                     flarg.l_start = part->part_devoff;
475                     flarg.l_len = PART2LOCKSIZE(part->part_devsize);
476                     if (fcntl(devfd, F_SETLK, &flarg) == -1) {
477                               pid_t holder;
478                               int sverrno;
479 
480                               sverrno = errno;
481                               if (fcntl(devfd, F_GETLK, &flarg) != 1)
482                                         holder = flarg.l_pid;
483                               else
484                                         holder = -1;
485                               warnx("ukfs_mount: cannot lock device.  held by pid %d",
486                                   holder);
487                               rv = sverrno;
488                               goto out;
489                     }
490           } else {
491                     close(devfd);
492                     devfd = -1;
493           }
494           *dfdp = devfd;
495 
496  out:
497           if (rv) {
498                     if (devfd != -1)
499                               close(devfd);
500           }
501 
502           return rv;
503 }
504 
505 struct mountinfo {
506           const char *mi_vfsname;
507           const char *mi_mountpath;
508           int mi_mntflags;
509           void *mi_arg;
510           size_t mi_alen;
511           int *mi_error;
512 };
513 static void *
mfs_mounter(void * arg)514 mfs_mounter(void *arg)
515 {
516           struct mountinfo *mi = arg;
517           int rv;
518 
519           rv = rump_sys_mount(mi->mi_vfsname, mi->mi_mountpath, mi->mi_mntflags,
520               mi->mi_arg, mi->mi_alen);
521           if (rv) {
522                     warn("mfs mount failed.  fix me.");
523                     abort(); /* XXX */
524           }
525 
526           return NULL;
527 }
528 
529 static struct ukfs *
doukfsmount(const char * vfsname,const char * devpath,struct ukfs_part * part,const char * mountpath,int mntflags,void * arg,size_t alen)530 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
531           const char *mountpath, int mntflags, void *arg, size_t alen)
532 {
533           struct ukfs *fs = NULL;
534           struct lwp *curlwp;
535           int rv = 0, devfd = -1;
536           int mounted = 0;
537           int regged = 0;
538 
539           pthread_spin_lock(&part->part_lck);
540           part->part_refcount++;
541           pthread_spin_unlock(&part->part_lck);
542           if (part != ukfs_part_na) {
543                     if ((rv = process_diskdevice(devpath, part,
544                         mntflags & MNT_RDONLY, &devfd)) != 0)
545                               goto out;
546           }
547 
548           fs = malloc(sizeof(struct ukfs));
549           if (fs == NULL) {
550                     rv = ENOMEM;
551                     goto out;
552           }
553           memset(fs, 0, sizeof(struct ukfs));
554 
555           /* create our mountpoint.  this is never removed. */
556           if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
557                     if (errno != EEXIST) {
558                               rv = errno;
559                               goto out;
560                     }
561           }
562 
563           if (part != ukfs_part_na) {
564                     /* LINTED */
565                     rv = rump_pub_etfs_register_withsize(devpath, devpath,
566                         RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
567                     if (rv) {
568                               goto out;
569                     }
570                     regged = 1;
571           }
572 
573           /*
574            * MFS is special since mount(2) doesn't return.  Hence, we
575            * create a thread here.  Could fix mfs to return, but there's
576            * too much history for me to bother.
577            */
578           if (strcmp(vfsname, MOUNT_MFS) == 0) {
579                     pthread_t pt;
580                     struct mountinfo mi;
581                     int i;
582 
583                     mi.mi_vfsname = vfsname;
584                     mi.mi_mountpath = mountpath;
585                     mi.mi_mntflags = mntflags;
586                     mi.mi_arg = arg;
587                     mi.mi_alen = alen;
588 
589                     if (pthread_create(&pt, NULL, mfs_mounter, &mi) == -1) {
590                               rv = errno;
591                               goto out;
592                     }
593 
594                     for (i = 0;i < 100000; i++) {
595                               struct statvfs svfsb;
596 
597                               rv = rump_sys_statvfs1(mountpath, &svfsb, ST_WAIT);
598                               if (rv == -1) {
599                                         rv = errno;
600                                         goto out;
601                               }
602 
603                               if (strcmp(svfsb.f_mntonname, mountpath) == 0 &&
604                                   strcmp(svfsb.f_fstypename, MOUNT_MFS) == 0) {
605                                         break;
606                               }
607                               usleep(1);
608                     }
609           } else {
610                     rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
611                     if (rv) {
612                               rv = errno;
613                               goto out;
614                     }
615           }
616 
617           mounted = 1;
618           rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
619           if (rv) {
620                     goto out;
621           }
622 
623           if (regged) {
624                     fs->ukfs_devpath = strdup(devpath);
625           }
626           fs->ukfs_mountpath = strdup(mountpath);
627           pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
628           fs->ukfs_devfd = devfd;
629           fs->ukfs_part = part;
630           assert(rv == 0);
631 
632           curlwp = rump_pub_lwproc_curlwp();
633           rump_pub_lwproc_newlwp(0);
634           fs->ukfs_lwp = rump_pub_lwproc_curlwp();
635           fs->ukfs_cwd = strdup("/");
636           rump_pub_lwproc_switch(curlwp);
637 
638  out:
639           if (rv) {
640                     if (fs) {
641                               free(fs);
642                               fs = NULL;
643                     }
644                     if (mounted)
645                               rump_sys_unmount(mountpath, MNT_FORCE);
646                     if (regged)
647                               rump_pub_etfs_remove(devpath);
648                     if (devfd != -1) {
649                               unlockdev(devfd, part);
650                               close(devfd);
651                     }
652                     ukfs_part_release(part);
653                     errno = rv;
654           }
655 
656           return fs;
657 }
658 
659 struct ukfs *
ukfs_mount(const char * vfsname,const char * devpath,const char * mountpath,int mntflags,void * arg,size_t alen)660 ukfs_mount(const char *vfsname, const char *devpath,
661           const char *mountpath, int mntflags, void *arg, size_t alen)
662 {
663 
664           return doukfsmount(vfsname, devpath, ukfs_part_na,
665               mountpath, mntflags, arg, alen);
666 }
667 
668 struct ukfs *
ukfs_mount_disk(const char * vfsname,const char * devpath,struct ukfs_part * part,const char * mountpath,int mntflags,void * arg,size_t alen)669 ukfs_mount_disk(const char *vfsname, const char *devpath,
670           struct ukfs_part *part, const char *mountpath, int mntflags,
671           void *arg, size_t alen)
672 {
673 
674           return doukfsmount(vfsname, devpath, part,
675               mountpath, mntflags, arg, alen);
676 }
677 
678 int
ukfs_release(struct ukfs * fs,int flags)679 ukfs_release(struct ukfs *fs, int flags)
680 {
681           struct lwp *curlwp = rump_pub_lwproc_curlwp();
682 
683           /* get root lwp */
684           rump_pub_lwproc_switch(fs->ukfs_lwp);
685           rump_pub_lwproc_rfork(RUMP_RFCFDG);
686 
687           if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
688                     int rv, mntflag, error;
689 
690                     mntflag = 0;
691                     if (flags & UKFS_RELFLAG_FORCE)
692                               mntflag = MNT_FORCE;
693 
694                     rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
695                     if (rv == -1) {
696                               error = errno;
697                               rump_pub_lwproc_releaselwp();
698                               if (curlwp)
699                                         rump_pub_lwproc_switch(curlwp);
700                               errno = error;
701                               return -1;
702                     }
703           }
704 
705           if (fs->ukfs_devpath) {
706                     rump_pub_etfs_remove(fs->ukfs_devpath);
707                     free(fs->ukfs_devpath);
708           }
709           free(fs->ukfs_mountpath);
710           free(fs->ukfs_cwd);
711 
712           /* release this routine's lwp and ukfs base lwp */
713           rump_pub_lwproc_releaselwp();
714           rump_pub_lwproc_switch(fs->ukfs_lwp);
715           rump_pub_lwproc_releaselwp();
716 
717           pthread_spin_destroy(&fs->ukfs_spin);
718           if (fs->ukfs_devfd != -1) {
719                     unlockdev(fs->ukfs_devfd, fs->ukfs_part);
720                     close(fs->ukfs_devfd);
721           }
722           ukfs_part_release(fs->ukfs_part);
723           free(fs);
724 
725           if (curlwp)
726                     rump_pub_lwproc_switch(curlwp);
727 
728           return 0;
729 }
730 
731 void
ukfs_part_release(struct ukfs_part * part)732 ukfs_part_release(struct ukfs_part *part)
733 {
734           int release;
735 
736           if (part != ukfs_part_none && part != ukfs_part_na) {
737                     pthread_spin_lock(&part->part_lck);
738                     release = --part->part_refcount == 0;
739                     pthread_spin_unlock(&part->part_lck);
740                     if (release) {
741                               pthread_spin_destroy(&part->part_lck);
742                               free(part);
743                     }
744           }
745 }
746 
747 #define STDCALL(ukfs, thecall)                                                            \
748           int rv = 0;                                                                     \
749                                                                                           \
750           PRECALL();                                                                      \
751           rv = thecall;                                                                   \
752           POSTCALL();                                                                     \
753           return rv;
754 
755 int
ukfs_opendir(struct ukfs * ukfs,const char * dirname,struct ukfs_dircookie ** c)756 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
757 {
758           struct vnode *vp;
759           int rv;
760 
761           PRECALL();
762           rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
763               NULL, &vp, NULL);
764           POSTCALL();
765 
766           if (rv == 0) {
767                     RUMP_VOP_UNLOCK(vp);
768           } else {
769                     errno = rv;
770                     rv = -1;
771           }
772 
773           /*LINTED*/
774           *c = (struct ukfs_dircookie *)vp;
775           return rv;
776 }
777 
778 static int
getmydents(struct vnode * vp,off_t * off,uint8_t * buf,size_t bufsize)779 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
780 {
781           struct uio *uio;
782           size_t resid;
783           int rv, eofflag;
784           struct kauth_cred *cred;
785 
786           uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
787           cred = rump_pub_cred_create(0, 0, 0, NULL);
788           rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
789           rump_pub_cred_put(cred);
790           RUMP_VOP_UNLOCK(vp);
791           *off = rump_pub_uio_getoff(uio);
792           resid = rump_pub_uio_free(uio);
793 
794           if (rv) {
795                     errno = rv;
796                     return -1;
797           }
798 
799           /* LINTED: not totally correct return type, but follows syscall */
800           return bufsize - resid;
801 }
802 
803 /*ARGSUSED*/
804 int
ukfs_getdents_cookie(struct ukfs * ukfs,struct ukfs_dircookie * c,off_t * off,uint8_t * buf,size_t bufsize)805 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
806           uint8_t *buf, size_t bufsize)
807 {
808           /*LINTED*/
809           struct vnode *vp = (struct vnode *)c;
810 
811           RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
812           return getmydents(vp, off, buf, bufsize);
813 }
814 
815 int
ukfs_getdents(struct ukfs * ukfs,const char * dirname,off_t * off,uint8_t * buf,size_t bufsize)816 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
817           uint8_t *buf, size_t bufsize)
818 {
819           struct vnode *vp;
820           int rv;
821 
822           PRECALL();
823           rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
824               NULL, &vp, NULL);
825           if (rv) {
826                     POSTCALL();
827                     errno = rv;
828                     return -1;
829           }
830 
831           rv = getmydents(vp, off, buf, bufsize);
832           rump_pub_vp_rele(vp);
833           POSTCALL();
834           return rv;
835 }
836 
837 /*ARGSUSED*/
838 int
ukfs_closedir(struct ukfs * ukfs,struct ukfs_dircookie * c)839 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
840 {
841 
842           /*LINTED*/
843           rump_pub_vp_rele((struct vnode *)c);
844           return 0;
845 }
846 
847 int
ukfs_open(struct ukfs * ukfs,const char * filename,int flags)848 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
849 {
850           int fd;
851 
852           PRECALL();
853           fd = rump_sys_open(filename, flags, 0);
854           POSTCALL();
855           if (fd == -1)
856                     return -1;
857 
858           return fd;
859 }
860 
861 ssize_t
ukfs_read(struct ukfs * ukfs,const char * filename,off_t off,uint8_t * buf,size_t bufsize)862 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
863           uint8_t *buf, size_t bufsize)
864 {
865           int fd;
866           ssize_t xfer = -1; /* XXXgcc */
867 
868           PRECALL();
869           fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
870           if (fd == -1)
871                     goto out;
872 
873           xfer = rump_sys_pread(fd, buf, bufsize, off);
874           rump_sys_close(fd);
875 
876  out:
877           POSTCALL();
878           if (fd == -1) {
879                     return -1;
880           }
881           return xfer;
882 }
883 
884 /*ARGSUSED*/
885 ssize_t
ukfs_read_fd(struct ukfs * ukfs,int fd,off_t off,uint8_t * buf,size_t buflen)886 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
887 {
888 
889           return rump_sys_pread(fd, buf, buflen, off);
890 }
891 
892 ssize_t
ukfs_write(struct ukfs * ukfs,const char * filename,off_t off,uint8_t * buf,size_t bufsize)893 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
894           uint8_t *buf, size_t bufsize)
895 {
896           int fd;
897           ssize_t xfer = -1; /* XXXgcc */
898 
899           PRECALL();
900           fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
901           if (fd == -1)
902                     goto out;
903 
904           /* write and commit */
905           xfer = rump_sys_pwrite(fd, buf, bufsize, off);
906           if (xfer > 0)
907                     rump_sys_fsync(fd);
908 
909           rump_sys_close(fd);
910 
911  out:
912           POSTCALL();
913           if (fd == -1) {
914                     return -1;
915           }
916           return xfer;
917 }
918 
919 /*ARGSUSED*/
920 ssize_t
ukfs_write_fd(struct ukfs * ukfs,int fd,off_t off,uint8_t * buf,size_t buflen,int dosync)921 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
922           int dosync)
923 {
924           ssize_t xfer;
925 
926           xfer = rump_sys_pwrite(fd, buf, buflen, off);
927           if (xfer > 0 && dosync)
928                     rump_sys_fsync(fd);
929 
930           return xfer;
931 }
932 
933 /*ARGSUSED*/
934 int
ukfs_close(struct ukfs * ukfs,int fd)935 ukfs_close(struct ukfs *ukfs, int fd)
936 {
937 
938           rump_sys_close(fd);
939           return 0;
940 }
941 
942 int
ukfs_create(struct ukfs * ukfs,const char * filename,mode_t mode)943 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
944 {
945           int fd;
946 
947           PRECALL();
948           fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
949           if (fd == -1)
950                     return -1;
951           rump_sys_close(fd);
952 
953           POSTCALL();
954           return 0;
955 }
956 
957 int
ukfs_mknod(struct ukfs * ukfs,const char * path,mode_t mode,dev_t dev)958 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
959 {
960 
961           STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
962 }
963 
964 int
ukfs_mkfifo(struct ukfs * ukfs,const char * path,mode_t mode)965 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
966 {
967 
968           STDCALL(ukfs, rump_sys_mkfifo(path, mode));
969 }
970 
971 int
ukfs_mkdir(struct ukfs * ukfs,const char * filename,mode_t mode)972 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
973 {
974 
975           STDCALL(ukfs, rump_sys_mkdir(filename, mode));
976 }
977 
978 int
ukfs_remove(struct ukfs * ukfs,const char * filename)979 ukfs_remove(struct ukfs *ukfs, const char *filename)
980 {
981 
982           STDCALL(ukfs, rump_sys_unlink(filename));
983 }
984 
985 int
ukfs_rmdir(struct ukfs * ukfs,const char * filename)986 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
987 {
988 
989           STDCALL(ukfs, rump_sys_rmdir(filename));
990 }
991 
992 int
ukfs_link(struct ukfs * ukfs,const char * filename,const char * f_create)993 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
994 {
995 
996           STDCALL(ukfs, rump_sys_link(filename, f_create));
997 }
998 
999 int
ukfs_symlink(struct ukfs * ukfs,const char * filename,const char * linkname)1000 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
1001 {
1002 
1003           STDCALL(ukfs, rump_sys_symlink(filename, linkname));
1004 }
1005 
1006 ssize_t
ukfs_readlink(struct ukfs * ukfs,const char * filename,char * linkbuf,size_t buflen)1007 ukfs_readlink(struct ukfs *ukfs, const char *filename,
1008           char *linkbuf, size_t buflen)
1009 {
1010           ssize_t rv;
1011 
1012           PRECALL();
1013           rv = rump_sys_readlink(filename, linkbuf, buflen);
1014           POSTCALL();
1015           return rv;
1016 }
1017 
1018 int
ukfs_rename(struct ukfs * ukfs,const char * from,const char * to)1019 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
1020 {
1021 
1022           STDCALL(ukfs, rump_sys_rename(from, to));
1023 }
1024 
1025 int
ukfs_chdir(struct ukfs * ukfs,const char * path)1026 ukfs_chdir(struct ukfs *ukfs, const char *path)
1027 {
1028           char *newpath, *oldpath;
1029           int rv;
1030 
1031           PRECALL();
1032           rv = rump_sys_chdir(path);
1033           if (rv == -1)
1034                     goto out;
1035 
1036           newpath = malloc(MAXPATHLEN);
1037           if (rump_sys___getcwd(newpath, MAXPATHLEN) == -1) {
1038                     goto out;
1039           }
1040 
1041           pthread_spin_lock(&ukfs->ukfs_spin);
1042           oldpath = ukfs->ukfs_cwd;
1043           ukfs->ukfs_cwd = newpath;
1044           pthread_spin_unlock(&ukfs->ukfs_spin);
1045           free(oldpath);
1046 
1047  out:
1048           POSTCALL();
1049           return rv;
1050 }
1051 
1052 int
ukfs_stat(struct ukfs * ukfs,const char * filename,struct stat * file_stat)1053 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1054 {
1055           int rv;
1056 
1057           PRECALL();
1058           rv = rump_sys_stat(filename, file_stat);
1059           POSTCALL();
1060 
1061           return rv;
1062 }
1063 
1064 int
ukfs_lstat(struct ukfs * ukfs,const char * filename,struct stat * file_stat)1065 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1066 {
1067           int rv;
1068 
1069           PRECALL();
1070           rv = rump_sys_lstat(filename, file_stat);
1071           POSTCALL();
1072 
1073           return rv;
1074 }
1075 
1076 int
ukfs_chmod(struct ukfs * ukfs,const char * filename,mode_t mode)1077 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1078 {
1079 
1080           STDCALL(ukfs, rump_sys_chmod(filename, mode));
1081 }
1082 
1083 int
ukfs_lchmod(struct ukfs * ukfs,const char * filename,mode_t mode)1084 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1085 {
1086 
1087           STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1088 }
1089 
1090 int
ukfs_chown(struct ukfs * ukfs,const char * filename,uid_t uid,gid_t gid)1091 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1092 {
1093 
1094           STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1095 }
1096 
1097 int
ukfs_lchown(struct ukfs * ukfs,const char * filename,uid_t uid,gid_t gid)1098 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1099 {
1100 
1101           STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1102 }
1103 
1104 int
ukfs_chflags(struct ukfs * ukfs,const char * filename,u_long flags)1105 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1106 {
1107 
1108           STDCALL(ukfs, rump_sys_chflags(filename, flags));
1109 }
1110 
1111 int
ukfs_lchflags(struct ukfs * ukfs,const char * filename,u_long flags)1112 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1113 {
1114 
1115           STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1116 }
1117 
1118 int
ukfs_utimes(struct ukfs * ukfs,const char * filename,const struct timeval * tptr)1119 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1120 {
1121 
1122           STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1123 }
1124 
1125 int
ukfs_lutimes(struct ukfs * ukfs,const char * filename,const struct timeval * tptr)1126 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1127                 const struct timeval *tptr)
1128 {
1129 
1130           STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1131 }
1132 
1133 /*
1134  * Dynamic module support
1135  */
1136 
1137 /* load one library */
1138 
1139 /*
1140  * XXX: the dlerror stuff isn't really threadsafe, but then again I
1141  * can't protect against other threads calling dl*() outside of ukfs,
1142  * so just live with it being flimsy
1143  */
1144 int
ukfs_modload(const char * fname)1145 ukfs_modload(const char *fname)
1146 {
1147           void *handle;
1148           const struct modinfo *const *mi_start, *const *mi_end;
1149           int error;
1150 
1151           handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1152           if (handle == NULL) {
1153                     const char *dlmsg = dlerror();
1154                     if (strstr(dlmsg, "Undefined symbol"))
1155                               return 0;
1156                     warnx("dlopen %s failed: %s", fname, dlmsg);
1157                     /* XXXerrno */
1158                     return -1;
1159           }
1160 
1161           mi_start = dlsym(handle, "__start_link_set_modules");
1162           mi_end = dlsym(handle, "__stop_link_set_modules");
1163           if (mi_start && mi_end) {
1164                     error = rump_pub_module_init(mi_start,
1165                         (size_t)(mi_end-mi_start));
1166                     if (error)
1167                               goto errclose;
1168                     return 1;
1169           }
1170           error = EINVAL;
1171 
1172  errclose:
1173           dlclose(handle);
1174           errno = error;
1175           return -1;
1176 }
1177 
1178 struct loadfail {
1179           char *pname;
1180 
1181           LIST_ENTRY(loadfail) entries;
1182 };
1183 
1184 #define RUMPFSMOD_PREFIX "librumpfs_"
1185 #define RUMPFSMOD_SUFFIX ".so"
1186 
1187 int
ukfs_modload_dir(const char * dir)1188 ukfs_modload_dir(const char *dir)
1189 {
1190           char nbuf[MAXPATHLEN+1], *p;
1191           struct dirent entry, *result;
1192           DIR *libdir;
1193           struct loadfail *lf, *nlf;
1194           int error, nloaded = 0, redo;
1195           LIST_HEAD(, loadfail) lfs;
1196 
1197           libdir = opendir(dir);
1198           if (libdir == NULL)
1199                     return -1;
1200 
1201           LIST_INIT(&lfs);
1202           for (;;) {
1203                     if ((error = readdir_r(libdir, &entry, &result)) != 0)
1204                               break;
1205                     if (!result)
1206                               break;
1207                     if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1208                         strlen(RUMPFSMOD_PREFIX)) != 0)
1209                               continue;
1210                     if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1211                         || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1212                               continue;
1213                     strlcpy(nbuf, dir, sizeof(nbuf));
1214                     strlcat(nbuf, "/", sizeof(nbuf));
1215                     strlcat(nbuf, result->d_name, sizeof(nbuf));
1216                     switch (ukfs_modload(nbuf)) {
1217                     case 0:
1218                               lf = malloc(sizeof(*lf));
1219                               if (lf == NULL) {
1220                                         error = ENOMEM;
1221                                         break;
1222                               }
1223                               lf->pname = strdup(nbuf);
1224                               if (lf->pname == NULL) {
1225                                         free(lf);
1226                                         error = ENOMEM;
1227                                         break;
1228                               }
1229                               LIST_INSERT_HEAD(&lfs, lf, entries);
1230                               break;
1231                     case 1:
1232                               nloaded++;
1233                               break;
1234                     default:
1235                               /* ignore errors */
1236                               break;
1237                     }
1238           }
1239           closedir(libdir);
1240           if (error && nloaded != 0)
1241                     error = 0;
1242 
1243           /*
1244            * El-cheapo dependency calculator.  Just try to load the
1245            * modules n times in a loop
1246            */
1247           for (redo = 1; redo;) {
1248                     redo = 0;
1249                     nlf = LIST_FIRST(&lfs);
1250                     while ((lf = nlf) != NULL) {
1251                               nlf = LIST_NEXT(lf, entries);
1252                               if (ukfs_modload(lf->pname) == 1) {
1253                                         nloaded++;
1254                                         redo = 1;
1255                                         LIST_REMOVE(lf, entries);
1256                                         free(lf->pname);
1257                                         free(lf);
1258                               }
1259                     }
1260           }
1261 
1262           while ((lf = LIST_FIRST(&lfs)) != NULL) {
1263                     LIST_REMOVE(lf, entries);
1264                     free(lf->pname);
1265                     free(lf);
1266           }
1267 
1268           if (error && nloaded == 0) {
1269                     errno = error;
1270                     return -1;
1271           }
1272 
1273           return nloaded;
1274 }
1275 
1276 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1277 ssize_t
ukfs_vfstypes(char * buf,size_t buflen)1278 ukfs_vfstypes(char *buf, size_t buflen)
1279 {
1280           int mib[3];
1281           struct sysctlnode q, ans[128];
1282           size_t alen;
1283           int i;
1284 
1285           mib[0] = CTL_VFS;
1286           mib[1] = VFS_GENERIC;
1287           mib[2] = CTL_QUERY;
1288           alen = sizeof(ans);
1289 
1290           memset(&q, 0, sizeof(q));
1291           q.sysctl_flags = SYSCTL_VERSION;
1292 
1293           if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1294                     return -1;
1295           }
1296 
1297           for (i = 0; i < alen/sizeof(ans[0]); i++)
1298                     if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1299                               break;
1300           if (i == alen/sizeof(ans[0])) {
1301                     errno = ENXIO;
1302                     return -1;
1303           }
1304 
1305           mib[0] = CTL_VFS;
1306           mib[1] = VFS_GENERIC;
1307           mib[2] = ans[i].sysctl_num;
1308 
1309           if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1310                     return -1;
1311           }
1312 
1313           return buflen;
1314 }
1315 
1316 /*
1317  * Utilities
1318  */
1319 static int
builddirs(const char * pathname,mode_t mode,int (* mkdirfn)(struct ukfs *,const char *,mode_t),struct ukfs * fs)1320 builddirs(const char *pathname, mode_t mode,
1321           int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1322 {
1323           char *f1, *f2;
1324           int rv;
1325           mode_t mask;
1326           bool end;
1327 
1328           /*ukfs_umask((mask = ukfs_umask(0)));*/
1329           umask((mask = umask(0)));
1330 
1331           f1 = f2 = strdup(pathname);
1332           if (f1 == NULL) {
1333                     errno = ENOMEM;
1334                     return -1;
1335           }
1336 
1337           end = false;
1338           for (;;) {
1339                     /* find next component */
1340                     f2 += strspn(f2, "/");
1341                     f2 += strcspn(f2, "/");
1342                     if (*f2 == '\0')
1343                               end = true;
1344                     else
1345                               *f2 = '\0';
1346 
1347                     rv = mkdirfn(fs, f1, mode & ~mask);
1348                     if (errno == EEXIST)
1349                               rv = 0;
1350 
1351                     if (rv == -1 || *f2 != '\0' || end)
1352                               break;
1353 
1354                     *f2 = '/';
1355           }
1356 
1357           free(f1);
1358 
1359           return rv;
1360 }
1361 
1362 int
ukfs_util_builddirs(struct ukfs * ukfs,const char * pathname,mode_t mode)1363 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1364 {
1365 
1366           return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1367 }
1368