xref: /dragonfly/sys/kern/vfs_syscalls.c (revision b07befac9dbf0ff12cd56e44bb635c15145d23f6)
1 /*
2  * Copyright (c) 1989, 1993
3  *        The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *        @(#)vfs_syscalls.c  8.13 (Berkeley) 4/15/94
35  * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/sysent.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mountctl.h>
46 #include <sys/sysmsg.h>
47 #include <sys/filedesc.h>
48 #include <sys/kernel.h>
49 #include <sys/fcntl.h>
50 #include <sys/file.h>
51 #include <sys/linker.h>
52 #include <sys/stat.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/caps.h>
57 #include <sys/jail.h>
58 #include <sys/namei.h>
59 #include <sys/nlookup.h>
60 #include <sys/dirent.h>
61 #include <sys/extattr.h>
62 #include <sys/spinlock.h>
63 #include <sys/kern_syscall.h>
64 #include <sys/objcache.h>
65 #include <sys/sysctl.h>
66 
67 #include <sys/buf2.h>
68 #include <sys/file2.h>
69 #include <sys/spinlock2.h>
70 
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 
75 #include <machine/limits.h>
76 #include <machine/stdarg.h>
77 
78 #define UMOUNTF_RETRIES                 50        /* 0.25 seconds per retry */
79 
80 static void mount_warning(struct mount *mp, const char *ctl, ...)
81                     __printflike(2, 3);
82 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb);
83 static int checkvp_chdir (struct vnode *vn, struct thread *td);
84 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch);
85 static int get_fscap(const char *);
86 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp);
87 static int chroot_visible_mnt(struct mount *mp, struct proc *p);
88 static int getutimes (struct timeval *, struct timespec *);
89 static int getutimens (const struct timespec *, struct timespec *, int *);
90 static int setfown (struct mount *, struct vnode *, uid_t, gid_t);
91 static int setfmode (struct vnode *, int);
92 static int setfflags (struct vnode *, u_long);
93 static int setutimes (struct vnode *, struct vattr *,
94                               const struct timespec *, int);
95 
96 static int          usermount = 0;      /* if 1, non-root can mount fs. */
97 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
98     "Allow non-root users to mount filesystems");
99 
100 static int          debug_unmount = 0; /* if 1 loop until unmount success */
101 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0,
102     "Stall failed unmounts in loop");
103 
104 static struct krate krate_rename = { 1 };
105 
106 /*
107  * Virtual File System System Calls
108  */
109 
110 /*
111  * Mount a file system.
112  *
113  * mount_args(char *type, char *path, int flags, caddr_t data)
114  *
115  * MPALMOSTSAFE
116  */
117 int
sys_mount(struct sysmsg * sysmsg,const struct mount_args * uap)118 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap)
119 {
120           struct thread *td = curthread;
121           struct vnode *vp;
122           struct nchandle nch;
123           struct mount *mp, *nullmp;
124           struct vfsconf *vfsp;
125           int error, flag = 0, flag2 = 0;
126           int hasmount;
127           int priv = 0;
128           int flags = uap->flags;
129           struct vattr va;
130           struct nlookupdata nd;
131           char fstypename[MFSNAMELEN];
132           struct ucred *cred;
133 
134           cred = td->td_ucred;
135 
136           /* We do not allow user mounts inside a jail for now */
137           if (usermount && jailed(cred)) {
138                     error = EPERM;
139                     goto done;
140           }
141 
142           /*
143            * Extract the file system type. We need to know this early, to take
144            * appropriate actions for jails and the filesystems to mount.
145            */
146         if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0)
147                     goto done;
148 
149           /*
150            * Select the correct cap according to the file system type.
151            */
152           priv = get_fscap(fstypename);
153 
154           if (usermount == 0 && (error = caps_priv_check_td(td, priv)))
155                     goto done;
156 
157           /*
158            * Do not allow NFS export by non-root users.
159            */
160           if (flags & MNT_EXPORTED) {
161                     error = caps_priv_check_td(td, priv);
162                     if (error)
163                               goto done;
164           }
165           /*
166            * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
167            */
168           if (caps_priv_check_td(td, priv))
169                     flags |= MNT_NOSUID | MNT_NODEV;
170 
171           /*
172            * Lookup the requested path and extract the nch and vnode.
173            */
174           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
175           if (error == 0) {
176                     if ((error = nlookup(&nd)) == 0) {
177                               if (nd.nl_nch.ncp->nc_vp == NULL)
178                                         error = ENOENT;
179                     }
180           }
181           if (error) {
182                     nlookup_done(&nd);
183                     goto done;
184           }
185 
186           /*
187            * If the target filesystem is resolved via a nullfs mount, then
188            * nd.nl_nch.mount will be pointing to the nullfs mount structure
189            * instead of the target file system. We need it in case we are
190            * doing an update.
191            */
192           nullmp = nd.nl_nch.mount;
193 
194           /*
195            * Extract the locked+refd ncp and cleanup the nd structure
196            */
197           nch = nd.nl_nch;
198           cache_zero(&nd.nl_nch);
199           nlookup_done(&nd);
200 
201           if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
202               (mp = cache_findmount(&nch)) != NULL) {
203                     cache_dropmount(mp);
204                     hasmount = 1;
205           } else {
206                     hasmount = 0;
207           }
208 
209 
210           /*
211            * now we have the locked ref'd nch and unreferenced vnode.
212            */
213           vp = nch.ncp->nc_vp;
214           if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
215                     cache_put(&nch);
216                     goto done;
217           }
218           cache_unlock(&nch);
219 
220           /*
221            * Now we have an unlocked ref'd nch and a locked ref'd vp
222            */
223           if (flags & MNT_UPDATE) {
224                     if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
225                               cache_drop(&nch);
226                               vput(vp);
227                               error = EINVAL;
228                               goto done;
229                     }
230 
231                     if (strncmp(fstypename, "null", 5) == 0) {
232                               KKASSERT(nullmp);
233                               mp = nullmp;
234                     } else {
235                               mp = vp->v_mount;
236                     }
237 
238                     flag = mp->mnt_flag;
239                     flag2 = mp->mnt_kern_flag;
240                     /*
241                      * We only allow the filesystem to be reloaded if it
242                      * is currently mounted read-only.
243                      */
244                     if ((flags & MNT_RELOAD) &&
245                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
246                               cache_drop(&nch);
247                               vput(vp);
248                               error = EOPNOTSUPP; /* Needs translation */
249                               goto done;
250                     }
251                     /*
252                      * Only root, or the user that did the original mount is
253                      * permitted to update it.
254                      */
255                     if (mp->mnt_stat.f_owner != cred->cr_uid &&
256                         (error = caps_priv_check_td(td, priv))) {
257                               cache_drop(&nch);
258                               vput(vp);
259                               goto done;
260                     }
261                     if (vfs_busy(mp, LK_NOWAIT)) {
262                               cache_drop(&nch);
263                               vput(vp);
264                               error = EBUSY;
265                               goto done;
266                     }
267                     if (hasmount) {
268                               cache_drop(&nch);
269                               vfs_unbusy(mp);
270                               vput(vp);
271                               error = EBUSY;
272                               goto done;
273                     }
274                     mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
275                     lwkt_gettoken(&mp->mnt_token);
276                     vn_unlock(vp);
277                     vfsp = mp->mnt_vfc;
278                     goto update;
279           }
280 
281           /*
282            * If the user is not root, ensure that they own the directory
283            * onto which we are attempting to mount.
284            */
285           if ((error = VOP_GETATTR(vp, &va)) ||
286               (va.va_uid != cred->cr_uid &&
287                (error = caps_priv_check_td(td, priv)))) {
288                     cache_drop(&nch);
289                     vput(vp);
290                     goto done;
291           }
292           if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
293                     cache_drop(&nch);
294                     vput(vp);
295                     goto done;
296           }
297           if (vp->v_type != VDIR) {
298                     cache_drop(&nch);
299                     vput(vp);
300                     error = ENOTDIR;
301                     goto done;
302           }
303           if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
304                     cache_drop(&nch);
305                     vput(vp);
306                     error = EPERM;
307                     goto done;
308           }
309           vfsp = vfsconf_find_by_name(fstypename);
310           if (vfsp == NULL) {
311                     linker_file_t lf;
312 
313                     /* Only load modules for root (very important!) */
314                     error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
315                     if (error) {
316                               cache_drop(&nch);
317                               vput(vp);
318                               goto done;
319                     }
320                     error = linker_load_file(fstypename, &lf);
321                     if (error || lf == NULL) {
322                               cache_drop(&nch);
323                               vput(vp);
324                               if (lf == NULL)
325                                         error = ENODEV;
326                               goto done;
327                     }
328                     lf->userrefs++;
329                     /* lookup again, see if the VFS was loaded */
330                     vfsp = vfsconf_find_by_name(fstypename);
331                     if (vfsp == NULL) {
332                               lf->userrefs--;
333                               linker_file_unload(lf);
334                               cache_drop(&nch);
335                               vput(vp);
336                               error = ENODEV;
337                               goto done;
338                     }
339           }
340           if (hasmount) {
341                     cache_drop(&nch);
342                     vput(vp);
343                     error = EBUSY;
344                     goto done;
345           }
346 
347           /*
348            * Allocate and initialize the filesystem.
349            */
350           mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
351           mount_init(mp, vfsp->vfc_vfsops);
352           vfs_busy(mp, LK_NOWAIT);
353           mp->mnt_vfc = vfsp;
354           mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT;
355           vfsp->vfc_refcount++;
356           mp->mnt_stat.f_type = vfsp->vfc_typenum;
357           mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
358           strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
359           mp->mnt_stat.f_owner = cred->cr_uid;
360           lwkt_gettoken(&mp->mnt_token);
361           vn_unlock(vp);
362 update:
363           /*
364            * (per-mount token acquired at this point)
365            *
366            * Set the mount level flags.
367            */
368           if (flags & MNT_RDONLY)
369                     mp->mnt_flag |= MNT_RDONLY;
370           else if (mp->mnt_flag & MNT_RDONLY)
371                     mp->mnt_kern_flag |= MNTK_WANTRDWR;
372           mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
373               MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME |
374               MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
375               MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR |
376               MNT_AUTOMOUNTED);
377           mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC |
378               MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE |
379               MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
380               MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR |
381               MNT_AUTOMOUNTED);
382 
383           /*
384            * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf.
385            * This way the initial VFS_MOUNT() call will also be MPSAFE.
386            */
387           if (vfsp->vfc_flags & VFCF_MPSAFE)
388                     mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
389 
390           /*
391            * Mount the filesystem.
392            * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
393            * get.
394            */
395           if (mp->mnt_flag & MNT_UPDATE) {
396                     error = VFS_MOUNT(mp, uap->path, uap->data, cred);
397                     if (mp->mnt_kern_flag & MNTK_WANTRDWR)
398                               mp->mnt_flag &= ~MNT_RDONLY;
399                     mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
400                     mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
401                     if (error) {
402                               mp->mnt_flag = flag;
403                               mp->mnt_kern_flag = flag2;
404                     }
405                     lwkt_reltoken(&mp->mnt_token);
406                     vfs_unbusy(mp);
407                     vrele(vp);
408                     cache_drop(&nch);
409                     goto done;
410           }
411           mp->mnt_ncmounton = nch;
412           error = VFS_MOUNT(mp, uap->path, uap->data, cred);
413           vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
414 
415           /*
416            * Put the new filesystem on the mount list after root.  The mount
417            * point gets its own mnt_ncmountpt (unless the VFS already set one
418            * up) which represents the root of the mount.  The lookup code
419            * detects the mount point going forward and checks the root of
420            * the mount going backwards.
421            *
422            * It is not necessary to invalidate or purge the vnode underneath
423            * because elements under the mount will be given their own glue
424            * namecache record.
425            */
426           if (!error) {
427                     if (mp->mnt_ncmountpt.ncp == NULL) {
428                               /*
429                                * Allocate, then unlock, but leave the ref intact.
430                                * This is the mnt_refs (1) that we will retain
431                                * through to the unmount.
432                                */
433                               cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
434                               cache_unlock(&mp->mnt_ncmountpt);
435                     }
436                     vn_unlock(vp);
437                     cache_lock(&nch);
438                     nch.ncp->nc_flag |= NCF_ISMOUNTPT;
439                     cache_unlock(&nch);
440                     cache_ismounting(mp);
441                     vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
442 
443                     mountlist_insert(mp, MNTINS_LAST);
444                     vn_unlock(vp);
445                     checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
446                     error = vfs_allocate_syncvnode(mp);
447                     lwkt_reltoken(&mp->mnt_token);
448                     vfs_unbusy(mp);
449                     error = VFS_START(mp, 0);
450                     vrele(vp);
451                     KNOTE(&fs_klist, VQ_MOUNT);
452           } else {
453                     bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton));
454                     vn_syncer_thr_stop(mp);
455                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
456                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
457                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
458                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
459                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
460                     if (mp->mnt_cred) {
461                               crfree(mp->mnt_cred);
462                               mp->mnt_cred = NULL;
463                     }
464                     mp->mnt_vfc->vfc_refcount--;
465                     lwkt_reltoken(&mp->mnt_token);
466                     vfs_unbusy(mp);
467                     kfree(mp, M_MOUNT);
468                     cache_drop(&nch);
469                     vput(vp);
470           }
471 done:
472           return (error);
473 }
474 
475 /*
476  * Scan all active processes to see if any of them have a current
477  * or root directory onto which the new filesystem has just been
478  * mounted. If so, replace them with the new mount point.
479  *
480  * Both old_nch and new_nch are ref'd on call but not locked.
481  * new_nch must be temporarily locked so it can be associated with the
482  * vnode representing the root of the mount point.
483  */
484 struct checkdirs_info {
485           struct nchandle old_nch;
486           struct nchandle new_nch;
487           struct vnode *old_vp;
488           struct vnode *new_vp;
489 };
490 
491 static int checkdirs_callback(struct proc *p, void *data);
492 
493 static void
checkdirs(struct nchandle * old_nch,struct nchandle * new_nch)494 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch)
495 {
496           struct checkdirs_info info;
497           struct vnode *olddp;
498           struct vnode *newdp;
499           struct mount *mp;
500 
501           /*
502            * If the old mount point's vnode has a usecount of 1, it is not
503            * being held as a descriptor anywhere.
504            */
505           olddp = old_nch->ncp->nc_vp;
506           if (olddp == NULL || VREFCNT(olddp) == 1)
507                     return;
508 
509           /*
510            * Force the root vnode of the new mount point to be resolved
511            * so we can update any matching processes.
512            */
513           mp = new_nch->mount;
514           if (VFS_ROOT(mp, &newdp))
515                     panic("mount: lost mount");
516           vn_unlock(newdp);
517           cache_lock(new_nch);
518           vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
519           cache_setunresolved(new_nch);
520           cache_setvp(new_nch, newdp);
521           cache_unlock(new_nch);
522 
523           /*
524            * Special handling of the root node
525            */
526           if (rootvnode == olddp) {
527                     vref(newdp);
528                     vfs_cache_setroot(newdp, cache_hold(new_nch));
529           }
530 
531           /*
532            * Pass newdp separately so the callback does not have to access
533            * it via new_nch->ncp->nc_vp.
534            */
535           info.old_nch = *old_nch;
536           info.new_nch = *new_nch;
537           info.new_vp = newdp;
538           allproc_scan(checkdirs_callback, &info, 0);
539           vput(newdp);
540 }
541 
542 /*
543  * NOTE: callback is not MP safe because the scanned process's filedesc
544  * structure can be ripped out from under us, amoung other things.
545  */
546 static int
checkdirs_callback(struct proc * p,void * data)547 checkdirs_callback(struct proc *p, void *data)
548 {
549           struct checkdirs_info *info = data;
550           struct filedesc *fdp;
551           struct nchandle ncdrop1;
552           struct nchandle ncdrop2;
553           struct vnode *vprele1;
554           struct vnode *vprele2;
555 
556           if ((fdp = p->p_fd) != NULL) {
557                     cache_zero(&ncdrop1);
558                     cache_zero(&ncdrop2);
559                     vprele1 = NULL;
560                     vprele2 = NULL;
561 
562                     /*
563                      * MPUNSAFE - XXX fdp can be pulled out from under a
564                      * foreign process.
565                      *
566                      * A shared filedesc is ok, we don't have to copy it
567                      * because we are making this change globally.
568                      */
569                     spin_lock(&fdp->fd_spin);
570                     if (fdp->fd_ncdir.mount == info->old_nch.mount &&
571                         fdp->fd_ncdir.ncp == info->old_nch.ncp) {
572                               vprele1 = fdp->fd_cdir;
573                               vref(info->new_vp);
574                               fdp->fd_cdir = info->new_vp;
575                               ncdrop1 = fdp->fd_ncdir;
576                               cache_copy(&info->new_nch, &fdp->fd_ncdir);
577                     }
578                     if (fdp->fd_nrdir.mount == info->old_nch.mount &&
579                         fdp->fd_nrdir.ncp == info->old_nch.ncp) {
580                               vprele2 = fdp->fd_rdir;
581                               vref(info->new_vp);
582                               fdp->fd_rdir = info->new_vp;
583                               ncdrop2 = fdp->fd_nrdir;
584                               cache_copy(&info->new_nch, &fdp->fd_nrdir);
585                     }
586                     spin_unlock(&fdp->fd_spin);
587                     if (ncdrop1.ncp)
588                               cache_drop(&ncdrop1);
589                     if (ncdrop2.ncp)
590                               cache_drop(&ncdrop2);
591                     if (vprele1)
592                               vrele(vprele1);
593                     if (vprele2)
594                               vrele(vprele2);
595           }
596           return(0);
597 }
598 
599 /*
600  * Unmount a file system.
601  *
602  * Note: unmount takes a path to the vnode mounted on as argument,
603  * not special file (as before).
604  *
605  * umount_args(char *path, int flags)
606  *
607  * MPALMOSTSAFE
608  */
609 int
sys_unmount(struct sysmsg * sysmsg,const struct unmount_args * uap)610 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap)
611 {
612           struct thread *td = curthread;
613           struct proc *p __debugvar = td->td_proc;
614           struct mount *mp = NULL;
615           struct nlookupdata nd;
616           char fstypename[MFSNAMELEN];
617           int priv = 0;
618           int error;
619           struct ucred *cred;
620 
621           cred = td->td_ucred;
622 
623           KKASSERT(p);
624 
625           /* We do not allow user umounts inside a jail for now */
626           if (usermount && jailed(cred)) {
627                     error = EPERM;
628                     goto done;
629           }
630 
631           error = nlookup_init(&nd, uap->path, UIO_USERSPACE,
632                                    NLC_FOLLOW | NLC_IGNBADDIR);
633           if (error == 0)
634                     error = nlookup(&nd);
635           if (error)
636                     goto out;
637 
638           mp = nd.nl_nch.mount;
639 
640           /* Figure out the fsname in order to select proper privs */
641           ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name);
642           priv = get_fscap(fstypename);
643 
644           if (usermount == 0 && (error = caps_priv_check_td(td, priv))) {
645                     nlookup_done(&nd);
646                     goto done;
647           }
648 
649           /*
650            * Only root, or the user that did the original mount is
651            * permitted to unmount this filesystem.
652            */
653           if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) &&
654               (error = caps_priv_check_td(td, priv)))
655           {
656                     goto out;
657           }
658 
659           /*
660            * Don't allow unmounting the root file system.
661            */
662           if (mp->mnt_flag & MNT_ROOTFS) {
663                     error = EINVAL;
664                     goto out;
665           }
666 
667           /*
668            * Must be the root of the filesystem
669            */
670           if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
671                     error = EINVAL;
672                     goto out;
673           }
674 
675           /* Check if this mount belongs to this prison */
676           if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison ||
677                     mp->mnt_cred->cr_prison != cred->cr_prison)) {
678                     kprintf("mountpoint %s does not belong to this jail\n",
679                         uap->path);
680                     error = EPERM;
681                     goto out;
682           }
683 
684           /*
685            * If no error try to issue the unmount.  We lose our cache
686            * ref when we call nlookup_done so we must hold the mount point
687            * to prevent use-after-free races.
688            */
689 out:
690           if (error == 0) {
691                     mount_hold(mp);
692                     nlookup_done(&nd);
693                     error = dounmount(mp, uap->flags, 0);
694                     mount_drop(mp);
695           } else {
696                     nlookup_done(&nd);
697           }
698 done:
699           return (error);
700 }
701 
702 /*
703  * Do the actual file system unmount (interlocked against the mountlist
704  * token and mp->mnt_token).
705  */
706 static int
dounmount_interlock(struct mount * mp)707 dounmount_interlock(struct mount *mp)
708 {
709           if (mp->mnt_kern_flag & MNTK_UNMOUNT)
710                     return (EBUSY);
711           mp->mnt_kern_flag |= MNTK_UNMOUNT;
712           return(0);
713 }
714 
715 /*
716  * Returns non-zero if the specified process uses the specified
717  * mount point.
718  */
719 static int
process_uses_mount(struct proc * p,struct mount * mp)720 process_uses_mount(struct proc *p, struct mount *mp)
721 {
722           struct filedesc *fdp;
723           struct file *fp;
724           int found;
725           int n;
726 
727           fdp = p->p_fd;
728           if (fdp == NULL)
729                     return 0;
730           if (fdp->fd_ncdir.mount == mp ||
731               fdp->fd_nrdir.mount == mp ||
732               fdp->fd_njdir.mount == mp)
733           {
734                     return 1;
735           }
736 
737           found = 0;
738           spin_lock_shared(&fdp->fd_spin);
739           for (n = 0; n < fdp->fd_nfiles; ++n) {
740                     fp = fdp->fd_files[n].fp;
741                     if (fp && fp->f_nchandle.mount == mp) {
742                               found = 1;
743                               break;
744                     }
745           }
746           spin_unlock_shared(&fdp->fd_spin);
747 
748           return found;
749 }
750 
751 /*
752  * Cleanup processes that have references to the mount point
753  * being force-unmounted.
754  */
755 struct unmount_allproc_info {
756           struct mount *mp;
757           int sig;
758 };
759 
760 static int
unmount_allproc_cb(struct proc * p,void * arg)761 unmount_allproc_cb(struct proc *p, void *arg)
762 {
763           struct unmount_allproc_info *info;
764           struct mount *mp;
765 
766           info = arg;
767           mp = info->mp;
768 
769           if (p->p_textnch.mount == mp)
770                     cache_drop(&p->p_textnch);
771           if (info->sig && process_uses_mount(p, mp)) {
772                     lwkt_gettoken(&p->p_token);
773                     p->p_flags |= P_MUSTKILL;
774                     lwkt_reltoken(&p->p_token);
775                     ksignal(p, info->sig);
776           }
777 
778           return 0;
779 }
780 
781 /*
782  * The guts of the unmount code.  The mount owns one ref and one hold
783  * count.  If we successfully interlock the unmount, those refs are ours.
784  * (The ref is from mnt_ncmountpt).
785  *
786  * When halting we shortcut certain mount types such as devfs by not actually
787  * issuing the VFS_SYNC() or VFS_UNMOUNT().  They are still disconnected
788  * from the mountlist so higher-level filesytems can unmount cleanly.
789  *
790  * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs.
791  */
792 int
dounmount(struct mount * mp,int flags,int halting)793 dounmount(struct mount *mp, int flags, int halting)
794 {
795           struct namecache *ncp;
796           struct nchandle nch;
797           struct vnode *vp;
798           int error;
799           int async_flag;
800           int lflags;
801           int freeok = 1;
802           int hadsyncer = 0;
803           int retry;
804           int quickhalt;
805 
806           lwkt_gettoken(&mp->mnt_token);
807 
808           /*
809            * When halting, certain mount points can essentially just
810            * be unhooked and otherwise ignored.
811            */
812           if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) {
813                     quickhalt = 1;
814                     freeok = 0;
815           } else {
816                     quickhalt = 0;
817           }
818 
819 
820           /*
821            * Exclusive access for unmounting purposes.
822            */
823           if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
824                     goto out;
825 
826           /*
827            * We now 'own' the last mp->mnt_refs
828            *
829            * Allow filesystems to detect that a forced unmount is in progress.
830            */
831           if (flags & MNT_FORCE)
832                     mp->mnt_kern_flag |= MNTK_UNMOUNTF;
833           lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK);
834           error = lockmgr(&mp->mnt_lock, lflags);
835           if (error) {
836                     mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
837                     if (mp->mnt_kern_flag & MNTK_MWAIT) {
838                               mp->mnt_kern_flag &= ~MNTK_MWAIT;
839                               wakeup(mp);
840                     }
841                     goto out;
842           }
843 
844           if (mp->mnt_flag & MNT_EXPUBLIC)
845                     vfs_setpublicfs(NULL, NULL, NULL);
846 
847           vfs_msync(mp, MNT_WAIT);
848           async_flag = mp->mnt_flag & MNT_ASYNC;
849           mp->mnt_flag &=~ MNT_ASYNC;
850 
851           /*
852            * Decomission our special mnt_syncer vnode.  This also stops
853            * the vnlru code.  If we are unable to unmount we recommission
854            * the vnode.
855            *
856            * Then sync the filesystem.
857            */
858           if ((vp = mp->mnt_syncer) != NULL) {
859                     mp->mnt_syncer = NULL;
860                     atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
861                     vrele(vp);
862                     hadsyncer = 1;
863           }
864 
865           /*
866            * Sync normally-mounted filesystem.
867            */
868           if (quickhalt == 0) {
869                     if ((mp->mnt_flag & MNT_RDONLY) == 0)
870                               VFS_SYNC(mp, MNT_WAIT);
871           }
872 
873           /*
874            * nchandle records ref the mount structure.  Expect a count of 1
875            * (our mount->mnt_ncmountpt).
876            *
877            * Scans can get temporary refs on a mountpoint (thought really
878            * heavy duty stuff like cache_findmount() do not).
879            */
880           for (retry = 0; (retry < UMOUNTF_RETRIES || debug_unmount); ++retry) {
881                     int dummy = 0;
882 
883                     /*
884                      * Invalidate the namecache topology under the mount.
885                      * nullfs mounts alias a real mount's namecache topology
886                      * and it should not be invalidated in that case.
887                      */
888                     if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
889                               cache_lock(&mp->mnt_ncmountpt);
890                               cache_inval(&mp->mnt_ncmountpt,
891                                             CINV_DESTROY|CINV_CHILDREN);
892                               cache_unlock(&mp->mnt_ncmountpt);
893                     }
894 
895                     /*
896                      * Clear pcpu caches
897                      */
898                     cache_unmounting(mp);
899                     if (mp->mnt_refs != 1)
900                               cache_clearmntcache(mp);
901 
902                     /*
903                      * Break out if we are good.  Don't count ncp refs if the
904                      * mount is aliased.
905                      */
906                     ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ?
907                           NULL : mp->mnt_ncmountpt.ncp;
908                     if (mp->mnt_refs == 1 &&
909                         (ncp == NULL || (ncp->nc_refs == 1 &&
910                                              TAILQ_FIRST(&ncp->nc_list) == NULL))) {
911                               break;
912                     }
913 
914                     /*
915                      * If forcing the unmount, clean out any p->p_textnch
916                      * nchandles that match this mount.
917                      *
918                      * In addition any process which has a current, root, or
919                      * jail directory matching the mount, or which has an open
920                      * descriptor matching the mount, will be killed.  We first
921                      * try SIGINT, and if that doesn't work we issue SIGKILL.
922                      */
923                     if (flags & MNT_FORCE) {
924                               struct unmount_allproc_info info;
925 
926                               info.mp = mp;
927                               switch(retry) {
928                               case 3:
929                                         info.sig = SIGINT;
930                                         break;
931                               case 7:
932                                         info.sig = SIGKILL;
933                                         break;
934                               default:
935                                         info.sig = 0;
936                                         break;
937                               }
938                               allproc_scan(&unmount_allproc_cb, &info, 0);
939                     }
940 
941                     /*
942                      * Sleep and retry.
943                      */
944                     error = lockmgr(&mp->mnt_lock, LK_RELEASE);
945                     tsleep(&dummy, 0, "mntbsy", hz / 4 + 1);
946                     error = lockmgr(&mp->mnt_lock, LK_EXCLUSIVE);
947                     if (debug_unmount && (retry & 15) == 15) {
948                               mount_warning(mp,
949                                               "(%p) debug - retry %d, "
950                                               "%d namecache refs, %d mount refs",
951                                               mp, retry,
952                                               (ncp ? ncp->nc_refs - 1 : 0),
953                                               mp->mnt_refs - 1);
954                     }
955           }
956           if (retry == UMOUNTF_RETRIES) {
957                     mount_warning(mp,
958                                     "forced umount of \"%s\" - "
959                                     "%d namecache refs, %d mount refs",
960                                     (mp->mnt_ncmountpt.ncp ?
961                                         mp->mnt_ncmountpt.ncp->nc_name : "?"),
962                                     (ncp ? ncp->nc_refs - 1 : 0),
963                                     mp->mnt_refs - 1);
964           }
965 
966           error = 0;
967           ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ?
968                 NULL : mp->mnt_ncmountpt.ncp;
969           if (mp->mnt_refs != 1 ||
970               (ncp != NULL && (ncp->nc_refs != 1 ||
971                                    TAILQ_FIRST(&ncp->nc_list)))) {
972                     mount_warning(mp,
973                                     "(%p): %d namecache refs, %d mount refs "
974                                     "still present",
975                                     mp,
976                                     (ncp ? ncp->nc_refs - 1 : 0),
977                                     mp->mnt_refs - 1);
978                     if (flags & MNT_FORCE) {
979                               freeok = 0;
980                               mount_warning(mp, "forcing unmount\n");
981                     } else {
982                               error = EBUSY;
983                     }
984           }
985 
986           /*
987            * So far so good, sync the filesystem once more and
988            * call the VFS unmount code if the sync succeeds.
989            */
990           if (error == 0 && quickhalt == 0) {
991                     if (mp->mnt_flag & MNT_RDONLY) {
992                               error = VFS_UNMOUNT(mp, flags);
993                     } else {
994                               error = VFS_SYNC(mp, MNT_WAIT);
995                               if (error == 0 ||             /* no error */
996                                   error == EOPNOTSUPP ||    /* no sync avail */
997                                   (flags & MNT_FORCE)) {    /* force anyway */
998                                         error = VFS_UNMOUNT(mp, flags);
999                               }
1000                     }
1001                     if (error) {
1002                               mount_warning(mp,
1003                                               "(%p) unmount: vfs refused to unmount, "
1004                                               "error %d",
1005                                               mp, error);
1006                     }
1007           }
1008 
1009           /*
1010            * If an error occurred we can still recover, restoring the
1011            * syncer vnode and misc flags.
1012            */
1013           if (error) {
1014                     if (mp->mnt_syncer == NULL && hadsyncer)
1015                               vfs_allocate_syncvnode(mp);
1016                     mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1017                     mp->mnt_flag |= async_flag;
1018                     lockmgr(&mp->mnt_lock, LK_RELEASE);
1019                     if (mp->mnt_kern_flag & MNTK_MWAIT) {
1020                               mp->mnt_kern_flag &= ~MNTK_MWAIT;
1021                               wakeup(mp);
1022                     }
1023                     goto out;
1024           }
1025           /*
1026            * Clean up any journals still associated with the mount after
1027            * filesystem activity has ceased.
1028            */
1029           journal_remove_all_journals(mp,
1030               ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
1031 
1032           mountlist_remove(mp);
1033 
1034           /*
1035            * Remove any installed vnode ops here so the individual VFSs don't
1036            * have to.
1037            *
1038            * mnt_refs should go to zero when we scrap mnt_ncmountpt.
1039            *
1040            * When quickhalting we have to keep these intact because the
1041            * underlying vnodes have not been destroyed, and some might be
1042            * dirty.
1043            */
1044           if (quickhalt == 0) {
1045                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
1046                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
1047                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
1048                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
1049                     vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
1050           }
1051 
1052           if (mp->mnt_ncmountpt.ncp != NULL) {
1053                     nch = mp->mnt_ncmountpt;
1054                     cache_zero(&mp->mnt_ncmountpt);
1055                     cache_clrmountpt(&nch);
1056                     cache_drop(&nch);
1057           }
1058           if (mp->mnt_ncmounton.ncp != NULL) {
1059                     cache_unmounting(mp);
1060                     nch = mp->mnt_ncmounton;
1061                     cache_zero(&mp->mnt_ncmounton);
1062                     cache_clrmountpt(&nch);
1063                     cache_drop(&nch);
1064           }
1065 
1066           if (mp->mnt_cred) {
1067                     crfree(mp->mnt_cred);
1068                     mp->mnt_cred = NULL;
1069           }
1070 
1071           mp->mnt_vfc->vfc_refcount--;
1072 
1073           /*
1074            * If not quickhalting the mount, we expect there to be no
1075            * vnodes left.
1076            */
1077           if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist))
1078                     panic("unmount: dangling vnode");
1079 
1080           /*
1081            * Release the lock
1082            */
1083           lockmgr(&mp->mnt_lock, LK_RELEASE);
1084           if (mp->mnt_kern_flag & MNTK_MWAIT) {
1085                     mp->mnt_kern_flag &= ~MNTK_MWAIT;
1086                     wakeup(mp);
1087           }
1088 
1089           /*
1090            * If we reach here and freeok != 0 we must free the mount.
1091            * mnt_refs should already have dropped to 0, so if it is not
1092            * zero we must cycle the caches and wait.
1093            *
1094            * When we are satisfied that the mount has disconnected we can
1095            * drop the hold on the mp that represented the mount (though the
1096            * caller might actually have another, so the caller's drop may
1097            * do the actual free).
1098            */
1099           if (freeok) {
1100                     if (mp->mnt_refs > 0)
1101                               cache_clearmntcache(mp);
1102                     while (mp->mnt_refs > 0) {
1103                               cache_unmounting(mp);
1104                               wakeup(mp);
1105                               tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1);
1106                               cache_clearmntcache(mp);
1107                     }
1108                     lwkt_reltoken(&mp->mnt_token);
1109                     mount_drop(mp);
1110                     mp = NULL;
1111           } else {
1112                     cache_clearmntcache(mp);
1113           }
1114           error = 0;
1115           KNOTE(&fs_klist, VQ_UNMOUNT);
1116 out:
1117           if (mp)
1118                     lwkt_reltoken(&mp->mnt_token);
1119           return (error);
1120 }
1121 
1122 static
1123 void
mount_warning(struct mount * mp,const char * ctl,...)1124 mount_warning(struct mount *mp, const char *ctl, ...)
1125 {
1126           char *ptr;
1127           char *buf;
1128           __va_list va;
1129 
1130           __va_start(va, ctl);
1131           if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL,
1132                                  &ptr, &buf, 0) == 0) {
1133                     kprintf("unmount(%s): ", ptr);
1134                     kvprintf(ctl, va);
1135                     kprintf("\n");
1136                     kfree(buf, M_TEMP);
1137           } else {
1138                     kprintf("unmount(%p", mp);
1139                     if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
1140                               kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
1141                     kprintf("): ");
1142                     kvprintf(ctl, va);
1143                     kprintf("\n");
1144           }
1145           __va_end(va);
1146 }
1147 
1148 /*
1149  * Shim cache_fullpath() to handle the case where a process is chrooted into
1150  * a subdirectory of a mount.  In this case if the root mount matches the
1151  * process root directory's mount we have to specify the process's root
1152  * directory instead of the mount point, because the mount point might
1153  * be above the root directory.
1154  */
1155 static
1156 int
mount_path(struct proc * p,struct mount * mp,char ** rb,char ** fb)1157 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb)
1158 {
1159           struct nchandle *nch;
1160 
1161           if (p && p->p_fd->fd_nrdir.mount == mp)
1162                     nch = &p->p_fd->fd_nrdir;
1163           else
1164                     nch = &mp->mnt_ncmountpt;
1165           return(cache_fullpath(p, nch, NULL, rb, fb, 0));
1166 }
1167 
1168 /*
1169  * Sync each mounted filesystem.
1170  */
1171 
1172 #ifdef DEBUG
1173 static int syncprt = 0;
1174 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1175 #endif /* DEBUG */
1176 
1177 static int sync_callback(struct mount *mp, void *data);
1178 
1179 int
sys_sync(struct sysmsg * sysmsg,const struct sync_args * uap)1180 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap)
1181 {
1182           mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
1183           return (0);
1184 }
1185 
1186 static
1187 int
sync_callback(struct mount * mp,void * data __unused)1188 sync_callback(struct mount *mp, void *data __unused)
1189 {
1190           int asyncflag;
1191 
1192           if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1193                     lwkt_gettoken(&mp->mnt_token);
1194                     asyncflag = mp->mnt_flag & MNT_ASYNC;
1195                     mp->mnt_flag &= ~MNT_ASYNC;
1196                     lwkt_reltoken(&mp->mnt_token);
1197                     vfs_msync(mp, MNT_NOWAIT);
1198                     VFS_SYNC(mp, MNT_NOWAIT);
1199                     lwkt_gettoken(&mp->mnt_token);
1200                     mp->mnt_flag |= asyncflag;
1201                     lwkt_reltoken(&mp->mnt_token);
1202           }
1203           return(0);
1204 }
1205 
1206 /* XXX PRISON: could be per prison flag */
1207 static int prison_quotas;
1208 #if 0
1209 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1210 #endif
1211 
1212 /*
1213  *  quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
1214  *
1215  * Change filesystem quotas.
1216  *
1217  * MPALMOSTSAFE
1218  */
1219 int
sys_quotactl(struct sysmsg * sysmsg,const struct quotactl_args * uap)1220 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap)
1221 {
1222           struct nlookupdata nd;
1223           struct thread *td;
1224           struct mount *mp;
1225           int error;
1226 
1227           td = curthread;
1228           if (td->td_ucred->cr_prison && !prison_quotas) {
1229                     error = EPERM;
1230                     goto done;
1231           }
1232 
1233           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1234           if (error == 0)
1235                     error = nlookup(&nd);
1236           if (error == 0) {
1237                     mp = nd.nl_nch.mount;
1238                     error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
1239                                             uap->arg, nd.nl_cred);
1240           }
1241           nlookup_done(&nd);
1242 done:
1243           return (error);
1244 }
1245 
1246 /*
1247  * mountctl(char *path, int op, int fd, const void *ctl, int ctllen,
1248  *                  void *buf, int buflen)
1249  *
1250  * This function operates on a mount point and executes the specified
1251  * operation using the specified control data, and possibly returns data.
1252  *
1253  * The actual number of bytes stored in the result buffer is returned, 0
1254  * if none, otherwise an error is returned.
1255  *
1256  * MPALMOSTSAFE
1257  */
1258 int
sys_mountctl(struct sysmsg * sysmsg,const struct mountctl_args * uap)1259 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap)
1260 {
1261           struct thread *td = curthread;
1262           struct file *fp;
1263           void *ctl = NULL;
1264           void *buf = NULL;
1265           char *path = NULL;
1266           int error;
1267 
1268           /*
1269            * Sanity and permissions checks.  We must be root.
1270            */
1271           if (td->td_ucred->cr_prison != NULL)
1272                     return (EPERM);
1273           if ((uap->op != MOUNTCTL_MOUNTFLAGS) &&
1274               (error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) != 0)
1275           {
1276                     return (error);
1277           }
1278 
1279           /*
1280            * Argument length checks
1281            */
1282           if (uap->ctllen < 0 || uap->ctllen > 1024)
1283                     return (EINVAL);
1284           if (uap->buflen < 0 || uap->buflen > 16 * 1024)
1285                     return (EINVAL);
1286           if (uap->path == NULL)
1287                     return (EINVAL);
1288 
1289           /*
1290            * Allocate the necessary buffers and copyin data
1291            */
1292           path = objcache_get(namei_oc, M_WAITOK);
1293           error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
1294           if (error)
1295                     goto done;
1296 
1297           if (uap->ctllen) {
1298                     ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO);
1299                     error = copyin(uap->ctl, ctl, uap->ctllen);
1300                     if (error)
1301                               goto done;
1302           }
1303           if (uap->buflen)
1304                     buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO);
1305 
1306           /*
1307            * Validate the descriptor
1308            */
1309           if (uap->fd >= 0) {
1310                     fp = holdfp(td, uap->fd, -1);
1311                     if (fp == NULL) {
1312                               error = EBADF;
1313                               goto done;
1314                     }
1315           } else {
1316                     fp = NULL;
1317           }
1318 
1319           /*
1320            * Execute the internal kernel function and clean up.
1321            */
1322           error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen,
1323                                     buf, uap->buflen, &sysmsg->sysmsg_result);
1324           if (fp)
1325                     dropfp(td, uap->fd, fp);
1326           if (error == 0 && sysmsg->sysmsg_result > 0)
1327                     error = copyout(buf, uap->buf, sysmsg->sysmsg_result);
1328 done:
1329           if (path)
1330                     objcache_put(namei_oc, path);
1331           if (ctl)
1332                     kfree(ctl, M_TEMP);
1333           if (buf)
1334                     kfree(buf, M_TEMP);
1335           return (error);
1336 }
1337 
1338 /*
1339  * Execute a mount control operation by resolving the path to a mount point
1340  * and calling vop_mountctl().
1341  *
1342  * Use the mount point from the nch instead of the vnode so nullfs mounts
1343  * can properly spike the VOP.
1344  */
1345 int
kern_mountctl(const char * path,int op,struct file * fp,const void * ctl,int ctllen,void * buf,int buflen,int * res)1346 kern_mountctl(const char *path, int op, struct file *fp,
1347                     const void *ctl, int ctllen,
1348                     void *buf, int buflen, int *res)
1349 {
1350           struct vnode *vp;
1351           struct nlookupdata nd;
1352           struct nchandle nch;
1353           struct mount *mp;
1354           int error;
1355 
1356           *res = 0;
1357           vp = NULL;
1358           error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
1359           if (error)
1360                     return (error);
1361           error = nlookup(&nd);
1362           if (error) {
1363                     nlookup_done(&nd);
1364                     return (error);
1365           }
1366           error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
1367           if (error) {
1368                     nlookup_done(&nd);
1369                     return (error);
1370           }
1371 
1372           /*
1373            * Yes, all this is needed to use the nch.mount below, because
1374            * we must maintain a ref on the mount to avoid ripouts (e.g.
1375            * due to heavy mount/unmount use by synth or poudriere).
1376            */
1377           nch = nd.nl_nch;
1378           cache_zero(&nd.nl_nch);
1379           cache_unlock(&nch);
1380           nlookup_done(&nd);
1381           vn_unlock(vp);
1382 
1383           mp = nch.mount;
1384 
1385           /*
1386            * Must be the root of the filesystem
1387            */
1388           if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
1389                     cache_drop(&nch);
1390                     vrele(vp);
1391                     return (EINVAL);
1392           }
1393           if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) {
1394                     kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n",
1395                               path);
1396                     cache_drop(&nch);
1397                     vrele(vp);
1398                     return (EINVAL);
1399           }
1400           error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen,
1401                                    buf, buflen, res);
1402           vrele(vp);
1403           cache_drop(&nch);
1404 
1405           return (error);
1406 }
1407 
1408 int
kern_statfs(struct nlookupdata * nd,struct statfs * buf)1409 kern_statfs(struct nlookupdata *nd, struct statfs *buf)
1410 {
1411           struct thread *td = curthread;
1412           struct proc *p = td->td_proc;
1413           struct mount *mp;
1414           struct statfs *sp;
1415           char *fullpath, *freepath;
1416           int error;
1417 
1418           if ((error = nlookup(nd)) != 0)
1419                     return (error);
1420           mp = nd->nl_nch.mount;
1421           sp = &mp->mnt_stat;
1422 
1423           /*
1424            * Ignore refresh error, user should have visibility.
1425            * This can happen if a NFS mount goes bad (e.g. server
1426            * revokes perms or goes down).
1427            */
1428           error = VFS_STATFS(mp, sp, nd->nl_cred);
1429           /* ignore error */
1430 
1431           error = mount_path(p, mp, &fullpath, &freepath);
1432           if (error)
1433                     return(error);
1434           bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1435           strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1436           kfree(freepath, M_TEMP);
1437 
1438           sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1439           bcopy(sp, buf, sizeof(*buf));
1440           /* Only root should have access to the fsid's. */
1441           if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))
1442                     buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1443           return (0);
1444 }
1445 
1446 /*
1447  * statfs_args(char *path, struct statfs *buf)
1448  *
1449  * Get filesystem statistics.
1450  */
1451 int
sys_statfs(struct sysmsg * sysmsg,const struct statfs_args * uap)1452 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap)
1453 {
1454           struct nlookupdata nd;
1455           struct statfs buf;
1456           int error;
1457 
1458           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1459           if (error == 0)
1460                     error = kern_statfs(&nd, &buf);
1461           nlookup_done(&nd);
1462           if (error == 0)
1463                     error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1464           return (error);
1465 }
1466 
1467 int
kern_fstatfs(int fd,struct statfs * buf)1468 kern_fstatfs(int fd, struct statfs *buf)
1469 {
1470           struct thread *td = curthread;
1471           struct proc *p = td->td_proc;
1472           struct file *fp;
1473           struct mount *mp;
1474           struct statfs *sp;
1475           char *fullpath, *freepath;
1476           int error;
1477 
1478           KKASSERT(p);
1479           if ((error = holdvnode(td, fd, &fp)) != 0)
1480                     return (error);
1481 
1482           /*
1483            * Try to use mount info from any overlays rather than the
1484            * mount info for the underlying vnode, otherwise we will
1485            * fail when operating on null-mounted paths inside a chroot.
1486            */
1487           if ((mp = fp->f_nchandle.mount) == NULL)
1488                     mp = ((struct vnode *)fp->f_data)->v_mount;
1489           if (mp == NULL) {
1490                     error = EBADF;
1491                     goto done;
1492           }
1493           if (fp->f_cred == NULL) {
1494                     error = EINVAL;
1495                     goto done;
1496           }
1497 
1498           /*
1499            * Ignore refresh error, user should have visibility.
1500            * This can happen if a NFS mount goes bad (e.g. server
1501            * revokes perms or goes down).
1502            */
1503           sp = &mp->mnt_stat;
1504           error = VFS_STATFS(mp, sp, fp->f_cred);
1505 
1506           if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
1507                     goto done;
1508           bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1509           strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1510           kfree(freepath, M_TEMP);
1511 
1512           sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1513           bcopy(sp, buf, sizeof(*buf));
1514 
1515           /* Only root should have access to the fsid's. */
1516           if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))
1517                     buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1518           error = 0;
1519 done:
1520           fdrop(fp);
1521           return (error);
1522 }
1523 
1524 /*
1525  * fstatfs_args(int fd, struct statfs *buf)
1526  *
1527  * Get filesystem statistics.
1528  */
1529 int
sys_fstatfs(struct sysmsg * sysmsg,const struct fstatfs_args * uap)1530 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap)
1531 {
1532           struct statfs buf;
1533           int error;
1534 
1535           error = kern_fstatfs(uap->fd, &buf);
1536 
1537           if (error == 0)
1538                     error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1539           return (error);
1540 }
1541 
1542 int
kern_statvfs(struct nlookupdata * nd,struct statvfs * buf)1543 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf)
1544 {
1545           struct mount *mp;
1546           struct statvfs *sp;
1547           int error;
1548 
1549           if ((error = nlookup(nd)) != 0)
1550                     return (error);
1551           mp = nd->nl_nch.mount;
1552           sp = &mp->mnt_vstat;
1553           if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
1554                     return (error);
1555 
1556           sp->f_flag = 0;
1557           if (mp->mnt_flag & MNT_RDONLY)
1558                     sp->f_flag |= ST_RDONLY;
1559           if (mp->mnt_flag & MNT_NOSUID)
1560                     sp->f_flag |= ST_NOSUID;
1561           bcopy(sp, buf, sizeof(*buf));
1562           return (0);
1563 }
1564 
1565 /*
1566  * statfs_args(char *path, struct statfs *buf)
1567  *
1568  * Get filesystem statistics.
1569  */
1570 int
sys_statvfs(struct sysmsg * sysmsg,const struct statvfs_args * uap)1571 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap)
1572 {
1573           struct nlookupdata nd;
1574           struct statvfs buf;
1575           int error;
1576 
1577           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1578           if (error == 0)
1579                     error = kern_statvfs(&nd, &buf);
1580           nlookup_done(&nd);
1581           if (error == 0)
1582                     error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1583           return (error);
1584 }
1585 
1586 int
kern_fstatvfs(int fd,struct statvfs * buf)1587 kern_fstatvfs(int fd, struct statvfs *buf)
1588 {
1589           struct thread *td = curthread;
1590           struct file *fp;
1591           struct mount *mp;
1592           struct statvfs *sp;
1593           int error;
1594 
1595           if ((error = holdvnode(td, fd, &fp)) != 0)
1596                     return (error);
1597           if ((mp = fp->f_nchandle.mount) == NULL)
1598                     mp = ((struct vnode *)fp->f_data)->v_mount;
1599           if (mp == NULL) {
1600                     error = EBADF;
1601                     goto done;
1602           }
1603           if (fp->f_cred == NULL) {
1604                     error = EINVAL;
1605                     goto done;
1606           }
1607           sp = &mp->mnt_vstat;
1608           if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
1609                     goto done;
1610 
1611           sp->f_flag = 0;
1612           if (mp->mnt_flag & MNT_RDONLY)
1613                     sp->f_flag |= ST_RDONLY;
1614           if (mp->mnt_flag & MNT_NOSUID)
1615                     sp->f_flag |= ST_NOSUID;
1616 
1617           bcopy(sp, buf, sizeof(*buf));
1618           error = 0;
1619 done:
1620           fdrop(fp);
1621           return (error);
1622 }
1623 
1624 /*
1625  * fstatfs_args(int fd, struct statfs *buf)
1626  *
1627  * Get filesystem statistics.
1628  */
1629 int
sys_fstatvfs(struct sysmsg * sysmsg,const struct fstatvfs_args * uap)1630 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap)
1631 {
1632           struct statvfs buf;
1633           int error;
1634 
1635           error = kern_fstatvfs(uap->fd, &buf);
1636 
1637           if (error == 0)
1638                     error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1639           return (error);
1640 }
1641 
1642 /*
1643  * getfsstat_args(struct statfs *buf, long bufsize, int flags)
1644  *
1645  * Get statistics on all filesystems.
1646  */
1647 
1648 struct getfsstat_info {
1649           struct statfs *sfsp;
1650           long count;
1651           long maxcount;
1652           int error;
1653           int flags;
1654           struct thread *td;
1655 };
1656 
1657 static int getfsstat_callback(struct mount *, void *);
1658 
1659 int
sys_getfsstat(struct sysmsg * sysmsg,const struct getfsstat_args * uap)1660 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap)
1661 {
1662           struct thread *td = curthread;
1663           struct getfsstat_info info;
1664 
1665           bzero(&info, sizeof(info));
1666 
1667           info.maxcount = uap->bufsize / sizeof(struct statfs);
1668           info.sfsp = uap->buf;
1669           info.count = 0;
1670           info.flags = uap->flags;
1671           info.td = td;
1672 
1673           mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
1674           if (info.sfsp && info.count > info.maxcount)
1675                     sysmsg->sysmsg_result = info.maxcount;
1676           else
1677                     sysmsg->sysmsg_result = info.count;
1678           return (info.error);
1679 }
1680 
1681 static int
getfsstat_callback(struct mount * mp,void * data)1682 getfsstat_callback(struct mount *mp, void *data)
1683 {
1684           struct getfsstat_info *info = data;
1685           struct statfs *sp;
1686           char *freepath;
1687           char *fullpath;
1688           int error;
1689 
1690           if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc))
1691                     return(0);
1692 
1693           if (info->sfsp && info->count < info->maxcount) {
1694                     sp = &mp->mnt_stat;
1695 
1696                     /*
1697                      * If MNT_NOWAIT or MNT_LAZY is specified, do not
1698                      * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1699                      * overrides MNT_WAIT.
1700                      *
1701                      * Ignore refresh error, user should have visibility.
1702                      * This can happen if a NFS mount goes bad (e.g. server
1703                      * revokes perms or goes down).
1704                      */
1705                     if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1706                         (info->flags & MNT_WAIT)) &&
1707                         (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
1708                               /* ignore error */
1709                     }
1710                     sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1711 
1712                     error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
1713                     if (error) {
1714                               info->error = error;
1715                               return(-1);
1716                     }
1717                     bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1718                     strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1719                     kfree(freepath, M_TEMP);
1720 
1721                     error = copyout(sp, info->sfsp, sizeof(*sp));
1722                     if (error) {
1723                               info->error = error;
1724                               return (-1);
1725                     }
1726                     ++info->sfsp;
1727           }
1728           info->count++;
1729           return(0);
1730 }
1731 
1732 /*
1733  * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf,
1734                        long bufsize, int flags)
1735  *
1736  * Get statistics on all filesystems.
1737  */
1738 
1739 struct getvfsstat_info {
1740           struct statfs *sfsp;
1741           struct statvfs *vsfsp;
1742           long count;
1743           long maxcount;
1744           int error;
1745           int flags;
1746           struct thread *td;
1747 };
1748 
1749 static int getvfsstat_callback(struct mount *, void *);
1750 
1751 int
sys_getvfsstat(struct sysmsg * sysmsg,const struct getvfsstat_args * uap)1752 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap)
1753 {
1754           struct thread *td = curthread;
1755           struct getvfsstat_info info;
1756 
1757           bzero(&info, sizeof(info));
1758 
1759           info.maxcount = uap->vbufsize / sizeof(struct statvfs);
1760           info.sfsp = uap->buf;
1761           info.vsfsp = uap->vbuf;
1762           info.count = 0;
1763           info.flags = uap->flags;
1764           info.td = td;
1765 
1766           mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
1767           if (info.vsfsp && info.count > info.maxcount)
1768                     sysmsg->sysmsg_result = info.maxcount;
1769           else
1770                     sysmsg->sysmsg_result = info.count;
1771           return (info.error);
1772 }
1773 
1774 static int
getvfsstat_callback(struct mount * mp,void * data)1775 getvfsstat_callback(struct mount *mp, void *data)
1776 {
1777           struct getvfsstat_info *info = data;
1778           struct statfs *sp;
1779           struct statvfs *vsp;
1780           char *freepath;
1781           char *fullpath;
1782           int error;
1783 
1784           if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc))
1785                     return(0);
1786 
1787           if (info->vsfsp && info->count < info->maxcount) {
1788                     sp = &mp->mnt_stat;
1789                     vsp = &mp->mnt_vstat;
1790 
1791                     /*
1792                      * If MNT_NOWAIT or MNT_LAZY is specified, do not
1793                      * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1794                      * overrides MNT_WAIT.
1795                      *
1796                      * Ignore refresh error, user should have visibility.
1797                      * This can happen if a NFS mount goes bad (e.g. server
1798                      * revokes perms or goes down).
1799                      */
1800                     if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1801                         (info->flags & MNT_WAIT)) &&
1802                         (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
1803                               /* ignore error */
1804                     }
1805                     sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1806 
1807                     if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1808                         (info->flags & MNT_WAIT)) &&
1809                         (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) {
1810                               /* ignore error */
1811                     }
1812                     vsp->f_flag = 0;
1813                     if (mp->mnt_flag & MNT_RDONLY)
1814                               vsp->f_flag |= ST_RDONLY;
1815                     if (mp->mnt_flag & MNT_NOSUID)
1816                               vsp->f_flag |= ST_NOSUID;
1817 
1818                     error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
1819                     if (error) {
1820                               info->error = error;
1821                               return(-1);
1822                     }
1823                     bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1824                     strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1825                     kfree(freepath, M_TEMP);
1826 
1827                     error = copyout(sp, info->sfsp, sizeof(*sp));
1828                     if (error == 0)
1829                               error = copyout(vsp, info->vsfsp, sizeof(*vsp));
1830                     if (error) {
1831                               info->error = error;
1832                               return (-1);
1833                     }
1834                     ++info->sfsp;
1835                     ++info->vsfsp;
1836           }
1837           info->count++;
1838           return(0);
1839 }
1840 
1841 
1842 /*
1843  * fchdir_args(int fd)
1844  *
1845  * Change current working directory to a given file descriptor.
1846  */
1847 int
sys_fchdir(struct sysmsg * sysmsg,const struct fchdir_args * uap)1848 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap)
1849 {
1850           struct thread *td = curthread;
1851           struct proc *p = td->td_proc;
1852           struct filedesc *fdp = p->p_fd;
1853           struct vnode *vp, *ovp;
1854           struct mount *mp;
1855           struct file *fp;
1856           struct nchandle nch, onch, tnch;
1857           int error;
1858 
1859           if ((error = holdvnode(td, uap->fd, &fp)) != 0)
1860                     return (error);
1861           lwkt_gettoken(&p->p_token);
1862           vp = (struct vnode *)fp->f_data;
1863           vref(vp);
1864           vn_lock(vp, LK_SHARED | LK_RETRY);
1865           if (fp->f_nchandle.ncp == NULL)
1866                     error = ENOTDIR;
1867           else
1868                     error = checkvp_chdir(vp, td);
1869           if (error) {
1870                     vput(vp);
1871                     goto done;
1872           }
1873           cache_copy(&fp->f_nchandle, &nch);
1874 
1875           /*
1876            * If the ncp has become a mount point, traverse through
1877            * the mount point.
1878            */
1879 
1880           while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
1881                  (mp = cache_findmount(&nch)) != NULL
1882           ) {
1883                     error = nlookup_mp(mp, &tnch);
1884                     if (error == 0) {
1885                               cache_unlock(&tnch);          /* leave ref intact */
1886                               vput(vp);
1887                               vp = tnch.ncp->nc_vp;
1888                               error = vget(vp, LK_SHARED);
1889                               KKASSERT(error == 0);
1890                               cache_drop(&nch);
1891                               nch = tnch;
1892                     }
1893                     cache_dropmount(mp);
1894           }
1895           if (error == 0) {
1896                     spin_lock(&fdp->fd_spin);
1897                     ovp = fdp->fd_cdir;
1898                     onch = fdp->fd_ncdir;
1899                     fdp->fd_cdir = vp;
1900                     fdp->fd_ncdir = nch;
1901                     spin_unlock(&fdp->fd_spin);
1902                     vn_unlock(vp);                /* leave ref intact */
1903                     cache_drop(&onch);
1904                     vrele(ovp);
1905           } else {
1906                     cache_drop(&nch);
1907                     vput(vp);
1908           }
1909           fdrop(fp);
1910 done:
1911           lwkt_reltoken(&p->p_token);
1912           return (error);
1913 }
1914 
1915 int
kern_chdir(struct nlookupdata * nd)1916 kern_chdir(struct nlookupdata *nd)
1917 {
1918           struct thread *td = curthread;
1919           struct proc *p = td->td_proc;
1920           struct filedesc *fdp = p->p_fd;
1921           struct vnode *vp, *ovp;
1922           struct nchandle onch;
1923           int error;
1924 
1925           nd->nl_flags |= NLC_SHAREDLOCK;
1926           if ((error = nlookup(nd)) != 0)
1927                     return (error);
1928           if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
1929                     return (ENOENT);
1930           if ((error = vget(vp, LK_SHARED)) != 0)
1931                     return (error);
1932 
1933           lwkt_gettoken(&p->p_token);
1934           error = checkvp_chdir(vp, td);
1935           vn_unlock(vp);
1936           if (error == 0) {
1937                     spin_lock(&fdp->fd_spin);
1938                     ovp = fdp->fd_cdir;
1939                     onch = fdp->fd_ncdir;
1940                     fdp->fd_ncdir = nd->nl_nch;
1941                     fdp->fd_cdir = vp;
1942                     spin_unlock(&fdp->fd_spin);
1943                     cache_unlock(&nd->nl_nch);    /* leave reference intact */
1944                     cache_drop(&onch);
1945                     vrele(ovp);
1946                     cache_zero(&nd->nl_nch);
1947           } else {
1948                     vrele(vp);
1949           }
1950           lwkt_reltoken(&p->p_token);
1951           return (error);
1952 }
1953 
1954 /*
1955  * chdir_args(char *path)
1956  *
1957  * Change current working directory (``.'').
1958  */
1959 int
sys_chdir(struct sysmsg * sysmsg,const struct chdir_args * uap)1960 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap)
1961 {
1962           struct nlookupdata nd;
1963           int error;
1964 
1965           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1966           if (error == 0)
1967                     error = kern_chdir(&nd);
1968           nlookup_done(&nd);
1969           return (error);
1970 }
1971 
1972 /*
1973  * Helper function for raised chroot(2) security function:  Refuse if
1974  * any filedescriptors are open directories.
1975  */
1976 static int
chroot_refuse_vdir_fds(thread_t td,struct filedesc * fdp)1977 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp)
1978 {
1979           struct vnode *vp;
1980           struct file *fp;
1981           int error;
1982           int fd;
1983 
1984           for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1985                     if ((error = holdvnode(td, fd, &fp)) != 0)
1986                               continue;
1987                     vp = (struct vnode *)fp->f_data;
1988                     if (vp->v_type != VDIR) {
1989                               fdrop(fp);
1990                               continue;
1991                     }
1992                     fdrop(fp);
1993                     return(EPERM);
1994           }
1995           return (0);
1996 }
1997 
1998 /*
1999  * This sysctl determines if we will allow a process to chroot(2) if it
2000  * has a directory open:
2001  *        0: disallowed for all processes.
2002  *        1: allowed for processes that were not already chroot(2)'ed.
2003  *        2: allowed for all processes.
2004  */
2005 
2006 static int chroot_allow_open_directories = 1;
2007 
2008 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
2009      &chroot_allow_open_directories, 0, "");
2010 
2011 /*
2012  * chroot to the specified namecache entry.  We obtain the vp from the
2013  * namecache data.  The passed ncp must be locked and referenced and will
2014  * remain locked and referenced on return.
2015  */
2016 int
kern_chroot(struct nchandle * nch)2017 kern_chroot(struct nchandle *nch)
2018 {
2019           struct thread *td = curthread;
2020           struct proc *p = td->td_proc;
2021           struct filedesc *fdp = p->p_fd;
2022           struct vnode *vp;
2023           int error;
2024 
2025           /*
2026            * Only privileged user can chroot
2027            */
2028           error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT);
2029           if (error)
2030                     return (error);
2031 
2032           /*
2033            * Disallow open directory descriptors (fchdir() breakouts).
2034            */
2035           if (chroot_allow_open_directories == 0 ||
2036              (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
2037                     if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0)
2038                               return (error);
2039           }
2040           if ((vp = nch->ncp->nc_vp) == NULL)
2041                     return (ENOENT);
2042 
2043           if ((error = vget(vp, LK_SHARED)) != 0)
2044                     return (error);
2045 
2046           /*
2047            * Check the validity of vp as a directory to change to and
2048            * associate it with rdir/jdir.
2049            */
2050           error = checkvp_chdir(vp, td);
2051           vn_unlock(vp);                          /* leave reference intact */
2052           if (error == 0) {
2053                     lwkt_gettoken(&p->p_token);
2054                     vrele(fdp->fd_rdir);
2055                     fdp->fd_rdir = vp;  /* reference inherited by fd_rdir */
2056                     cache_drop(&fdp->fd_nrdir);
2057                     cache_copy(nch, &fdp->fd_nrdir);
2058                     if (fdp->fd_jdir == NULL) {
2059                               fdp->fd_jdir = vp;
2060                               vref(fdp->fd_jdir);
2061                               cache_copy(nch, &fdp->fd_njdir);
2062                     }
2063                     if ((p->p_flags & P_DIDCHROOT) == 0) {
2064                               p->p_flags |= P_DIDCHROOT;
2065                               if (p->p_depth <= 65535 - 32)
2066                                         p->p_depth += 32;
2067                     }
2068                     lwkt_reltoken(&p->p_token);
2069           } else {
2070                     vrele(vp);
2071           }
2072           return (error);
2073 }
2074 
2075 /*
2076  * chroot_args(char *path)
2077  *
2078  * Change notion of root (``/'') directory.
2079  */
2080 int
sys_chroot(struct sysmsg * sysmsg,const struct chroot_args * uap)2081 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap)
2082 {
2083           struct thread *td __debugvar = curthread;
2084           struct nlookupdata nd;
2085           int error;
2086 
2087           KKASSERT(td->td_proc);
2088           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2089           if (error == 0) {
2090                     nd.nl_flags |= NLC_EXEC;
2091                     error = nlookup(&nd);
2092                     if (error == 0)
2093                               error = kern_chroot(&nd.nl_nch);
2094           }
2095           nlookup_done(&nd);
2096           return(error);
2097 }
2098 
2099 int
sys_chroot_kernel(struct sysmsg * sysmsg,const struct chroot_kernel_args * uap)2100 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap)
2101 {
2102           struct thread *td = curthread;
2103           struct nlookupdata nd;
2104           struct nchandle *nch;
2105           struct vnode *vp;
2106           int error;
2107 
2108           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2109           if (error)
2110                     goto error_nond;
2111 
2112           error = nlookup(&nd);
2113           if (error)
2114                     goto error_out;
2115 
2116           nch = &nd.nl_nch;
2117 
2118           error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT);
2119           if (error)
2120                     goto error_out;
2121 
2122           if ((vp = nch->ncp->nc_vp) == NULL) {
2123                     error = ENOENT;
2124                     goto error_out;
2125           }
2126 
2127           if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0)
2128                     goto error_out;
2129 
2130           vfs_cache_setroot(vp, cache_hold(nch));
2131 
2132 error_out:
2133           nlookup_done(&nd);
2134 error_nond:
2135           return(error);
2136 }
2137 
2138 /*
2139  * Common routine for chroot and chdir.  Given a locked, referenced vnode,
2140  * determine whether it is legal to chdir to the vnode.  The vnode's state
2141  * is not changed by this call.
2142  */
2143 static int
checkvp_chdir(struct vnode * vp,struct thread * td)2144 checkvp_chdir(struct vnode *vp, struct thread *td)
2145 {
2146           int error;
2147 
2148           if (vp->v_type != VDIR)
2149                     error = ENOTDIR;
2150           else
2151                     error = VOP_EACCESS(vp, VEXEC, td->td_ucred);
2152           return (error);
2153 }
2154 
2155 int
kern_open(struct nlookupdata * nd,int oflags,int mode,int * res)2156 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res)
2157 {
2158           struct thread *td = curthread;
2159           struct proc *p = td->td_proc;
2160           struct lwp *lp = td->td_lwp;
2161           struct filedesc *fdp = p->p_fd;
2162           int cmode, flags;
2163           struct file *nfp;
2164           struct file *fp;
2165           int type, indx, error = 0;
2166           struct flock lf;
2167 
2168           if ((oflags & O_ACCMODE) == O_ACCMODE)
2169                     return (EINVAL);
2170           flags = FFLAGS(oflags);
2171           error = falloc(lp, &nfp, NULL);
2172           if (error)
2173                     return (error);
2174           fp = nfp;
2175           cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2176 
2177           /*
2178            * Call vn_open() to do the lookup and assign the vnode to the
2179            * file pointer.  vn_open() does not change the ref count on fp
2180            * and the vnode, on success, will be inherited by the file pointer
2181            * and unlocked.
2182            *
2183            * Request a shared lock on the vnode if possible.
2184            *
2185            * When NLC_SHAREDLOCK is set we may still need an exclusive vnode
2186            * lock for O_RDWR opens on executables in order to avoid a VTEXT
2187            * detection race.  The NLC_EXCLLOCK_IFEXEC handles this case.
2188            *
2189            * NOTE: We need a flag to separate terminal vnode locking from
2190            *         parent locking.  O_CREAT needs parent locking, but O_TRUNC
2191            *         and O_RDWR only need to lock the terminal vnode exclusively.
2192            */
2193           nd->nl_flags |= NLC_LOCKVP;
2194           if ((flags & (O_CREAT|O_TRUNC)) == 0) {
2195                     nd->nl_flags |= NLC_SHAREDLOCK;
2196                     if (flags & O_RDWR)
2197                               nd->nl_flags |= NLC_EXCLLOCK_IFEXEC;
2198           }
2199 
2200           /*
2201            * Issue the vn_open, passing in the referenced fp.  the vn_open()
2202            * is allowed to replace fp by fdrop()ing it and returning its own
2203            * referenced fp.
2204            */
2205           nfp = fp;
2206           error = vn_open(nd, &nfp, flags, cmode);
2207           fp = nfp;
2208           nlookup_done(nd);
2209 
2210           /*
2211            * Deal with any error condition
2212            */
2213           if (error) {
2214                     fdrop(fp);          /* our ref */
2215                     if (error == ERESTART)
2216                               error = EINTR;
2217                     return (error);
2218           }
2219 
2220           /*
2221            * Reserve a file descriptor.
2222            */
2223           if ((error = fdalloc(p, 0, &indx)) != 0) {
2224                     fdrop(fp);
2225                     return (error);
2226           }
2227 
2228           /*
2229            * Handle advisory lock flags.  This is only supported with vnodes.
2230            * For things like /dev/fd/N we might not actually get a vnode.
2231            */
2232           if ((flags & (O_EXLOCK | O_SHLOCK)) && fp->f_type == DTYPE_VNODE) {
2233                     struct vnode *vp;
2234 
2235                     vp = (struct vnode *)fp->f_data;
2236                     vref(vp);
2237 
2238                     lf.l_whence = SEEK_SET;
2239                     lf.l_start = 0;
2240                     lf.l_len = 0;
2241                     if (flags & O_EXLOCK)
2242                               lf.l_type = F_WRLCK;
2243                     else
2244                               lf.l_type = F_RDLCK;
2245                     if (flags & FNONBLOCK)
2246                               type = 0;
2247                     else
2248                               type = F_WAIT;
2249 
2250                     error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
2251                     if (error) {
2252                               /*
2253                                * lock request failed.  Clean up the reserved
2254                                * descriptor.
2255                                */
2256                               vrele(vp);
2257                               fsetfd(fdp, NULL, indx);
2258                               fdrop(fp);
2259                               return (error);
2260                     }
2261                     atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */
2262                     vrele(vp);
2263           }
2264 
2265           /*
2266            * release our private reference, leaving the one associated with the
2267            * descriptor table intact.
2268            */
2269           if (oflags & O_CLOEXEC)
2270                     fdp->fd_files[indx].fileflags |= UF_EXCLOSE;
2271           fsetfd(fdp, fp, indx);
2272           fdrop(fp);
2273           *res = indx;
2274 
2275           return (error);
2276 }
2277 
2278 /*
2279  * open_args(char *path, int flags, int mode)
2280  *
2281  * Check permissions, allocate an open file structure,
2282  * and call the device open routine if any.
2283  */
2284 int
sys_open(struct sysmsg * sysmsg,const struct open_args * uap)2285 sys_open(struct sysmsg *sysmsg, const struct open_args *uap)
2286 {
2287           struct nlookupdata nd;
2288           int error;
2289 
2290           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2291           if (error == 0) {
2292                     error = kern_open(&nd, uap->flags,
2293                                             uap->mode, &sysmsg->sysmsg_result);
2294           }
2295           nlookup_done(&nd);
2296           return (error);
2297 }
2298 
2299 /*
2300  * openat_args(int fd, char *path, int flags, int mode)
2301  */
2302 int
sys_openat(struct sysmsg * sysmsg,const struct openat_args * uap)2303 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap)
2304 {
2305           struct nlookupdata nd;
2306           int error;
2307           struct file *fp;
2308 
2309           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2310           if (error == 0) {
2311                     error = kern_open(&nd, uap->flags, uap->mode,
2312                                                   &sysmsg->sysmsg_result);
2313           }
2314           nlookup_done_at(&nd, fp);
2315           return (error);
2316 }
2317 
2318 int
kern_mknod(struct nlookupdata * nd,int mode,int rmajor,int rminor)2319 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
2320 {
2321           struct thread *td = curthread;
2322           struct proc *p = td->td_proc;
2323           struct vnode *vp;
2324           struct vattr vattr;
2325           int error;
2326           int whiteout = 0;
2327 
2328           KKASSERT(p);
2329 
2330           VATTR_NULL(&vattr);
2331           vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2332           vattr.va_rmajor = rmajor;
2333           vattr.va_rminor = rminor;
2334 
2335           switch (mode & S_IFMT) {
2336           case S_IFMT:        /* used by badsect to flag bad sectors */
2337                     error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_BAD);
2338                     vattr.va_type = VBAD;
2339                     break;
2340           case S_IFCHR:
2341                     error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV);
2342                     vattr.va_type = VCHR;
2343                     break;
2344           case S_IFBLK:
2345                     error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV);
2346                     vattr.va_type = VBLK;
2347                     break;
2348           case S_IFWHT:
2349                     error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_WHT);
2350                     whiteout = 1;
2351                     break;
2352           case S_IFDIR:       /* special directories support for HAMMER */
2353                     error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_DIR);
2354                     vattr.va_type = VDIR;
2355                     break;
2356           case S_IFIFO:
2357                     return (kern_mkfifo(nd, mode));
2358                     break;
2359           default:
2360                     error = EINVAL;
2361                     break;
2362           }
2363 
2364           if (error)
2365                     return (error);
2366 
2367           bwillinode(1);
2368           nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2369           if ((error = nlookup(nd)) != 0)
2370                     return (error);
2371           if (nd->nl_nch.ncp->nc_vp)
2372                     return (EEXIST);
2373           if (nd->nl_dvp == NULL)
2374                     return (EINVAL);
2375           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2376                     return (error);
2377 
2378           if (whiteout) {
2379                     error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
2380                                               nd->nl_cred, NAMEI_CREATE);
2381           } else {
2382                     vp = NULL;
2383                     error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
2384                                            &vp, nd->nl_cred, &vattr);
2385                     if (error == 0)
2386                               vput(vp);
2387           }
2388           return (error);
2389 }
2390 
2391 /*
2392  * mknod_args(char *path, int mode, int dev)
2393  *
2394  * Create a special file.
2395  */
2396 int
sys_mknod(struct sysmsg * sysmsg,const struct mknod_args * uap)2397 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap)
2398 {
2399           struct nlookupdata nd;
2400           int error;
2401 
2402           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2403           if (error == 0) {
2404                     error = kern_mknod(&nd, uap->mode,
2405                                            umajor(uap->dev), uminor(uap->dev));
2406           }
2407           nlookup_done(&nd);
2408           return (error);
2409 }
2410 
2411 /*
2412  * mknodat_args(int fd, char *path, mode_t mode, dev_t dev)
2413  *
2414  * Create a special file.  The path is relative to the directory associated
2415  * with fd.
2416  */
2417 int
sys_mknodat(struct sysmsg * sysmsg,const struct mknodat_args * uap)2418 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap)
2419 {
2420           struct nlookupdata nd;
2421           struct file *fp;
2422           int error;
2423 
2424           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2425           if (error == 0) {
2426                     error = kern_mknod(&nd, uap->mode,
2427                                            umajor(uap->dev), uminor(uap->dev));
2428           }
2429           nlookup_done_at(&nd, fp);
2430           return (error);
2431 }
2432 
2433 int
kern_mkfifo(struct nlookupdata * nd,int mode)2434 kern_mkfifo(struct nlookupdata *nd, int mode)
2435 {
2436           struct thread *td = curthread;
2437           struct proc *p = td->td_proc;
2438           struct vattr vattr;
2439           struct vnode *vp;
2440           int error;
2441 
2442           bwillinode(1);
2443 
2444           nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2445           if ((error = nlookup(nd)) != 0)
2446                     return (error);
2447           if (nd->nl_nch.ncp->nc_vp)
2448                     return (EEXIST);
2449           if (nd->nl_dvp == NULL)
2450                     return (EINVAL);
2451           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2452                     return (error);
2453 
2454           VATTR_NULL(&vattr);
2455           vattr.va_type = VFIFO;
2456           vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2457           vp = NULL;
2458           error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
2459           if (error == 0)
2460                     vput(vp);
2461           return (error);
2462 }
2463 
2464 /*
2465  * mkfifo_args(char *path, int mode)
2466  *
2467  * Create a named pipe.
2468  */
2469 int
sys_mkfifo(struct sysmsg * sysmsg,const struct mkfifo_args * uap)2470 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap)
2471 {
2472           struct nlookupdata nd;
2473           int error;
2474 
2475           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2476           if (error == 0)
2477                     error = kern_mkfifo(&nd, uap->mode);
2478           nlookup_done(&nd);
2479           return (error);
2480 }
2481 
2482 /*
2483  * mkfifoat_args(int fd, char *path, mode_t mode)
2484  *
2485  * Create a named pipe.  The path is relative to the directory associated
2486  * with fd.
2487  */
2488 int
sys_mkfifoat(struct sysmsg * sysmsg,const struct mkfifoat_args * uap)2489 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap)
2490 {
2491           struct nlookupdata nd;
2492           struct file *fp;
2493           int error;
2494 
2495           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2496           if (error == 0)
2497                     error = kern_mkfifo(&nd, uap->mode);
2498           nlookup_done_at(&nd, fp);
2499           return (error);
2500 }
2501 
2502 static int hardlink_check_uid = 0;
2503 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
2504     &hardlink_check_uid, 0,
2505     "Unprivileged processes cannot create hard links to files owned by other "
2506     "users");
2507 static int hardlink_check_gid = 0;
2508 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
2509     &hardlink_check_gid, 0,
2510     "Unprivileged processes cannot create hard links to files owned by other "
2511     "groups");
2512 
2513 static int
can_hardlink(struct vnode * vp,struct thread * td,struct ucred * cred)2514 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
2515 {
2516           struct vattr va;
2517           int error;
2518 
2519           /*
2520            * Shortcut if disabled
2521            */
2522           if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
2523                     return (0);
2524 
2525           /*
2526            * Privileged user can always hardlink
2527            */
2528           if (caps_priv_check(cred, SYSCAP_NOVFS_LINK) == 0)
2529                     return (0);
2530 
2531           /*
2532            * Otherwise only if the originating file is owned by the
2533            * same user or group.  Note that any group is allowed if
2534            * the file is owned by the caller.
2535            */
2536           error = VOP_GETATTR(vp, &va);
2537           if (error != 0)
2538                     return (error);
2539 
2540           if (hardlink_check_uid) {
2541                     if (cred->cr_uid != va.va_uid)
2542                               return (EPERM);
2543           }
2544 
2545           if (hardlink_check_gid) {
2546                     if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
2547                               return (EPERM);
2548           }
2549 
2550           return (0);
2551 }
2552 
2553 int
kern_link(struct nlookupdata * nd,struct nlookupdata * linknd)2554 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd)
2555 {
2556           struct thread *td = curthread;
2557           struct vnode *vp;
2558           int error;
2559 
2560           /*
2561            * Lookup the source and obtained a locked vnode.
2562            *
2563            * You may only hardlink a file which you have write permission
2564            * on or which you own.
2565            *
2566            * XXX relookup on vget failure / race ?
2567            */
2568           bwillinode(1);
2569           nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK;
2570           if ((error = nlookup(nd)) != 0)
2571                     return (error);
2572           vp = nd->nl_nch.ncp->nc_vp;
2573           KKASSERT(vp != NULL);
2574           if (vp->v_type == VDIR)
2575                     return (EPERM);               /* POSIX */
2576           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2577                     return (error);
2578           if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
2579                     return (error);
2580 
2581           /*
2582            * Unlock the source so we can lookup the target without deadlocking
2583            * (XXX vp is locked already, possible other deadlock?).  The target
2584            * must not exist.
2585            */
2586           KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
2587           nd->nl_flags &= ~NLC_NCPISLOCKED;
2588           cache_unlock(&nd->nl_nch);
2589           vn_unlock(vp);
2590 
2591           linknd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2592           if ((error = nlookup(linknd)) != 0) {
2593                     vrele(vp);
2594                     return (error);
2595           }
2596           if (linknd->nl_nch.ncp->nc_vp) {
2597                     vrele(vp);
2598                     return (EEXIST);
2599           }
2600           if (linknd->nl_dvp == NULL) {
2601                     vrele(vp);
2602                     return (EINVAL);
2603           }
2604           VFS_MODIFYING(vp->v_mount);
2605           error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
2606           if (error) {
2607                     vrele(vp);
2608                     return (error);
2609           }
2610 
2611           /*
2612            * Finally run the new API VOP.
2613            */
2614           error = can_hardlink(vp, td, td->td_ucred);
2615           if (error == 0) {
2616                     error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
2617                                           vp, linknd->nl_cred);
2618           }
2619           vput(vp);
2620           return (error);
2621 }
2622 
2623 /*
2624  * link_args(char *path, char *link)
2625  *
2626  * Make a hard file link.
2627  */
2628 int
sys_link(struct sysmsg * sysmsg,const struct link_args * uap)2629 sys_link(struct sysmsg *sysmsg, const struct link_args *uap)
2630 {
2631           struct nlookupdata nd, linknd;
2632           int error;
2633 
2634           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2635           if (error == 0) {
2636                     error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
2637                     if (error == 0)
2638                               error = kern_link(&nd, &linknd);
2639                     nlookup_done(&linknd);
2640           }
2641           nlookup_done(&nd);
2642           return (error);
2643 }
2644 
2645 /*
2646  * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags)
2647  *
2648  * Make a hard file link. The path1 argument is relative to the directory
2649  * associated with fd1, and similarly the path2 argument is relative to
2650  * the directory associated with fd2.
2651  */
2652 int
sys_linkat(struct sysmsg * sysmsg,const struct linkat_args * uap)2653 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap)
2654 {
2655           struct nlookupdata nd, linknd;
2656           struct file *fp1, *fp2;
2657           int error;
2658 
2659           error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE,
2660               (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0);
2661           if (error == 0) {
2662                     error = nlookup_init_at(&linknd, &fp2, uap->fd2,
2663                         uap->path2, UIO_USERSPACE, 0);
2664                     if (error == 0)
2665                               error = kern_link(&nd, &linknd);
2666                     nlookup_done_at(&linknd, fp2);
2667           }
2668           nlookup_done_at(&nd, fp1);
2669           return (error);
2670 }
2671 
2672 int
kern_symlink(struct nlookupdata * nd,char * path,int mode)2673 kern_symlink(struct nlookupdata *nd, char *path, int mode)
2674 {
2675           struct vattr vattr;
2676           struct vnode *vp;
2677           struct vnode *dvp;
2678           int error;
2679 
2680           bwillinode(1);
2681           nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2682           if ((error = nlookup(nd)) != 0)
2683                     return (error);
2684           if (nd->nl_nch.ncp->nc_vp)
2685                     return (EEXIST);
2686           if (nd->nl_dvp == NULL)
2687                     return (EINVAL);
2688           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2689                     return (error);
2690           dvp = nd->nl_dvp;
2691           VATTR_NULL(&vattr);
2692           vattr.va_mode = mode;
2693           error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
2694           if (error == 0)
2695                     vput(vp);
2696           return (error);
2697 }
2698 
2699 /*
2700  * symlink(char *path, char *link)
2701  *
2702  * Make a symbolic link.
2703  */
2704 int
sys_symlink(struct sysmsg * sysmsg,const struct symlink_args * uap)2705 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap)
2706 {
2707           struct thread *td = curthread;
2708           struct nlookupdata nd;
2709           char *path;
2710           int error;
2711           int mode;
2712 
2713           path = objcache_get(namei_oc, M_WAITOK);
2714           error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
2715           if (error == 0) {
2716                     error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
2717                     if (error == 0) {
2718                               mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2719                               error = kern_symlink(&nd, path, mode);
2720                     }
2721                     nlookup_done(&nd);
2722           }
2723           objcache_put(namei_oc, path);
2724           return (error);
2725 }
2726 
2727 /*
2728  * symlinkat_args(char *path1, int fd, char *path2)
2729  *
2730  * Make a symbolic link.  The path2 argument is relative to the directory
2731  * associated with fd.
2732  */
2733 int
sys_symlinkat(struct sysmsg * sysmsg,const struct symlinkat_args * uap)2734 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap)
2735 {
2736           struct thread *td = curthread;
2737           struct nlookupdata nd;
2738           struct file *fp;
2739           char *path1;
2740           int error;
2741           int mode;
2742 
2743           path1 = objcache_get(namei_oc, M_WAITOK);
2744           error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL);
2745           if (error == 0) {
2746                     error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2,
2747                         UIO_USERSPACE, 0);
2748                     if (error == 0) {
2749                               mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2750                               error = kern_symlink(&nd, path1, mode);
2751                     }
2752                     nlookup_done_at(&nd, fp);
2753           }
2754           objcache_put(namei_oc, path1);
2755           return (error);
2756 }
2757 
2758 /*
2759  * undelete_args(char *path)
2760  *
2761  * Delete a whiteout from the filesystem.
2762  */
2763 int
sys_undelete(struct sysmsg * sysmsg,const struct undelete_args * uap)2764 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap)
2765 {
2766           struct nlookupdata nd;
2767           int error;
2768 
2769           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2770           bwillinode(1);
2771           nd.nl_flags |= NLC_DELETE | NLC_REFDVP;
2772           if (error == 0)
2773                     error = nlookup(&nd);
2774           if (error == 0 && nd.nl_dvp == NULL)
2775                     error = EINVAL;
2776           if (error == 0)
2777                     error = ncp_writechk(&nd.nl_nch);
2778           if (error == 0) {
2779                     error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
2780                                               NAMEI_DELETE);
2781           }
2782           nlookup_done(&nd);
2783           return (error);
2784 }
2785 
2786 int
kern_unlink(struct nlookupdata * nd)2787 kern_unlink(struct nlookupdata *nd)
2788 {
2789           int error;
2790 
2791           bwillinode(1);
2792           nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
2793           if ((error = nlookup(nd)) != 0)
2794                     return (error);
2795           if (nd->nl_dvp == NULL)
2796                     return EINVAL;
2797           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2798                     return (error);
2799           error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
2800           return (error);
2801 }
2802 
2803 /*
2804  * unlink_args(char *path)
2805  *
2806  * Delete a name from the filesystem.
2807  */
2808 int
sys_unlink(struct sysmsg * sysmsg,const struct unlink_args * uap)2809 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap)
2810 {
2811           struct nlookupdata nd;
2812           int error;
2813 
2814           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2815           if (error == 0)
2816                     error = kern_unlink(&nd);
2817           nlookup_done(&nd);
2818           return (error);
2819 }
2820 
2821 
2822 /*
2823  * unlinkat_args(int fd, char *path, int flags)
2824  *
2825  * Delete the file or directory entry pointed to by fd/path.
2826  */
2827 int
sys_unlinkat(struct sysmsg * sysmsg,const struct unlinkat_args * uap)2828 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap)
2829 {
2830           struct nlookupdata nd;
2831           struct file *fp;
2832           int error;
2833 
2834           if (uap->flags & ~AT_REMOVEDIR)
2835                     return (EINVAL);
2836 
2837           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2838           if (error == 0) {
2839                     if (uap->flags & AT_REMOVEDIR)
2840                               error = kern_rmdir(&nd);
2841                     else
2842                               error = kern_unlink(&nd);
2843           }
2844           nlookup_done_at(&nd, fp);
2845           return (error);
2846 }
2847 
2848 int
kern_lseek(int fd,off_t offset,int whence,off_t * res)2849 kern_lseek(int fd, off_t offset, int whence, off_t *res)
2850 {
2851           struct thread *td = curthread;
2852           struct file *fp;
2853           int error;
2854 
2855           fp = holdfp(td, fd, -1);
2856           if (fp == NULL)
2857                     return (EBADF);
2858 
2859           error = fo_seek(fp, offset, whence, res);
2860           dropfp(td, fd, fp);
2861 
2862           return (error);
2863 }
2864 
2865 /*
2866  * lseek_args(int fd, int pad, off_t offset, int whence)
2867  *
2868  * Reposition read/write file offset.
2869  */
2870 int
sys_lseek(struct sysmsg * sysmsg,const struct lseek_args * uap)2871 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap)
2872 {
2873           int error;
2874 
2875           error = kern_lseek(uap->fd, uap->offset, uap->whence,
2876                                  &sysmsg->sysmsg_offset);
2877 
2878           return (error);
2879 }
2880 
2881 /*
2882  * Check if current process can access given file.  amode is a bitmask of *_OK
2883  * access bits.  flags is a bitmask of AT_* flags.
2884  */
2885 int
kern_access(struct nlookupdata * nd,int amode,int flags)2886 kern_access(struct nlookupdata *nd, int amode, int flags)
2887 {
2888           struct vnode *vp;
2889           int error, mode;
2890 
2891           if (flags & ~AT_EACCESS)
2892                     return (EINVAL);
2893           nd->nl_flags |= NLC_SHAREDLOCK;
2894           if ((error = nlookup(nd)) != 0)
2895                     return (error);
2896           if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0)
2897                     return (error);
2898 retry:
2899           error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
2900           if (error)
2901                     return (error);
2902 
2903           /* Flags == 0 means only check for existence. */
2904           if (amode) {
2905                     mode = 0;
2906                     if (amode & R_OK)
2907                               mode |= VREAD;
2908                     if (amode & W_OK)
2909                               mode |= VWRITE;
2910                     if (amode & X_OK)
2911                               mode |= VEXEC;
2912                     if ((mode & VWRITE) == 0 ||
2913                         (error = vn_writechk(vp)) == 0) {
2914                               error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
2915                     }
2916 
2917                     /*
2918                      * If the file handle is stale we have to re-resolve the
2919                      * entry with the ncp held exclusively.  This is a hack
2920                      * at the moment.
2921                      */
2922                     if (error == ESTALE) {
2923                               u_int dummy_gen;
2924 
2925                               vput(vp);
2926                               cache_unlock(&nd->nl_nch);
2927                               cache_lock(&nd->nl_nch);
2928                               dummy_gen = nd->nl_nch.ncp->nc_generation;
2929                               cache_setunresolved(&nd->nl_nch);
2930                               error = cache_resolve(&nd->nl_nch, &dummy_gen,
2931                                                         nd->nl_cred);
2932                               if (error == 0) {
2933                                         vp = NULL;
2934                                         goto retry;
2935                               }
2936                               return(error);
2937                     }
2938           }
2939           vput(vp);
2940           return (error);
2941 }
2942 
2943 /*
2944  * access_args(char *path, int flags)
2945  *
2946  * Check access permissions.
2947  */
2948 int
sys_access(struct sysmsg * sysmsg,const struct access_args * uap)2949 sys_access(struct sysmsg *sysmsg, const struct access_args *uap)
2950 {
2951           struct nlookupdata nd;
2952           int error;
2953 
2954           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2955           if (error == 0)
2956                     error = kern_access(&nd, uap->flags, 0);
2957           nlookup_done(&nd);
2958           return (error);
2959 }
2960 
2961 
2962 /*
2963  * eaccess_args(char *path, int flags)
2964  *
2965  * Check access permissions.
2966  */
2967 int
sys_eaccess(struct sysmsg * sysmsg,const struct eaccess_args * uap)2968 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap)
2969 {
2970           struct nlookupdata nd;
2971           int error;
2972 
2973           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2974           if (error == 0)
2975                     error = kern_access(&nd, uap->flags, AT_EACCESS);
2976           nlookup_done(&nd);
2977           return (error);
2978 }
2979 
2980 
2981 /*
2982  * faccessat_args(int fd, char *path, int amode, int flags)
2983  *
2984  * Check access permissions.
2985  */
2986 int
sys_faccessat(struct sysmsg * sysmsg,const struct faccessat_args * uap)2987 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap)
2988 {
2989           struct nlookupdata nd;
2990           struct file *fp;
2991           int error;
2992 
2993           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE,
2994                                         NLC_FOLLOW);
2995           if (error == 0)
2996                     error = kern_access(&nd, uap->amode, uap->flags);
2997           nlookup_done_at(&nd, fp);
2998           return (error);
2999 }
3000 
3001 int
kern_stat(struct nlookupdata * nd,struct stat * st)3002 kern_stat(struct nlookupdata *nd, struct stat *st)
3003 {
3004           int error;
3005           struct vnode *vp;
3006 
3007           nd->nl_flags |= NLC_SHAREDLOCK;
3008           if ((error = nlookup(nd)) != 0)
3009                     return (error);
3010 again:
3011           if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
3012                     return (ENOENT);
3013 
3014 #if 1
3015           error = cache_vref(&nd->nl_nch, NULL, &vp);
3016 #else
3017           error = vget(vp, LK_SHARED);
3018 #endif
3019           if (error)
3020                     return (error);
3021           error = vn_stat(vp, st, nd->nl_cred);
3022 
3023           /*
3024            * If the file handle is stale we have to re-resolve the
3025            * entry with the ncp held exclusively.  This is a hack
3026            * at the moment.
3027            */
3028           if (error == ESTALE) {
3029                     u_int dummy_gen;
3030 #if 1
3031                     vrele(vp);
3032 #else
3033                     vput(vp);
3034 #endif
3035                     cache_unlock(&nd->nl_nch);
3036                     cache_lock(&nd->nl_nch);
3037                     dummy_gen = nd->nl_nch.ncp->nc_generation;
3038                     cache_setunresolved(&nd->nl_nch);
3039                     error = cache_resolve(&nd->nl_nch, &dummy_gen, nd->nl_cred);
3040                     if (error == 0)
3041                               goto again;
3042           } else {
3043 #if 1
3044                     vrele(vp);
3045 #else
3046                     vput(vp);
3047 #endif
3048           }
3049           return (error);
3050 }
3051 
3052 /*
3053  * stat_args(char *path, struct stat *ub)
3054  *
3055  * Get file status; this version follows links.
3056  */
3057 int
sys_stat(struct sysmsg * sysmsg,const struct stat_args * uap)3058 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap)
3059 {
3060           struct nlookupdata nd;
3061           struct stat st;
3062           int error;
3063 
3064           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3065           if (error == 0) {
3066                     error = kern_stat(&nd, &st);
3067                     if (error == 0)
3068                               error = copyout(&st, uap->ub, sizeof(*uap->ub));
3069           }
3070           nlookup_done(&nd);
3071           return (error);
3072 }
3073 
3074 /*
3075  * lstat_args(char *path, struct stat *ub)
3076  *
3077  * Get file status; this version does not follow links.
3078  */
3079 int
sys_lstat(struct sysmsg * sysmsg,const struct lstat_args * uap)3080 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap)
3081 {
3082           struct nlookupdata nd;
3083           struct stat st;
3084           int error;
3085 
3086           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3087           if (error == 0) {
3088                     error = kern_stat(&nd, &st);
3089                     if (error == 0)
3090                               error = copyout(&st, uap->ub, sizeof(*uap->ub));
3091           }
3092           nlookup_done(&nd);
3093           return (error);
3094 }
3095 
3096 /*
3097  * fstatat_args(int fd, char *path, struct stat *sb, int flags)
3098  *
3099  * Get status of file pointed to by fd/path.
3100  */
3101 int
sys_fstatat(struct sysmsg * sysmsg,const struct fstatat_args * uap)3102 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap)
3103 {
3104           struct nlookupdata nd;
3105           struct stat st;
3106           int error;
3107           int flags;
3108           struct file *fp;
3109 
3110           if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3111                     return (EINVAL);
3112 
3113           flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3114 
3115           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3116                                         UIO_USERSPACE, flags);
3117           if (error == 0) {
3118                     error = kern_stat(&nd, &st);
3119                     if (error == 0)
3120                               error = copyout(&st, uap->sb, sizeof(*uap->sb));
3121           }
3122           nlookup_done_at(&nd, fp);
3123           return (error);
3124 }
3125 
3126 static int
kern_pathconf(char * path,int name,int flags,register_t * sysmsg_regp)3127 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp)
3128 {
3129           struct nlookupdata nd;
3130           struct vnode *vp;
3131           int error;
3132 
3133           vp = NULL;
3134           error = nlookup_init(&nd, path, UIO_USERSPACE, flags);
3135           if (error == 0)
3136                     error = nlookup(&nd);
3137           if (error == 0)
3138                     error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
3139           nlookup_done(&nd);
3140           if (error == 0) {
3141                     error = VOP_PATHCONF(vp, name, sysmsg_regp);
3142                     vput(vp);
3143           }
3144           return (error);
3145 }
3146 
3147 /*
3148  * pathconf_Args(char *path, int name)
3149  *
3150  * Get configurable pathname variables.
3151  */
3152 int
sys_pathconf(struct sysmsg * sysmsg,const struct pathconf_args * uap)3153 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap)
3154 {
3155           return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW,
3156                     &sysmsg->sysmsg_reg));
3157 }
3158 
3159 /*
3160  * lpathconf_Args(char *path, int name)
3161  *
3162  * Get configurable pathname variables, but don't follow symlinks.
3163  */
3164 int
sys_lpathconf(struct sysmsg * sysmsg,const struct lpathconf_args * uap)3165 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap)
3166 {
3167           return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg));
3168 }
3169 
3170 /*
3171  * XXX: daver
3172  * kern_readlink isn't properly split yet.  There is a copyin burried
3173  * in VOP_READLINK().
3174  */
3175 int
kern_readlink(struct nlookupdata * nd,char * buf,int count,int * res)3176 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res)
3177 {
3178           struct thread *td = curthread;
3179           struct vnode *vp;
3180           struct iovec aiov;
3181           struct uio auio;
3182           int error;
3183 
3184           nd->nl_flags |= NLC_SHAREDLOCK;
3185           if ((error = nlookup(nd)) != 0)
3186                     return (error);
3187           error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
3188           if (error)
3189                     return (error);
3190           if (vp->v_type != VLNK) {
3191                     error = EINVAL;
3192           } else {
3193                     aiov.iov_base = buf;
3194                     aiov.iov_len = count;
3195                     auio.uio_iov = &aiov;
3196                     auio.uio_iovcnt = 1;
3197                     auio.uio_offset = 0;
3198                     auio.uio_rw = UIO_READ;
3199                     auio.uio_segflg = UIO_USERSPACE;
3200                     auio.uio_td = td;
3201                     auio.uio_resid = count;
3202                     error = VOP_READLINK(vp, &auio, td->td_ucred);
3203           }
3204           vput(vp);
3205           *res = count - auio.uio_resid;
3206           return (error);
3207 }
3208 
3209 /*
3210  * readlink_args(char *path, char *buf, int count)
3211  *
3212  * Return target name of a symbolic link.
3213  */
3214 int
sys_readlink(struct sysmsg * sysmsg,const struct readlink_args * uap)3215 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap)
3216 {
3217           struct nlookupdata nd;
3218           int error;
3219 
3220           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3221           if (error == 0) {
3222                     error = kern_readlink(&nd, uap->buf, uap->count,
3223                                                   &sysmsg->sysmsg_result);
3224           }
3225           nlookup_done(&nd);
3226           return (error);
3227 }
3228 
3229 /*
3230  * readlinkat_args(int fd, char *path, char *buf, size_t bufsize)
3231  *
3232  * Return target name of a symbolic link.  The path is relative to the
3233  * directory associated with fd.
3234  */
3235 int
sys_readlinkat(struct sysmsg * sysmsg,const struct readlinkat_args * uap)3236 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap)
3237 {
3238           struct nlookupdata nd;
3239           struct file *fp;
3240           int error;
3241 
3242           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
3243           if (error == 0) {
3244                     error = kern_readlink(&nd, uap->buf, uap->bufsize,
3245                                                   &sysmsg->sysmsg_result);
3246           }
3247           nlookup_done_at(&nd, fp);
3248           return (error);
3249 }
3250 
3251 static int
setfflags(struct vnode * vp,u_long flags)3252 setfflags(struct vnode *vp, u_long flags)
3253 {
3254           struct thread *td = curthread;
3255           int error;
3256           struct vattr vattr;
3257 
3258           /*
3259            * Prevent non-root users from setting flags on devices.  When
3260            * a device is reused, users can retain ownership of the device
3261            * if they are allowed to set flags and programs assume that
3262            * chown can't fail when done as root.
3263            */
3264           if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
3265               ((error =
3266                     caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHFLAGS_DEV)) != 0))
3267           {
3268                     return (error);
3269           }
3270 
3271           /*
3272            * note: vget is required for any operation that might mod the vnode
3273            * so VINACTIVE is properly cleared.
3274            */
3275           if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3276                     VATTR_NULL(&vattr);
3277                     vattr.va_flags = flags;
3278                     error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3279                     vput(vp);
3280           }
3281           return (error);
3282 }
3283 
3284 /*
3285  * chflags(const char *path, u_long flags)
3286  *
3287  * Change flags of a file given a path name.
3288  */
3289 int
sys_chflags(struct sysmsg * sysmsg,const struct chflags_args * uap)3290 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap)
3291 {
3292           struct nlookupdata nd;
3293           struct vnode *vp;
3294           int error;
3295 
3296           vp = NULL;
3297           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3298           if (error == 0)
3299                     error = nlookup(&nd);
3300           if (error == 0)
3301                     error = ncp_writechk(&nd.nl_nch);
3302           if (error == 0)
3303                     error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3304           nlookup_done(&nd);
3305           if (error == 0) {
3306                     error = setfflags(vp, uap->flags);
3307                     vrele(vp);
3308           }
3309           return (error);
3310 }
3311 
3312 /*
3313  * lchflags(const char *path, u_long flags)
3314  *
3315  * Change flags of a file given a path name, but don't follow symlinks.
3316  */
3317 int
sys_lchflags(struct sysmsg * sysmsg,const struct lchflags_args * uap)3318 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap)
3319 {
3320           struct nlookupdata nd;
3321           struct vnode *vp;
3322           int error;
3323 
3324           vp = NULL;
3325           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3326           if (error == 0)
3327                     error = nlookup(&nd);
3328           if (error == 0)
3329                     error = ncp_writechk(&nd.nl_nch);
3330           if (error == 0)
3331                     error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3332           nlookup_done(&nd);
3333           if (error == 0) {
3334                     error = setfflags(vp, uap->flags);
3335                     vrele(vp);
3336           }
3337           return (error);
3338 }
3339 
3340 /*
3341  * fchflags_args(int fd, u_flags flags)
3342  *
3343  * Change flags of a file given a file descriptor.
3344  */
3345 int
sys_fchflags(struct sysmsg * sysmsg,const struct fchflags_args * uap)3346 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap)
3347 {
3348           struct thread *td = curthread;
3349           struct file *fp;
3350           int error;
3351 
3352           if ((error = holdvnode(td, uap->fd, &fp)) != 0)
3353                     return (error);
3354           if (fp->f_nchandle.ncp)
3355                     error = ncp_writechk(&fp->f_nchandle);
3356           if (error == 0)
3357                     error = setfflags((struct vnode *) fp->f_data, uap->flags);
3358           fdrop(fp);
3359           return (error);
3360 }
3361 
3362 /*
3363  * chflagsat_args(int fd, const char *path, u_long flags, int atflags)
3364  * change flags given a pathname relative to a filedescriptor
3365  */
3366 int
sys_chflagsat(struct sysmsg * sysmsg,const struct chflagsat_args * uap)3367 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap)
3368 {
3369           struct nlookupdata nd;
3370           struct vnode *vp;
3371           struct file *fp;
3372           int error;
3373           int lookupflags;
3374 
3375           if (uap->atflags & ~AT_SYMLINK_NOFOLLOW)
3376                     return (EINVAL);
3377 
3378           lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3379 
3380           vp = NULL;
3381           error = nlookup_init_at(&nd, &fp, uap->fd,  uap->path, UIO_USERSPACE, lookupflags);
3382           if (error == 0)
3383                     error = nlookup(&nd);
3384           if (error == 0)
3385                     error = ncp_writechk(&nd.nl_nch);
3386           if (error == 0)
3387                     error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3388           nlookup_done_at(&nd, fp);
3389           if (error == 0) {
3390                     error = setfflags(vp, uap->flags);
3391                     vrele(vp);
3392           }
3393           return (error);
3394 }
3395 
3396 
3397 static int
setfmode(struct vnode * vp,int mode)3398 setfmode(struct vnode *vp, int mode)
3399 {
3400           struct thread *td = curthread;
3401           int error;
3402           struct vattr vattr;
3403 
3404           /*
3405            * note: vget is required for any operation that might mod the vnode
3406            * so VINACTIVE is properly cleared.
3407            */
3408           if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3409                     VATTR_NULL(&vattr);
3410                     vattr.va_mode = mode & ALLPERMS;
3411                     error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3412                     cache_inval_wxok(vp);
3413                     vput(vp);
3414           }
3415           return error;
3416 }
3417 
3418 int
kern_chmod(struct nlookupdata * nd,int mode)3419 kern_chmod(struct nlookupdata *nd, int mode)
3420 {
3421           struct vnode *vp;
3422           int error;
3423 
3424           if ((error = nlookup(nd)) != 0)
3425                     return (error);
3426           if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3427                     return (error);
3428           if ((error = ncp_writechk(&nd->nl_nch)) == 0)
3429                     error = setfmode(vp, mode);
3430           vrele(vp);
3431           return (error);
3432 }
3433 
3434 /*
3435  * chmod_args(char *path, int mode)
3436  *
3437  * Change mode of a file given path name.
3438  */
3439 int
sys_chmod(struct sysmsg * sysmsg,const struct chmod_args * uap)3440 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap)
3441 {
3442           struct nlookupdata nd;
3443           int error;
3444 
3445           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3446           if (error == 0)
3447                     error = kern_chmod(&nd, uap->mode);
3448           nlookup_done(&nd);
3449           return (error);
3450 }
3451 
3452 /*
3453  * lchmod_args(char *path, int mode)
3454  *
3455  * Change mode of a file given path name (don't follow links.)
3456  */
3457 int
sys_lchmod(struct sysmsg * sysmsg,const struct lchmod_args * uap)3458 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap)
3459 {
3460           struct nlookupdata nd;
3461           int error;
3462 
3463           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3464           if (error == 0)
3465                     error = kern_chmod(&nd, uap->mode);
3466           nlookup_done(&nd);
3467           return (error);
3468 }
3469 
3470 /*
3471  * fchmod_args(int fd, int mode)
3472  *
3473  * Change mode of a file given a file descriptor.
3474  */
3475 int
sys_fchmod(struct sysmsg * sysmsg,const struct fchmod_args * uap)3476 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap)
3477 {
3478           struct thread *td = curthread;
3479           struct file *fp;
3480           int error;
3481 
3482           if ((error = holdvnode(td, uap->fd, &fp)) != 0)
3483                     return (error);
3484           if (fp->f_nchandle.ncp)
3485                     error = ncp_writechk(&fp->f_nchandle);
3486           if (error == 0)
3487                     error = setfmode((struct vnode *)fp->f_data, uap->mode);
3488           fdrop(fp);
3489           return (error);
3490 }
3491 
3492 /*
3493  * fchmodat_args(char *path, int mode)
3494  *
3495  * Change mode of a file pointed to by fd/path.
3496  */
3497 int
sys_fchmodat(struct sysmsg * sysmsg,const struct fchmodat_args * uap)3498 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap)
3499 {
3500           struct nlookupdata nd;
3501           struct file *fp;
3502           int error;
3503           int flags;
3504 
3505           if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3506                     return (EINVAL);
3507           flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3508 
3509           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3510                                         UIO_USERSPACE, flags);
3511           if (error == 0)
3512                     error = kern_chmod(&nd, uap->mode);
3513           nlookup_done_at(&nd, fp);
3514           return (error);
3515 }
3516 
3517 static int
setfown(struct mount * mp,struct vnode * vp,uid_t uid,gid_t gid)3518 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid)
3519 {
3520           struct thread *td = curthread;
3521           int error;
3522           struct vattr vattr;
3523           uid_t o_uid;
3524           gid_t o_gid;
3525           uint64_t size;
3526 
3527           /*
3528            * note: vget is required for any operation that might mod the vnode
3529            * so VINACTIVE is properly cleared.
3530            */
3531           if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3532                     if ((error = VOP_GETATTR(vp, &vattr)) != 0)
3533                               return error;
3534                     o_uid = vattr.va_uid;
3535                     o_gid = vattr.va_gid;
3536                     size = vattr.va_size;
3537 
3538                     VATTR_NULL(&vattr);
3539                     vattr.va_uid = uid;
3540                     vattr.va_gid = gid;
3541                     error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3542                     vput(vp);
3543           }
3544 
3545           if (error == 0) {
3546                     if (uid == -1)
3547                               uid = o_uid;
3548                     if (gid == -1)
3549                               gid = o_gid;
3550                     VFS_ACCOUNT(mp, o_uid, o_gid, -size);
3551                     VFS_ACCOUNT(mp,   uid,   gid,  size);
3552           }
3553 
3554           return error;
3555 }
3556 
3557 int
kern_chown(struct nlookupdata * nd,int uid,int gid)3558 kern_chown(struct nlookupdata *nd, int uid, int gid)
3559 {
3560           struct vnode *vp;
3561           int error;
3562 
3563           if ((error = nlookup(nd)) != 0)
3564                     return (error);
3565           if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3566                     return (error);
3567           if ((error = ncp_writechk(&nd->nl_nch)) == 0)
3568                     error = setfown(nd->nl_nch.mount, vp, uid, gid);
3569           vrele(vp);
3570           return (error);
3571 }
3572 
3573 /*
3574  * chown(char *path, int uid, int gid)
3575  *
3576  * Set ownership given a path name.
3577  */
3578 int
sys_chown(struct sysmsg * sysmsg,const struct chown_args * uap)3579 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap)
3580 {
3581           struct nlookupdata nd;
3582           int error;
3583 
3584           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3585           if (error == 0)
3586                     error = kern_chown(&nd, uap->uid, uap->gid);
3587           nlookup_done(&nd);
3588           return (error);
3589 }
3590 
3591 /*
3592  * lchown_args(char *path, int uid, int gid)
3593  *
3594  * Set ownership given a path name, do not cross symlinks.
3595  */
3596 int
sys_lchown(struct sysmsg * sysmsg,const struct lchown_args * uap)3597 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap)
3598 {
3599           struct nlookupdata nd;
3600           int error;
3601 
3602           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3603           if (error == 0)
3604                     error = kern_chown(&nd, uap->uid, uap->gid);
3605           nlookup_done(&nd);
3606           return (error);
3607 }
3608 
3609 /*
3610  * fchown_args(int fd, int uid, int gid)
3611  *
3612  * Set ownership given a file descriptor.
3613  */
3614 int
sys_fchown(struct sysmsg * sysmsg,const struct fchown_args * uap)3615 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap)
3616 {
3617           struct thread *td = curthread;
3618           struct proc *p = td->td_proc;
3619           struct file *fp;
3620           int error;
3621 
3622           if ((error = holdvnode(td, uap->fd, &fp)) != 0)
3623                     return (error);
3624           if (fp->f_nchandle.ncp)
3625                     error = ncp_writechk(&fp->f_nchandle);
3626           if (error == 0)
3627                     error = setfown(p->p_fd->fd_ncdir.mount,
3628                               (struct vnode *)fp->f_data, uap->uid, uap->gid);
3629           fdrop(fp);
3630           return (error);
3631 }
3632 
3633 /*
3634  * fchownat(int fd, char *path, int uid, int gid, int flags)
3635  *
3636  * Set ownership of file pointed to by fd/path.
3637  */
3638 int
sys_fchownat(struct sysmsg * sysmsg,const struct fchownat_args * uap)3639 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap)
3640 {
3641           struct nlookupdata nd;
3642           struct file *fp;
3643           int error;
3644           int flags;
3645 
3646           if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3647                     return (EINVAL);
3648           flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3649 
3650           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3651                                         UIO_USERSPACE, flags);
3652           if (error == 0)
3653                     error = kern_chown(&nd, uap->uid, uap->gid);
3654           nlookup_done_at(&nd, fp);
3655           return (error);
3656 }
3657 
3658 
3659 static int
getutimes(struct timeval * tvp,struct timespec * tsp)3660 getutimes(struct timeval *tvp, struct timespec *tsp)
3661 {
3662           struct timeval tv[2];
3663           int error;
3664 
3665           if (tvp == NULL) {
3666                     microtime(&tv[0]);
3667                     TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3668                     tsp[1] = tsp[0];
3669           } else {
3670                     if ((error = itimerfix(tvp)) != 0)
3671                               return (error);
3672                     TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3673                     TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3674           }
3675           return 0;
3676 }
3677 
3678 static int
getutimens(const struct timespec * ts,struct timespec * newts,int * nullflag)3679 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag)
3680 {
3681           struct timespec tsnow;
3682           int error;
3683 
3684           *nullflag = 0;
3685           nanotime(&tsnow);
3686           if (ts == NULL) {
3687                     newts[0] = tsnow;
3688                     newts[1] = tsnow;
3689                     *nullflag = 1;
3690                     return (0);
3691           }
3692 
3693           newts[0] = ts[0];
3694           newts[1] = ts[1];
3695           if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) {
3696                     newts[0].tv_sec = VNOVAL;
3697                     newts[1].tv_sec = VNOVAL;
3698                     return (0);
3699           }
3700           if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW)
3701                     *nullflag = 1;
3702 
3703           if (newts[0].tv_nsec == UTIME_OMIT)
3704                     newts[0].tv_sec = VNOVAL;
3705           else if (newts[0].tv_nsec == UTIME_NOW)
3706                     newts[0] = tsnow;
3707           else if ((error = itimespecfix(&newts[0])) != 0)
3708                     return (error);
3709 
3710           if (newts[1].tv_nsec == UTIME_OMIT)
3711                     newts[1].tv_sec = VNOVAL;
3712           else if (newts[1].tv_nsec == UTIME_NOW)
3713                     newts[1] = tsnow;
3714           else if ((error = itimespecfix(&newts[1])) != 0)
3715                     return (error);
3716 
3717           return (0);
3718 }
3719 
3720 static int
setutimes(struct vnode * vp,struct vattr * vattr,const struct timespec * ts,int nullflag)3721 setutimes(struct vnode *vp, struct vattr *vattr,
3722             const struct timespec *ts, int nullflag)
3723 {
3724           struct thread *td = curthread;
3725           int error;
3726 
3727           VATTR_NULL(vattr);
3728           vattr->va_atime = ts[0];
3729           vattr->va_mtime = ts[1];
3730           if (nullflag)
3731                     vattr->va_vaflags |= VA_UTIMES_NULL;
3732           error = VOP_SETATTR(vp, vattr, td->td_ucred);
3733 
3734           return error;
3735 }
3736 
3737 int
kern_utimes(struct nlookupdata * nd,struct timeval * tptr)3738 kern_utimes(struct nlookupdata *nd, struct timeval *tptr)
3739 {
3740           struct timespec ts[2];
3741           int error;
3742 
3743           if (tptr) {
3744                     if ((error = getutimes(tptr, ts)) != 0)
3745                               return (error);
3746           }
3747           error = kern_utimensat(nd, tptr ? ts : NULL, 0);
3748           return (error);
3749 }
3750 
3751 /*
3752  * utimes_args(char *path, struct timeval *tptr)
3753  *
3754  * Set the access and modification times of a file.
3755  */
3756 int
sys_utimes(struct sysmsg * sysmsg,const struct utimes_args * uap)3757 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap)
3758 {
3759           struct timeval tv[2];
3760           struct nlookupdata nd;
3761           int error;
3762 
3763           if (uap->tptr) {
3764                     error = copyin(uap->tptr, tv, sizeof(tv));
3765                     if (error)
3766                               return (error);
3767           }
3768           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3769           if (error == 0)
3770                     error = kern_utimes(&nd, uap->tptr ? tv : NULL);
3771           nlookup_done(&nd);
3772           return (error);
3773 }
3774 
3775 /*
3776  * lutimes_args(char *path, struct timeval *tptr)
3777  *
3778  * Set the access and modification times of a file.
3779  */
3780 int
sys_lutimes(struct sysmsg * sysmsg,const struct lutimes_args * uap)3781 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap)
3782 {
3783           struct timeval tv[2];
3784           struct nlookupdata nd;
3785           int error;
3786 
3787           if (uap->tptr) {
3788                     error = copyin(uap->tptr, tv, sizeof(tv));
3789                     if (error)
3790                               return (error);
3791           }
3792           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3793           if (error == 0)
3794                     error = kern_utimes(&nd, uap->tptr ? tv : NULL);
3795           nlookup_done(&nd);
3796           return (error);
3797 }
3798 
3799 /*
3800  * Set utimes on a file descriptor.  The creds used to open the
3801  * file are used to determine whether the operation is allowed
3802  * or not.
3803  */
3804 int
kern_futimens(int fd,struct timespec * ts)3805 kern_futimens(int fd, struct timespec *ts)
3806 {
3807           struct thread *td = curthread;
3808           struct timespec newts[2];
3809           struct file *fp;
3810           struct vnode *vp;
3811           struct vattr vattr;
3812           struct vattr_lite lva;
3813           int nullflag;
3814           int error;
3815 
3816           error = getutimens(ts, newts, &nullflag);
3817           if (error)
3818                     return (error);
3819           if ((error = holdvnode(td, fd, &fp)) != 0)
3820                     return (error);
3821           if (fp->f_nchandle.ncp)
3822                     error = ncp_writechk(&fp->f_nchandle);
3823           if (error == 0) {
3824                     vp = fp->f_data;
3825                     error = vget(vp, LK_EXCLUSIVE);
3826                     if (error == 0) {
3827                               error = VOP_GETATTR_FP(vp, &vattr, fp);
3828                               if (error == 0) {
3829                                         lva.va_type = vattr.va_type;
3830                                         lva.va_nlink = vattr.va_nlink;
3831                                         lva.va_mode = vattr.va_mode;
3832                                         lva.va_uid = vattr.va_uid;
3833                                         lva.va_gid = vattr.va_gid;
3834                                         lva.va_size = vattr.va_size;
3835                                         lva.va_flags = vattr.va_flags;
3836 
3837                                         error = naccess_lva(&lva, NLC_OWN | NLC_WRITE,
3838                                                                fp->f_cred);
3839                               }
3840                               if (error == 0) {
3841                                         error = setutimes(vp, &vattr, newts, nullflag);
3842                               }
3843                               vput(vp);
3844                     }
3845           }
3846           fdrop(fp);
3847           return (error);
3848 }
3849 
3850 /*
3851  * futimens_args(int fd, struct timespec *ts)
3852  *
3853  * Set the access and modification times of a file.
3854  */
3855 int
sys_futimens(struct sysmsg * sysmsg,const struct futimens_args * uap)3856 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap)
3857 {
3858           struct timespec ts[2];
3859           int error;
3860 
3861           if (uap->ts) {
3862                     error = copyin(uap->ts, ts, sizeof(ts));
3863                     if (error)
3864                               return (error);
3865           }
3866           error = kern_futimens(uap->fd, uap->ts ? ts : NULL);
3867           return (error);
3868 }
3869 
3870 int
kern_futimes(int fd,struct timeval * tptr)3871 kern_futimes(int fd, struct timeval *tptr)
3872 {
3873           struct timespec ts[2];
3874           int error;
3875 
3876           if (tptr) {
3877                     if ((error = getutimes(tptr, ts)) != 0)
3878                               return (error);
3879           }
3880           error = kern_futimens(fd, tptr ? ts : NULL);
3881           return (error);
3882 }
3883 
3884 /*
3885  * futimes_args(int fd, struct timeval *tptr)
3886  *
3887  * Set the access and modification times of a file.
3888  */
3889 int
sys_futimes(struct sysmsg * sysmsg,const struct futimes_args * uap)3890 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap)
3891 {
3892           struct timeval tv[2];
3893           int error;
3894 
3895           if (uap->tptr) {
3896                     error = copyin(uap->tptr, tv, sizeof(tv));
3897                     if (error)
3898                               return (error);
3899           }
3900           error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
3901           return (error);
3902 }
3903 
3904 /*
3905  * futimesat_args(int fd, const char *path, struct timeval *tptr)
3906  *
3907  * Set the access and modification times of a file.
3908  */
3909 int
sys_futimesat(struct sysmsg * sysmsg,const struct futimesat_args * uap)3910 sys_futimesat(struct sysmsg *sysmsg, const struct futimesat_args *uap)
3911 {
3912           struct timespec ts[2];
3913           struct nlookupdata nd;
3914           struct file *fp;
3915           int error;
3916 
3917           if (uap->tptr) {
3918                     struct timeval tv[2];
3919 
3920                     if ((error = copyin(uap->tptr, tv, sizeof(tv))) != 0)
3921                               return error;
3922                     if ((error = getutimes(tv, ts)) != 0)
3923                               return error;
3924           }
3925 
3926           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3927                                   UIO_USERSPACE, 0);
3928           if (error == 0)
3929                     error = kern_utimensat(&nd, uap->tptr ? ts : NULL, 0);
3930           nlookup_done_at(&nd, fp);
3931 
3932           return (error);
3933 }
3934 
3935 int
kern_utimensat(struct nlookupdata * nd,const struct timespec * ts,int flags)3936 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags)
3937 {
3938           struct timespec newts[2];
3939           struct vnode *vp;
3940           struct vattr vattr;
3941           int nullflag;
3942           int error;
3943 
3944           if (flags & ~AT_SYMLINK_NOFOLLOW)
3945                     return (EINVAL);
3946 
3947           error = getutimens(ts, newts, &nullflag);
3948           if (error)
3949                     return (error);
3950 
3951           nd->nl_flags |= NLC_OWN | NLC_WRITE;
3952           if ((error = nlookup(nd)) != 0)
3953                     return (error);
3954           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
3955                     return (error);
3956           if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3957                     return (error);
3958           if ((error = vn_writechk(vp)) == 0) {
3959                     error = vget(vp, LK_EXCLUSIVE);
3960                     if (error == 0) {
3961                               error = setutimes(vp, &vattr, newts, nullflag);
3962                               vput(vp);
3963                     }
3964           }
3965           vrele(vp);
3966           return (error);
3967 }
3968 
3969 /*
3970  * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags);
3971  *
3972  * Set file access and modification times of a file.
3973  */
3974 int
sys_utimensat(struct sysmsg * sysmsg,const struct utimensat_args * uap)3975 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap)
3976 {
3977           struct timespec ts[2];
3978           struct nlookupdata nd;
3979           struct file *fp;
3980           int error;
3981           int flags;
3982 
3983           if (uap->ts) {
3984                     error = copyin(uap->ts, ts, sizeof(ts));
3985                     if (error)
3986                               return (error);
3987           }
3988 
3989           flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3990           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3991                                   UIO_USERSPACE, flags);
3992           if (error == 0)
3993                     error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags);
3994           nlookup_done_at(&nd, fp);
3995           return (error);
3996 }
3997 
3998 int
kern_truncate(struct nlookupdata * nd,off_t length)3999 kern_truncate(struct nlookupdata *nd, off_t length)
4000 {
4001           struct vnode *vp;
4002           struct vattr vattr;
4003           int error;
4004           uid_t uid = 0;
4005           gid_t gid = 0;
4006           uint64_t old_size = 0;
4007 
4008           if (length < 0)
4009                     return(EINVAL);
4010           nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE;
4011           if ((error = nlookup(nd)) != 0)
4012                     return (error);
4013           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4014                     return (error);
4015           if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
4016                     return (error);
4017           error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
4018           if (error) {
4019                     vrele(vp);
4020                     return (error);
4021           }
4022           if (vp->v_type == VDIR) {
4023                     error = EISDIR;
4024                     goto done;
4025           }
4026           if (vfs_quota_enabled) {
4027                     error = VOP_GETATTR(vp, &vattr);
4028                     KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0"));
4029                     uid = vattr.va_uid;
4030                     gid = vattr.va_gid;
4031                     old_size = vattr.va_size;
4032           }
4033 
4034           if ((error = vn_writechk(vp)) == 0) {
4035                     VATTR_NULL(&vattr);
4036                     vattr.va_size = length;
4037                     error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
4038                     VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size);
4039           }
4040 done:
4041           vput(vp);
4042           return (error);
4043 }
4044 
4045 /*
4046  * truncate(char *path, int pad, off_t length)
4047  *
4048  * Truncate a file given its path name.
4049  */
4050 int
sys_truncate(struct sysmsg * sysmsg,const struct truncate_args * uap)4051 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap)
4052 {
4053           struct nlookupdata nd;
4054           int error;
4055 
4056           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4057           if (error == 0)
4058                     error = kern_truncate(&nd, uap->length);
4059           nlookup_done(&nd);
4060           return error;
4061 }
4062 
4063 int
kern_ftruncate(int fd,off_t length)4064 kern_ftruncate(int fd, off_t length)
4065 {
4066           struct thread *td = curthread;
4067           struct vattr vattr;
4068           struct vnode *vp;
4069           struct file *fp;
4070           int error;
4071           uid_t uid = 0;
4072           gid_t gid = 0;
4073           uint64_t old_size = 0;
4074           struct mount *mp;
4075 
4076           if (length < 0)
4077                     return(EINVAL);
4078           if ((error = holdvnode(td, fd, &fp)) != 0)
4079                     return (error);
4080           if (fp->f_nchandle.ncp) {
4081                     error = ncp_writechk(&fp->f_nchandle);
4082                     if (error)
4083                               goto done;
4084           }
4085           if ((fp->f_flag & FWRITE) == 0) {
4086                     error = EINVAL;
4087                     goto done;
4088           }
4089           if (fp->f_flag & FAPPENDONLY) {         /* inode was set s/uapnd */
4090                     error = EINVAL;
4091                     goto done;
4092           }
4093           vp = (struct vnode *)fp->f_data;
4094           vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4095           if (vp->v_type == VDIR) {
4096                     error = EISDIR;
4097                     vn_unlock(vp);
4098                     goto done;
4099           }
4100 
4101           if (vfs_quota_enabled) {
4102                     error = VOP_GETATTR_FP(vp, &vattr, fp);
4103                     KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0"));
4104                     uid = vattr.va_uid;
4105                     gid = vattr.va_gid;
4106                     old_size = vattr.va_size;
4107           }
4108 
4109           if ((error = vn_writechk(vp)) == 0) {
4110                     VATTR_NULL(&vattr);
4111                     vattr.va_size = length;
4112                     error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp);
4113                     mp = vq_vptomp(vp);
4114                     VFS_ACCOUNT(mp, uid, gid, length - old_size);
4115           }
4116           vn_unlock(vp);
4117 done:
4118           fdrop(fp);
4119           return (error);
4120 }
4121 
4122 /*
4123  * ftruncate_args(int fd, int pad, off_t length)
4124  *
4125  * Truncate a file given a file descriptor.
4126  */
4127 int
sys_ftruncate(struct sysmsg * sysmsg,const struct ftruncate_args * uap)4128 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap)
4129 {
4130           int error;
4131 
4132           error = kern_ftruncate(uap->fd, uap->length);
4133 
4134           return (error);
4135 }
4136 
4137 int
kern_fsync(int fd,bool fullsync)4138 kern_fsync(int fd, bool fullsync)
4139 {
4140           struct thread *td = curthread;
4141           struct vnode *vp;
4142           struct file *fp;
4143           vm_object_t obj;
4144           int error;
4145 
4146           if ((error = holdvnode(td, fd, &fp)) != 0)
4147                     return (error);
4148           vp = (struct vnode *)fp->f_data;
4149           vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4150           if ((obj = vp->v_object) != NULL) {
4151                     if (vp->v_mount == NULL ||
4152                         (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) {
4153                               vm_object_page_clean(obj, 0, 0, 0);
4154                     }
4155           }
4156           error = fullsync ?
4157                     VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp) :
4158                     VOP_FDATASYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp);
4159           if (error == 0 && vp->v_mount)
4160                     error = buf_fsync(vp);
4161           vn_unlock(vp);
4162           fdrop(fp);
4163 
4164           return (error);
4165 }
4166 
4167 /*
4168  * fsync(int fd)
4169  *
4170  * Sync an open file.
4171  */
4172 int
sys_fsync(struct sysmsg * sysmsg,const struct fsync_args * uap)4173 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap)
4174 {
4175           return (kern_fsync(uap->fd, true));
4176 }
4177 
4178 /*
4179  * fdatasync(int fd)
4180  *
4181  * Data-sync an open file.
4182  */
4183 int
sys_fdatasync(struct sysmsg * sysmsg,const struct fdatasync_args * uap)4184 sys_fdatasync(struct sysmsg *sysmsg, const struct fdatasync_args *uap)
4185 {
4186           return (kern_fsync(uap->fd, false));
4187 }
4188 
4189 /*
4190  * rename op.
4191  *
4192  * NOTE: error == 0 and nl_dvp is NULL indicates a mount point, operation
4193  *         disallowed.  e.g. /var/cache where /var/cache is a null-mount, for
4194  *         example.
4195  */
4196 int
kern_rename(struct nlookupdata * fromnd,struct nlookupdata * tond)4197 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond)
4198 {
4199           struct nchandle fnchd;
4200           struct nchandle tnchd;
4201           struct namecache *ncp;
4202           struct vnode *fdvp;
4203           struct vnode *tdvp;
4204           struct mount *mp;
4205           struct mount *userenlk;
4206           int error;
4207           u_int fncp_gen;
4208           u_int tncp_gen;
4209 
4210           bwillinode(1);
4211           fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC;
4212           if ((error = nlookup(fromnd)) != 0)
4213                     return (error);
4214 
4215           /*
4216            * Attempt to rename a mount point (from or to)
4217            */
4218           if (error == 0 && fromnd->nl_dvp == NULL)
4219                     return (EINVAL);
4220 
4221           if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
4222                     return (ENOENT);
4223           fnchd.mount = fromnd->nl_nch.mount;
4224           cache_hold(&fnchd);
4225 
4226           /*
4227            * unlock the source nch so we can lookup the target nch without
4228            * deadlocking.  The target may or may not exist so we do not check
4229            * for a target vp like kern_mkdir() and other creation functions do.
4230            *
4231            * The source and target directories are ref'd and rechecked after
4232            * everything is relocked to determine if the source or target file
4233            * has been renamed.
4234            */
4235           KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
4236           fromnd->nl_flags &= ~NLC_NCPISLOCKED;
4237           fncp_gen = fromnd->nl_nch.ncp->nc_generation;
4238 
4239           if (fromnd->nl_nch.ncp->nc_vp &&
4240               fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
4241                     userenlk = fnchd.mount;
4242                     cache_unlock(&fromnd->nl_nch);
4243                     lockmgr(&userenlk->mnt_renlock, LK_EXCLUSIVE);
4244           } else {
4245                     userenlk = NULL;
4246                     cache_unlock(&fromnd->nl_nch);
4247           }
4248 
4249           /*
4250            * Lookup target
4251            */
4252           tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP;
4253           if ((error = nlookup(tond)) != 0) {
4254                     cache_drop(&fnchd);
4255                     goto done;
4256           }
4257           tncp_gen = tond->nl_nch.ncp->nc_generation;
4258 
4259           /*
4260            * Attempt to rename a mount point (from or to)
4261            */
4262           if (error == 0 && tond->nl_dvp == NULL) {
4263                     cache_drop(&fnchd);
4264                     error = ENOENT;
4265                     goto done;
4266           }
4267 
4268           if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
4269                     cache_drop(&fnchd);
4270                     error = ENOENT;
4271                     goto done;
4272           }
4273           tnchd.mount = tond->nl_nch.mount;
4274           cache_hold(&tnchd);
4275 
4276           /*
4277            * If the source and target are the same there is nothing to do
4278            */
4279           if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
4280                     cache_drop(&fnchd);
4281                     cache_drop(&tnchd);
4282                     error = 0;
4283                     goto done;
4284           }
4285 
4286           /*
4287            * Mount points cannot be renamed or overwritten
4288            */
4289           if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) &
4290               NCF_ISMOUNTPT
4291           ) {
4292                     cache_drop(&fnchd);
4293                     cache_drop(&tnchd);
4294                     error = EINVAL;
4295                     goto done;
4296           }
4297 
4298           /*
4299            * Lock all four namecache entries.  tond is already locked.
4300            */
4301           cache_lock4_tondlocked(&fnchd, &fromnd->nl_nch,
4302                                      &tnchd, &tond->nl_nch,
4303                                      fromnd->nl_cred, tond->nl_cred);
4304           fromnd->nl_flags |= NLC_NCPISLOCKED;
4305 
4306           /*
4307            * If the namecache generation changed for either fromnd or tond,
4308            * we must retry.
4309            */
4310           if (((fromnd->nl_nch.ncp->nc_generation - fncp_gen) & ~1) ||
4311               ((tond->nl_nch.ncp->nc_generation - tncp_gen) & ~1))
4312           {
4313                     krateprintf(&krate_rename,
4314                               "kern_rename: retry due to race on: "
4315                               "\"%s\" -> \"%s\" (%d,%d)\n",
4316                               fromnd->nl_nch.ncp->nc_name,
4317                               tond->nl_nch.ncp->nc_name,
4318                               fromnd->nl_nch.ncp->nc_generation - fncp_gen,
4319                               tond->nl_nch.ncp->nc_generation - tncp_gen);
4320                     error = EAGAIN;
4321                     goto finish;
4322           }
4323 
4324           /*
4325            * If either fromnd or tond are marked destroyed a ripout occured
4326            * out from under us and we must retry.
4327            */
4328           if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) ||
4329               fromnd->nl_nch.ncp->nc_vp == NULL ||
4330               (tond->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED))) {
4331                     krateprintf(&krate_rename,
4332                               "kern_rename: retry due to ripout on: "
4333                               "\"%s\" -> \"%s\"\n",
4334                               fromnd->nl_nch.ncp->nc_name,
4335                               tond->nl_nch.ncp->nc_name);
4336                     error = EAGAIN;
4337                     goto finish;
4338           }
4339 
4340           /*
4341            * Make sure the parent directories linkages are the same.  We have
4342            * already checked that fromnd and tond are not mount points so this
4343            * should not loop forever on a cross-mount.
4344            */
4345           if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent ||
4346               tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
4347                     error = EAGAIN;
4348                     goto finish;
4349           }
4350 
4351           /*
4352            * Both the source and target must be within the same filesystem and
4353            * in the same filesystem as their parent directories within the
4354            * namecache topology.
4355            *
4356            * NOTE: fromnd's nc_mount or nc_vp could be NULL.
4357            */
4358           mp = fnchd.mount;
4359           if (mp != tnchd.mount || mp != fromnd->nl_nch.mount ||
4360               mp != tond->nl_nch.mount) {
4361                     error = EXDEV;
4362                     goto finish;
4363           }
4364 
4365           /*
4366            * Make sure the mount point is writable
4367            */
4368           if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
4369                     goto finish;
4370           }
4371 
4372           /*
4373            * If the target exists and either the source or target is a directory,
4374            * then both must be directories.
4375            *
4376            * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
4377            * have become NULL.
4378            */
4379           if (tond->nl_nch.ncp->nc_vp) {
4380                     if (fromnd->nl_nch.ncp->nc_vp == NULL) {
4381                               error = ENOENT;
4382                     } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
4383                               if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
4384                                         error = ENOTDIR;
4385                     } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
4386                               error = EISDIR;
4387                     }
4388           }
4389 
4390           /*
4391            * You cannot rename a source into itself or a subdirectory of itself.
4392            * We check this by travsersing the target directory upwards looking
4393            * for a match against the source.
4394            *
4395            * Only required when renaming a directory, in which case userenlk is
4396            * non-NULL.
4397            */
4398           if (__predict_false(userenlk && error == 0)) {
4399                     for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
4400                               if (fromnd->nl_nch.ncp == ncp) {
4401                                         error = EINVAL;
4402                                         break;
4403                               }
4404                     }
4405           }
4406 
4407           /*
4408            * Even though the namespaces are different, they may still represent
4409            * hardlinks to the same file.  The filesystem might have a hard time
4410            * with this so we issue a NREMOVE of the source instead of a NRENAME
4411            * when we detect the situation.
4412            */
4413           if (error == 0) {
4414                     fdvp = fromnd->nl_dvp;
4415                     tdvp = tond->nl_dvp;
4416                     if (fdvp == NULL || tdvp == NULL) {
4417                               error = EPERM;
4418                     } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
4419                               error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
4420                                                       fromnd->nl_cred);
4421                     } else {
4422                               error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
4423                                                       fdvp, tdvp, tond->nl_cred);
4424                     }
4425           }
4426 finish:
4427           cache_put(&tnchd);
4428           cache_put(&fnchd);
4429 done:
4430           if (userenlk)
4431                     lockmgr(&userenlk->mnt_renlock, LK_RELEASE);
4432           return (error);
4433 }
4434 
4435 /*
4436  * rename_args(char *from, char *to)
4437  *
4438  * Rename files.  Source and destination must either both be directories,
4439  * or both not be directories.  If target is a directory, it must be empty.
4440  */
4441 int
sys_rename(struct sysmsg * sysmsg,const struct rename_args * uap)4442 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap)
4443 {
4444           struct nlookupdata fromnd, tond;
4445           int error;
4446 
4447           do {
4448                     error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
4449                     if (error == 0) {
4450                               error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
4451                               if (error == 0)
4452                                         error = kern_rename(&fromnd, &tond);
4453                               nlookup_done(&tond);
4454                     }
4455                     nlookup_done(&fromnd);
4456           } while (error == EAGAIN);
4457           return (error);
4458 }
4459 
4460 /*
4461  * renameat_args(int oldfd, char *old, int newfd, char *new)
4462  *
4463  * Rename files using paths relative to the directories associated with
4464  * oldfd and newfd.  Source and destination must either both be directories,
4465  * or both not be directories.  If target is a directory, it must be empty.
4466  */
4467 int
sys_renameat(struct sysmsg * sysmsg,const struct renameat_args * uap)4468 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap)
4469 {
4470           struct nlookupdata oldnd, newnd;
4471           struct file *oldfp, *newfp;
4472           int error;
4473 
4474           do {
4475                     error = nlookup_init_at(&oldnd, &oldfp,
4476                                                   uap->oldfd, uap->old,
4477                                                   UIO_USERSPACE, 0);
4478                     if (error == 0) {
4479                               error = nlookup_init_at(&newnd, &newfp,
4480                                                             uap->newfd, uap->new,
4481                                                             UIO_USERSPACE, 0);
4482                               if (error == 0)
4483                                         error = kern_rename(&oldnd, &newnd);
4484                               nlookup_done_at(&newnd, newfp);
4485                     }
4486                     nlookup_done_at(&oldnd, oldfp);
4487           } while (error == EAGAIN);
4488           return (error);
4489 }
4490 
4491 int
kern_mkdir(struct nlookupdata * nd,int mode)4492 kern_mkdir(struct nlookupdata *nd, int mode)
4493 {
4494           struct thread *td = curthread;
4495           struct proc *p = td->td_proc;
4496           struct vnode *vp;
4497           struct vattr vattr;
4498           int error;
4499 
4500           bwillinode(1);
4501           nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP;
4502           if ((error = nlookup(nd)) != 0)
4503                     return (error);
4504 
4505           if (nd->nl_nch.ncp->nc_vp)
4506                     return (EEXIST);
4507           if (nd->nl_dvp == NULL)
4508                     return (EINVAL);
4509           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4510                     return (error);
4511           VATTR_NULL(&vattr);
4512           vattr.va_type = VDIR;
4513           vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
4514 
4515           vp = NULL;
4516           error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr);
4517           if (error == 0)
4518                     vput(vp);
4519           return (error);
4520 }
4521 
4522 /*
4523  * mkdir_args(char *path, int mode)
4524  *
4525  * Make a directory file.
4526  */
4527 int
sys_mkdir(struct sysmsg * sysmsg,const struct mkdir_args * uap)4528 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap)
4529 {
4530           struct nlookupdata nd;
4531           int error;
4532 
4533           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
4534           if (error == 0)
4535                     error = kern_mkdir(&nd, uap->mode);
4536           nlookup_done(&nd);
4537           return (error);
4538 }
4539 
4540 /*
4541  * mkdirat_args(int fd, char *path, mode_t mode)
4542  *
4543  * Make a directory file.  The path is relative to the directory associated
4544  * with fd.
4545  */
4546 int
sys_mkdirat(struct sysmsg * sysmsg,const struct mkdirat_args * uap)4547 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap)
4548 {
4549           struct nlookupdata nd;
4550           struct file *fp;
4551           int error;
4552 
4553           error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
4554           if (error == 0)
4555                     error = kern_mkdir(&nd, uap->mode);
4556           nlookup_done_at(&nd, fp);
4557           return (error);
4558 }
4559 
4560 int
kern_rmdir(struct nlookupdata * nd)4561 kern_rmdir(struct nlookupdata *nd)
4562 {
4563           int error;
4564 
4565           bwillinode(1);
4566           nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
4567           if ((error = nlookup(nd)) != 0)
4568                     return (error);
4569 
4570           /*
4571            * Do not allow directories representing mount points to be
4572            * deleted, even if empty.  Check write perms on mount point
4573            * in case the vnode is aliased (aka nullfs).
4574            */
4575           if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
4576                     return (EBUSY);
4577           if (nd->nl_dvp == NULL)
4578                     return (EINVAL);
4579           if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4580                     return (error);
4581           error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
4582           return (error);
4583 }
4584 
4585 /*
4586  * rmdir_args(char *path)
4587  *
4588  * Remove a directory file.
4589  */
4590 int
sys_rmdir(struct sysmsg * sysmsg,const struct rmdir_args * uap)4591 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap)
4592 {
4593           struct nlookupdata nd;
4594           int error;
4595 
4596           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
4597           if (error == 0)
4598                     error = kern_rmdir(&nd);
4599           nlookup_done(&nd);
4600           return (error);
4601 }
4602 
4603 int
kern_getdirentries(int fd,char * buf,u_int count,long * basep,int * res,enum uio_seg direction)4604 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res,
4605                        enum uio_seg direction)
4606 {
4607           struct thread *td = curthread;
4608           struct vnode *vp;
4609           struct file *fp;
4610           struct uio auio;
4611           struct iovec aiov;
4612           off_t loff;
4613           int error, eofflag;
4614 
4615           if ((error = holdvnode(td, fd, &fp)) != 0)
4616                     return (error);
4617           if ((fp->f_flag & FREAD) == 0) {
4618                     error = EBADF;
4619                     goto done;
4620           }
4621           vp = (struct vnode *)fp->f_data;
4622           if (vp->v_type != VDIR) {
4623                     error = EINVAL;
4624                     goto done;
4625           }
4626           aiov.iov_base = buf;
4627           aiov.iov_len = count;
4628           auio.uio_iov = &aiov;
4629           auio.uio_iovcnt = 1;
4630           auio.uio_rw = UIO_READ;
4631           auio.uio_segflg = direction;
4632           auio.uio_td = td;
4633           auio.uio_resid = count;
4634           loff = auio.uio_offset = fp->f_offset;
4635           error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp);
4636           fp->f_offset = auio.uio_offset;
4637           if (error)
4638                     goto done;
4639 
4640           /*
4641            * WARNING!  *basep may not be wide enough to accomodate the
4642            * seek offset.   XXX should we hack this to return the upper 32 bits
4643            * for offsets greater then 4G?
4644            */
4645           if (basep) {
4646                     *basep = (long)loff;
4647           }
4648           *res = count - auio.uio_resid;
4649 done:
4650           fdrop(fp);
4651           return (error);
4652 }
4653 
4654 /*
4655  * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
4656  *
4657  * Read a block of directory entries in a file system independent format.
4658  */
4659 int
sys_getdirentries(struct sysmsg * sysmsg,const struct getdirentries_args * uap)4660 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap)
4661 {
4662           long base;
4663           int error;
4664 
4665           error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
4666                                            &sysmsg->sysmsg_result, UIO_USERSPACE);
4667 
4668           if (error == 0 && uap->basep)
4669                     error = copyout(&base, uap->basep, sizeof(*uap->basep));
4670           return (error);
4671 }
4672 
4673 /*
4674  * getdents_args(int fd, char *buf, size_t count)
4675  */
4676 int
sys_getdents(struct sysmsg * sysmsg,const struct getdents_args * uap)4677 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap)
4678 {
4679           int error;
4680 
4681           error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
4682                                            &sysmsg->sysmsg_result, UIO_USERSPACE);
4683 
4684           return (error);
4685 }
4686 
4687 /*
4688  * Set the mode mask for creation of filesystem nodes.
4689  *
4690  * umask(int newmask)
4691  */
4692 int
sys_umask(struct sysmsg * sysmsg,const struct umask_args * uap)4693 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap)
4694 {
4695           struct thread *td = curthread;
4696           struct proc *p = td->td_proc;
4697           struct filedesc *fdp;
4698 
4699           fdp = p->p_fd;
4700           sysmsg->sysmsg_result = fdp->fd_cmask;
4701           fdp->fd_cmask = uap->newmask & ALLPERMS;
4702           return (0);
4703 }
4704 
4705 /*
4706  * revoke(char *path)
4707  *
4708  * Void all references to file by ripping underlying filesystem
4709  * away from vnode.
4710  */
4711 int
sys_revoke(struct sysmsg * sysmsg,const struct revoke_args * uap)4712 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap)
4713 {
4714           struct nlookupdata nd;
4715           struct vattr vattr;
4716           struct vnode *vp;
4717           struct ucred *cred;
4718           int error;
4719 
4720           vp = NULL;
4721           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4722           if (error == 0)
4723                     error = nlookup(&nd);
4724           if (error == 0)
4725                     error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
4726           cred = crhold(nd.nl_cred);
4727           nlookup_done(&nd);
4728           if (error == 0) {
4729                     if (error == 0)
4730                               error = VOP_GETATTR(vp, &vattr);
4731                     if (error == 0 && cred->cr_uid != vattr.va_uid)
4732                               error = caps_priv_check(cred, SYSCAP_NOVFS_REVOKE);
4733                     if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) {
4734                               if (vcount(vp) > 0)
4735                                         error = vrevoke(vp, cred);
4736                     } else if (error == 0) {
4737                               error = vrevoke(vp, cred);
4738                     }
4739                     vrele(vp);
4740           }
4741           if (cred)
4742                     crfree(cred);
4743           return (error);
4744 }
4745 
4746 /*
4747  * getfh_args(char *fname, fhandle_t *fhp)
4748  *
4749  * Get (NFS) file handle
4750  *
4751  * NOTE: We use the fsid of the covering mount, even if it is a nullfs
4752  * mount.  This allows nullfs mounts to be explicitly exported.
4753  *
4754  * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe.
4755  *
4756  *            nullfs mounts of subdirectories are not safe.  That is, it will
4757  *            work, but you do not really have protection against access to
4758  *            the related parent directories.
4759  */
4760 int
sys_getfh(struct sysmsg * sysmsg,const struct getfh_args * uap)4761 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap)
4762 {
4763           struct nlookupdata nd;
4764           fhandle_t fh;
4765           struct vnode *vp;
4766           struct mount *mp;
4767           int error;
4768 
4769           /*
4770            * Must be super user
4771            */
4772           if ((error = caps_priv_check_self(SYSCAP_RESTRICTEDROOT)) != 0)
4773                     return (error);
4774 
4775           vp = NULL;
4776           error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
4777           if (error == 0)
4778                     error = nlookup(&nd);
4779           if (error == 0)
4780                     error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
4781           mp = nd.nl_nch.mount;
4782           nlookup_done(&nd);
4783           if (error == 0) {
4784                     bzero(&fh, sizeof(fh));
4785                     fh.fh_fsid = mp->mnt_stat.f_fsid;
4786                     error = VFS_VPTOFH(vp, &fh.fh_fid);
4787                     vput(vp);
4788                     if (error == 0)
4789                               error = copyout(&fh, uap->fhp, sizeof(fh));
4790           }
4791           return (error);
4792 }
4793 
4794 /*
4795  * fhopen_args(const struct fhandle *u_fhp, int flags)
4796  *
4797  * syscall for the rpc.lockd to use to translate a NFS file handle into
4798  * an open descriptor.
4799  *
4800  * WARNING: Do not remove the caps_priv_check() call or this becomes
4801  *            one giant security hole.
4802  */
4803 int
sys_fhopen(struct sysmsg * sysmsg,const struct fhopen_args * uap)4804 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap)
4805 {
4806           struct thread *td = curthread;
4807           struct filedesc *fdp = td->td_proc->p_fd;
4808           struct mount *mp;
4809           struct vnode *vp;
4810           struct fhandle fhp;
4811           struct vattr vat;
4812           struct vattr *vap = &vat;
4813           struct flock lf;
4814           int fmode, mode, error = 0, type;
4815           struct file *nfp;
4816           struct file *fp;
4817           int indx;
4818 
4819           /*
4820            * Must be super user
4821            */
4822           error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
4823           if (error)
4824                     return (error);
4825 
4826           fmode = FFLAGS(uap->flags);
4827 
4828           /*
4829            * Why not allow a non-read/write open for our lockd?
4830            */
4831           if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4832                     return (EINVAL);
4833           error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4834           if (error)
4835                     return(error);
4836 
4837           /*
4838            * Find the mount point
4839            */
4840           mp = vfs_getvfs(&fhp.fh_fsid);
4841           if (mp == NULL) {
4842                     error = ESTALE;
4843                     goto done2;
4844           }
4845           /* now give me my vnode, it gets returned to me locked */
4846           error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp);
4847           if (error)
4848                     goto done;
4849           /*
4850            * from now on we have to make sure not
4851            * to forget about the vnode
4852            * any error that causes an abort must vput(vp)
4853            * just set error = err and 'goto bad;'.
4854            */
4855 
4856           /*
4857            * from vn_open
4858            */
4859           if (vp->v_type == VLNK) {
4860                     error = EMLINK;
4861                     goto bad;
4862           }
4863           if (vp->v_type == VSOCK) {
4864                     error = EOPNOTSUPP;
4865                     goto bad;
4866           }
4867           mode = 0;
4868           if (fmode & (FWRITE | O_TRUNC)) {
4869                     if (vp->v_type == VDIR) {
4870                               error = EISDIR;
4871                               goto bad;
4872                     }
4873                     error = vn_writechk(vp);
4874                     if (error)
4875                               goto bad;
4876                     mode |= VWRITE;
4877           }
4878           if (fmode & FREAD)
4879                     mode |= VREAD;
4880           if (mode) {
4881                     error = VOP_ACCESS(vp, mode, td->td_ucred);
4882                     if (error)
4883                               goto bad;
4884           }
4885           if (fmode & O_TRUNC) {
4886                     vn_unlock(vp);                                    /* XXX */
4887                     vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
4888                     VATTR_NULL(vap);
4889                     vap->va_size = 0;
4890                     error = VOP_SETATTR(vp, vap, td->td_ucred);
4891                     if (error)
4892                               goto bad;
4893           }
4894 
4895           /*
4896            * VOP_OPEN needs the file pointer so it can potentially override
4897            * it.
4898            *
4899            * WARNING! no f_nchandle will be associated when fhopen()ing a
4900            * directory.  XXX
4901            */
4902           if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0)
4903                     goto bad;
4904           error = VOP_OPEN(vp, fmode, td->td_ucred, &nfp);
4905           fp = nfp;
4906 
4907           if (error) {
4908                     /*
4909                      * setting f_ops this way prevents VOP_CLOSE from being
4910                      * called or fdrop() releasing the vp from v_data.   Since
4911                      * the VOP_OPEN failed we don't want to VOP_CLOSE.
4912                      */
4913                     fp->f_ops = &badfileops;
4914                     fp->f_data = NULL;
4915                     goto bad_drop;
4916           }
4917 
4918           /*
4919            * The fp is given its own reference, we still have our ref and lock.
4920            *
4921            * Assert that all regular files must be created with a VM object.
4922            */
4923           if (vp->v_type == VREG && vp->v_object == NULL) {
4924                     kprintf("fhopen: regular file did not "
4925                               "have VM object: %p\n",
4926                               vp);
4927                     goto bad_drop;
4928           }
4929 
4930           /*
4931            * The open was successful.  Handle any locking requirements.
4932            */
4933           if (fmode & (O_EXLOCK | O_SHLOCK)) {
4934                     lf.l_whence = SEEK_SET;
4935                     lf.l_start = 0;
4936                     lf.l_len = 0;
4937                     if (fmode & O_EXLOCK)
4938                               lf.l_type = F_WRLCK;
4939                     else
4940                               lf.l_type = F_RDLCK;
4941                     if (fmode & FNONBLOCK)
4942                               type = 0;
4943                     else
4944                               type = F_WAIT;
4945                     vn_unlock(vp);
4946                     if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK,
4947                                                    &lf, type)) != 0) {
4948                               /*
4949                                * release our private reference.
4950                                */
4951                               fsetfd(fdp, NULL, indx);
4952                               fdrop(fp);
4953                               vrele(vp);
4954                               goto done;
4955                     }
4956                     vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4957                     atomic_set_int(&fp->f_flag, FHASLOCK);  /* race ok */
4958           }
4959 
4960           /*
4961            * Clean up.  Associate the file pointer with the previously
4962            * reserved descriptor and return it.
4963            */
4964           vput(vp);
4965           if (uap->flags & O_CLOEXEC)
4966                     fdp->fd_files[indx].fileflags |= UF_EXCLOSE;
4967           fsetfd(fdp, fp, indx);
4968           fdrop(fp);
4969           sysmsg->sysmsg_result = indx;
4970           mount_drop(mp);
4971 
4972           return (error);
4973 
4974 bad_drop:
4975           fsetfd(fdp, NULL, indx);
4976           fdrop(fp);
4977 bad:
4978           vput(vp);
4979 done:
4980           mount_drop(mp);
4981 done2:
4982           return (error);
4983 }
4984 
4985 /*
4986  * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
4987  */
4988 int
sys_fhstat(struct sysmsg * sysmsg,const struct fhstat_args * uap)4989 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap)
4990 {
4991           struct thread *td = curthread;
4992           struct stat sb;
4993           fhandle_t fh;
4994           struct mount *mp;
4995           struct vnode *vp;
4996           int error;
4997 
4998           /*
4999            * Must be super user
5000            */
5001           error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
5002           if (error)
5003                     return (error);
5004 
5005           error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
5006           if (error)
5007                     return (error);
5008 
5009           if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
5010                     error = ESTALE;
5011           if (error == 0) {
5012                     if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) {
5013                               error = vn_stat(vp, &sb, td->td_ucred);
5014                               vput(vp);
5015                     }
5016           }
5017           if (error == 0)
5018                     error = copyout(&sb, uap->sb, sizeof(sb));
5019           if (mp)
5020                     mount_drop(mp);
5021 
5022           return (error);
5023 }
5024 
5025 /*
5026  * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
5027  */
5028 int
sys_fhstatfs(struct sysmsg * sysmsg,const struct fhstatfs_args * uap)5029 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap)
5030 {
5031           struct thread *td = curthread;
5032           struct proc *p = td->td_proc;
5033           struct statfs *sp;
5034           struct mount *mp;
5035           struct vnode *vp;
5036           struct statfs sb;
5037           char *fullpath, *freepath;
5038           fhandle_t fh;
5039           int error;
5040 
5041           /*
5042            * Must be super user
5043            */
5044           error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
5045           if (error)
5046                     return (error);
5047 
5048           if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
5049                     return (error);
5050 
5051           if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
5052                     error = ESTALE;
5053                     goto done;
5054           }
5055           if (p != NULL && !chroot_visible_mnt(mp, p)) {
5056                     error = ESTALE;
5057                     goto done;
5058           }
5059 
5060           if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0)
5061                     goto done;
5062           mp = vp->v_mount;
5063           sp = &mp->mnt_stat;
5064           vput(vp);
5065           if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0)
5066                     goto done;
5067 
5068           error = mount_path(p, mp, &fullpath, &freepath);
5069           if (error)
5070                     goto done;
5071           bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
5072           strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
5073           kfree(freepath, M_TEMP);
5074 
5075           sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
5076           if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) {
5077                     bcopy(sp, &sb, sizeof(sb));
5078                     sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
5079                     sp = &sb;
5080           }
5081           error = copyout(sp, uap->buf, sizeof(*sp));
5082 done:
5083           if (mp)
5084                     mount_drop(mp);
5085 
5086           return (error);
5087 }
5088 
5089 /*
5090  * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf)
5091  */
5092 int
sys_fhstatvfs(struct sysmsg * sysmsg,const struct fhstatvfs_args * uap)5093 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap)
5094 {
5095           struct thread *td = curthread;
5096           struct proc *p = td->td_proc;
5097           struct statvfs *sp;
5098           struct mount *mp;
5099           struct vnode *vp;
5100           fhandle_t fh;
5101           int error;
5102 
5103           /*
5104            * Must be super user
5105            */
5106           if ((error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)))
5107                     return (error);
5108 
5109           if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
5110                     return (error);
5111 
5112           if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
5113                     error = ESTALE;
5114                     goto done;
5115           }
5116           if (p != NULL && !chroot_visible_mnt(mp, p)) {
5117                     error = ESTALE;
5118                     goto done;
5119           }
5120 
5121           if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
5122                     goto done;
5123           mp = vp->v_mount;
5124           sp = &mp->mnt_vstat;
5125           vput(vp);
5126           if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0)
5127                     goto done;
5128 
5129           sp->f_flag = 0;
5130           if (mp->mnt_flag & MNT_RDONLY)
5131                     sp->f_flag |= ST_RDONLY;
5132           if (mp->mnt_flag & MNT_NOSUID)
5133                     sp->f_flag |= ST_NOSUID;
5134           error = copyout(sp, uap->buf, sizeof(*sp));
5135 done:
5136           if (mp)
5137                     mount_drop(mp);
5138           return (error);
5139 }
5140 
5141 
5142 /*
5143  * Syscall to push extended attribute configuration information into the
5144  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
5145  * a command (int cmd), and attribute name and misc data.  For now, the
5146  * attribute name is left in userspace for consumption by the VFS_op.
5147  * It will probably be changed to be copied into sysspace by the
5148  * syscall in the future, once issues with various consumers of the
5149  * attribute code have raised their hands.
5150  *
5151  * Currently this is used only by UFS Extended Attributes.
5152  */
5153 int
sys_extattrctl(struct sysmsg * sysmsg,const struct extattrctl_args * uap)5154 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap)
5155 {
5156           struct nlookupdata nd;
5157           struct vnode *vp;
5158           char attrname[EXTATTR_MAXNAMELEN];
5159           int error;
5160           size_t size;
5161 
5162           attrname[0] = 0;
5163           vp = NULL;
5164           error = 0;
5165 
5166           if (error == 0 && uap->filename) {
5167                     error = nlookup_init(&nd, uap->filename, UIO_USERSPACE,
5168                                              NLC_FOLLOW);
5169                     if (error == 0)
5170                               error = nlookup(&nd);
5171                     if (error == 0)
5172                               error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
5173                     nlookup_done(&nd);
5174           }
5175 
5176           if (error == 0 && uap->attrname) {
5177                     error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
5178                                           &size);
5179           }
5180 
5181           if (error == 0) {
5182                     error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5183                     if (error == 0)
5184                               error = nlookup(&nd);
5185                     if (error == 0)
5186                               error = ncp_writechk(&nd.nl_nch);
5187                     if (error == 0) {
5188                               error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp,
5189                                                          uap->attrnamespace,
5190                                                          uap->attrname, nd.nl_cred);
5191                     }
5192                     nlookup_done(&nd);
5193           }
5194 
5195           return (error);
5196 }
5197 
5198 /*
5199  * Syscall to get a named extended attribute on a file or directory.
5200  */
5201 int
sys_extattr_set_file(struct sysmsg * sysmsg,const struct extattr_set_file_args * uap)5202 sys_extattr_set_file(struct sysmsg *sysmsg,
5203                          const struct extattr_set_file_args *uap)
5204 {
5205           char attrname[EXTATTR_MAXNAMELEN];
5206           struct nlookupdata nd;
5207           struct vnode *vp;
5208           struct uio auio;
5209           struct iovec aiov;
5210           int error;
5211 
5212           error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
5213           if (error)
5214                     return (error);
5215 
5216           vp = NULL;
5217 
5218           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5219           if (error == 0)
5220                     error = nlookup(&nd);
5221           if (error == 0)
5222                     error = ncp_writechk(&nd.nl_nch);
5223           if (error == 0)
5224                     error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
5225           if (error) {
5226                     nlookup_done(&nd);
5227                     return (error);
5228           }
5229 
5230           bzero(&auio, sizeof(auio));
5231           aiov.iov_base = uap->data;
5232           aiov.iov_len = uap->nbytes;
5233           auio.uio_iov = &aiov;
5234           auio.uio_iovcnt = 1;
5235           auio.uio_offset = 0;
5236           auio.uio_resid = uap->nbytes;
5237           auio.uio_rw = UIO_WRITE;
5238           auio.uio_td = curthread;
5239 
5240           error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname,
5241                                      &auio, nd.nl_cred);
5242 
5243           vput(vp);
5244           nlookup_done(&nd);
5245           return (error);
5246 }
5247 
5248 /*
5249  * Syscall to get a named extended attribute on a file or directory.
5250  */
5251 int
sys_extattr_get_file(struct sysmsg * sysmsg,const struct extattr_get_file_args * uap)5252 sys_extattr_get_file(struct sysmsg *sysmsg,
5253                          const struct extattr_get_file_args *uap)
5254 {
5255           char attrname[EXTATTR_MAXNAMELEN];
5256           struct nlookupdata nd;
5257           struct uio auio;
5258           struct iovec aiov;
5259           struct vnode *vp;
5260           int error;
5261 
5262           error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
5263           if (error)
5264                     return (error);
5265 
5266           vp = NULL;
5267 
5268           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5269           if (error == 0)
5270                     error = nlookup(&nd);
5271           if (error == 0)
5272                     error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp);
5273           if (error) {
5274                     nlookup_done(&nd);
5275                     return (error);
5276           }
5277 
5278           bzero(&auio, sizeof(auio));
5279           aiov.iov_base = uap->data;
5280           aiov.iov_len = uap->nbytes;
5281           auio.uio_iov = &aiov;
5282           auio.uio_iovcnt = 1;
5283           auio.uio_offset = 0;
5284           auio.uio_resid = uap->nbytes;
5285           auio.uio_rw = UIO_READ;
5286           auio.uio_td = curthread;
5287 
5288           error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname,
5289                                         &auio, nd.nl_cred);
5290           sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid;
5291 
5292           vput(vp);
5293           nlookup_done(&nd);
5294           return(error);
5295 }
5296 
5297 /*
5298  * Syscall to delete a named extended attribute from a file or directory.
5299  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
5300  */
5301 int
sys_extattr_delete_file(struct sysmsg * sysmsg,const struct extattr_delete_file_args * uap)5302 sys_extattr_delete_file(struct sysmsg *sysmsg,
5303                               const struct extattr_delete_file_args *uap)
5304 {
5305           char attrname[EXTATTR_MAXNAMELEN];
5306           struct nlookupdata nd;
5307           struct vnode *vp;
5308           int error;
5309 
5310           error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
5311           if (error)
5312                     return(error);
5313 
5314           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5315           if (error == 0)
5316                     error = nlookup(&nd);
5317           if (error == 0)
5318                     error = ncp_writechk(&nd.nl_nch);
5319           if (error == 0) {
5320                     error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
5321                     if (error == 0) {
5322                               error = VOP_SETEXTATTR(vp, uap->attrnamespace,
5323                                                          attrname, NULL, nd.nl_cred);
5324                               vput(vp);
5325                     }
5326           }
5327           nlookup_done(&nd);
5328           return(error);
5329 }
5330 
5331 /*
5332  * Determine if the mount is visible to the process.
5333  */
5334 static int
chroot_visible_mnt(struct mount * mp,struct proc * p)5335 chroot_visible_mnt(struct mount *mp, struct proc *p)
5336 {
5337           struct nchandle nch;
5338 
5339           /*
5340            * Traverse from the mount point upwards.  If we hit the process
5341            * root then the mount point is visible to the process.
5342            */
5343           nch = mp->mnt_ncmountpt;
5344           while (nch.ncp) {
5345                     if (nch.mount == p->p_fd->fd_nrdir.mount &&
5346                         nch.ncp == p->p_fd->fd_nrdir.ncp) {
5347                               return(1);
5348                     }
5349                     if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
5350                               nch = nch.mount->mnt_ncmounton;
5351                     } else {
5352                               nch.ncp = nch.ncp->nc_parent;
5353                     }
5354           }
5355 
5356           /*
5357            * If the mount point is not visible to the process, but the
5358            * process root is in a subdirectory of the mount, return
5359            * TRUE anyway.
5360            */
5361           if (p->p_fd->fd_nrdir.mount == mp)
5362                     return(1);
5363 
5364           return(0);
5365 }
5366 
5367 /*
5368  * Return the appropriate system capability restriction.
5369  */
5370 static int
get_fscap(const char * fsname)5371 get_fscap(const char *fsname)
5372 {
5373 
5374           if (strncmp("null", fsname, 5) == 0) {
5375                     return SYSCAP_NOMOUNT_NULLFS;
5376           } else if (strncmp(fsname, "devfs", 6) == 0) {
5377                     return SYSCAP_NOMOUNT_DEVFS;
5378           } else if (strncmp(fsname, "procfs", 7) == 0) {
5379                     return SYSCAP_NOMOUNT_PROCFS;
5380           } else if (strncmp(fsname, "tmpfs", 6) == 0) {
5381                     return SYSCAP_NOMOUNT_TMPFS;
5382           } else if (strncmp(fsname, "fusefs", 7) == 0) {
5383                     return SYSCAP_NOMOUNT_FUSE;
5384           }
5385           return SYSCAP_RESTRICTEDROOT;
5386 }
5387 
5388 int
sys___realpath(struct sysmsg * sysmsg,const struct __realpath_args * uap)5389 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap)
5390 {
5391           struct nlookupdata nd;
5392           char *rbuf;
5393           char *fbuf;
5394           ssize_t rlen;
5395           int error;
5396 
5397           /*
5398            * Invalid length if less than 0.  0 is allowed
5399            */
5400           if ((ssize_t)uap->len < 0)
5401                     return EINVAL;
5402 
5403           rbuf = NULL;
5404           fbuf = NULL;
5405           error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5406           if (error)
5407                     goto done;
5408 
5409           nd.nl_flags |= NLC_SHAREDLOCK;
5410           error = nlookup(&nd);
5411           if (error)
5412                     goto done;
5413 
5414           if (nd.nl_nch.ncp->nc_vp == NULL) {
5415                     error = ENOENT;
5416                     goto done;
5417           }
5418 
5419           /*
5420            * Shortcut test for existence.
5421            */
5422           if (uap->len == 0) {
5423                     error = ENAMETOOLONG;
5424                     goto done;
5425           }
5426 
5427           /*
5428            * Obtain the path relative to the process root.  The nch must not
5429            * be locked for the cache_fullpath() call.
5430            */
5431           if (nd.nl_flags & NLC_NCPISLOCKED) {
5432                     nd.nl_flags &= ~NLC_NCPISLOCKED;
5433                     cache_unlock(&nd.nl_nch);
5434           }
5435           error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0);
5436           if (error)
5437                     goto done;
5438 
5439           rlen = (ssize_t)strlen(rbuf);
5440           if (rlen >= uap->len) {
5441                     error = ENAMETOOLONG;
5442                     goto done;
5443           }
5444           error = copyout(rbuf, uap->buf, rlen + 1);
5445           if (error == 0)
5446                     sysmsg->sysmsg_szresult = rlen;
5447 done:
5448           nlookup_done(&nd);
5449           if (fbuf)
5450                     kfree(fbuf, M_TEMP);
5451 
5452           return error;
5453 }
5454 
5455 int
sys_posix_fallocate(struct sysmsg * sysmsg,const struct posix_fallocate_args * uap)5456 sys_posix_fallocate(struct sysmsg *sysmsg, const struct posix_fallocate_args *uap)
5457 {
5458           return (kern_posix_fallocate(uap->fd, uap->offset, uap->len));
5459 }
5460 
5461 int
kern_posix_fallocate(int fd,off_t offset,off_t len)5462 kern_posix_fallocate(int fd, off_t offset, off_t len)
5463 {
5464           struct thread *td = curthread;
5465           struct vnode *vp;
5466           struct file *fp;
5467           int error;
5468 
5469           if (offset < 0 || len <= 0)
5470                     return (EINVAL);
5471           /* Check for wrap. */
5472           if (offset > OFF_MAX - len)
5473                     return (EFBIG);
5474 
5475           fp = holdfp(td, fd, -1);
5476           if (fp == NULL)
5477                     return (EBADF);
5478 
5479           switch (fp->f_type) {
5480           case DTYPE_VNODE:
5481                     break;
5482           case DTYPE_PIPE:
5483           case DTYPE_FIFO:
5484                     error = ESPIPE;
5485                     goto out;
5486           default:
5487                     error = ENODEV;
5488                     goto out;
5489           }
5490 
5491           if ((fp->f_flag & FWRITE) == 0) {
5492                     error = EBADF;
5493                     goto out;
5494           }
5495 
5496           vp = fp->f_data;
5497           if (vp->v_type != VREG) {
5498                     error = ENODEV;
5499                     goto out;
5500           }
5501 
5502           vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
5503           error = VOP_ALLOCATE(vp, offset, len);
5504           vn_unlock(vp);
5505 out:
5506           dropfp(td, fd, fp);
5507           return (error);
5508 }
5509