1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: stable/9/sys/kern/vfs_syscalls.c 301055 2016-05-31 16:58:00Z glebius $");
39 
40 #include "opt_capsicum.h"
41 #include "opt_compat.h"
42 #include "opt_kdtrace.h"
43 #include "opt_ktrace.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/capability.h>
50 #include <sys/disk.h>
51 #include <sys/sysent.h>
52 #include <sys/malloc.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
55 #include <sys/sysproto.h>
56 #include <sys/namei.h>
57 #include <sys/filedesc.h>
58 #include <sys/kernel.h>
59 #include <sys/fcntl.h>
60 #include <sys/file.h>
61 #include <sys/filio.h>
62 #include <sys/limits.h>
63 #include <sys/linker.h>
64 #include <sys/sdt.h>
65 #include <sys/stat.h>
66 #include <sys/sx.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/priv.h>
70 #include <sys/proc.h>
71 #include <sys/dirent.h>
72 #include <sys/jail.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sysctl.h>
75 #ifdef KTRACE
76 #include <sys/ktrace.h>
77 #endif
78 
79 #include <machine/stdarg.h>
80 
81 #include <security/audit/audit.h>
82 #include <security/mac/mac_framework.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_page.h>
87 #include <vm/uma.h>
88 
89 #include <ufs/ufs/quota.h>
90 
91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
92 
93 SDT_PROVIDER_DEFINE(vfs);
94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
96 
97 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
99 static int setfflags(struct thread *td, struct vnode *, int);
100 static int setutimes(struct thread *td, struct vnode *,
101     const struct timespec *, int, int);
102 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
103     struct thread *td);
104 
105 /*
106  * The module initialization routine for POSIX asynchronous I/O will
107  * set this to the version of AIO that it implements.  (Zero means
108  * that it is not implemented.)  This value is used here by pathconf()
109  * and in kern_descrip.c by fpathconf().
110  */
111 int async_io_version;
112 
113 #ifdef DEBUG
114 static int syncprt = 0;
115 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
116 #endif
117 
118 /*
119  * Sync each mounted filesystem.
120  */
121 #ifndef _SYS_SYSPROTO_H_
122 struct sync_args {
123 	int     dummy;
124 };
125 #endif
126 /* ARGSUSED */
127 int
sys_sync(td,uap)128 sys_sync(td, uap)
129 	struct thread *td;
130 	struct sync_args *uap;
131 {
132 	struct mount *mp, *nmp;
133 	int save, vfslocked;
134 
135 	mtx_lock(&mountlist_mtx);
136 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
137 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
138 			nmp = TAILQ_NEXT(mp, mnt_list);
139 			continue;
140 		}
141 		vfslocked = VFS_LOCK_GIANT(mp);
142 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
143 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
144 			save = curthread_pflags_set(TDP_SYNCIO);
145 			vfs_msync(mp, MNT_NOWAIT);
146 			VFS_SYNC(mp, MNT_NOWAIT);
147 			curthread_pflags_restore(save);
148 			vn_finished_write(mp);
149 		}
150 		VFS_UNLOCK_GIANT(vfslocked);
151 		mtx_lock(&mountlist_mtx);
152 		nmp = TAILQ_NEXT(mp, mnt_list);
153 		vfs_unbusy(mp);
154 	}
155 	mtx_unlock(&mountlist_mtx);
156 	return (0);
157 }
158 
159 /*
160  * Change filesystem quotas.
161  */
162 #ifndef _SYS_SYSPROTO_H_
163 struct quotactl_args {
164 	char *path;
165 	int cmd;
166 	int uid;
167 	caddr_t arg;
168 };
169 #endif
170 int
sys_quotactl(td,uap)171 sys_quotactl(td, uap)
172 	struct thread *td;
173 	register struct quotactl_args /* {
174 		char *path;
175 		int cmd;
176 		int uid;
177 		caddr_t arg;
178 	} */ *uap;
179 {
180 	struct mount *mp;
181 	int vfslocked;
182 	int error;
183 	struct nameidata nd;
184 
185 	AUDIT_ARG_CMD(uap->cmd);
186 	AUDIT_ARG_UID(uap->uid);
187 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
188 		return (EPERM);
189 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
190 	   UIO_USERSPACE, uap->path, td);
191 	if ((error = namei(&nd)) != 0)
192 		return (error);
193 	vfslocked = NDHASGIANT(&nd);
194 	NDFREE(&nd, NDF_ONLY_PNBUF);
195 	mp = nd.ni_vp->v_mount;
196 	vfs_ref(mp);
197 	vput(nd.ni_vp);
198 	error = vfs_busy(mp, 0);
199 	vfs_rel(mp);
200 	if (error) {
201 		VFS_UNLOCK_GIANT(vfslocked);
202 		return (error);
203 	}
204 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
205 
206 	/*
207 	 * Since quota on operation typically needs to open quota
208 	 * file, the Q_QUOTAON handler needs to unbusy the mount point
209 	 * before calling into namei.  Otherwise, unmount might be
210 	 * started between two vfs_busy() invocations (first is our,
211 	 * second is from mount point cross-walk code in lookup()),
212 	 * causing deadlock.
213 	 *
214 	 * Require that Q_QUOTAON handles the vfs_busy() reference on
215 	 * its own, always returning with ubusied mount point.
216 	 */
217 	if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
218 		vfs_unbusy(mp);
219 	VFS_UNLOCK_GIANT(vfslocked);
220 	return (error);
221 }
222 
223 /*
224  * Used by statfs conversion routines to scale the block size up if
225  * necessary so that all of the block counts are <= 'max_size'.  Note
226  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
227  * value of 'n'.
228  */
229 void
statfs_scale_blocks(struct statfs * sf,long max_size)230 statfs_scale_blocks(struct statfs *sf, long max_size)
231 {
232 	uint64_t count;
233 	int shift;
234 
235 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
236 
237 	/*
238 	 * Attempt to scale the block counts to give a more accurate
239 	 * overview to userland of the ratio of free space to used
240 	 * space.  To do this, find the largest block count and compute
241 	 * a divisor that lets it fit into a signed integer <= max_size.
242 	 */
243 	if (sf->f_bavail < 0)
244 		count = -sf->f_bavail;
245 	else
246 		count = sf->f_bavail;
247 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
248 	if (count <= max_size)
249 		return;
250 
251 	count >>= flsl(max_size);
252 	shift = 0;
253 	while (count > 0) {
254 		shift++;
255 		count >>=1;
256 	}
257 
258 	sf->f_bsize <<= shift;
259 	sf->f_blocks >>= shift;
260 	sf->f_bfree >>= shift;
261 	sf->f_bavail >>= shift;
262 }
263 
264 /*
265  * Get filesystem statistics.
266  */
267 #ifndef _SYS_SYSPROTO_H_
268 struct statfs_args {
269 	char *path;
270 	struct statfs *buf;
271 };
272 #endif
273 int
sys_statfs(td,uap)274 sys_statfs(td, uap)
275 	struct thread *td;
276 	register struct statfs_args /* {
277 		char *path;
278 		struct statfs *buf;
279 	} */ *uap;
280 {
281 	struct statfs sf;
282 	int error;
283 
284 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
285 	if (error == 0)
286 		error = copyout(&sf, uap->buf, sizeof(sf));
287 	return (error);
288 }
289 
290 int
kern_statfs(struct thread * td,char * path,enum uio_seg pathseg,struct statfs * buf)291 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
292     struct statfs *buf)
293 {
294 	struct mount *mp;
295 	struct statfs *sp, sb;
296 	int vfslocked;
297 	int error;
298 	struct nameidata nd;
299 
300 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
301 	    AUDITVNODE1, pathseg, path, td);
302 	error = namei(&nd);
303 	if (error)
304 		return (error);
305 	vfslocked = NDHASGIANT(&nd);
306 	mp = nd.ni_vp->v_mount;
307 	vfs_ref(mp);
308 	NDFREE(&nd, NDF_ONLY_PNBUF);
309 	vput(nd.ni_vp);
310 	error = vfs_busy(mp, 0);
311 	vfs_rel(mp);
312 	if (error) {
313 		VFS_UNLOCK_GIANT(vfslocked);
314 		return (error);
315 	}
316 #ifdef MAC
317 	error = mac_mount_check_stat(td->td_ucred, mp);
318 	if (error)
319 		goto out;
320 #endif
321 	/*
322 	 * Set these in case the underlying filesystem fails to do so.
323 	 */
324 	sp = &mp->mnt_stat;
325 	sp->f_version = STATFS_VERSION;
326 	sp->f_namemax = NAME_MAX;
327 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
328 	error = VFS_STATFS(mp, sp);
329 	if (error)
330 		goto out;
331 	if (priv_check(td, PRIV_VFS_GENERATION)) {
332 		bcopy(sp, &sb, sizeof(sb));
333 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
334 		prison_enforce_statfs(td->td_ucred, mp, &sb);
335 		sp = &sb;
336 	}
337 	*buf = *sp;
338 out:
339 	vfs_unbusy(mp);
340 	VFS_UNLOCK_GIANT(vfslocked);
341 	return (error);
342 }
343 
344 /*
345  * Get filesystem statistics.
346  */
347 #ifndef _SYS_SYSPROTO_H_
348 struct fstatfs_args {
349 	int fd;
350 	struct statfs *buf;
351 };
352 #endif
353 int
sys_fstatfs(td,uap)354 sys_fstatfs(td, uap)
355 	struct thread *td;
356 	register struct fstatfs_args /* {
357 		int fd;
358 		struct statfs *buf;
359 	} */ *uap;
360 {
361 	struct statfs sf;
362 	int error;
363 
364 	error = kern_fstatfs(td, uap->fd, &sf);
365 	if (error == 0)
366 		error = copyout(&sf, uap->buf, sizeof(sf));
367 	return (error);
368 }
369 
370 int
kern_fstatfs(struct thread * td,int fd,struct statfs * buf)371 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
372 {
373 	struct file *fp;
374 	struct mount *mp;
375 	struct statfs *sp, sb;
376 	int vfslocked;
377 	struct vnode *vp;
378 	int error;
379 
380 	AUDIT_ARG_FD(fd);
381 	error = getvnode(td->td_proc->p_fd, fd, CAP_FSTATFS, &fp);
382 	if (error)
383 		return (error);
384 	vp = fp->f_vnode;
385 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
386 	vn_lock(vp, LK_SHARED | LK_RETRY);
387 #ifdef AUDIT
388 	AUDIT_ARG_VNODE1(vp);
389 #endif
390 	mp = vp->v_mount;
391 	if (mp)
392 		vfs_ref(mp);
393 	VOP_UNLOCK(vp, 0);
394 	fdrop(fp, td);
395 	if (mp == NULL) {
396 		error = EBADF;
397 		goto out;
398 	}
399 	error = vfs_busy(mp, 0);
400 	vfs_rel(mp);
401 	if (error) {
402 		VFS_UNLOCK_GIANT(vfslocked);
403 		return (error);
404 	}
405 #ifdef MAC
406 	error = mac_mount_check_stat(td->td_ucred, mp);
407 	if (error)
408 		goto out;
409 #endif
410 	/*
411 	 * Set these in case the underlying filesystem fails to do so.
412 	 */
413 	sp = &mp->mnt_stat;
414 	sp->f_version = STATFS_VERSION;
415 	sp->f_namemax = NAME_MAX;
416 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
417 	error = VFS_STATFS(mp, sp);
418 	if (error)
419 		goto out;
420 	if (priv_check(td, PRIV_VFS_GENERATION)) {
421 		bcopy(sp, &sb, sizeof(sb));
422 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
423 		prison_enforce_statfs(td->td_ucred, mp, &sb);
424 		sp = &sb;
425 	}
426 	*buf = *sp;
427 out:
428 	if (mp)
429 		vfs_unbusy(mp);
430 	VFS_UNLOCK_GIANT(vfslocked);
431 	return (error);
432 }
433 
434 /*
435  * Get statistics on all filesystems.
436  */
437 #ifndef _SYS_SYSPROTO_H_
438 struct getfsstat_args {
439 	struct statfs *buf;
440 	long bufsize;
441 	int flags;
442 };
443 #endif
444 int
sys_getfsstat(td,uap)445 sys_getfsstat(td, uap)
446 	struct thread *td;
447 	register struct getfsstat_args /* {
448 		struct statfs *buf;
449 		long bufsize;
450 		int flags;
451 	} */ *uap;
452 {
453 
454 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
455 	    uap->flags));
456 }
457 
458 /*
459  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
460  * 	The caller is responsible for freeing memory which will be allocated
461  *	in '*buf'.
462  */
463 int
kern_getfsstat(struct thread * td,struct statfs ** buf,size_t bufsize,enum uio_seg bufseg,int flags)464 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
465     enum uio_seg bufseg, int flags)
466 {
467 	struct mount *mp, *nmp;
468 	struct statfs *sfsp, *sp, sb;
469 	size_t count, maxcount;
470 	int vfslocked;
471 	int error;
472 
473 	maxcount = bufsize / sizeof(struct statfs);
474 	if (bufsize == 0)
475 		sfsp = NULL;
476 	else if (bufseg == UIO_USERSPACE)
477 		sfsp = *buf;
478 	else /* if (bufseg == UIO_SYSSPACE) */ {
479 		count = 0;
480 		mtx_lock(&mountlist_mtx);
481 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
482 			count++;
483 		}
484 		mtx_unlock(&mountlist_mtx);
485 		if (maxcount > count)
486 			maxcount = count;
487 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
488 		    M_WAITOK);
489 	}
490 	count = 0;
491 	mtx_lock(&mountlist_mtx);
492 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
493 		if (prison_canseemount(td->td_ucred, mp) != 0) {
494 			nmp = TAILQ_NEXT(mp, mnt_list);
495 			continue;
496 		}
497 #ifdef MAC
498 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
499 			nmp = TAILQ_NEXT(mp, mnt_list);
500 			continue;
501 		}
502 #endif
503 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
504 			nmp = TAILQ_NEXT(mp, mnt_list);
505 			continue;
506 		}
507 		vfslocked = VFS_LOCK_GIANT(mp);
508 		if (sfsp && count < maxcount) {
509 			sp = &mp->mnt_stat;
510 			/*
511 			 * Set these in case the underlying filesystem
512 			 * fails to do so.
513 			 */
514 			sp->f_version = STATFS_VERSION;
515 			sp->f_namemax = NAME_MAX;
516 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
517 			/*
518 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
519 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
520 			 * overrides MNT_WAIT.
521 			 */
522 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
523 			    (flags & MNT_WAIT)) &&
524 			    (error = VFS_STATFS(mp, sp))) {
525 				VFS_UNLOCK_GIANT(vfslocked);
526 				mtx_lock(&mountlist_mtx);
527 				nmp = TAILQ_NEXT(mp, mnt_list);
528 				vfs_unbusy(mp);
529 				continue;
530 			}
531 			if (priv_check(td, PRIV_VFS_GENERATION)) {
532 				bcopy(sp, &sb, sizeof(sb));
533 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
534 				prison_enforce_statfs(td->td_ucred, mp, &sb);
535 				sp = &sb;
536 			}
537 			if (bufseg == UIO_SYSSPACE)
538 				bcopy(sp, sfsp, sizeof(*sp));
539 			else /* if (bufseg == UIO_USERSPACE) */ {
540 				error = copyout(sp, sfsp, sizeof(*sp));
541 				if (error) {
542 					vfs_unbusy(mp);
543 					VFS_UNLOCK_GIANT(vfslocked);
544 					return (error);
545 				}
546 			}
547 			sfsp++;
548 		}
549 		VFS_UNLOCK_GIANT(vfslocked);
550 		count++;
551 		mtx_lock(&mountlist_mtx);
552 		nmp = TAILQ_NEXT(mp, mnt_list);
553 		vfs_unbusy(mp);
554 	}
555 	mtx_unlock(&mountlist_mtx);
556 	if (sfsp && count > maxcount)
557 		td->td_retval[0] = maxcount;
558 	else
559 		td->td_retval[0] = count;
560 	return (0);
561 }
562 
563 #ifdef COMPAT_FREEBSD4
564 /*
565  * Get old format filesystem statistics.
566  */
567 static void cvtstatfs(struct statfs *, struct ostatfs *);
568 
569 #ifndef _SYS_SYSPROTO_H_
570 struct freebsd4_statfs_args {
571 	char *path;
572 	struct ostatfs *buf;
573 };
574 #endif
575 int
freebsd4_statfs(td,uap)576 freebsd4_statfs(td, uap)
577 	struct thread *td;
578 	struct freebsd4_statfs_args /* {
579 		char *path;
580 		struct ostatfs *buf;
581 	} */ *uap;
582 {
583 	struct ostatfs osb;
584 	struct statfs sf;
585 	int error;
586 
587 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
588 	if (error)
589 		return (error);
590 	cvtstatfs(&sf, &osb);
591 	return (copyout(&osb, uap->buf, sizeof(osb)));
592 }
593 
594 /*
595  * Get filesystem statistics.
596  */
597 #ifndef _SYS_SYSPROTO_H_
598 struct freebsd4_fstatfs_args {
599 	int fd;
600 	struct ostatfs *buf;
601 };
602 #endif
603 int
freebsd4_fstatfs(td,uap)604 freebsd4_fstatfs(td, uap)
605 	struct thread *td;
606 	struct freebsd4_fstatfs_args /* {
607 		int fd;
608 		struct ostatfs *buf;
609 	} */ *uap;
610 {
611 	struct ostatfs osb;
612 	struct statfs sf;
613 	int error;
614 
615 	error = kern_fstatfs(td, uap->fd, &sf);
616 	if (error)
617 		return (error);
618 	cvtstatfs(&sf, &osb);
619 	return (copyout(&osb, uap->buf, sizeof(osb)));
620 }
621 
622 /*
623  * Get statistics on all filesystems.
624  */
625 #ifndef _SYS_SYSPROTO_H_
626 struct freebsd4_getfsstat_args {
627 	struct ostatfs *buf;
628 	long bufsize;
629 	int flags;
630 };
631 #endif
632 int
freebsd4_getfsstat(td,uap)633 freebsd4_getfsstat(td, uap)
634 	struct thread *td;
635 	register struct freebsd4_getfsstat_args /* {
636 		struct ostatfs *buf;
637 		long bufsize;
638 		int flags;
639 	} */ *uap;
640 {
641 	struct statfs *buf, *sp;
642 	struct ostatfs osb;
643 	size_t count, size;
644 	int error;
645 
646 	count = uap->bufsize / sizeof(struct ostatfs);
647 	size = count * sizeof(struct statfs);
648 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
649 	if (size > 0) {
650 		count = td->td_retval[0];
651 		sp = buf;
652 		while (count > 0 && error == 0) {
653 			cvtstatfs(sp, &osb);
654 			error = copyout(&osb, uap->buf, sizeof(osb));
655 			sp++;
656 			uap->buf++;
657 			count--;
658 		}
659 		free(buf, M_TEMP);
660 	}
661 	return (error);
662 }
663 
664 /*
665  * Implement fstatfs() for (NFS) file handles.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct freebsd4_fhstatfs_args {
669 	struct fhandle *u_fhp;
670 	struct ostatfs *buf;
671 };
672 #endif
673 int
freebsd4_fhstatfs(td,uap)674 freebsd4_fhstatfs(td, uap)
675 	struct thread *td;
676 	struct freebsd4_fhstatfs_args /* {
677 		struct fhandle *u_fhp;
678 		struct ostatfs *buf;
679 	} */ *uap;
680 {
681 	struct ostatfs osb;
682 	struct statfs sf;
683 	fhandle_t fh;
684 	int error;
685 
686 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
687 	if (error)
688 		return (error);
689 	error = kern_fhstatfs(td, fh, &sf);
690 	if (error)
691 		return (error);
692 	cvtstatfs(&sf, &osb);
693 	return (copyout(&osb, uap->buf, sizeof(osb)));
694 }
695 
696 /*
697  * Convert a new format statfs structure to an old format statfs structure.
698  */
699 static void
cvtstatfs(nsp,osp)700 cvtstatfs(nsp, osp)
701 	struct statfs *nsp;
702 	struct ostatfs *osp;
703 {
704 
705 	statfs_scale_blocks(nsp, LONG_MAX);
706 	bzero(osp, sizeof(*osp));
707 	osp->f_bsize = nsp->f_bsize;
708 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
709 	osp->f_blocks = nsp->f_blocks;
710 	osp->f_bfree = nsp->f_bfree;
711 	osp->f_bavail = nsp->f_bavail;
712 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
713 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
714 	osp->f_owner = nsp->f_owner;
715 	osp->f_type = nsp->f_type;
716 	osp->f_flags = nsp->f_flags;
717 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
718 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
719 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
720 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
721 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
722 	    MIN(MFSNAMELEN, OMFSNAMELEN));
723 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
724 	    MIN(MNAMELEN, OMNAMELEN));
725 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
726 	    MIN(MNAMELEN, OMNAMELEN));
727 	osp->f_fsid = nsp->f_fsid;
728 }
729 #endif /* COMPAT_FREEBSD4 */
730 
731 /*
732  * Change current working directory to a given file descriptor.
733  */
734 #ifndef _SYS_SYSPROTO_H_
735 struct fchdir_args {
736 	int	fd;
737 };
738 #endif
739 int
sys_fchdir(td,uap)740 sys_fchdir(td, uap)
741 	struct thread *td;
742 	struct fchdir_args /* {
743 		int fd;
744 	} */ *uap;
745 {
746 	register struct filedesc *fdp = td->td_proc->p_fd;
747 	struct vnode *vp, *tdp, *vpold;
748 	struct mount *mp;
749 	struct file *fp;
750 	int vfslocked;
751 	int error;
752 
753 	AUDIT_ARG_FD(uap->fd);
754 	if ((error = getvnode(fdp, uap->fd, CAP_FCHDIR, &fp)) != 0)
755 		return (error);
756 	vp = fp->f_vnode;
757 	VREF(vp);
758 	fdrop(fp, td);
759 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
760 	vn_lock(vp, LK_SHARED | LK_RETRY);
761 	AUDIT_ARG_VNODE1(vp);
762 	error = change_dir(vp, td);
763 	while (!error && (mp = vp->v_mountedhere) != NULL) {
764 		int tvfslocked;
765 		if (vfs_busy(mp, 0))
766 			continue;
767 		tvfslocked = VFS_LOCK_GIANT(mp);
768 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
769 		vfs_unbusy(mp);
770 		if (error) {
771 			VFS_UNLOCK_GIANT(tvfslocked);
772 			break;
773 		}
774 		vput(vp);
775 		VFS_UNLOCK_GIANT(vfslocked);
776 		vp = tdp;
777 		vfslocked = tvfslocked;
778 	}
779 	if (error) {
780 		vput(vp);
781 		VFS_UNLOCK_GIANT(vfslocked);
782 		return (error);
783 	}
784 	VOP_UNLOCK(vp, 0);
785 	VFS_UNLOCK_GIANT(vfslocked);
786 	FILEDESC_XLOCK(fdp);
787 	vpold = fdp->fd_cdir;
788 	fdp->fd_cdir = vp;
789 	FILEDESC_XUNLOCK(fdp);
790 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
791 	vrele(vpold);
792 	VFS_UNLOCK_GIANT(vfslocked);
793 	return (0);
794 }
795 
796 /*
797  * Change current working directory (``.'').
798  */
799 #ifndef _SYS_SYSPROTO_H_
800 struct chdir_args {
801 	char	*path;
802 };
803 #endif
804 int
sys_chdir(td,uap)805 sys_chdir(td, uap)
806 	struct thread *td;
807 	struct chdir_args /* {
808 		char *path;
809 	} */ *uap;
810 {
811 
812 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
813 }
814 
815 int
kern_chdir(struct thread * td,char * path,enum uio_seg pathseg)816 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
817 {
818 	register struct filedesc *fdp = td->td_proc->p_fd;
819 	int error;
820 	struct nameidata nd;
821 	struct vnode *vp;
822 	int vfslocked;
823 
824 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
825 	    MPSAFE, pathseg, path, td);
826 	if ((error = namei(&nd)) != 0)
827 		return (error);
828 	vfslocked = NDHASGIANT(&nd);
829 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
830 		vput(nd.ni_vp);
831 		VFS_UNLOCK_GIANT(vfslocked);
832 		NDFREE(&nd, NDF_ONLY_PNBUF);
833 		return (error);
834 	}
835 	VOP_UNLOCK(nd.ni_vp, 0);
836 	VFS_UNLOCK_GIANT(vfslocked);
837 	NDFREE(&nd, NDF_ONLY_PNBUF);
838 	FILEDESC_XLOCK(fdp);
839 	vp = fdp->fd_cdir;
840 	fdp->fd_cdir = nd.ni_vp;
841 	FILEDESC_XUNLOCK(fdp);
842 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
843 	vrele(vp);
844 	VFS_UNLOCK_GIANT(vfslocked);
845 	return (0);
846 }
847 
848 /*
849  * Helper function for raised chroot(2) security function:  Refuse if
850  * any filedescriptors are open directories.
851  */
852 static int
chroot_refuse_vdir_fds(fdp)853 chroot_refuse_vdir_fds(fdp)
854 	struct filedesc *fdp;
855 {
856 	struct vnode *vp;
857 	struct file *fp;
858 	int fd;
859 
860 	FILEDESC_LOCK_ASSERT(fdp);
861 
862 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
863 		fp = fget_locked(fdp, fd);
864 		if (fp == NULL)
865 			continue;
866 		if (fp->f_type == DTYPE_VNODE) {
867 			vp = fp->f_vnode;
868 			if (vp->v_type == VDIR)
869 				return (EPERM);
870 		}
871 	}
872 	return (0);
873 }
874 
875 /*
876  * This sysctl determines if we will allow a process to chroot(2) if it
877  * has a directory open:
878  *	0: disallowed for all processes.
879  *	1: allowed for processes that were not already chroot(2)'ed.
880  *	2: allowed for all processes.
881  */
882 
883 static int chroot_allow_open_directories = 1;
884 
885 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
886      &chroot_allow_open_directories, 0, "");
887 
888 /*
889  * Change notion of root (``/'') directory.
890  */
891 #ifndef _SYS_SYSPROTO_H_
892 struct chroot_args {
893 	char	*path;
894 };
895 #endif
896 int
sys_chroot(td,uap)897 sys_chroot(td, uap)
898 	struct thread *td;
899 	struct chroot_args /* {
900 		char *path;
901 	} */ *uap;
902 {
903 	int error;
904 	struct nameidata nd;
905 	int vfslocked;
906 
907 	error = priv_check(td, PRIV_VFS_CHROOT);
908 	if (error)
909 		return (error);
910 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
911 	    AUDITVNODE1, UIO_USERSPACE, uap->path, td);
912 	error = namei(&nd);
913 	if (error)
914 		goto error;
915 	vfslocked = NDHASGIANT(&nd);
916 	if ((error = change_dir(nd.ni_vp, td)) != 0)
917 		goto e_vunlock;
918 #ifdef MAC
919 	if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
920 		goto e_vunlock;
921 #endif
922 	VOP_UNLOCK(nd.ni_vp, 0);
923 	error = change_root(nd.ni_vp, td);
924 	vrele(nd.ni_vp);
925 	VFS_UNLOCK_GIANT(vfslocked);
926 	NDFREE(&nd, NDF_ONLY_PNBUF);
927 	return (error);
928 e_vunlock:
929 	vput(nd.ni_vp);
930 	VFS_UNLOCK_GIANT(vfslocked);
931 error:
932 	NDFREE(&nd, NDF_ONLY_PNBUF);
933 	return (error);
934 }
935 
936 /*
937  * Common routine for chroot and chdir.  Callers must provide a locked vnode
938  * instance.
939  */
940 int
change_dir(vp,td)941 change_dir(vp, td)
942 	struct vnode *vp;
943 	struct thread *td;
944 {
945 	int error;
946 
947 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
948 	if (vp->v_type != VDIR)
949 		return (ENOTDIR);
950 #ifdef MAC
951 	error = mac_vnode_check_chdir(td->td_ucred, vp);
952 	if (error)
953 		return (error);
954 #endif
955 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
956 	return (error);
957 }
958 
959 /*
960  * Common routine for kern_chroot() and jail_attach().  The caller is
961  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
962  * authorize this operation.
963  */
964 int
change_root(vp,td)965 change_root(vp, td)
966 	struct vnode *vp;
967 	struct thread *td;
968 {
969 	struct filedesc *fdp;
970 	struct vnode *oldvp;
971 	int vfslocked;
972 	int error;
973 
974 	VFS_ASSERT_GIANT(vp->v_mount);
975 	fdp = td->td_proc->p_fd;
976 	FILEDESC_XLOCK(fdp);
977 	if (chroot_allow_open_directories == 0 ||
978 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
979 		error = chroot_refuse_vdir_fds(fdp);
980 		if (error) {
981 			FILEDESC_XUNLOCK(fdp);
982 			return (error);
983 		}
984 	}
985 	oldvp = fdp->fd_rdir;
986 	fdp->fd_rdir = vp;
987 	VREF(fdp->fd_rdir);
988 	if (!fdp->fd_jdir) {
989 		fdp->fd_jdir = vp;
990 		VREF(fdp->fd_jdir);
991 	}
992 	FILEDESC_XUNLOCK(fdp);
993 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
994 	vrele(oldvp);
995 	VFS_UNLOCK_GIANT(vfslocked);
996 	return (0);
997 }
998 
999 static __inline cap_rights_t
flags_to_rights(int flags)1000 flags_to_rights(int flags)
1001 {
1002 	cap_rights_t rights = 0;
1003 
1004 	switch ((flags & O_ACCMODE)) {
1005 	case O_RDONLY:
1006 		rights |= CAP_READ;
1007 		break;
1008 
1009 	case O_RDWR:
1010 		rights |= CAP_READ;
1011 		/* fall through */
1012 
1013 	case O_WRONLY:
1014 		rights |= CAP_WRITE;
1015 		break;
1016 
1017 	case O_EXEC:
1018 		rights |= CAP_FEXECVE;
1019 		break;
1020 	}
1021 
1022 	if (flags & O_CREAT)
1023 		rights |= CAP_CREATE;
1024 
1025 	if (flags & O_TRUNC)
1026 		rights |= CAP_FTRUNCATE;
1027 
1028 	if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
1029 		rights |= CAP_FLOCK;
1030 
1031 	return (rights);
1032 }
1033 
1034 /*
1035  * Check permissions, allocate an open file structure, and call the device
1036  * open routine if any.
1037  */
1038 #ifndef _SYS_SYSPROTO_H_
1039 struct open_args {
1040 	char	*path;
1041 	int	flags;
1042 	int	mode;
1043 };
1044 #endif
1045 int
sys_open(td,uap)1046 sys_open(td, uap)
1047 	struct thread *td;
1048 	register struct open_args /* {
1049 		char *path;
1050 		int flags;
1051 		int mode;
1052 	} */ *uap;
1053 {
1054 
1055 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
1056 }
1057 
1058 #ifndef _SYS_SYSPROTO_H_
1059 struct openat_args {
1060 	int	fd;
1061 	char	*path;
1062 	int	flag;
1063 	int	mode;
1064 };
1065 #endif
1066 int
sys_openat(struct thread * td,struct openat_args * uap)1067 sys_openat(struct thread *td, struct openat_args *uap)
1068 {
1069 
1070 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
1071 	    uap->mode));
1072 }
1073 
1074 int
kern_open(struct thread * td,char * path,enum uio_seg pathseg,int flags,int mode)1075 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
1076     int mode)
1077 {
1078 
1079 	return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
1080 }
1081 
1082 int
kern_openat(struct thread * td,int fd,char * path,enum uio_seg pathseg,int flags,int mode)1083 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1084     int flags, int mode)
1085 {
1086 	struct proc *p = td->td_proc;
1087 	struct filedesc *fdp = p->p_fd;
1088 	struct file *fp;
1089 	struct vnode *vp;
1090 	int cmode;
1091 	struct file *nfp;
1092 	int type, indx = -1, error, error_open;
1093 	struct flock lf;
1094 	struct nameidata nd;
1095 	int vfslocked;
1096 	cap_rights_t rights_needed = CAP_LOOKUP;
1097 
1098 	AUDIT_ARG_FFLAGS(flags);
1099 	AUDIT_ARG_MODE(mode);
1100 	/* XXX: audit dirfd */
1101 	rights_needed |= flags_to_rights(flags);
1102 	/*
1103 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
1104 	 * may be specified.
1105 	 */
1106 	if (flags & O_EXEC) {
1107 		if (flags & O_ACCMODE)
1108 			return (EINVAL);
1109 	} else if ((flags & O_ACCMODE) == O_ACCMODE)
1110 		return (EINVAL);
1111 	else
1112 		flags = FFLAGS(flags);
1113 
1114 	/*
1115 	 * allocate the file descriptor, but don't install a descriptor yet
1116 	 */
1117 	error = falloc_noinstall(td, &nfp);
1118 	if (error)
1119 		return (error);
1120 	/* An extra reference on `nfp' has been held for us by falloc_noinstall(). */
1121 	fp = nfp;
1122 	/* Set the flags early so the finit in devfs can pick them up. */
1123 	fp->f_flag = flags & FMASK;
1124 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1125 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
1126 	    path, fd, rights_needed, td);
1127 	td->td_dupfd = -1;		/* XXX check for fdopen */
1128 	error = vn_open(&nd, &flags, cmode, fp);
1129 	if (error) {
1130 		/*
1131 		 * If the vn_open replaced the method vector, something
1132 		 * wonderous happened deep below and we just pass it up
1133 		 * pretending we know what we do.
1134 		 */
1135 		if (error == ENXIO && fp->f_ops != &badfileops)
1136 			goto success;
1137 
1138 		/*
1139 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1140 		 * responsible for dropping the old contents of ofiles[indx]
1141 		 * if it succeeds.
1142 		 *
1143 		 * Don't do this for relative (capability) lookups; we don't
1144 		 * understand exactly what would happen, and we don't think
1145 		 * that it ever should.
1146 		 */
1147 		if ((nd.ni_strictrelative == 0) &&
1148 		    (error == ENODEV || error == ENXIO) &&
1149 		    (td->td_dupfd >= 0)) {
1150 			/* XXX from fdopen */
1151 			error_open = error;
1152 			if ((error = finstall(td, fp, &indx, flags)) != 0)
1153 				goto bad_unlocked;
1154 			if ((error = dupfdopen(td, fdp, indx, td->td_dupfd,
1155 			    flags, error_open)) == 0)
1156 				goto success;
1157 		}
1158 		/*
1159 		 * Clean up the descriptor, but only if another thread hadn't
1160 		 * replaced or closed it.
1161 		 */
1162 		if (indx != -1)
1163 			fdclose(fdp, fp, indx, td);
1164 		fdrop(fp, td);
1165 
1166 		return (error);
1167 	}
1168 	td->td_dupfd = 0;
1169 	vfslocked = NDHASGIANT(&nd);
1170 	NDFREE(&nd, NDF_ONLY_PNBUF);
1171 	vp = nd.ni_vp;
1172 
1173 	/*
1174 	 * Store the vnode, for any f_type. Typically, the vnode use
1175 	 * count is decremented by direct call to vn_closefile() for
1176 	 * files that switched type in the cdevsw fdopen() method.
1177 	 */
1178 	fp->f_vnode = vp;
1179 	/*
1180 	 * If the file wasn't claimed by devfs bind it to the normal
1181 	 * vnode operations here.
1182 	 */
1183 	if (fp->f_ops == &badfileops) {
1184 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1185 		fp->f_seqcount = 1;
1186 		finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
1187 	}
1188 
1189 	VOP_UNLOCK(vp, 0);
1190 	if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) {
1191 		lf.l_whence = SEEK_SET;
1192 		lf.l_start = 0;
1193 		lf.l_len = 0;
1194 		if (flags & O_EXLOCK)
1195 			lf.l_type = F_WRLCK;
1196 		else
1197 			lf.l_type = F_RDLCK;
1198 		type = F_FLOCK;
1199 		if ((flags & FNONBLOCK) == 0)
1200 			type |= F_WAIT;
1201 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1202 			    type)) != 0)
1203 			goto bad;
1204 		atomic_set_int(&fp->f_flag, FHASLOCK);
1205 	}
1206 	if (flags & O_TRUNC) {
1207 		error = fo_truncate(fp, 0, td->td_ucred, td);
1208 		if (error)
1209 			goto bad;
1210 	}
1211 	VFS_UNLOCK_GIANT(vfslocked);
1212 success:
1213 	/*
1214 	 * If we haven't already installed the FD (for dupfdopen), do so now.
1215 	 */
1216 	if (indx == -1) {
1217 #ifdef CAPABILITIES
1218 		if (nd.ni_strictrelative == 1) {
1219 			/*
1220 			 * We are doing a strict relative lookup; wrap the
1221 			 * result in a capability.
1222 			 */
1223 			if ((error = kern_capwrap(td, fp, nd.ni_baserights,
1224 			    &indx)) != 0)
1225 				goto bad_unlocked;
1226 		} else
1227 #endif
1228 			if ((error = finstall(td, fp, &indx, flags)) != 0)
1229 				goto bad_unlocked;
1230 
1231 	}
1232 
1233 	/*
1234 	 * Release our private reference, leaving the one associated with
1235 	 * the descriptor table intact.
1236 	 */
1237 	fdrop(fp, td);
1238 	td->td_retval[0] = indx;
1239 	return (0);
1240 bad:
1241 	VFS_UNLOCK_GIANT(vfslocked);
1242 bad_unlocked:
1243 	if (indx != -1)
1244 		fdclose(fdp, fp, indx, td);
1245 	fdrop(fp, td);
1246 	td->td_retval[0] = -1;
1247 	return (error);
1248 }
1249 
1250 #ifdef COMPAT_43
1251 /*
1252  * Create a file.
1253  */
1254 #ifndef _SYS_SYSPROTO_H_
1255 struct ocreat_args {
1256 	char	*path;
1257 	int	mode;
1258 };
1259 #endif
1260 int
ocreat(td,uap)1261 ocreat(td, uap)
1262 	struct thread *td;
1263 	register struct ocreat_args /* {
1264 		char *path;
1265 		int mode;
1266 	} */ *uap;
1267 {
1268 
1269 	return (kern_open(td, uap->path, UIO_USERSPACE,
1270 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1271 }
1272 #endif /* COMPAT_43 */
1273 
1274 /*
1275  * Create a special file.
1276  */
1277 #ifndef _SYS_SYSPROTO_H_
1278 struct mknod_args {
1279 	char	*path;
1280 	int	mode;
1281 	int	dev;
1282 };
1283 #endif
1284 int
sys_mknod(td,uap)1285 sys_mknod(td, uap)
1286 	struct thread *td;
1287 	register struct mknod_args /* {
1288 		char *path;
1289 		int mode;
1290 		int dev;
1291 	} */ *uap;
1292 {
1293 
1294 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1295 }
1296 
1297 #ifndef _SYS_SYSPROTO_H_
1298 struct mknodat_args {
1299 	int	fd;
1300 	char	*path;
1301 	mode_t	mode;
1302 	dev_t	dev;
1303 };
1304 #endif
1305 int
sys_mknodat(struct thread * td,struct mknodat_args * uap)1306 sys_mknodat(struct thread *td, struct mknodat_args *uap)
1307 {
1308 
1309 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
1310 	    uap->dev));
1311 }
1312 
1313 int
kern_mknod(struct thread * td,char * path,enum uio_seg pathseg,int mode,int dev)1314 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1315     int dev)
1316 {
1317 
1318 	return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
1319 }
1320 
1321 int
kern_mknodat(struct thread * td,int fd,char * path,enum uio_seg pathseg,int mode,int dev)1322 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1323     int mode, int dev)
1324 {
1325 	struct vnode *vp;
1326 	struct mount *mp;
1327 	struct vattr vattr;
1328 	int error;
1329 	int whiteout = 0;
1330 	struct nameidata nd;
1331 	int vfslocked;
1332 
1333 	AUDIT_ARG_MODE(mode);
1334 	AUDIT_ARG_DEV(dev);
1335 	switch (mode & S_IFMT) {
1336 	case S_IFCHR:
1337 	case S_IFBLK:
1338 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1339 		break;
1340 	case S_IFMT:
1341 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1342 		break;
1343 	case S_IFWHT:
1344 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1345 		break;
1346 	case S_IFIFO:
1347 		if (dev == 0)
1348 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
1349 		/* FALLTHROUGH */
1350 	default:
1351 		error = EINVAL;
1352 		break;
1353 	}
1354 	if (error)
1355 		return (error);
1356 restart:
1357 	bwillwrite();
1358 	NDINIT_ATRIGHTS(&nd, CREATE,
1359 	    LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
1360 	    CAP_MKFIFO, td);
1361 	if ((error = namei(&nd)) != 0)
1362 		return (error);
1363 	vfslocked = NDHASGIANT(&nd);
1364 	vp = nd.ni_vp;
1365 	if (vp != NULL) {
1366 		NDFREE(&nd, NDF_ONLY_PNBUF);
1367 		if (vp == nd.ni_dvp)
1368 			vrele(nd.ni_dvp);
1369 		else
1370 			vput(nd.ni_dvp);
1371 		vrele(vp);
1372 		VFS_UNLOCK_GIANT(vfslocked);
1373 		return (EEXIST);
1374 	} else {
1375 		VATTR_NULL(&vattr);
1376 		vattr.va_mode = (mode & ALLPERMS) &
1377 		    ~td->td_proc->p_fd->fd_cmask;
1378 		vattr.va_rdev = dev;
1379 		whiteout = 0;
1380 
1381 		switch (mode & S_IFMT) {
1382 		case S_IFMT:	/* used by badsect to flag bad sectors */
1383 			vattr.va_type = VBAD;
1384 			break;
1385 		case S_IFCHR:
1386 			vattr.va_type = VCHR;
1387 			break;
1388 		case S_IFBLK:
1389 			vattr.va_type = VBLK;
1390 			break;
1391 		case S_IFWHT:
1392 			whiteout = 1;
1393 			break;
1394 		default:
1395 			panic("kern_mknod: invalid mode");
1396 		}
1397 	}
1398 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1399 		NDFREE(&nd, NDF_ONLY_PNBUF);
1400 		vput(nd.ni_dvp);
1401 		VFS_UNLOCK_GIANT(vfslocked);
1402 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1403 			return (error);
1404 		goto restart;
1405 	}
1406 #ifdef MAC
1407 	if (error == 0 && !whiteout)
1408 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
1409 		    &nd.ni_cnd, &vattr);
1410 #endif
1411 	if (!error) {
1412 		if (whiteout)
1413 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1414 		else {
1415 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1416 						&nd.ni_cnd, &vattr);
1417 			if (error == 0)
1418 				vput(nd.ni_vp);
1419 		}
1420 	}
1421 	NDFREE(&nd, NDF_ONLY_PNBUF);
1422 	vput(nd.ni_dvp);
1423 	vn_finished_write(mp);
1424 	VFS_UNLOCK_GIANT(vfslocked);
1425 	return (error);
1426 }
1427 
1428 /*
1429  * Create a named pipe.
1430  */
1431 #ifndef _SYS_SYSPROTO_H_
1432 struct mkfifo_args {
1433 	char	*path;
1434 	int	mode;
1435 };
1436 #endif
1437 int
sys_mkfifo(td,uap)1438 sys_mkfifo(td, uap)
1439 	struct thread *td;
1440 	register struct mkfifo_args /* {
1441 		char *path;
1442 		int mode;
1443 	} */ *uap;
1444 {
1445 
1446 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1447 }
1448 
1449 #ifndef _SYS_SYSPROTO_H_
1450 struct mkfifoat_args {
1451 	int	fd;
1452 	char	*path;
1453 	mode_t	mode;
1454 };
1455 #endif
1456 int
sys_mkfifoat(struct thread * td,struct mkfifoat_args * uap)1457 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
1458 {
1459 
1460 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
1461 	    uap->mode));
1462 }
1463 
1464 int
kern_mkfifo(struct thread * td,char * path,enum uio_seg pathseg,int mode)1465 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1466 {
1467 
1468 	return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
1469 }
1470 
1471 int
kern_mkfifoat(struct thread * td,int fd,char * path,enum uio_seg pathseg,int mode)1472 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1473     int mode)
1474 {
1475 	struct mount *mp;
1476 	struct vattr vattr;
1477 	int error;
1478 	struct nameidata nd;
1479 	int vfslocked;
1480 
1481 	AUDIT_ARG_MODE(mode);
1482 restart:
1483 	bwillwrite();
1484 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1485 	    pathseg, path, fd, td);
1486 	if ((error = namei(&nd)) != 0)
1487 		return (error);
1488 	vfslocked = NDHASGIANT(&nd);
1489 	if (nd.ni_vp != NULL) {
1490 		NDFREE(&nd, NDF_ONLY_PNBUF);
1491 		if (nd.ni_vp == nd.ni_dvp)
1492 			vrele(nd.ni_dvp);
1493 		else
1494 			vput(nd.ni_dvp);
1495 		vrele(nd.ni_vp);
1496 		VFS_UNLOCK_GIANT(vfslocked);
1497 		return (EEXIST);
1498 	}
1499 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1500 		NDFREE(&nd, NDF_ONLY_PNBUF);
1501 		vput(nd.ni_dvp);
1502 		VFS_UNLOCK_GIANT(vfslocked);
1503 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1504 			return (error);
1505 		goto restart;
1506 	}
1507 	VATTR_NULL(&vattr);
1508 	vattr.va_type = VFIFO;
1509 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1510 #ifdef MAC
1511 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1512 	    &vattr);
1513 	if (error)
1514 		goto out;
1515 #endif
1516 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1517 	if (error == 0)
1518 		vput(nd.ni_vp);
1519 #ifdef MAC
1520 out:
1521 #endif
1522 	vput(nd.ni_dvp);
1523 	vn_finished_write(mp);
1524 	VFS_UNLOCK_GIANT(vfslocked);
1525 	NDFREE(&nd, NDF_ONLY_PNBUF);
1526 	return (error);
1527 }
1528 
1529 /*
1530  * Make a hard file link.
1531  */
1532 #ifndef _SYS_SYSPROTO_H_
1533 struct link_args {
1534 	char	*path;
1535 	char	*link;
1536 };
1537 #endif
1538 int
sys_link(td,uap)1539 sys_link(td, uap)
1540 	struct thread *td;
1541 	register struct link_args /* {
1542 		char *path;
1543 		char *link;
1544 	} */ *uap;
1545 {
1546 
1547 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1548 }
1549 
1550 #ifndef _SYS_SYSPROTO_H_
1551 struct linkat_args {
1552 	int	fd1;
1553 	char	*path1;
1554 	int	fd2;
1555 	char	*path2;
1556 	int	flag;
1557 };
1558 #endif
1559 int
sys_linkat(struct thread * td,struct linkat_args * uap)1560 sys_linkat(struct thread *td, struct linkat_args *uap)
1561 {
1562 	int flag;
1563 
1564 	flag = uap->flag;
1565 	if (flag & ~AT_SYMLINK_FOLLOW)
1566 		return (EINVAL);
1567 
1568 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
1569 	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
1570 }
1571 
1572 int hardlink_check_uid = 0;
1573 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1574     &hardlink_check_uid, 0,
1575     "Unprivileged processes cannot create hard links to files owned by other "
1576     "users");
1577 static int hardlink_check_gid = 0;
1578 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1579     &hardlink_check_gid, 0,
1580     "Unprivileged processes cannot create hard links to files owned by other "
1581     "groups");
1582 
1583 static int
can_hardlink(struct vnode * vp,struct ucred * cred)1584 can_hardlink(struct vnode *vp, struct ucred *cred)
1585 {
1586 	struct vattr va;
1587 	int error;
1588 
1589 	if (!hardlink_check_uid && !hardlink_check_gid)
1590 		return (0);
1591 
1592 	error = VOP_GETATTR(vp, &va, cred);
1593 	if (error != 0)
1594 		return (error);
1595 
1596 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1597 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1598 		if (error)
1599 			return (error);
1600 	}
1601 
1602 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1603 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1604 		if (error)
1605 			return (error);
1606 	}
1607 
1608 	return (0);
1609 }
1610 
1611 int
kern_link(struct thread * td,char * path,char * link,enum uio_seg segflg)1612 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1613 {
1614 
1615 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
1616 }
1617 
1618 int
kern_linkat(struct thread * td,int fd1,int fd2,char * path1,char * path2,enum uio_seg segflg,int follow)1619 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
1620     enum uio_seg segflg, int follow)
1621 {
1622 	struct vnode *vp;
1623 	struct mount *mp;
1624 	struct nameidata nd;
1625 	int vfslocked;
1626 	int lvfslocked;
1627 	int error;
1628 
1629 	bwillwrite();
1630 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
1631 	    fd1, td);
1632 
1633 	if ((error = namei(&nd)) != 0)
1634 		return (error);
1635 	vfslocked = NDHASGIANT(&nd);
1636 	NDFREE(&nd, NDF_ONLY_PNBUF);
1637 	vp = nd.ni_vp;
1638 	if (vp->v_type == VDIR) {
1639 		vrele(vp);
1640 		VFS_UNLOCK_GIANT(vfslocked);
1641 		return (EPERM);		/* POSIX */
1642 	}
1643 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1644 		vrele(vp);
1645 		VFS_UNLOCK_GIANT(vfslocked);
1646 		return (error);
1647 	}
1648 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1649 	    segflg, path2, fd2, td);
1650 	if ((error = namei(&nd)) == 0) {
1651 		lvfslocked = NDHASGIANT(&nd);
1652 		if (nd.ni_vp != NULL) {
1653 			if (nd.ni_dvp == nd.ni_vp)
1654 				vrele(nd.ni_dvp);
1655 			else
1656 				vput(nd.ni_dvp);
1657 			vrele(nd.ni_vp);
1658 			error = EEXIST;
1659 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
1660 		    == 0) {
1661 			error = can_hardlink(vp, td->td_ucred);
1662 			if (error == 0)
1663 #ifdef MAC
1664 				error = mac_vnode_check_link(td->td_ucred,
1665 				    nd.ni_dvp, vp, &nd.ni_cnd);
1666 			if (error == 0)
1667 #endif
1668 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1669 			VOP_UNLOCK(vp, 0);
1670 			vput(nd.ni_dvp);
1671 		}
1672 		NDFREE(&nd, NDF_ONLY_PNBUF);
1673 		VFS_UNLOCK_GIANT(lvfslocked);
1674 	}
1675 	vrele(vp);
1676 	vn_finished_write(mp);
1677 	VFS_UNLOCK_GIANT(vfslocked);
1678 	return (error);
1679 }
1680 
1681 /*
1682  * Make a symbolic link.
1683  */
1684 #ifndef _SYS_SYSPROTO_H_
1685 struct symlink_args {
1686 	char	*path;
1687 	char	*link;
1688 };
1689 #endif
1690 int
sys_symlink(td,uap)1691 sys_symlink(td, uap)
1692 	struct thread *td;
1693 	register struct symlink_args /* {
1694 		char *path;
1695 		char *link;
1696 	} */ *uap;
1697 {
1698 
1699 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1700 }
1701 
1702 #ifndef _SYS_SYSPROTO_H_
1703 struct symlinkat_args {
1704 	char	*path;
1705 	int	fd;
1706 	char	*path2;
1707 };
1708 #endif
1709 int
sys_symlinkat(struct thread * td,struct symlinkat_args * uap)1710 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
1711 {
1712 
1713 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
1714 	    UIO_USERSPACE));
1715 }
1716 
1717 int
kern_symlink(struct thread * td,char * path,char * link,enum uio_seg segflg)1718 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1719 {
1720 
1721 	return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
1722 }
1723 
1724 int
kern_symlinkat(struct thread * td,char * path1,int fd,char * path2,enum uio_seg segflg)1725 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
1726     enum uio_seg segflg)
1727 {
1728 	struct mount *mp;
1729 	struct vattr vattr;
1730 	char *syspath;
1731 	int error;
1732 	struct nameidata nd;
1733 	int vfslocked;
1734 
1735 	if (segflg == UIO_SYSSPACE) {
1736 		syspath = path1;
1737 	} else {
1738 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1739 		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
1740 			goto out;
1741 	}
1742 	AUDIT_ARG_TEXT(syspath);
1743 restart:
1744 	bwillwrite();
1745 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1746 	    segflg, path2, fd, td);
1747 	if ((error = namei(&nd)) != 0)
1748 		goto out;
1749 	vfslocked = NDHASGIANT(&nd);
1750 	if (nd.ni_vp) {
1751 		NDFREE(&nd, NDF_ONLY_PNBUF);
1752 		if (nd.ni_vp == nd.ni_dvp)
1753 			vrele(nd.ni_dvp);
1754 		else
1755 			vput(nd.ni_dvp);
1756 		vrele(nd.ni_vp);
1757 		VFS_UNLOCK_GIANT(vfslocked);
1758 		error = EEXIST;
1759 		goto out;
1760 	}
1761 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1762 		NDFREE(&nd, NDF_ONLY_PNBUF);
1763 		vput(nd.ni_dvp);
1764 		VFS_UNLOCK_GIANT(vfslocked);
1765 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1766 			goto out;
1767 		goto restart;
1768 	}
1769 	VATTR_NULL(&vattr);
1770 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1771 #ifdef MAC
1772 	vattr.va_type = VLNK;
1773 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1774 	    &vattr);
1775 	if (error)
1776 		goto out2;
1777 #endif
1778 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1779 	if (error == 0)
1780 		vput(nd.ni_vp);
1781 #ifdef MAC
1782 out2:
1783 #endif
1784 	NDFREE(&nd, NDF_ONLY_PNBUF);
1785 	vput(nd.ni_dvp);
1786 	vn_finished_write(mp);
1787 	VFS_UNLOCK_GIANT(vfslocked);
1788 out:
1789 	if (segflg != UIO_SYSSPACE)
1790 		uma_zfree(namei_zone, syspath);
1791 	return (error);
1792 }
1793 
1794 /*
1795  * Delete a whiteout from the filesystem.
1796  */
1797 int
sys_undelete(td,uap)1798 sys_undelete(td, uap)
1799 	struct thread *td;
1800 	register struct undelete_args /* {
1801 		char *path;
1802 	} */ *uap;
1803 {
1804 	int error;
1805 	struct mount *mp;
1806 	struct nameidata nd;
1807 	int vfslocked;
1808 
1809 restart:
1810 	bwillwrite();
1811 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1812 	    UIO_USERSPACE, uap->path, td);
1813 	error = namei(&nd);
1814 	if (error)
1815 		return (error);
1816 	vfslocked = NDHASGIANT(&nd);
1817 
1818 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1819 		NDFREE(&nd, NDF_ONLY_PNBUF);
1820 		if (nd.ni_vp == nd.ni_dvp)
1821 			vrele(nd.ni_dvp);
1822 		else
1823 			vput(nd.ni_dvp);
1824 		if (nd.ni_vp)
1825 			vrele(nd.ni_vp);
1826 		VFS_UNLOCK_GIANT(vfslocked);
1827 		return (EEXIST);
1828 	}
1829 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1830 		NDFREE(&nd, NDF_ONLY_PNBUF);
1831 		vput(nd.ni_dvp);
1832 		VFS_UNLOCK_GIANT(vfslocked);
1833 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1834 			return (error);
1835 		goto restart;
1836 	}
1837 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1838 	NDFREE(&nd, NDF_ONLY_PNBUF);
1839 	vput(nd.ni_dvp);
1840 	vn_finished_write(mp);
1841 	VFS_UNLOCK_GIANT(vfslocked);
1842 	return (error);
1843 }
1844 
1845 /*
1846  * Delete a name from the filesystem.
1847  */
1848 #ifndef _SYS_SYSPROTO_H_
1849 struct unlink_args {
1850 	char	*path;
1851 };
1852 #endif
1853 int
sys_unlink(td,uap)1854 sys_unlink(td, uap)
1855 	struct thread *td;
1856 	struct unlink_args /* {
1857 		char *path;
1858 	} */ *uap;
1859 {
1860 
1861 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1862 }
1863 
1864 #ifndef _SYS_SYSPROTO_H_
1865 struct unlinkat_args {
1866 	int	fd;
1867 	char	*path;
1868 	int	flag;
1869 };
1870 #endif
1871 int
sys_unlinkat(struct thread * td,struct unlinkat_args * uap)1872 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
1873 {
1874 	int flag = uap->flag;
1875 	int fd = uap->fd;
1876 	char *path = uap->path;
1877 
1878 	if (flag & ~AT_REMOVEDIR)
1879 		return (EINVAL);
1880 
1881 	if (flag & AT_REMOVEDIR)
1882 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
1883 	else
1884 		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
1885 }
1886 
1887 int
kern_unlink(struct thread * td,char * path,enum uio_seg pathseg)1888 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1889 {
1890 
1891 	return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
1892 }
1893 
1894 int
kern_unlinkat(struct thread * td,int fd,char * path,enum uio_seg pathseg,ino_t oldinum)1895 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1896     ino_t oldinum)
1897 {
1898 	struct mount *mp;
1899 	struct vnode *vp;
1900 	int error;
1901 	struct nameidata nd;
1902 	struct stat sb;
1903 	int vfslocked;
1904 
1905 restart:
1906 	bwillwrite();
1907 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1908 	    pathseg, path, fd, td);
1909 	if ((error = namei(&nd)) != 0)
1910 		return (error == EINVAL ? EPERM : error);
1911 	vfslocked = NDHASGIANT(&nd);
1912 	vp = nd.ni_vp;
1913 	if (vp->v_type == VDIR && oldinum == 0) {
1914 		error = EPERM;		/* POSIX */
1915 	} else if (oldinum != 0 &&
1916 		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
1917 		  sb.st_ino != oldinum) {
1918 			error = EIDRM;	/* Identifier removed */
1919 	} else {
1920 		/*
1921 		 * The root of a mounted filesystem cannot be deleted.
1922 		 *
1923 		 * XXX: can this only be a VDIR case?
1924 		 */
1925 		if (vp->v_vflag & VV_ROOT)
1926 			error = EBUSY;
1927 	}
1928 	if (error == 0) {
1929 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1930 			NDFREE(&nd, NDF_ONLY_PNBUF);
1931 			vput(nd.ni_dvp);
1932 			if (vp == nd.ni_dvp)
1933 				vrele(vp);
1934 			else
1935 				vput(vp);
1936 			VFS_UNLOCK_GIANT(vfslocked);
1937 			if ((error = vn_start_write(NULL, &mp,
1938 			    V_XSLEEP | PCATCH)) != 0)
1939 				return (error);
1940 			goto restart;
1941 		}
1942 #ifdef MAC
1943 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
1944 		    &nd.ni_cnd);
1945 		if (error)
1946 			goto out;
1947 #endif
1948 		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
1949 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1950 #ifdef MAC
1951 out:
1952 #endif
1953 		vn_finished_write(mp);
1954 	}
1955 	NDFREE(&nd, NDF_ONLY_PNBUF);
1956 	vput(nd.ni_dvp);
1957 	if (vp == nd.ni_dvp)
1958 		vrele(vp);
1959 	else
1960 		vput(vp);
1961 	VFS_UNLOCK_GIANT(vfslocked);
1962 	return (error);
1963 }
1964 
1965 /*
1966  * Reposition read/write file offset.
1967  */
1968 #ifndef _SYS_SYSPROTO_H_
1969 struct lseek_args {
1970 	int	fd;
1971 	int	pad;
1972 	off_t	offset;
1973 	int	whence;
1974 };
1975 #endif
1976 int
sys_lseek(td,uap)1977 sys_lseek(td, uap)
1978 	struct thread *td;
1979 	register struct lseek_args /* {
1980 		int fd;
1981 		int pad;
1982 		off_t offset;
1983 		int whence;
1984 	} */ *uap;
1985 {
1986 	struct ucred *cred = td->td_ucred;
1987 	struct file *fp;
1988 	struct vnode *vp;
1989 	struct vattr vattr;
1990 	off_t foffset, offset, size;
1991 	int error, noneg;
1992 	int vfslocked;
1993 
1994 	AUDIT_ARG_FD(uap->fd);
1995 	if ((error = fget(td, uap->fd, CAP_SEEK, &fp)) != 0)
1996 		return (error);
1997 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1998 		fdrop(fp, td);
1999 		return (ESPIPE);
2000 	}
2001 	vp = fp->f_vnode;
2002 	foffset = foffset_lock(fp, 0);
2003 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2004 	noneg = (vp->v_type != VCHR);
2005 	offset = uap->offset;
2006 	switch (uap->whence) {
2007 	case L_INCR:
2008 		if (noneg &&
2009 		    (foffset < 0 ||
2010 		    (offset > 0 && foffset > OFF_MAX - offset))) {
2011 			error = EOVERFLOW;
2012 			break;
2013 		}
2014 		offset += foffset;
2015 		break;
2016 	case L_XTND:
2017 		vn_lock(vp, LK_SHARED | LK_RETRY);
2018 		error = VOP_GETATTR(vp, &vattr, cred);
2019 		VOP_UNLOCK(vp, 0);
2020 		if (error)
2021 			break;
2022 
2023 		/*
2024 		 * If the file references a disk device, then fetch
2025 		 * the media size and use that to determine the ending
2026 		 * offset.
2027 		 */
2028 		if (vattr.va_size == 0 && vp->v_type == VCHR &&
2029 		    fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
2030 			vattr.va_size = size;
2031 		if (noneg &&
2032 		    (vattr.va_size > OFF_MAX ||
2033 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
2034 			error = EOVERFLOW;
2035 			break;
2036 		}
2037 		offset += vattr.va_size;
2038 		break;
2039 	case L_SET:
2040 		break;
2041 	case SEEK_DATA:
2042 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
2043 		break;
2044 	case SEEK_HOLE:
2045 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
2046 		break;
2047 	default:
2048 		error = EINVAL;
2049 	}
2050 	if (error == 0 && noneg && offset < 0)
2051 		error = EINVAL;
2052 	if (error != 0)
2053 		goto drop;
2054 	VFS_KNOTE_UNLOCKED(vp, 0);
2055 	*(off_t *)(td->td_retval) = offset;
2056 drop:
2057 	fdrop(fp, td);
2058 	VFS_UNLOCK_GIANT(vfslocked);
2059 	foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
2060 	return (error);
2061 }
2062 
2063 #if defined(COMPAT_43)
2064 /*
2065  * Reposition read/write file offset.
2066  */
2067 #ifndef _SYS_SYSPROTO_H_
2068 struct olseek_args {
2069 	int	fd;
2070 	long	offset;
2071 	int	whence;
2072 };
2073 #endif
2074 int
olseek(td,uap)2075 olseek(td, uap)
2076 	struct thread *td;
2077 	register struct olseek_args /* {
2078 		int fd;
2079 		long offset;
2080 		int whence;
2081 	} */ *uap;
2082 {
2083 	struct lseek_args /* {
2084 		int fd;
2085 		int pad;
2086 		off_t offset;
2087 		int whence;
2088 	} */ nuap;
2089 
2090 	nuap.fd = uap->fd;
2091 	nuap.offset = uap->offset;
2092 	nuap.whence = uap->whence;
2093 	return (sys_lseek(td, &nuap));
2094 }
2095 #endif /* COMPAT_43 */
2096 
2097 /* Version with the 'pad' argument */
2098 int
freebsd6_lseek(td,uap)2099 freebsd6_lseek(td, uap)
2100 	struct thread *td;
2101 	register struct freebsd6_lseek_args *uap;
2102 {
2103 	struct lseek_args ouap;
2104 
2105 	ouap.fd = uap->fd;
2106 	ouap.offset = uap->offset;
2107 	ouap.whence = uap->whence;
2108 	return (sys_lseek(td, &ouap));
2109 }
2110 
2111 /*
2112  * Check access permissions using passed credentials.
2113  */
2114 static int
vn_access(vp,user_flags,cred,td)2115 vn_access(vp, user_flags, cred, td)
2116 	struct vnode	*vp;
2117 	int		user_flags;
2118 	struct ucred	*cred;
2119 	struct thread	*td;
2120 {
2121 	int error;
2122 	accmode_t accmode;
2123 
2124 	/* Flags == 0 means only check for existence. */
2125 	error = 0;
2126 	if (user_flags) {
2127 		accmode = 0;
2128 		if (user_flags & R_OK)
2129 			accmode |= VREAD;
2130 		if (user_flags & W_OK)
2131 			accmode |= VWRITE;
2132 		if (user_flags & X_OK)
2133 			accmode |= VEXEC;
2134 #ifdef MAC
2135 		error = mac_vnode_check_access(cred, vp, accmode);
2136 		if (error)
2137 			return (error);
2138 #endif
2139 		if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2140 			error = VOP_ACCESS(vp, accmode, cred, td);
2141 	}
2142 	return (error);
2143 }
2144 
2145 /*
2146  * Check access permissions using "real" credentials.
2147  */
2148 #ifndef _SYS_SYSPROTO_H_
2149 struct access_args {
2150 	char	*path;
2151 	int	flags;
2152 };
2153 #endif
2154 int
sys_access(td,uap)2155 sys_access(td, uap)
2156 	struct thread *td;
2157 	register struct access_args /* {
2158 		char *path;
2159 		int flags;
2160 	} */ *uap;
2161 {
2162 
2163 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
2164 }
2165 
2166 #ifndef _SYS_SYSPROTO_H_
2167 struct faccessat_args {
2168 	int	dirfd;
2169 	char	*path;
2170 	int	mode;
2171 	int	flag;
2172 }
2173 #endif
2174 int
sys_faccessat(struct thread * td,struct faccessat_args * uap)2175 sys_faccessat(struct thread *td, struct faccessat_args *uap)
2176 {
2177 
2178 	if (uap->flag & ~AT_EACCESS)
2179 		return (EINVAL);
2180 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
2181 	    uap->mode));
2182 }
2183 
2184 int
kern_access(struct thread * td,char * path,enum uio_seg pathseg,int mode)2185 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2186 {
2187 
2188 	return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
2189 }
2190 
2191 int
kern_accessat(struct thread * td,int fd,char * path,enum uio_seg pathseg,int flags,int mode)2192 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2193     int flags, int mode)
2194 {
2195 	struct ucred *cred, *tmpcred;
2196 	struct vnode *vp;
2197 	struct nameidata nd;
2198 	int vfslocked;
2199 	int error;
2200 
2201 	/*
2202 	 * Create and modify a temporary credential instead of one that
2203 	 * is potentially shared.
2204 	 */
2205 	if (!(flags & AT_EACCESS)) {
2206 		cred = td->td_ucred;
2207 		tmpcred = crdup(cred);
2208 		tmpcred->cr_uid = cred->cr_ruid;
2209 		tmpcred->cr_groups[0] = cred->cr_rgid;
2210 		td->td_ucred = tmpcred;
2211 	} else
2212 		cred = tmpcred = td->td_ucred;
2213 	AUDIT_ARG_VALUE(mode);
2214 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2215 	    AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
2216 	if ((error = namei(&nd)) != 0)
2217 		goto out1;
2218 	vfslocked = NDHASGIANT(&nd);
2219 	vp = nd.ni_vp;
2220 
2221 	error = vn_access(vp, mode, tmpcred, td);
2222 	NDFREE(&nd, NDF_ONLY_PNBUF);
2223 	vput(vp);
2224 	VFS_UNLOCK_GIANT(vfslocked);
2225 out1:
2226 	if (!(flags & AT_EACCESS)) {
2227 		td->td_ucred = cred;
2228 		crfree(tmpcred);
2229 	}
2230 	return (error);
2231 }
2232 
2233 /*
2234  * Check access permissions using "effective" credentials.
2235  */
2236 #ifndef _SYS_SYSPROTO_H_
2237 struct eaccess_args {
2238 	char	*path;
2239 	int	flags;
2240 };
2241 #endif
2242 int
sys_eaccess(td,uap)2243 sys_eaccess(td, uap)
2244 	struct thread *td;
2245 	register struct eaccess_args /* {
2246 		char *path;
2247 		int flags;
2248 	} */ *uap;
2249 {
2250 
2251 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
2252 }
2253 
2254 int
kern_eaccess(struct thread * td,char * path,enum uio_seg pathseg,int flags)2255 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
2256 {
2257 
2258 	return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
2259 }
2260 
2261 #if defined(COMPAT_43)
2262 /*
2263  * Get file status; this version follows links.
2264  */
2265 #ifndef _SYS_SYSPROTO_H_
2266 struct ostat_args {
2267 	char	*path;
2268 	struct ostat *ub;
2269 };
2270 #endif
2271 int
ostat(td,uap)2272 ostat(td, uap)
2273 	struct thread *td;
2274 	register struct ostat_args /* {
2275 		char *path;
2276 		struct ostat *ub;
2277 	} */ *uap;
2278 {
2279 	struct stat sb;
2280 	struct ostat osb;
2281 	int error;
2282 
2283 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2284 	if (error)
2285 		return (error);
2286 	cvtstat(&sb, &osb);
2287 	error = copyout(&osb, uap->ub, sizeof (osb));
2288 	return (error);
2289 }
2290 
2291 /*
2292  * Get file status; this version does not follow links.
2293  */
2294 #ifndef _SYS_SYSPROTO_H_
2295 struct olstat_args {
2296 	char	*path;
2297 	struct ostat *ub;
2298 };
2299 #endif
2300 int
olstat(td,uap)2301 olstat(td, uap)
2302 	struct thread *td;
2303 	register struct olstat_args /* {
2304 		char *path;
2305 		struct ostat *ub;
2306 	} */ *uap;
2307 {
2308 	struct stat sb;
2309 	struct ostat osb;
2310 	int error;
2311 
2312 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2313 	if (error)
2314 		return (error);
2315 	cvtstat(&sb, &osb);
2316 	error = copyout(&osb, uap->ub, sizeof (osb));
2317 	return (error);
2318 }
2319 
2320 /*
2321  * Convert from an old to a new stat structure.
2322  */
2323 void
cvtstat(st,ost)2324 cvtstat(st, ost)
2325 	struct stat *st;
2326 	struct ostat *ost;
2327 {
2328 
2329 	bzero(ost, sizeof(*ost));
2330 	ost->st_dev = st->st_dev;
2331 	ost->st_ino = st->st_ino;
2332 	ost->st_mode = st->st_mode;
2333 	ost->st_nlink = st->st_nlink;
2334 	ost->st_uid = st->st_uid;
2335 	ost->st_gid = st->st_gid;
2336 	ost->st_rdev = st->st_rdev;
2337 	if (st->st_size < (quad_t)1 << 32)
2338 		ost->st_size = st->st_size;
2339 	else
2340 		ost->st_size = -2;
2341 	ost->st_atim = st->st_atim;
2342 	ost->st_mtim = st->st_mtim;
2343 	ost->st_ctim = st->st_ctim;
2344 	ost->st_blksize = st->st_blksize;
2345 	ost->st_blocks = st->st_blocks;
2346 	ost->st_flags = st->st_flags;
2347 	ost->st_gen = st->st_gen;
2348 }
2349 #endif /* COMPAT_43 */
2350 
2351 /*
2352  * Get file status; this version follows links.
2353  */
2354 #ifndef _SYS_SYSPROTO_H_
2355 struct stat_args {
2356 	char	*path;
2357 	struct stat *ub;
2358 };
2359 #endif
2360 int
sys_stat(td,uap)2361 sys_stat(td, uap)
2362 	struct thread *td;
2363 	register struct stat_args /* {
2364 		char *path;
2365 		struct stat *ub;
2366 	} */ *uap;
2367 {
2368 	struct stat sb;
2369 	int error;
2370 
2371 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2372 	if (error == 0)
2373 		error = copyout(&sb, uap->ub, sizeof (sb));
2374 	return (error);
2375 }
2376 
2377 #ifndef _SYS_SYSPROTO_H_
2378 struct fstatat_args {
2379 	int	fd;
2380 	char	*path;
2381 	struct stat	*buf;
2382 	int	flag;
2383 }
2384 #endif
2385 int
sys_fstatat(struct thread * td,struct fstatat_args * uap)2386 sys_fstatat(struct thread *td, struct fstatat_args *uap)
2387 {
2388 	struct stat sb;
2389 	int error;
2390 
2391 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
2392 	    UIO_USERSPACE, &sb);
2393 	if (error == 0)
2394 		error = copyout(&sb, uap->buf, sizeof (sb));
2395 	return (error);
2396 }
2397 
2398 int
kern_stat(struct thread * td,char * path,enum uio_seg pathseg,struct stat * sbp)2399 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2400 {
2401 
2402 	return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
2403 }
2404 
2405 int
kern_statat(struct thread * td,int flag,int fd,char * path,enum uio_seg pathseg,struct stat * sbp)2406 kern_statat(struct thread *td, int flag, int fd, char *path,
2407     enum uio_seg pathseg, struct stat *sbp)
2408 {
2409 
2410 	return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
2411 }
2412 
2413 int
kern_statat_vnhook(struct thread * td,int flag,int fd,char * path,enum uio_seg pathseg,struct stat * sbp,void (* hook)(struct vnode * vp,struct stat * sbp))2414 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
2415     enum uio_seg pathseg, struct stat *sbp,
2416     void (*hook)(struct vnode *vp, struct stat *sbp))
2417 {
2418 	struct nameidata nd;
2419 	struct stat sb;
2420 	int error, vfslocked;
2421 
2422 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2423 		return (EINVAL);
2424 
2425 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
2426 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
2427 	    path, fd, CAP_FSTAT, td);
2428 
2429 	if ((error = namei(&nd)) != 0)
2430 		return (error);
2431 	vfslocked = NDHASGIANT(&nd);
2432 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2433 	if (!error) {
2434 		SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
2435 		if (S_ISREG(sb.st_mode))
2436 			SDT_PROBE2(vfs, , stat, reg, path, pathseg);
2437 		if (__predict_false(hook != NULL))
2438 			hook(nd.ni_vp, &sb);
2439 	}
2440 	NDFREE(&nd, NDF_ONLY_PNBUF);
2441 	vput(nd.ni_vp);
2442 	VFS_UNLOCK_GIANT(vfslocked);
2443 	if (error)
2444 		return (error);
2445 	*sbp = sb;
2446 #ifdef KTRACE
2447 	if (KTRPOINT(td, KTR_STRUCT))
2448 		ktrstat(&sb);
2449 #endif
2450 	return (0);
2451 }
2452 
2453 /*
2454  * Get file status; this version does not follow links.
2455  */
2456 #ifndef _SYS_SYSPROTO_H_
2457 struct lstat_args {
2458 	char	*path;
2459 	struct stat *ub;
2460 };
2461 #endif
2462 int
sys_lstat(td,uap)2463 sys_lstat(td, uap)
2464 	struct thread *td;
2465 	register struct lstat_args /* {
2466 		char *path;
2467 		struct stat *ub;
2468 	} */ *uap;
2469 {
2470 	struct stat sb;
2471 	int error;
2472 
2473 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2474 	if (error == 0)
2475 		error = copyout(&sb, uap->ub, sizeof (sb));
2476 	return (error);
2477 }
2478 
2479 int
kern_lstat(struct thread * td,char * path,enum uio_seg pathseg,struct stat * sbp)2480 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2481 {
2482 
2483 	return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
2484 	    sbp));
2485 }
2486 
2487 /*
2488  * Implementation of the NetBSD [l]stat() functions.
2489  */
2490 void
cvtnstat(sb,nsb)2491 cvtnstat(sb, nsb)
2492 	struct stat *sb;
2493 	struct nstat *nsb;
2494 {
2495 	bzero(nsb, sizeof *nsb);
2496 	nsb->st_dev = sb->st_dev;
2497 	nsb->st_ino = sb->st_ino;
2498 	nsb->st_mode = sb->st_mode;
2499 	nsb->st_nlink = sb->st_nlink;
2500 	nsb->st_uid = sb->st_uid;
2501 	nsb->st_gid = sb->st_gid;
2502 	nsb->st_rdev = sb->st_rdev;
2503 	nsb->st_atim = sb->st_atim;
2504 	nsb->st_mtim = sb->st_mtim;
2505 	nsb->st_ctim = sb->st_ctim;
2506 	nsb->st_size = sb->st_size;
2507 	nsb->st_blocks = sb->st_blocks;
2508 	nsb->st_blksize = sb->st_blksize;
2509 	nsb->st_flags = sb->st_flags;
2510 	nsb->st_gen = sb->st_gen;
2511 	nsb->st_birthtim = sb->st_birthtim;
2512 }
2513 
2514 #ifndef _SYS_SYSPROTO_H_
2515 struct nstat_args {
2516 	char	*path;
2517 	struct nstat *ub;
2518 };
2519 #endif
2520 int
sys_nstat(td,uap)2521 sys_nstat(td, uap)
2522 	struct thread *td;
2523 	register struct nstat_args /* {
2524 		char *path;
2525 		struct nstat *ub;
2526 	} */ *uap;
2527 {
2528 	struct stat sb;
2529 	struct nstat nsb;
2530 	int error;
2531 
2532 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2533 	if (error)
2534 		return (error);
2535 	cvtnstat(&sb, &nsb);
2536 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2537 	return (error);
2538 }
2539 
2540 /*
2541  * NetBSD lstat.  Get file status; this version does not follow links.
2542  */
2543 #ifndef _SYS_SYSPROTO_H_
2544 struct lstat_args {
2545 	char	*path;
2546 	struct stat *ub;
2547 };
2548 #endif
2549 int
sys_nlstat(td,uap)2550 sys_nlstat(td, uap)
2551 	struct thread *td;
2552 	register struct nlstat_args /* {
2553 		char *path;
2554 		struct nstat *ub;
2555 	} */ *uap;
2556 {
2557 	struct stat sb;
2558 	struct nstat nsb;
2559 	int error;
2560 
2561 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2562 	if (error)
2563 		return (error);
2564 	cvtnstat(&sb, &nsb);
2565 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2566 	return (error);
2567 }
2568 
2569 /*
2570  * Get configurable pathname variables.
2571  */
2572 #ifndef _SYS_SYSPROTO_H_
2573 struct pathconf_args {
2574 	char	*path;
2575 	int	name;
2576 };
2577 #endif
2578 int
sys_pathconf(td,uap)2579 sys_pathconf(td, uap)
2580 	struct thread *td;
2581 	register struct pathconf_args /* {
2582 		char *path;
2583 		int name;
2584 	} */ *uap;
2585 {
2586 
2587 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
2588 }
2589 
2590 #ifndef _SYS_SYSPROTO_H_
2591 struct lpathconf_args {
2592 	char	*path;
2593 	int	name;
2594 };
2595 #endif
2596 int
sys_lpathconf(td,uap)2597 sys_lpathconf(td, uap)
2598 	struct thread *td;
2599 	register struct lpathconf_args /* {
2600 		char *path;
2601 		int name;
2602 	} */ *uap;
2603 {
2604 
2605 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
2606 }
2607 
2608 int
kern_pathconf(struct thread * td,char * path,enum uio_seg pathseg,int name,u_long flags)2609 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
2610     u_long flags)
2611 {
2612 	struct nameidata nd;
2613 	int error, vfslocked;
2614 
2615 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
2616 	    flags, pathseg, path, td);
2617 	if ((error = namei(&nd)) != 0)
2618 		return (error);
2619 	vfslocked = NDHASGIANT(&nd);
2620 	NDFREE(&nd, NDF_ONLY_PNBUF);
2621 
2622 	/* If asynchronous I/O is available, it works for all files. */
2623 	if (name == _PC_ASYNC_IO)
2624 		td->td_retval[0] = async_io_version;
2625 	else
2626 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2627 	vput(nd.ni_vp);
2628 	VFS_UNLOCK_GIANT(vfslocked);
2629 	return (error);
2630 }
2631 
2632 /*
2633  * Return target name of a symbolic link.
2634  */
2635 #ifndef _SYS_SYSPROTO_H_
2636 struct readlink_args {
2637 	char	*path;
2638 	char	*buf;
2639 	size_t	count;
2640 };
2641 #endif
2642 int
sys_readlink(td,uap)2643 sys_readlink(td, uap)
2644 	struct thread *td;
2645 	register struct readlink_args /* {
2646 		char *path;
2647 		char *buf;
2648 		size_t count;
2649 	} */ *uap;
2650 {
2651 
2652 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2653 	    UIO_USERSPACE, uap->count));
2654 }
2655 #ifndef _SYS_SYSPROTO_H_
2656 struct readlinkat_args {
2657 	int	fd;
2658 	char	*path;
2659 	char	*buf;
2660 	size_t	bufsize;
2661 };
2662 #endif
2663 int
sys_readlinkat(struct thread * td,struct readlinkat_args * uap)2664 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
2665 {
2666 
2667 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
2668 	    uap->buf, UIO_USERSPACE, uap->bufsize));
2669 }
2670 
2671 int
kern_readlink(struct thread * td,char * path,enum uio_seg pathseg,char * buf,enum uio_seg bufseg,size_t count)2672 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2673     enum uio_seg bufseg, size_t count)
2674 {
2675 
2676 	return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
2677 	    count));
2678 }
2679 
2680 int
kern_readlinkat(struct thread * td,int fd,char * path,enum uio_seg pathseg,char * buf,enum uio_seg bufseg,size_t count)2681 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2682     char *buf, enum uio_seg bufseg, size_t count)
2683 {
2684 	struct vnode *vp;
2685 	struct iovec aiov;
2686 	struct uio auio;
2687 	int error;
2688 	struct nameidata nd;
2689 	int vfslocked;
2690 
2691 	if (count > IOSIZE_MAX)
2692 		return (EINVAL);
2693 
2694 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2695 	    AUDITVNODE1, pathseg, path, fd, td);
2696 
2697 	if ((error = namei(&nd)) != 0)
2698 		return (error);
2699 	NDFREE(&nd, NDF_ONLY_PNBUF);
2700 	vfslocked = NDHASGIANT(&nd);
2701 	vp = nd.ni_vp;
2702 #ifdef MAC
2703 	error = mac_vnode_check_readlink(td->td_ucred, vp);
2704 	if (error) {
2705 		vput(vp);
2706 		VFS_UNLOCK_GIANT(vfslocked);
2707 		return (error);
2708 	}
2709 #endif
2710 	if (vp->v_type != VLNK)
2711 		error = EINVAL;
2712 	else {
2713 		aiov.iov_base = buf;
2714 		aiov.iov_len = count;
2715 		auio.uio_iov = &aiov;
2716 		auio.uio_iovcnt = 1;
2717 		auio.uio_offset = 0;
2718 		auio.uio_rw = UIO_READ;
2719 		auio.uio_segflg = bufseg;
2720 		auio.uio_td = td;
2721 		auio.uio_resid = count;
2722 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2723 		td->td_retval[0] = count - auio.uio_resid;
2724 	}
2725 	vput(vp);
2726 	VFS_UNLOCK_GIANT(vfslocked);
2727 	return (error);
2728 }
2729 
2730 /*
2731  * Common implementation code for chflags() and fchflags().
2732  */
2733 static int
setfflags(td,vp,flags)2734 setfflags(td, vp, flags)
2735 	struct thread *td;
2736 	struct vnode *vp;
2737 	int flags;
2738 {
2739 	int error;
2740 	struct mount *mp;
2741 	struct vattr vattr;
2742 
2743 	/* We can't support the value matching VNOVAL. */
2744 	if (flags == VNOVAL)
2745 		return (EOPNOTSUPP);
2746 
2747 	/*
2748 	 * Prevent non-root users from setting flags on devices.  When
2749 	 * a device is reused, users can retain ownership of the device
2750 	 * if they are allowed to set flags and programs assume that
2751 	 * chown can't fail when done as root.
2752 	 */
2753 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2754 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2755 		if (error)
2756 			return (error);
2757 	}
2758 
2759 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2760 		return (error);
2761 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2762 	VATTR_NULL(&vattr);
2763 	vattr.va_flags = flags;
2764 #ifdef MAC
2765 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
2766 	if (error == 0)
2767 #endif
2768 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2769 	VOP_UNLOCK(vp, 0);
2770 	vn_finished_write(mp);
2771 	return (error);
2772 }
2773 
2774 /*
2775  * Change flags of a file given a path name.
2776  */
2777 #ifndef _SYS_SYSPROTO_H_
2778 struct chflags_args {
2779 	char	*path;
2780 	int	flags;
2781 };
2782 #endif
2783 int
sys_chflags(td,uap)2784 sys_chflags(td, uap)
2785 	struct thread *td;
2786 	register struct chflags_args /* {
2787 		char *path;
2788 		int flags;
2789 	} */ *uap;
2790 {
2791 	int error;
2792 	struct nameidata nd;
2793 	int vfslocked;
2794 
2795 	AUDIT_ARG_FFLAGS(uap->flags);
2796 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2797 	    uap->path, td);
2798 	if ((error = namei(&nd)) != 0)
2799 		return (error);
2800 	NDFREE(&nd, NDF_ONLY_PNBUF);
2801 	vfslocked = NDHASGIANT(&nd);
2802 	error = setfflags(td, nd.ni_vp, uap->flags);
2803 	vrele(nd.ni_vp);
2804 	VFS_UNLOCK_GIANT(vfslocked);
2805 	return (error);
2806 }
2807 
2808 /*
2809  * Same as chflags() but doesn't follow symlinks.
2810  */
2811 int
sys_lchflags(td,uap)2812 sys_lchflags(td, uap)
2813 	struct thread *td;
2814 	register struct lchflags_args /* {
2815 		char *path;
2816 		int flags;
2817 	} */ *uap;
2818 {
2819 	int error;
2820 	struct nameidata nd;
2821 	int vfslocked;
2822 
2823 	AUDIT_ARG_FFLAGS(uap->flags);
2824 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2825 	    uap->path, td);
2826 	if ((error = namei(&nd)) != 0)
2827 		return (error);
2828 	vfslocked = NDHASGIANT(&nd);
2829 	NDFREE(&nd, NDF_ONLY_PNBUF);
2830 	error = setfflags(td, nd.ni_vp, uap->flags);
2831 	vrele(nd.ni_vp);
2832 	VFS_UNLOCK_GIANT(vfslocked);
2833 	return (error);
2834 }
2835 
2836 /*
2837  * Change flags of a file given a file descriptor.
2838  */
2839 #ifndef _SYS_SYSPROTO_H_
2840 struct fchflags_args {
2841 	int	fd;
2842 	int	flags;
2843 };
2844 #endif
2845 int
sys_fchflags(td,uap)2846 sys_fchflags(td, uap)
2847 	struct thread *td;
2848 	register struct fchflags_args /* {
2849 		int fd;
2850 		int flags;
2851 	} */ *uap;
2852 {
2853 	struct file *fp;
2854 	int vfslocked;
2855 	int error;
2856 
2857 	AUDIT_ARG_FD(uap->fd);
2858 	AUDIT_ARG_FFLAGS(uap->flags);
2859 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FCHFLAGS,
2860 	    &fp)) != 0)
2861 		return (error);
2862 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2863 #ifdef AUDIT
2864 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2865 	AUDIT_ARG_VNODE1(fp->f_vnode);
2866 	VOP_UNLOCK(fp->f_vnode, 0);
2867 #endif
2868 	error = setfflags(td, fp->f_vnode, uap->flags);
2869 	VFS_UNLOCK_GIANT(vfslocked);
2870 	fdrop(fp, td);
2871 	return (error);
2872 }
2873 
2874 /*
2875  * Common implementation code for chmod(), lchmod() and fchmod().
2876  */
2877 int
setfmode(td,cred,vp,mode)2878 setfmode(td, cred, vp, mode)
2879 	struct thread *td;
2880 	struct ucred *cred;
2881 	struct vnode *vp;
2882 	int mode;
2883 {
2884 	int error;
2885 	struct mount *mp;
2886 	struct vattr vattr;
2887 
2888 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2889 		return (error);
2890 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2891 	VATTR_NULL(&vattr);
2892 	vattr.va_mode = mode & ALLPERMS;
2893 #ifdef MAC
2894 	error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
2895 	if (error == 0)
2896 #endif
2897 		error = VOP_SETATTR(vp, &vattr, cred);
2898 	VOP_UNLOCK(vp, 0);
2899 	vn_finished_write(mp);
2900 	return (error);
2901 }
2902 
2903 /*
2904  * Change mode of a file given path name.
2905  */
2906 #ifndef _SYS_SYSPROTO_H_
2907 struct chmod_args {
2908 	char	*path;
2909 	int	mode;
2910 };
2911 #endif
2912 int
sys_chmod(td,uap)2913 sys_chmod(td, uap)
2914 	struct thread *td;
2915 	register struct chmod_args /* {
2916 		char *path;
2917 		int mode;
2918 	} */ *uap;
2919 {
2920 
2921 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2922 }
2923 
2924 #ifndef _SYS_SYSPROTO_H_
2925 struct fchmodat_args {
2926 	int	dirfd;
2927 	char	*path;
2928 	mode_t	mode;
2929 	int	flag;
2930 }
2931 #endif
2932 int
sys_fchmodat(struct thread * td,struct fchmodat_args * uap)2933 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
2934 {
2935 	int flag = uap->flag;
2936 	int fd = uap->fd;
2937 	char *path = uap->path;
2938 	mode_t mode = uap->mode;
2939 
2940 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2941 		return (EINVAL);
2942 
2943 	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
2944 }
2945 
2946 int
kern_chmod(struct thread * td,char * path,enum uio_seg pathseg,int mode)2947 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2948 {
2949 
2950 	return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
2951 }
2952 
2953 /*
2954  * Change mode of a file given path name (don't follow links.)
2955  */
2956 #ifndef _SYS_SYSPROTO_H_
2957 struct lchmod_args {
2958 	char	*path;
2959 	int	mode;
2960 };
2961 #endif
2962 int
sys_lchmod(td,uap)2963 sys_lchmod(td, uap)
2964 	struct thread *td;
2965 	register struct lchmod_args /* {
2966 		char *path;
2967 		int mode;
2968 	} */ *uap;
2969 {
2970 
2971 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2972 	    uap->mode, AT_SYMLINK_NOFOLLOW));
2973 }
2974 
2975 
2976 int
kern_fchmodat(struct thread * td,int fd,char * path,enum uio_seg pathseg,mode_t mode,int flag)2977 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2978     mode_t mode, int flag)
2979 {
2980 	int error;
2981 	struct nameidata nd;
2982 	int vfslocked;
2983 	int follow;
2984 
2985 	AUDIT_ARG_MODE(mode);
2986 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2987 	NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
2988 	    path, fd, CAP_FCHMOD, td);
2989 	if ((error = namei(&nd)) != 0)
2990 		return (error);
2991 	vfslocked = NDHASGIANT(&nd);
2992 	NDFREE(&nd, NDF_ONLY_PNBUF);
2993 	error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
2994 	vrele(nd.ni_vp);
2995 	VFS_UNLOCK_GIANT(vfslocked);
2996 	return (error);
2997 }
2998 
2999 /*
3000  * Change mode of a file given a file descriptor.
3001  */
3002 #ifndef _SYS_SYSPROTO_H_
3003 struct fchmod_args {
3004 	int	fd;
3005 	int	mode;
3006 };
3007 #endif
3008 int
sys_fchmod(struct thread * td,struct fchmod_args * uap)3009 sys_fchmod(struct thread *td, struct fchmod_args *uap)
3010 {
3011 	struct file *fp;
3012 	int error;
3013 
3014 	AUDIT_ARG_FD(uap->fd);
3015 	AUDIT_ARG_MODE(uap->mode);
3016 
3017 	error = fget(td, uap->fd, CAP_FCHMOD, &fp);
3018 	if (error != 0)
3019 		return (error);
3020 	error = fo_chmod(fp, uap->mode, td->td_ucred, td);
3021 	fdrop(fp, td);
3022 	return (error);
3023 }
3024 
3025 /*
3026  * Common implementation for chown(), lchown(), and fchown()
3027  */
3028 int
setfown(td,cred,vp,uid,gid)3029 setfown(td, cred, vp, uid, gid)
3030 	struct thread *td;
3031 	struct ucred *cred;
3032 	struct vnode *vp;
3033 	uid_t uid;
3034 	gid_t gid;
3035 {
3036 	int error;
3037 	struct mount *mp;
3038 	struct vattr vattr;
3039 
3040 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3041 		return (error);
3042 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3043 	VATTR_NULL(&vattr);
3044 	vattr.va_uid = uid;
3045 	vattr.va_gid = gid;
3046 #ifdef MAC
3047 	error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
3048 	    vattr.va_gid);
3049 	if (error == 0)
3050 #endif
3051 		error = VOP_SETATTR(vp, &vattr, cred);
3052 	VOP_UNLOCK(vp, 0);
3053 	vn_finished_write(mp);
3054 	return (error);
3055 }
3056 
3057 /*
3058  * Set ownership given a path name.
3059  */
3060 #ifndef _SYS_SYSPROTO_H_
3061 struct chown_args {
3062 	char	*path;
3063 	int	uid;
3064 	int	gid;
3065 };
3066 #endif
3067 int
sys_chown(td,uap)3068 sys_chown(td, uap)
3069 	struct thread *td;
3070 	register struct chown_args /* {
3071 		char *path;
3072 		int uid;
3073 		int gid;
3074 	} */ *uap;
3075 {
3076 
3077 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3078 }
3079 
3080 #ifndef _SYS_SYSPROTO_H_
3081 struct fchownat_args {
3082 	int fd;
3083 	const char * path;
3084 	uid_t uid;
3085 	gid_t gid;
3086 	int flag;
3087 };
3088 #endif
3089 int
sys_fchownat(struct thread * td,struct fchownat_args * uap)3090 sys_fchownat(struct thread *td, struct fchownat_args *uap)
3091 {
3092 	int flag;
3093 
3094 	flag = uap->flag;
3095 	if (flag & ~AT_SYMLINK_NOFOLLOW)
3096 		return (EINVAL);
3097 
3098 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
3099 	    uap->gid, uap->flag));
3100 }
3101 
3102 int
kern_chown(struct thread * td,char * path,enum uio_seg pathseg,int uid,int gid)3103 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3104     int gid)
3105 {
3106 
3107 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
3108 }
3109 
3110 int
kern_fchownat(struct thread * td,int fd,char * path,enum uio_seg pathseg,int uid,int gid,int flag)3111 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3112     int uid, int gid, int flag)
3113 {
3114 	struct nameidata nd;
3115 	int error, vfslocked, follow;
3116 
3117 	AUDIT_ARG_OWNER(uid, gid);
3118 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3119 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
3120 	    path, fd, CAP_FCHOWN, td);
3121 
3122 	if ((error = namei(&nd)) != 0)
3123 		return (error);
3124 	vfslocked = NDHASGIANT(&nd);
3125 	NDFREE(&nd, NDF_ONLY_PNBUF);
3126 	error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
3127 	vrele(nd.ni_vp);
3128 	VFS_UNLOCK_GIANT(vfslocked);
3129 	return (error);
3130 }
3131 
3132 /*
3133  * Set ownership given a path name, do not cross symlinks.
3134  */
3135 #ifndef _SYS_SYSPROTO_H_
3136 struct lchown_args {
3137 	char	*path;
3138 	int	uid;
3139 	int	gid;
3140 };
3141 #endif
3142 int
sys_lchown(td,uap)3143 sys_lchown(td, uap)
3144 	struct thread *td;
3145 	register struct lchown_args /* {
3146 		char *path;
3147 		int uid;
3148 		int gid;
3149 	} */ *uap;
3150 {
3151 
3152 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3153 }
3154 
3155 int
kern_lchown(struct thread * td,char * path,enum uio_seg pathseg,int uid,int gid)3156 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3157     int gid)
3158 {
3159 
3160 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
3161 	    AT_SYMLINK_NOFOLLOW));
3162 }
3163 
3164 /*
3165  * Set ownership given a file descriptor.
3166  */
3167 #ifndef _SYS_SYSPROTO_H_
3168 struct fchown_args {
3169 	int	fd;
3170 	int	uid;
3171 	int	gid;
3172 };
3173 #endif
3174 int
sys_fchown(td,uap)3175 sys_fchown(td, uap)
3176 	struct thread *td;
3177 	register struct fchown_args /* {
3178 		int fd;
3179 		int uid;
3180 		int gid;
3181 	} */ *uap;
3182 {
3183 	struct file *fp;
3184 	int error;
3185 
3186 	AUDIT_ARG_FD(uap->fd);
3187 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
3188 	error = fget(td, uap->fd, CAP_FCHOWN, &fp);
3189 	if (error != 0)
3190 		return (error);
3191 	error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
3192 	fdrop(fp, td);
3193 	return (error);
3194 }
3195 
3196 /*
3197  * Common implementation code for utimes(), lutimes(), and futimes().
3198  */
3199 static int
getutimes(usrtvp,tvpseg,tsp)3200 getutimes(usrtvp, tvpseg, tsp)
3201 	const struct timeval *usrtvp;
3202 	enum uio_seg tvpseg;
3203 	struct timespec *tsp;
3204 {
3205 	struct timeval tv[2];
3206 	const struct timeval *tvp;
3207 	int error;
3208 
3209 	if (usrtvp == NULL) {
3210 		vfs_timestamp(&tsp[0]);
3211 		tsp[1] = tsp[0];
3212 	} else {
3213 		if (tvpseg == UIO_SYSSPACE) {
3214 			tvp = usrtvp;
3215 		} else {
3216 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
3217 				return (error);
3218 			tvp = tv;
3219 		}
3220 
3221 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
3222 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
3223 			return (EINVAL);
3224 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3225 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3226 	}
3227 	return (0);
3228 }
3229 
3230 /*
3231  * Common implementation code for utimes(), lutimes(), and futimes().
3232  */
3233 static int
setutimes(td,vp,ts,numtimes,nullflag)3234 setutimes(td, vp, ts, numtimes, nullflag)
3235 	struct thread *td;
3236 	struct vnode *vp;
3237 	const struct timespec *ts;
3238 	int numtimes;
3239 	int nullflag;
3240 {
3241 	int error, setbirthtime;
3242 	struct mount *mp;
3243 	struct vattr vattr;
3244 
3245 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3246 		return (error);
3247 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3248 	setbirthtime = 0;
3249 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
3250 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
3251 		setbirthtime = 1;
3252 	VATTR_NULL(&vattr);
3253 	vattr.va_atime = ts[0];
3254 	vattr.va_mtime = ts[1];
3255 	if (setbirthtime)
3256 		vattr.va_birthtime = ts[1];
3257 	if (numtimes > 2)
3258 		vattr.va_birthtime = ts[2];
3259 	if (nullflag)
3260 		vattr.va_vaflags |= VA_UTIMES_NULL;
3261 #ifdef MAC
3262 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
3263 	    vattr.va_mtime);
3264 #endif
3265 	if (error == 0)
3266 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3267 	VOP_UNLOCK(vp, 0);
3268 	vn_finished_write(mp);
3269 	return (error);
3270 }
3271 
3272 /*
3273  * Set the access and modification times of a file.
3274  */
3275 #ifndef _SYS_SYSPROTO_H_
3276 struct utimes_args {
3277 	char	*path;
3278 	struct	timeval *tptr;
3279 };
3280 #endif
3281 int
sys_utimes(td,uap)3282 sys_utimes(td, uap)
3283 	struct thread *td;
3284 	register struct utimes_args /* {
3285 		char *path;
3286 		struct timeval *tptr;
3287 	} */ *uap;
3288 {
3289 
3290 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3291 	    UIO_USERSPACE));
3292 }
3293 
3294 #ifndef _SYS_SYSPROTO_H_
3295 struct futimesat_args {
3296 	int fd;
3297 	const char * path;
3298 	const struct timeval * times;
3299 };
3300 #endif
3301 int
sys_futimesat(struct thread * td,struct futimesat_args * uap)3302 sys_futimesat(struct thread *td, struct futimesat_args *uap)
3303 {
3304 
3305 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
3306 	    uap->times, UIO_USERSPACE));
3307 }
3308 
3309 int
kern_utimes(struct thread * td,char * path,enum uio_seg pathseg,struct timeval * tptr,enum uio_seg tptrseg)3310 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
3311     struct timeval *tptr, enum uio_seg tptrseg)
3312 {
3313 
3314 	return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
3315 }
3316 
3317 int
kern_utimesat(struct thread * td,int fd,char * path,enum uio_seg pathseg,struct timeval * tptr,enum uio_seg tptrseg)3318 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3319     struct timeval *tptr, enum uio_seg tptrseg)
3320 {
3321 	struct nameidata nd;
3322 	struct timespec ts[2];
3323 	int error, vfslocked;
3324 
3325 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3326 		return (error);
3327 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
3328 	    path, fd, CAP_FUTIMES, td);
3329 
3330 	if ((error = namei(&nd)) != 0)
3331 		return (error);
3332 	vfslocked = NDHASGIANT(&nd);
3333 	NDFREE(&nd, NDF_ONLY_PNBUF);
3334 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3335 	vrele(nd.ni_vp);
3336 	VFS_UNLOCK_GIANT(vfslocked);
3337 	return (error);
3338 }
3339 
3340 /*
3341  * Set the access and modification times of a file.
3342  */
3343 #ifndef _SYS_SYSPROTO_H_
3344 struct lutimes_args {
3345 	char	*path;
3346 	struct	timeval *tptr;
3347 };
3348 #endif
3349 int
sys_lutimes(td,uap)3350 sys_lutimes(td, uap)
3351 	struct thread *td;
3352 	register struct lutimes_args /* {
3353 		char *path;
3354 		struct timeval *tptr;
3355 	} */ *uap;
3356 {
3357 
3358 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3359 	    UIO_USERSPACE));
3360 }
3361 
3362 int
kern_lutimes(struct thread * td,char * path,enum uio_seg pathseg,struct timeval * tptr,enum uio_seg tptrseg)3363 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
3364     struct timeval *tptr, enum uio_seg tptrseg)
3365 {
3366 	struct timespec ts[2];
3367 	int error;
3368 	struct nameidata nd;
3369 	int vfslocked;
3370 
3371 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3372 		return (error);
3373 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3374 	if ((error = namei(&nd)) != 0)
3375 		return (error);
3376 	vfslocked = NDHASGIANT(&nd);
3377 	NDFREE(&nd, NDF_ONLY_PNBUF);
3378 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3379 	vrele(nd.ni_vp);
3380 	VFS_UNLOCK_GIANT(vfslocked);
3381 	return (error);
3382 }
3383 
3384 /*
3385  * Set the access and modification times of a file.
3386  */
3387 #ifndef _SYS_SYSPROTO_H_
3388 struct futimes_args {
3389 	int	fd;
3390 	struct	timeval *tptr;
3391 };
3392 #endif
3393 int
sys_futimes(td,uap)3394 sys_futimes(td, uap)
3395 	struct thread *td;
3396 	register struct futimes_args /* {
3397 		int  fd;
3398 		struct timeval *tptr;
3399 	} */ *uap;
3400 {
3401 
3402 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
3403 }
3404 
3405 int
kern_futimes(struct thread * td,int fd,struct timeval * tptr,enum uio_seg tptrseg)3406 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
3407     enum uio_seg tptrseg)
3408 {
3409 	struct timespec ts[2];
3410 	struct file *fp;
3411 	int vfslocked;
3412 	int error;
3413 
3414 	AUDIT_ARG_FD(fd);
3415 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3416 		return (error);
3417 	if ((error = getvnode(td->td_proc->p_fd, fd, CAP_FUTIMES, &fp))
3418 	    != 0)
3419 		return (error);
3420 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3421 #ifdef AUDIT
3422 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3423 	AUDIT_ARG_VNODE1(fp->f_vnode);
3424 	VOP_UNLOCK(fp->f_vnode, 0);
3425 #endif
3426 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3427 	VFS_UNLOCK_GIANT(vfslocked);
3428 	fdrop(fp, td);
3429 	return (error);
3430 }
3431 
3432 /*
3433  * Truncate a file given its path name.
3434  */
3435 #ifndef _SYS_SYSPROTO_H_
3436 struct truncate_args {
3437 	char	*path;
3438 	int	pad;
3439 	off_t	length;
3440 };
3441 #endif
3442 int
sys_truncate(td,uap)3443 sys_truncate(td, uap)
3444 	struct thread *td;
3445 	register struct truncate_args /* {
3446 		char *path;
3447 		int pad;
3448 		off_t length;
3449 	} */ *uap;
3450 {
3451 
3452 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3453 }
3454 
3455 int
kern_truncate(struct thread * td,char * path,enum uio_seg pathseg,off_t length)3456 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3457 {
3458 	struct mount *mp;
3459 	struct vnode *vp;
3460 	void *rl_cookie;
3461 	struct vattr vattr;
3462 	struct nameidata nd;
3463 	int error, vfslocked;
3464 
3465 	if (length < 0)
3466 		return(EINVAL);
3467 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3468 	if ((error = namei(&nd)) != 0)
3469 		return (error);
3470 	vfslocked = NDHASGIANT(&nd);
3471 	vp = nd.ni_vp;
3472 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
3473 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3474 		vn_rangelock_unlock(vp, rl_cookie);
3475 		vrele(vp);
3476 		VFS_UNLOCK_GIANT(vfslocked);
3477 		return (error);
3478 	}
3479 	NDFREE(&nd, NDF_ONLY_PNBUF);
3480 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3481 	if (vp->v_type == VDIR)
3482 		error = EISDIR;
3483 #ifdef MAC
3484 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3485 	}
3486 #endif
3487 	else if ((error = vn_writechk(vp)) == 0 &&
3488 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3489 		VATTR_NULL(&vattr);
3490 		vattr.va_size = length;
3491 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3492 	}
3493 	VOP_UNLOCK(vp, 0);
3494 	vn_finished_write(mp);
3495 	vn_rangelock_unlock(vp, rl_cookie);
3496 	vrele(vp);
3497 	VFS_UNLOCK_GIANT(vfslocked);
3498 	return (error);
3499 }
3500 
3501 #if defined(COMPAT_43)
3502 /*
3503  * Truncate a file given its path name.
3504  */
3505 #ifndef _SYS_SYSPROTO_H_
3506 struct otruncate_args {
3507 	char	*path;
3508 	long	length;
3509 };
3510 #endif
3511 int
otruncate(td,uap)3512 otruncate(td, uap)
3513 	struct thread *td;
3514 	register struct otruncate_args /* {
3515 		char *path;
3516 		long length;
3517 	} */ *uap;
3518 {
3519 	struct truncate_args /* {
3520 		char *path;
3521 		int pad;
3522 		off_t length;
3523 	} */ nuap;
3524 
3525 	nuap.path = uap->path;
3526 	nuap.length = uap->length;
3527 	return (sys_truncate(td, &nuap));
3528 }
3529 #endif /* COMPAT_43 */
3530 
3531 /* Versions with the pad argument */
3532 int
freebsd6_truncate(struct thread * td,struct freebsd6_truncate_args * uap)3533 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3534 {
3535 	struct truncate_args ouap;
3536 
3537 	ouap.path = uap->path;
3538 	ouap.length = uap->length;
3539 	return (sys_truncate(td, &ouap));
3540 }
3541 
3542 int
freebsd6_ftruncate(struct thread * td,struct freebsd6_ftruncate_args * uap)3543 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3544 {
3545 	struct ftruncate_args ouap;
3546 
3547 	ouap.fd = uap->fd;
3548 	ouap.length = uap->length;
3549 	return (sys_ftruncate(td, &ouap));
3550 }
3551 
3552 /*
3553  * Sync an open file.
3554  */
3555 #ifndef _SYS_SYSPROTO_H_
3556 struct fsync_args {
3557 	int	fd;
3558 };
3559 #endif
3560 int
sys_fsync(td,uap)3561 sys_fsync(td, uap)
3562 	struct thread *td;
3563 	struct fsync_args /* {
3564 		int fd;
3565 	} */ *uap;
3566 {
3567 	struct vnode *vp;
3568 	struct mount *mp;
3569 	struct file *fp;
3570 	int vfslocked;
3571 	int error, lock_flags;
3572 
3573 	AUDIT_ARG_FD(uap->fd);
3574 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_FSYNC,
3575 	    &fp)) != 0)
3576 		return (error);
3577 	vp = fp->f_vnode;
3578 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3579 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3580 		goto drop;
3581 	if (MNT_SHARED_WRITES(mp) ||
3582 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
3583 		lock_flags = LK_SHARED;
3584 	} else {
3585 		lock_flags = LK_EXCLUSIVE;
3586 	}
3587 	vn_lock(vp, lock_flags | LK_RETRY);
3588 	AUDIT_ARG_VNODE1(vp);
3589 	if (vp->v_object != NULL) {
3590 		VM_OBJECT_LOCK(vp->v_object);
3591 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3592 		VM_OBJECT_UNLOCK(vp->v_object);
3593 	}
3594 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3595 
3596 	VOP_UNLOCK(vp, 0);
3597 	vn_finished_write(mp);
3598 drop:
3599 	VFS_UNLOCK_GIANT(vfslocked);
3600 	fdrop(fp, td);
3601 	return (error);
3602 }
3603 
3604 /*
3605  * Rename files.  Source and destination must either both be directories, or
3606  * both not be directories.  If target is a directory, it must be empty.
3607  */
3608 #ifndef _SYS_SYSPROTO_H_
3609 struct rename_args {
3610 	char	*from;
3611 	char	*to;
3612 };
3613 #endif
3614 int
sys_rename(td,uap)3615 sys_rename(td, uap)
3616 	struct thread *td;
3617 	register struct rename_args /* {
3618 		char *from;
3619 		char *to;
3620 	} */ *uap;
3621 {
3622 
3623 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3624 }
3625 
3626 #ifndef _SYS_SYSPROTO_H_
3627 struct renameat_args {
3628 	int	oldfd;
3629 	char	*old;
3630 	int	newfd;
3631 	char	*new;
3632 };
3633 #endif
3634 int
sys_renameat(struct thread * td,struct renameat_args * uap)3635 sys_renameat(struct thread *td, struct renameat_args *uap)
3636 {
3637 
3638 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
3639 	    UIO_USERSPACE));
3640 }
3641 
3642 int
kern_rename(struct thread * td,char * from,char * to,enum uio_seg pathseg)3643 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3644 {
3645 
3646 	return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
3647 }
3648 
3649 int
kern_renameat(struct thread * td,int oldfd,char * old,int newfd,char * new,enum uio_seg pathseg)3650 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
3651     enum uio_seg pathseg)
3652 {
3653 	struct mount *mp = NULL;
3654 	struct vnode *tvp, *fvp, *tdvp;
3655 	struct nameidata fromnd, tond;
3656 	int tvfslocked;
3657 	int fvfslocked;
3658 	int error;
3659 
3660 	bwillwrite();
3661 #ifdef MAC
3662 	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
3663 	    MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
3664 #else
3665 	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3666 	    AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
3667 #endif
3668 
3669 	if ((error = namei(&fromnd)) != 0)
3670 		return (error);
3671 	fvfslocked = NDHASGIANT(&fromnd);
3672 	tvfslocked = 0;
3673 #ifdef MAC
3674 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3675 	    fromnd.ni_vp, &fromnd.ni_cnd);
3676 	VOP_UNLOCK(fromnd.ni_dvp, 0);
3677 	if (fromnd.ni_dvp != fromnd.ni_vp)
3678 		VOP_UNLOCK(fromnd.ni_vp, 0);
3679 #endif
3680 	fvp = fromnd.ni_vp;
3681 	if (error == 0)
3682 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3683 	if (error != 0) {
3684 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3685 		vrele(fromnd.ni_dvp);
3686 		vrele(fvp);
3687 		goto out1;
3688 	}
3689 	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
3690 	    SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
3691 	    td);
3692 	if (fromnd.ni_vp->v_type == VDIR)
3693 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3694 	if ((error = namei(&tond)) != 0) {
3695 		/* Translate error code for rename("dir1", "dir2/."). */
3696 		if (error == EISDIR && fvp->v_type == VDIR)
3697 			error = EINVAL;
3698 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3699 		vrele(fromnd.ni_dvp);
3700 		vrele(fvp);
3701 		vn_finished_write(mp);
3702 		goto out1;
3703 	}
3704 	tvfslocked = NDHASGIANT(&tond);
3705 	tdvp = tond.ni_dvp;
3706 	tvp = tond.ni_vp;
3707 	if (tvp != NULL) {
3708 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3709 			error = ENOTDIR;
3710 			goto out;
3711 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3712 			error = EISDIR;
3713 			goto out;
3714 		}
3715 	}
3716 	if (fvp == tdvp) {
3717 		error = EINVAL;
3718 		goto out;
3719 	}
3720 	/*
3721 	 * If the source is the same as the destination (that is, if they
3722 	 * are links to the same vnode), then there is nothing to do.
3723 	 */
3724 	if (fvp == tvp)
3725 		error = -1;
3726 #ifdef MAC
3727 	else
3728 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3729 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3730 #endif
3731 out:
3732 	if (!error) {
3733 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3734 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3735 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3736 		NDFREE(&tond, NDF_ONLY_PNBUF);
3737 	} else {
3738 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3739 		NDFREE(&tond, NDF_ONLY_PNBUF);
3740 		if (tvp)
3741 			vput(tvp);
3742 		if (tdvp == tvp)
3743 			vrele(tdvp);
3744 		else
3745 			vput(tdvp);
3746 		vrele(fromnd.ni_dvp);
3747 		vrele(fvp);
3748 	}
3749 	vrele(tond.ni_startdir);
3750 	vn_finished_write(mp);
3751 out1:
3752 	if (fromnd.ni_startdir)
3753 		vrele(fromnd.ni_startdir);
3754 	VFS_UNLOCK_GIANT(fvfslocked);
3755 	VFS_UNLOCK_GIANT(tvfslocked);
3756 	if (error == -1)
3757 		return (0);
3758 	return (error);
3759 }
3760 
3761 /*
3762  * Make a directory file.
3763  */
3764 #ifndef _SYS_SYSPROTO_H_
3765 struct mkdir_args {
3766 	char	*path;
3767 	int	mode;
3768 };
3769 #endif
3770 int
sys_mkdir(td,uap)3771 sys_mkdir(td, uap)
3772 	struct thread *td;
3773 	register struct mkdir_args /* {
3774 		char *path;
3775 		int mode;
3776 	} */ *uap;
3777 {
3778 
3779 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3780 }
3781 
3782 #ifndef _SYS_SYSPROTO_H_
3783 struct mkdirat_args {
3784 	int	fd;
3785 	char	*path;
3786 	mode_t	mode;
3787 };
3788 #endif
3789 int
sys_mkdirat(struct thread * td,struct mkdirat_args * uap)3790 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
3791 {
3792 
3793 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
3794 }
3795 
3796 int
kern_mkdir(struct thread * td,char * path,enum uio_seg segflg,int mode)3797 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3798 {
3799 
3800 	return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
3801 }
3802 
3803 int
kern_mkdirat(struct thread * td,int fd,char * path,enum uio_seg segflg,int mode)3804 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
3805     int mode)
3806 {
3807 	struct mount *mp;
3808 	struct vnode *vp;
3809 	struct vattr vattr;
3810 	int error;
3811 	struct nameidata nd;
3812 	int vfslocked;
3813 
3814 	AUDIT_ARG_MODE(mode);
3815 restart:
3816 	bwillwrite();
3817 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
3818 	    AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
3819 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3820 	if ((error = namei(&nd)) != 0)
3821 		return (error);
3822 	vfslocked = NDHASGIANT(&nd);
3823 	vp = nd.ni_vp;
3824 	if (vp != NULL) {
3825 		NDFREE(&nd, NDF_ONLY_PNBUF);
3826 		/*
3827 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3828 		 * the strange behaviour of leaving the vnode unlocked
3829 		 * if the target is the same vnode as the parent.
3830 		 */
3831 		if (vp == nd.ni_dvp)
3832 			vrele(nd.ni_dvp);
3833 		else
3834 			vput(nd.ni_dvp);
3835 		vrele(vp);
3836 		VFS_UNLOCK_GIANT(vfslocked);
3837 		return (EEXIST);
3838 	}
3839 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3840 		NDFREE(&nd, NDF_ONLY_PNBUF);
3841 		vput(nd.ni_dvp);
3842 		VFS_UNLOCK_GIANT(vfslocked);
3843 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3844 			return (error);
3845 		goto restart;
3846 	}
3847 	VATTR_NULL(&vattr);
3848 	vattr.va_type = VDIR;
3849 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3850 #ifdef MAC
3851 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3852 	    &vattr);
3853 	if (error)
3854 		goto out;
3855 #endif
3856 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3857 #ifdef MAC
3858 out:
3859 #endif
3860 	NDFREE(&nd, NDF_ONLY_PNBUF);
3861 	vput(nd.ni_dvp);
3862 	if (!error)
3863 		vput(nd.ni_vp);
3864 	vn_finished_write(mp);
3865 	VFS_UNLOCK_GIANT(vfslocked);
3866 	return (error);
3867 }
3868 
3869 /*
3870  * Remove a directory file.
3871  */
3872 #ifndef _SYS_SYSPROTO_H_
3873 struct rmdir_args {
3874 	char	*path;
3875 };
3876 #endif
3877 int
sys_rmdir(td,uap)3878 sys_rmdir(td, uap)
3879 	struct thread *td;
3880 	struct rmdir_args /* {
3881 		char *path;
3882 	} */ *uap;
3883 {
3884 
3885 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3886 }
3887 
3888 int
kern_rmdir(struct thread * td,char * path,enum uio_seg pathseg)3889 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3890 {
3891 
3892 	return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
3893 }
3894 
3895 int
kern_rmdirat(struct thread * td,int fd,char * path,enum uio_seg pathseg)3896 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
3897 {
3898 	struct mount *mp;
3899 	struct vnode *vp;
3900 	int error;
3901 	struct nameidata nd;
3902 	int vfslocked;
3903 
3904 restart:
3905 	bwillwrite();
3906 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
3907 	    AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
3908 	if ((error = namei(&nd)) != 0)
3909 		return (error);
3910 	vfslocked = NDHASGIANT(&nd);
3911 	vp = nd.ni_vp;
3912 	if (vp->v_type != VDIR) {
3913 		error = ENOTDIR;
3914 		goto out;
3915 	}
3916 	/*
3917 	 * No rmdir "." please.
3918 	 */
3919 	if (nd.ni_dvp == vp) {
3920 		error = EINVAL;
3921 		goto out;
3922 	}
3923 	/*
3924 	 * The root of a mounted filesystem cannot be deleted.
3925 	 */
3926 	if (vp->v_vflag & VV_ROOT) {
3927 		error = EBUSY;
3928 		goto out;
3929 	}
3930 #ifdef MAC
3931 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3932 	    &nd.ni_cnd);
3933 	if (error)
3934 		goto out;
3935 #endif
3936 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3937 		NDFREE(&nd, NDF_ONLY_PNBUF);
3938 		vput(vp);
3939 		if (nd.ni_dvp == vp)
3940 			vrele(nd.ni_dvp);
3941 		else
3942 			vput(nd.ni_dvp);
3943 		VFS_UNLOCK_GIANT(vfslocked);
3944 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3945 			return (error);
3946 		goto restart;
3947 	}
3948 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
3949 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3950 	vn_finished_write(mp);
3951 out:
3952 	NDFREE(&nd, NDF_ONLY_PNBUF);
3953 	vput(vp);
3954 	if (nd.ni_dvp == vp)
3955 		vrele(nd.ni_dvp);
3956 	else
3957 		vput(nd.ni_dvp);
3958 	VFS_UNLOCK_GIANT(vfslocked);
3959 	return (error);
3960 }
3961 
3962 #ifdef COMPAT_43
3963 /*
3964  * Read a block of directory entries in a filesystem independent format.
3965  */
3966 #ifndef _SYS_SYSPROTO_H_
3967 struct ogetdirentries_args {
3968 	int	fd;
3969 	char	*buf;
3970 	u_int	count;
3971 	long	*basep;
3972 };
3973 #endif
3974 int
ogetdirentries(struct thread * td,struct ogetdirentries_args * uap)3975 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
3976 {
3977 	long loff;
3978 	int error;
3979 
3980 	error = kern_ogetdirentries(td, uap, &loff);
3981 	if (error == 0)
3982 		error = copyout(&loff, uap->basep, sizeof(long));
3983 	return (error);
3984 }
3985 
3986 int
kern_ogetdirentries(struct thread * td,struct ogetdirentries_args * uap,long * ploff)3987 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
3988     long *ploff)
3989 {
3990 	struct vnode *vp;
3991 	struct file *fp;
3992 	struct uio auio, kuio;
3993 	struct iovec aiov, kiov;
3994 	struct dirent *dp, *edp;
3995 	caddr_t dirbuf;
3996 	int error, eofflag, readcnt, vfslocked;
3997 	long loff;
3998 	off_t foffset;
3999 
4000 	/* XXX arbitrary sanity limit on `count'. */
4001 	if (uap->count > 64 * 1024)
4002 		return (EINVAL);
4003 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
4004 	    &fp)) != 0)
4005 		return (error);
4006 	if ((fp->f_flag & FREAD) == 0) {
4007 		fdrop(fp, td);
4008 		return (EBADF);
4009 	}
4010 	vp = fp->f_vnode;
4011 	foffset = foffset_lock(fp, 0);
4012 unionread:
4013 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4014 	if (vp->v_type != VDIR) {
4015 		VFS_UNLOCK_GIANT(vfslocked);
4016 		foffset_unlock(fp, foffset, 0);
4017 		fdrop(fp, td);
4018 		return (EINVAL);
4019 	}
4020 	aiov.iov_base = uap->buf;
4021 	aiov.iov_len = uap->count;
4022 	auio.uio_iov = &aiov;
4023 	auio.uio_iovcnt = 1;
4024 	auio.uio_rw = UIO_READ;
4025 	auio.uio_segflg = UIO_USERSPACE;
4026 	auio.uio_td = td;
4027 	auio.uio_resid = uap->count;
4028 	vn_lock(vp, LK_SHARED | LK_RETRY);
4029 	loff = auio.uio_offset = foffset;
4030 #ifdef MAC
4031 	error = mac_vnode_check_readdir(td->td_ucred, vp);
4032 	if (error) {
4033 		VOP_UNLOCK(vp, 0);
4034 		VFS_UNLOCK_GIANT(vfslocked);
4035 		foffset_unlock(fp, foffset, FOF_NOUPDATE);
4036 		fdrop(fp, td);
4037 		return (error);
4038 	}
4039 #endif
4040 #	if (BYTE_ORDER != LITTLE_ENDIAN)
4041 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
4042 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
4043 			    NULL, NULL);
4044 			foffset = auio.uio_offset;
4045 		} else
4046 #	endif
4047 	{
4048 		kuio = auio;
4049 		kuio.uio_iov = &kiov;
4050 		kuio.uio_segflg = UIO_SYSSPACE;
4051 		kiov.iov_len = uap->count;
4052 		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
4053 		kiov.iov_base = dirbuf;
4054 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
4055 			    NULL, NULL);
4056 		foffset = kuio.uio_offset;
4057 		if (error == 0) {
4058 			readcnt = uap->count - kuio.uio_resid;
4059 			edp = (struct dirent *)&dirbuf[readcnt];
4060 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
4061 #				if (BYTE_ORDER == LITTLE_ENDIAN)
4062 					/*
4063 					 * The expected low byte of
4064 					 * dp->d_namlen is our dp->d_type.
4065 					 * The high MBZ byte of dp->d_namlen
4066 					 * is our dp->d_namlen.
4067 					 */
4068 					dp->d_type = dp->d_namlen;
4069 					dp->d_namlen = 0;
4070 #				else
4071 					/*
4072 					 * The dp->d_type is the high byte
4073 					 * of the expected dp->d_namlen,
4074 					 * so must be zero'ed.
4075 					 */
4076 					dp->d_type = 0;
4077 #				endif
4078 				if (dp->d_reclen > 0) {
4079 					dp = (struct dirent *)
4080 					    ((char *)dp + dp->d_reclen);
4081 				} else {
4082 					error = EIO;
4083 					break;
4084 				}
4085 			}
4086 			if (dp >= edp)
4087 				error = uiomove(dirbuf, readcnt, &auio);
4088 		}
4089 		free(dirbuf, M_TEMP);
4090 	}
4091 	if (error) {
4092 		VOP_UNLOCK(vp, 0);
4093 		VFS_UNLOCK_GIANT(vfslocked);
4094 		foffset_unlock(fp, foffset, 0);
4095 		fdrop(fp, td);
4096 		return (error);
4097 	}
4098 	if (uap->count == auio.uio_resid &&
4099 	    (vp->v_vflag & VV_ROOT) &&
4100 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4101 		struct vnode *tvp = vp;
4102 		vp = vp->v_mount->mnt_vnodecovered;
4103 		VREF(vp);
4104 		fp->f_vnode = vp;
4105 		fp->f_data = vp;
4106 		foffset = 0;
4107 		vput(tvp);
4108 		VFS_UNLOCK_GIANT(vfslocked);
4109 		goto unionread;
4110 	}
4111 	VOP_UNLOCK(vp, 0);
4112 	VFS_UNLOCK_GIANT(vfslocked);
4113 	foffset_unlock(fp, foffset, 0);
4114 	fdrop(fp, td);
4115 	td->td_retval[0] = uap->count - auio.uio_resid;
4116 	if (error == 0)
4117 		*ploff = loff;
4118 	return (error);
4119 }
4120 #endif /* COMPAT_43 */
4121 
4122 /*
4123  * Read a block of directory entries in a filesystem independent format.
4124  */
4125 #ifndef _SYS_SYSPROTO_H_
4126 struct getdirentries_args {
4127 	int	fd;
4128 	char	*buf;
4129 	u_int	count;
4130 	long	*basep;
4131 };
4132 #endif
4133 int
sys_getdirentries(td,uap)4134 sys_getdirentries(td, uap)
4135 	struct thread *td;
4136 	register struct getdirentries_args /* {
4137 		int fd;
4138 		char *buf;
4139 		u_int count;
4140 		long *basep;
4141 	} */ *uap;
4142 {
4143 	long base;
4144 	int error;
4145 
4146 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
4147 	if (error)
4148 		return (error);
4149 	if (uap->basep != NULL)
4150 		error = copyout(&base, uap->basep, sizeof(long));
4151 	return (error);
4152 }
4153 
4154 int
kern_getdirentries(struct thread * td,int fd,char * buf,u_int count,long * basep)4155 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
4156     long *basep)
4157 {
4158 	struct vnode *vp;
4159 	struct file *fp;
4160 	struct uio auio;
4161 	struct iovec aiov;
4162 	int vfslocked;
4163 	long loff;
4164 	int error, eofflag;
4165 	off_t foffset;
4166 
4167 	AUDIT_ARG_FD(fd);
4168 	auio.uio_resid = count;
4169 	if (auio.uio_resid > IOSIZE_MAX)
4170 		return (EINVAL);
4171 	if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
4172 	    &fp)) != 0)
4173 		return (error);
4174 	if ((fp->f_flag & FREAD) == 0) {
4175 		fdrop(fp, td);
4176 		return (EBADF);
4177 	}
4178 	vp = fp->f_vnode;
4179 	foffset = foffset_lock(fp, 0);
4180 unionread:
4181 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4182 	if (vp->v_type != VDIR) {
4183 		VFS_UNLOCK_GIANT(vfslocked);
4184 		error = EINVAL;
4185 		goto fail;
4186 	}
4187 	aiov.iov_base = buf;
4188 	aiov.iov_len = count;
4189 	auio.uio_iov = &aiov;
4190 	auio.uio_iovcnt = 1;
4191 	auio.uio_rw = UIO_READ;
4192 	auio.uio_segflg = UIO_USERSPACE;
4193 	auio.uio_td = td;
4194 	vn_lock(vp, LK_SHARED | LK_RETRY);
4195 	AUDIT_ARG_VNODE1(vp);
4196 	loff = auio.uio_offset = foffset;
4197 #ifdef MAC
4198 	error = mac_vnode_check_readdir(td->td_ucred, vp);
4199 	if (error == 0)
4200 #endif
4201 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
4202 		    NULL);
4203 	foffset = auio.uio_offset;
4204 	if (error) {
4205 		VOP_UNLOCK(vp, 0);
4206 		VFS_UNLOCK_GIANT(vfslocked);
4207 		goto fail;
4208 	}
4209 	if (count == auio.uio_resid &&
4210 	    (vp->v_vflag & VV_ROOT) &&
4211 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4212 		struct vnode *tvp = vp;
4213 		vp = vp->v_mount->mnt_vnodecovered;
4214 		VREF(vp);
4215 		fp->f_vnode = vp;
4216 		fp->f_data = vp;
4217 		foffset = 0;
4218 		vput(tvp);
4219 		VFS_UNLOCK_GIANT(vfslocked);
4220 		goto unionread;
4221 	}
4222 	VOP_UNLOCK(vp, 0);
4223 	VFS_UNLOCK_GIANT(vfslocked);
4224 	*basep = loff;
4225 	td->td_retval[0] = count - auio.uio_resid;
4226 fail:
4227 	foffset_unlock(fp, foffset, 0);
4228 	fdrop(fp, td);
4229 	return (error);
4230 }
4231 
4232 #ifndef _SYS_SYSPROTO_H_
4233 struct getdents_args {
4234 	int fd;
4235 	char *buf;
4236 	size_t count;
4237 };
4238 #endif
4239 int
sys_getdents(td,uap)4240 sys_getdents(td, uap)
4241 	struct thread *td;
4242 	register struct getdents_args /* {
4243 		int fd;
4244 		char *buf;
4245 		u_int count;
4246 	} */ *uap;
4247 {
4248 	struct getdirentries_args ap;
4249 	ap.fd = uap->fd;
4250 	ap.buf = uap->buf;
4251 	ap.count = uap->count;
4252 	ap.basep = NULL;
4253 	return (sys_getdirentries(td, &ap));
4254 }
4255 
4256 /*
4257  * Set the mode mask for creation of filesystem nodes.
4258  */
4259 #ifndef _SYS_SYSPROTO_H_
4260 struct umask_args {
4261 	int	newmask;
4262 };
4263 #endif
4264 int
sys_umask(td,uap)4265 sys_umask(td, uap)
4266 	struct thread *td;
4267 	struct umask_args /* {
4268 		int newmask;
4269 	} */ *uap;
4270 {
4271 	register struct filedesc *fdp;
4272 
4273 	FILEDESC_XLOCK(td->td_proc->p_fd);
4274 	fdp = td->td_proc->p_fd;
4275 	td->td_retval[0] = fdp->fd_cmask;
4276 	fdp->fd_cmask = uap->newmask & ALLPERMS;
4277 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
4278 	return (0);
4279 }
4280 
4281 /*
4282  * Void all references to file by ripping underlying filesystem away from
4283  * vnode.
4284  */
4285 #ifndef _SYS_SYSPROTO_H_
4286 struct revoke_args {
4287 	char	*path;
4288 };
4289 #endif
4290 int
sys_revoke(td,uap)4291 sys_revoke(td, uap)
4292 	struct thread *td;
4293 	register struct revoke_args /* {
4294 		char *path;
4295 	} */ *uap;
4296 {
4297 	struct vnode *vp;
4298 	struct vattr vattr;
4299 	int error;
4300 	struct nameidata nd;
4301 	int vfslocked;
4302 
4303 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4304 	    UIO_USERSPACE, uap->path, td);
4305 	if ((error = namei(&nd)) != 0)
4306 		return (error);
4307 	vfslocked = NDHASGIANT(&nd);
4308 	vp = nd.ni_vp;
4309 	NDFREE(&nd, NDF_ONLY_PNBUF);
4310 	if (vp->v_type != VCHR || vp->v_rdev == NULL) {
4311 		error = EINVAL;
4312 		goto out;
4313 	}
4314 #ifdef MAC
4315 	error = mac_vnode_check_revoke(td->td_ucred, vp);
4316 	if (error)
4317 		goto out;
4318 #endif
4319 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
4320 	if (error)
4321 		goto out;
4322 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4323 		error = priv_check(td, PRIV_VFS_ADMIN);
4324 		if (error)
4325 			goto out;
4326 	}
4327 	if (vcount(vp) > 1)
4328 		VOP_REVOKE(vp, REVOKEALL);
4329 out:
4330 	vput(vp);
4331 	VFS_UNLOCK_GIANT(vfslocked);
4332 	return (error);
4333 }
4334 
4335 /*
4336  * Convert a user file descriptor to a kernel file entry and check that, if it
4337  * is a capability, the correct rights are present. A reference on the file
4338  * entry is held upon returning.
4339  */
4340 int
getvnode(struct filedesc * fdp,int fd,cap_rights_t rights,struct file ** fpp)4341 getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
4342     struct file **fpp)
4343 {
4344 	struct file *fp;
4345 #ifdef CAPABILITIES
4346 	struct file *fp_fromcap;
4347 #endif
4348 	int error;
4349 
4350 	error = 0;
4351 	fp = NULL;
4352 	if ((fdp == NULL) || (fp = fget_unlocked(fdp, fd)) == NULL)
4353 		return (EBADF);
4354 #ifdef CAPABILITIES
4355 	/*
4356 	 * If the file descriptor is for a capability, test rights and use the
4357 	 * file descriptor referenced by the capability.
4358 	 */
4359 	error = cap_funwrap(fp, rights, &fp_fromcap);
4360 	if (error) {
4361 		fdrop(fp, curthread);
4362 		return (error);
4363 	}
4364 	if (fp != fp_fromcap) {
4365 		fhold(fp_fromcap);
4366 		fdrop(fp, curthread);
4367 		fp = fp_fromcap;
4368 	}
4369 #endif /* CAPABILITIES */
4370 
4371 	/*
4372 	 * The file could be not of the vnode type, or it may be not
4373 	 * yet fully initialized, in which case the f_vnode pointer
4374 	 * may be set, but f_ops is still badfileops.  E.g.,
4375 	 * devfs_open() transiently create such situation to
4376 	 * facilitate csw d_fdopen().
4377 	 *
4378 	 * Dupfdopen() handling in kern_openat() installs the
4379 	 * half-baked file into the process descriptor table, allowing
4380 	 * other thread to dereference it. Guard against the race by
4381 	 * checking f_ops.
4382 	 */
4383 	if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
4384 		fdrop(fp, curthread);
4385 		return (EINVAL);
4386 	}
4387 	*fpp = fp;
4388 	return (0);
4389 }
4390 
4391 
4392 /*
4393  * Get an (NFS) file handle.
4394  */
4395 #ifndef _SYS_SYSPROTO_H_
4396 struct lgetfh_args {
4397 	char	*fname;
4398 	fhandle_t *fhp;
4399 };
4400 #endif
4401 int
sys_lgetfh(td,uap)4402 sys_lgetfh(td, uap)
4403 	struct thread *td;
4404 	register struct lgetfh_args *uap;
4405 {
4406 	struct nameidata nd;
4407 	fhandle_t fh;
4408 	register struct vnode *vp;
4409 	int vfslocked;
4410 	int error;
4411 
4412 	error = priv_check(td, PRIV_VFS_GETFH);
4413 	if (error)
4414 		return (error);
4415 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4416 	    UIO_USERSPACE, uap->fname, td);
4417 	error = namei(&nd);
4418 	if (error)
4419 		return (error);
4420 	vfslocked = NDHASGIANT(&nd);
4421 	NDFREE(&nd, NDF_ONLY_PNBUF);
4422 	vp = nd.ni_vp;
4423 	bzero(&fh, sizeof(fh));
4424 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4425 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4426 	vput(vp);
4427 	VFS_UNLOCK_GIANT(vfslocked);
4428 	if (error)
4429 		return (error);
4430 	error = copyout(&fh, uap->fhp, sizeof (fh));
4431 	return (error);
4432 }
4433 
4434 #ifndef _SYS_SYSPROTO_H_
4435 struct getfh_args {
4436 	char	*fname;
4437 	fhandle_t *fhp;
4438 };
4439 #endif
4440 int
sys_getfh(td,uap)4441 sys_getfh(td, uap)
4442 	struct thread *td;
4443 	register struct getfh_args *uap;
4444 {
4445 	struct nameidata nd;
4446 	fhandle_t fh;
4447 	register struct vnode *vp;
4448 	int vfslocked;
4449 	int error;
4450 
4451 	error = priv_check(td, PRIV_VFS_GETFH);
4452 	if (error)
4453 		return (error);
4454 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4455 	    UIO_USERSPACE, uap->fname, td);
4456 	error = namei(&nd);
4457 	if (error)
4458 		return (error);
4459 	vfslocked = NDHASGIANT(&nd);
4460 	NDFREE(&nd, NDF_ONLY_PNBUF);
4461 	vp = nd.ni_vp;
4462 	bzero(&fh, sizeof(fh));
4463 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4464 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4465 	vput(vp);
4466 	VFS_UNLOCK_GIANT(vfslocked);
4467 	if (error)
4468 		return (error);
4469 	error = copyout(&fh, uap->fhp, sizeof (fh));
4470 	return (error);
4471 }
4472 
4473 /*
4474  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4475  * open descriptor.
4476  *
4477  * warning: do not remove the priv_check() call or this becomes one giant
4478  * security hole.
4479  */
4480 #ifndef _SYS_SYSPROTO_H_
4481 struct fhopen_args {
4482 	const struct fhandle *u_fhp;
4483 	int flags;
4484 };
4485 #endif
4486 int
sys_fhopen(td,uap)4487 sys_fhopen(td, uap)
4488 	struct thread *td;
4489 	struct fhopen_args /* {
4490 		const struct fhandle *u_fhp;
4491 		int flags;
4492 	} */ *uap;
4493 {
4494 	struct proc *p = td->td_proc;
4495 	struct mount *mp;
4496 	struct vnode *vp;
4497 	struct fhandle fhp;
4498 	struct vattr vat;
4499 	struct vattr *vap = &vat;
4500 	struct flock lf;
4501 	struct file *fp;
4502 	register struct filedesc *fdp = p->p_fd;
4503 	int fmode, error, type;
4504 	accmode_t accmode;
4505 	struct file *nfp;
4506 	int vfslocked;
4507 	int indx;
4508 
4509 	error = priv_check(td, PRIV_VFS_FHOPEN);
4510 	if (error)
4511 		return (error);
4512 	fmode = FFLAGS(uap->flags);
4513 	/* why not allow a non-read/write open for our lockd? */
4514 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4515 		return (EINVAL);
4516 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4517 	if (error)
4518 		return(error);
4519 	/* find the mount point */
4520 	mp = vfs_busyfs(&fhp.fh_fsid);
4521 	if (mp == NULL)
4522 		return (ESTALE);
4523 	vfslocked = VFS_LOCK_GIANT(mp);
4524 	/* now give me my vnode, it gets returned to me locked */
4525 	error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
4526 	vfs_unbusy(mp);
4527 	if (error)
4528 		goto out;
4529 	/*
4530 	 * from now on we have to make sure not
4531 	 * to forget about the vnode
4532 	 * any error that causes an abort must vput(vp)
4533 	 * just set error = err and 'goto bad;'.
4534 	 */
4535 
4536 	/*
4537 	 * from vn_open
4538 	 */
4539 	if (vp->v_type == VLNK) {
4540 		error = EMLINK;
4541 		goto bad;
4542 	}
4543 	if (vp->v_type == VSOCK) {
4544 		error = EOPNOTSUPP;
4545 		goto bad;
4546 	}
4547 	if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
4548 		error = ENOTDIR;
4549 		goto bad;
4550 	}
4551 	accmode = 0;
4552 	if (fmode & (FWRITE | O_TRUNC)) {
4553 		if (vp->v_type == VDIR) {
4554 			error = EISDIR;
4555 			goto bad;
4556 		}
4557 		error = vn_writechk(vp);
4558 		if (error)
4559 			goto bad;
4560 		accmode |= VWRITE;
4561 	}
4562 	if (fmode & FREAD)
4563 		accmode |= VREAD;
4564 	if ((fmode & O_APPEND) && (fmode & FWRITE))
4565 		accmode |= VAPPEND;
4566 #ifdef MAC
4567 	error = mac_vnode_check_open(td->td_ucred, vp, accmode);
4568 	if (error)
4569 		goto bad;
4570 #endif
4571 	if (accmode) {
4572 		error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
4573 		if (error)
4574 			goto bad;
4575 	}
4576 	if (fmode & O_TRUNC) {
4577 		vfs_ref(mp);
4578 		VOP_UNLOCK(vp, 0);				/* XXX */
4579 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4580 			vrele(vp);
4581 			vfs_rel(mp);
4582 			goto out;
4583 		}
4584 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4585 		vfs_rel(mp);
4586 #ifdef MAC
4587 		/*
4588 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4589 		 * should be right.
4590 		 */
4591 		error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
4592 		if (error == 0) {
4593 #endif
4594 			VATTR_NULL(vap);
4595 			vap->va_size = 0;
4596 			error = VOP_SETATTR(vp, vap, td->td_ucred);
4597 #ifdef MAC
4598 		}
4599 #endif
4600 		vn_finished_write(mp);
4601 		if (error)
4602 			goto bad;
4603 	}
4604 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4605 	if (error)
4606 		goto bad;
4607 
4608 	if (fmode & FWRITE)
4609 		vp->v_writecount++;
4610 
4611 	/*
4612 	 * end of vn_open code
4613 	 */
4614 
4615 	if ((error = falloc(td, &nfp, &indx, fmode)) != 0) {
4616 		if (fmode & FWRITE)
4617 			vp->v_writecount--;
4618 		goto bad;
4619 	}
4620 	/* An extra reference on `nfp' has been held for us by falloc(). */
4621 	fp = nfp;
4622 	nfp->f_vnode = vp;
4623 	finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
4624 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4625 		lf.l_whence = SEEK_SET;
4626 		lf.l_start = 0;
4627 		lf.l_len = 0;
4628 		if (fmode & O_EXLOCK)
4629 			lf.l_type = F_WRLCK;
4630 		else
4631 			lf.l_type = F_RDLCK;
4632 		type = F_FLOCK;
4633 		if ((fmode & FNONBLOCK) == 0)
4634 			type |= F_WAIT;
4635 		VOP_UNLOCK(vp, 0);
4636 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4637 			    type)) != 0) {
4638 			/*
4639 			 * The lock request failed.  Normally close the
4640 			 * descriptor but handle the case where someone might
4641 			 * have dup()d or close()d it when we weren't looking.
4642 			 */
4643 			fdclose(fdp, fp, indx, td);
4644 
4645 			/*
4646 			 * release our private reference
4647 			 */
4648 			fdrop(fp, td);
4649 			goto out;
4650 		}
4651 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4652 		atomic_set_int(&fp->f_flag, FHASLOCK);
4653 	}
4654 
4655 	VOP_UNLOCK(vp, 0);
4656 	fdrop(fp, td);
4657 	VFS_UNLOCK_GIANT(vfslocked);
4658 	td->td_retval[0] = indx;
4659 	return (0);
4660 
4661 bad:
4662 	vput(vp);
4663 out:
4664 	VFS_UNLOCK_GIANT(vfslocked);
4665 	return (error);
4666 }
4667 
4668 /*
4669  * Stat an (NFS) file handle.
4670  */
4671 #ifndef _SYS_SYSPROTO_H_
4672 struct fhstat_args {
4673 	struct fhandle *u_fhp;
4674 	struct stat *sb;
4675 };
4676 #endif
4677 int
sys_fhstat(td,uap)4678 sys_fhstat(td, uap)
4679 	struct thread *td;
4680 	register struct fhstat_args /* {
4681 		struct fhandle *u_fhp;
4682 		struct stat *sb;
4683 	} */ *uap;
4684 {
4685 	struct stat sb;
4686 	fhandle_t fh;
4687 	struct mount *mp;
4688 	struct vnode *vp;
4689 	int vfslocked;
4690 	int error;
4691 
4692 	error = priv_check(td, PRIV_VFS_FHSTAT);
4693 	if (error)
4694 		return (error);
4695 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4696 	if (error)
4697 		return (error);
4698 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4699 		return (ESTALE);
4700 	vfslocked = VFS_LOCK_GIANT(mp);
4701 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
4702 	vfs_unbusy(mp);
4703 	if (error) {
4704 		VFS_UNLOCK_GIANT(vfslocked);
4705 		return (error);
4706 	}
4707 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4708 	vput(vp);
4709 	VFS_UNLOCK_GIANT(vfslocked);
4710 	if (error)
4711 		return (error);
4712 	error = copyout(&sb, uap->sb, sizeof(sb));
4713 	return (error);
4714 }
4715 
4716 /*
4717  * Implement fstatfs() for (NFS) file handles.
4718  */
4719 #ifndef _SYS_SYSPROTO_H_
4720 struct fhstatfs_args {
4721 	struct fhandle *u_fhp;
4722 	struct statfs *buf;
4723 };
4724 #endif
4725 int
sys_fhstatfs(td,uap)4726 sys_fhstatfs(td, uap)
4727 	struct thread *td;
4728 	struct fhstatfs_args /* {
4729 		struct fhandle *u_fhp;
4730 		struct statfs *buf;
4731 	} */ *uap;
4732 {
4733 	struct statfs sf;
4734 	fhandle_t fh;
4735 	int error;
4736 
4737 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4738 	if (error)
4739 		return (error);
4740 	error = kern_fhstatfs(td, fh, &sf);
4741 	if (error)
4742 		return (error);
4743 	return (copyout(&sf, uap->buf, sizeof(sf)));
4744 }
4745 
4746 int
kern_fhstatfs(struct thread * td,fhandle_t fh,struct statfs * buf)4747 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4748 {
4749 	struct statfs *sp;
4750 	struct mount *mp;
4751 	struct vnode *vp;
4752 	int vfslocked;
4753 	int error;
4754 
4755 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4756 	if (error)
4757 		return (error);
4758 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4759 		return (ESTALE);
4760 	vfslocked = VFS_LOCK_GIANT(mp);
4761 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
4762 	if (error) {
4763 		vfs_unbusy(mp);
4764 		VFS_UNLOCK_GIANT(vfslocked);
4765 		return (error);
4766 	}
4767 	vput(vp);
4768 	error = prison_canseemount(td->td_ucred, mp);
4769 	if (error)
4770 		goto out;
4771 #ifdef MAC
4772 	error = mac_mount_check_stat(td->td_ucred, mp);
4773 	if (error)
4774 		goto out;
4775 #endif
4776 	/*
4777 	 * Set these in case the underlying filesystem fails to do so.
4778 	 */
4779 	sp = &mp->mnt_stat;
4780 	sp->f_version = STATFS_VERSION;
4781 	sp->f_namemax = NAME_MAX;
4782 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4783 	error = VFS_STATFS(mp, sp);
4784 	if (error == 0)
4785 		*buf = *sp;
4786 out:
4787 	vfs_unbusy(mp);
4788 	VFS_UNLOCK_GIANT(vfslocked);
4789 	return (error);
4790 }
4791 
4792 int
kern_posix_fallocate(struct thread * td,int fd,off_t offset,off_t len)4793 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
4794 {
4795 	struct file *fp;
4796 	struct mount *mp;
4797 	struct vnode *vp;
4798 	off_t olen, ooffset;
4799 	int error, vfslocked;
4800 
4801 	fp = NULL;
4802 	vfslocked = 0;
4803 	error = fget(td, fd, CAP_WRITE, &fp);
4804 	if (error != 0)
4805 		goto out;
4806 
4807 	switch (fp->f_type) {
4808 	case DTYPE_VNODE:
4809 		break;
4810 	case DTYPE_PIPE:
4811 	case DTYPE_FIFO:
4812 		error = ESPIPE;
4813 		goto out;
4814 	default:
4815 		error = ENODEV;
4816 		goto out;
4817 	}
4818 	if ((fp->f_flag & FWRITE) == 0) {
4819 		error = EBADF;
4820 		goto out;
4821 	}
4822 	vp = fp->f_vnode;
4823 	if (vp->v_type != VREG) {
4824 		error = ENODEV;
4825 		goto out;
4826 	}
4827 	if (offset < 0 || len <= 0) {
4828 		error = EINVAL;
4829 		goto out;
4830 	}
4831 	/* Check for wrap. */
4832 	if (offset > OFF_MAX - len) {
4833 		error = EFBIG;
4834 		goto out;
4835 	}
4836 
4837 	/* Allocating blocks may take a long time, so iterate. */
4838 	for (;;) {
4839 		olen = len;
4840 		ooffset = offset;
4841 
4842 		bwillwrite();
4843 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4844 		mp = NULL;
4845 		error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4846 		if (error != 0) {
4847 			VFS_UNLOCK_GIANT(vfslocked);
4848 			break;
4849 		}
4850 		error = vn_lock(vp, LK_EXCLUSIVE);
4851 		if (error != 0) {
4852 			vn_finished_write(mp);
4853 			VFS_UNLOCK_GIANT(vfslocked);
4854 			break;
4855 		}
4856 #ifdef MAC
4857 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
4858 		if (error == 0)
4859 #endif
4860 			error = VOP_ALLOCATE(vp, &offset, &len);
4861 		VOP_UNLOCK(vp, 0);
4862 		vn_finished_write(mp);
4863 		VFS_UNLOCK_GIANT(vfslocked);
4864 
4865 		if (olen + ooffset != offset + len) {
4866 			panic("offset + len changed from %jx/%jx to %jx/%jx",
4867 			    ooffset, olen, offset, len);
4868 		}
4869 		if (error != 0 || len == 0)
4870 			break;
4871 		KASSERT(olen > len, ("Iteration did not make progress?"));
4872 		maybe_yield();
4873 	}
4874  out:
4875 	if (fp != NULL)
4876 		fdrop(fp, td);
4877 	return (error);
4878 }
4879 
4880 int
sys_posix_fallocate(struct thread * td,struct posix_fallocate_args * uap)4881 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
4882 {
4883 
4884 	td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
4885 	    uap->len);
4886 	return (0);
4887 }
4888 
4889 /*
4890  * Unlike madvise(2), we do not make a best effort to remember every
4891  * possible caching hint.  Instead, we remember the last setting with
4892  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
4893  * region of any current setting.
4894  */
4895 int
kern_posix_fadvise(struct thread * td,int fd,off_t offset,off_t len,int advice)4896 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
4897     int advice)
4898 {
4899 	struct fadvise_info *fa, *new;
4900 	struct file *fp;
4901 	struct vnode *vp;
4902 	off_t end;
4903 	int error;
4904 
4905 	if (offset < 0 || len < 0 || offset > OFF_MAX - len)
4906 		return (EINVAL);
4907 	switch (advice) {
4908 	case POSIX_FADV_SEQUENTIAL:
4909 	case POSIX_FADV_RANDOM:
4910 	case POSIX_FADV_NOREUSE:
4911 		new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
4912 		break;
4913 	case POSIX_FADV_NORMAL:
4914 	case POSIX_FADV_WILLNEED:
4915 	case POSIX_FADV_DONTNEED:
4916 		new = NULL;
4917 		break;
4918 	default:
4919 		return (EINVAL);
4920 	}
4921 	/* XXX: CAP_POSIX_FADVISE? */
4922 	error = fget(td, fd, 0, &fp);
4923 	if (error != 0)
4924 		goto out;
4925 
4926 	switch (fp->f_type) {
4927 	case DTYPE_VNODE:
4928 		break;
4929 	case DTYPE_PIPE:
4930 	case DTYPE_FIFO:
4931 		error = ESPIPE;
4932 		goto out;
4933 	default:
4934 		error = ENODEV;
4935 		goto out;
4936 	}
4937 	vp = fp->f_vnode;
4938 	if (vp->v_type != VREG) {
4939 		error = ENODEV;
4940 		goto out;
4941 	}
4942 	if (len == 0)
4943 		end = OFF_MAX;
4944 	else
4945 		end = offset + len - 1;
4946 	switch (advice) {
4947 	case POSIX_FADV_SEQUENTIAL:
4948 	case POSIX_FADV_RANDOM:
4949 	case POSIX_FADV_NOREUSE:
4950 		/*
4951 		 * Try to merge any existing non-standard region with
4952 		 * this new region if possible, otherwise create a new
4953 		 * non-standard region for this request.
4954 		 */
4955 		mtx_pool_lock(mtxpool_sleep, fp);
4956 		fa = fp->f_advice;
4957 		if (fa != NULL && fa->fa_advice == advice &&
4958 		    ((fa->fa_start <= end && fa->fa_end >= offset) ||
4959 		    (end != OFF_MAX && fa->fa_start == end + 1) ||
4960 		    (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
4961 			if (offset < fa->fa_start)
4962 				fa->fa_start = offset;
4963 			if (end > fa->fa_end)
4964 				fa->fa_end = end;
4965 		} else {
4966 			new->fa_advice = advice;
4967 			new->fa_start = offset;
4968 			new->fa_end = end;
4969 			new->fa_prevstart = 0;
4970 			new->fa_prevend = 0;
4971 			fp->f_advice = new;
4972 			new = fa;
4973 		}
4974 		mtx_pool_unlock(mtxpool_sleep, fp);
4975 		break;
4976 	case POSIX_FADV_NORMAL:
4977 		/*
4978 		 * If a the "normal" region overlaps with an existing
4979 		 * non-standard region, trim or remove the
4980 		 * non-standard region.
4981 		 */
4982 		mtx_pool_lock(mtxpool_sleep, fp);
4983 		fa = fp->f_advice;
4984 		if (fa != NULL) {
4985 			if (offset <= fa->fa_start && end >= fa->fa_end) {
4986 				new = fa;
4987 				fp->f_advice = NULL;
4988 			} else if (offset <= fa->fa_start &&
4989  			    end >= fa->fa_start)
4990 				fa->fa_start = end + 1;
4991 			else if (offset <= fa->fa_end && end >= fa->fa_end)
4992 				fa->fa_end = offset - 1;
4993 			else if (offset >= fa->fa_start && end <= fa->fa_end) {
4994 				/*
4995 				 * If the "normal" region is a middle
4996 				 * portion of the existing
4997 				 * non-standard region, just remove
4998 				 * the whole thing rather than picking
4999 				 * one side or the other to
5000 				 * preserve.
5001 				 */
5002 				new = fa;
5003 				fp->f_advice = NULL;
5004 			}
5005 		}
5006 		mtx_pool_unlock(mtxpool_sleep, fp);
5007 		break;
5008 	case POSIX_FADV_WILLNEED:
5009 	case POSIX_FADV_DONTNEED:
5010 		error = VOP_ADVISE(vp, offset, end, advice);
5011 		break;
5012 	}
5013 out:
5014 	if (fp != NULL)
5015 		fdrop(fp, td);
5016 	free(new, M_FADVISE);
5017 	return (error);
5018 }
5019 
5020 int
sys_posix_fadvise(struct thread * td,struct posix_fadvise_args * uap)5021 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
5022 {
5023 
5024 	td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
5025 	    uap->len, uap->advice);
5026 	return (0);
5027 }
5028