xref: /freebsd-13-stable/sys/fs/tmpfs/tmpfs_vnops.c (revision 06b782abaafe56674fc2be847dea4ab28c1cddf0)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 #include <sys/cdefs.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/dirent.h>
42 #include <sys/fcntl.h>
43 #include <sys/file.h>
44 #include <sys/filio.h>
45 #include <sys/limits.h>
46 #include <sys/lockf.h>
47 #include <sys/lock.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/rwlock.h>
53 #include <sys/sched.h>
54 #include <sys/smr.h>
55 #include <sys/stat.h>
56 #include <sys/sysctl.h>
57 #include <sys/unistd.h>
58 #include <sys/vnode.h>
59 #include <security/audit/audit.h>
60 #include <security/mac/mac_framework.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_page.h>
66 #include <vm/vm_pager.h>
67 #include <vm/swap_pager.h>
68 
69 #include <fs/tmpfs/tmpfs_vnops.h>
70 #include <fs/tmpfs/tmpfs.h>
71 
72 SYSCTL_DECL(_vfs_tmpfs);
73 VFS_SMR_DECLARE;
74 
75 static volatile int tmpfs_rename_restarts;
76 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
77     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
78     "Times rename had to restart due to lock contention");
79 
80 static int
tmpfs_vn_get_ino_alloc(struct mount * mp,void * arg,int lkflags,struct vnode ** rvp)81 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
82     struct vnode **rvp)
83 {
84 
85 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
86 }
87 
88 static int
tmpfs_lookup1(struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp)89 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
90 {
91 	struct tmpfs_dirent *de;
92 	struct tmpfs_node *dnode, *pnode;
93 	struct tmpfs_mount *tm;
94 	int error;
95 
96 	/* Caller assumes responsibility for ensuring access (VEXEC). */
97 	dnode = VP_TO_TMPFS_DIR(dvp);
98 	*vpp = NULLVP;
99 
100 	/* We cannot be requesting the parent directory of the root node. */
101 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
102 	    dnode->tn_dir.tn_parent == dnode,
103 	    !(cnp->cn_flags & ISDOTDOT)));
104 
105 	TMPFS_ASSERT_LOCKED(dnode);
106 	if (dnode->tn_dir.tn_parent == NULL) {
107 		error = ENOENT;
108 		goto out;
109 	}
110 	if (cnp->cn_flags & ISDOTDOT) {
111 		tm = VFS_TO_TMPFS(dvp->v_mount);
112 		pnode = dnode->tn_dir.tn_parent;
113 		tmpfs_ref_node(pnode);
114 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
115 		    pnode, cnp->cn_lkflags, vpp);
116 		tmpfs_free_node(tm, pnode);
117 		if (error != 0)
118 			goto out;
119 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
120 		VREF(dvp);
121 		*vpp = dvp;
122 		error = 0;
123 	} else {
124 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
125 		if (de != NULL && de->td_node == NULL)
126 			cnp->cn_flags |= ISWHITEOUT;
127 		if (de == NULL || de->td_node == NULL) {
128 			/*
129 			 * The entry was not found in the directory.
130 			 * This is OK if we are creating or renaming an
131 			 * entry and are working on the last component of
132 			 * the path name.
133 			 */
134 			if ((cnp->cn_flags & ISLASTCN) &&
135 			    (cnp->cn_nameiop == CREATE || \
136 			    cnp->cn_nameiop == RENAME ||
137 			    (cnp->cn_nameiop == DELETE &&
138 			    cnp->cn_flags & DOWHITEOUT &&
139 			    cnp->cn_flags & ISWHITEOUT))) {
140 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
141 				    cnp->cn_thread);
142 				if (error != 0)
143 					goto out;
144 
145 				/*
146 				 * Keep the component name in the buffer for
147 				 * future uses.
148 				 */
149 				cnp->cn_flags |= SAVENAME;
150 
151 				error = EJUSTRETURN;
152 			} else
153 				error = ENOENT;
154 		} else {
155 			struct tmpfs_node *tnode;
156 
157 			/*
158 			 * The entry was found, so get its associated
159 			 * tmpfs_node.
160 			 */
161 			tnode = de->td_node;
162 
163 			/*
164 			 * If we are not at the last path component and
165 			 * found a non-directory or non-link entry (which
166 			 * may itself be pointing to a directory), raise
167 			 * an error.
168 			 */
169 			if ((tnode->tn_type != VDIR &&
170 			    tnode->tn_type != VLNK) &&
171 			    !(cnp->cn_flags & ISLASTCN)) {
172 				error = ENOTDIR;
173 				goto out;
174 			}
175 
176 			/*
177 			 * If we are deleting or renaming the entry, keep
178 			 * track of its tmpfs_dirent so that it can be
179 			 * easily deleted later.
180 			 */
181 			if ((cnp->cn_flags & ISLASTCN) &&
182 			    (cnp->cn_nameiop == DELETE ||
183 			    cnp->cn_nameiop == RENAME)) {
184 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
185 				    cnp->cn_thread);
186 				if (error != 0)
187 					goto out;
188 
189 				/* Allocate a new vnode on the matching entry. */
190 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
191 				    cnp->cn_lkflags, vpp);
192 				if (error != 0)
193 					goto out;
194 
195 				if ((dnode->tn_mode & S_ISTXT) &&
196 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
197 				  cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN,
198 				  cnp->cn_cred, cnp->cn_thread)) {
199 					error = EPERM;
200 					vput(*vpp);
201 					*vpp = NULL;
202 					goto out;
203 				}
204 				cnp->cn_flags |= SAVENAME;
205 			} else {
206 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
207 				    cnp->cn_lkflags, vpp);
208 				if (error != 0)
209 					goto out;
210 			}
211 		}
212 	}
213 
214 	/*
215 	 * Store the result of this lookup in the cache.  Avoid this if the
216 	 * request was for creation, as it does not improve timings on
217 	 * emprical tests.
218 	 */
219 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
220 		cache_enter(dvp, *vpp, cnp);
221 
222 out:
223 #ifdef INVARIANTS
224 	/*
225 	 * If there were no errors, *vpp cannot be null and it must be
226 	 * locked.
227 	 */
228 	if (error == 0) {
229 		MPASS(*vpp != NULLVP);
230 		ASSERT_VOP_LOCKED(*vpp, __func__);
231 	} else {
232 		MPASS(*vpp == NULL);
233 	}
234 #endif
235 
236 	return (error);
237 }
238 
239 static int
tmpfs_cached_lookup(struct vop_cachedlookup_args * v)240 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
241 {
242 
243 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
244 }
245 
246 static int
tmpfs_lookup(struct vop_lookup_args * v)247 tmpfs_lookup(struct vop_lookup_args *v)
248 {
249 	struct vnode *dvp = v->a_dvp;
250 	struct vnode **vpp = v->a_vpp;
251 	struct componentname *cnp = v->a_cnp;
252 	int error;
253 
254 	/* Check accessibility of requested node as a first step. */
255 	error = vn_dir_check_exec(dvp, cnp);
256 	if (error != 0)
257 		return (error);
258 
259 	return (tmpfs_lookup1(dvp, vpp, cnp));
260 }
261 
262 static int
tmpfs_create(struct vop_create_args * v)263 tmpfs_create(struct vop_create_args *v)
264 {
265 	struct vnode *dvp = v->a_dvp;
266 	struct vnode **vpp = v->a_vpp;
267 	struct componentname *cnp = v->a_cnp;
268 	struct vattr *vap = v->a_vap;
269 	int error;
270 
271 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
272 
273 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
274 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
275 		cache_enter(dvp, *vpp, cnp);
276 	return (error);
277 }
278 
279 static int
tmpfs_mknod(struct vop_mknod_args * v)280 tmpfs_mknod(struct vop_mknod_args *v)
281 {
282 	struct vnode *dvp = v->a_dvp;
283 	struct vnode **vpp = v->a_vpp;
284 	struct componentname *cnp = v->a_cnp;
285 	struct vattr *vap = v->a_vap;
286 
287 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
288 	    vap->va_type != VFIFO)
289 		return (EINVAL);
290 
291 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
292 }
293 
294 struct fileops tmpfs_fnops;
295 
296 static int
tmpfs_open(struct vop_open_args * v)297 tmpfs_open(struct vop_open_args *v)
298 {
299 	struct vnode *vp;
300 	struct tmpfs_node *node;
301 	struct file *fp;
302 	int error, mode;
303 
304 	vp = v->a_vp;
305 	mode = v->a_mode;
306 	node = VP_TO_TMPFS_NODE(vp);
307 
308 	/*
309 	 * The file is still active but all its names have been removed
310 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
311 	 * it is about to die.
312 	 */
313 	if (node->tn_links < 1)
314 		return (ENOENT);
315 
316 	/* If the file is marked append-only, deny write requests. */
317 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
318 		error = EPERM;
319 	else {
320 		error = 0;
321 		/* For regular files, the call below is nop. */
322 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
323 		    OBJ_DEAD) == 0, ("dead object"));
324 		vnode_create_vobject(vp, node->tn_size, v->a_td);
325 	}
326 
327 	fp = v->a_fp;
328 	MPASS(fp == NULL || fp->f_data == NULL);
329 	if (error == 0 && fp != NULL && vp->v_type == VREG) {
330 		tmpfs_ref_node(node);
331 		finit_vnode(fp, mode, node, &tmpfs_fnops);
332 	}
333 
334 	return (error);
335 }
336 
337 static int
tmpfs_close(struct vop_close_args * v)338 tmpfs_close(struct vop_close_args *v)
339 {
340 	struct vnode *vp = v->a_vp;
341 
342 	/* Update node times. */
343 	tmpfs_update(vp);
344 
345 	return (0);
346 }
347 
348 int
tmpfs_fo_close(struct file * fp,struct thread * td)349 tmpfs_fo_close(struct file *fp, struct thread *td)
350 {
351 	struct tmpfs_node *node;
352 
353 	node = fp->f_data;
354 	if (node != NULL) {
355 		MPASS(node->tn_type == VREG);
356 		tmpfs_free_node(node->tn_reg.tn_tmp, node);
357 	}
358 	return (vnops.fo_close(fp, td));
359 }
360 
361 /*
362  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
363  * the comment above cache_fplookup for details.
364  */
365 int
tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args * v)366 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
367 {
368 	struct vnode *vp;
369 	struct tmpfs_node *node;
370 	struct ucred *cred;
371 	mode_t all_x, mode;
372 
373 	vp = v->a_vp;
374 	node = VP_TO_TMPFS_NODE_SMR(vp);
375 	if (__predict_false(node == NULL))
376 		return (EAGAIN);
377 
378 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
379 	mode = atomic_load_short(&node->tn_mode);
380 	if (__predict_true((mode & all_x) == all_x))
381 		return (0);
382 
383 	cred = v->a_cred;
384 	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
385 }
386 
387 int
tmpfs_access(struct vop_access_args * v)388 tmpfs_access(struct vop_access_args *v)
389 {
390 	struct vnode *vp = v->a_vp;
391 	accmode_t accmode = v->a_accmode;
392 	struct ucred *cred = v->a_cred;
393 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
394 	int error;
395 	struct tmpfs_node *node;
396 
397 	MPASS(VOP_ISLOCKED(vp));
398 
399 	node = VP_TO_TMPFS_NODE(vp);
400 
401 	/*
402 	 * Common case path lookup.
403 	 */
404 	if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
405 		return (0);
406 
407 	switch (vp->v_type) {
408 	case VDIR:
409 		/* FALLTHROUGH */
410 	case VLNK:
411 		/* FALLTHROUGH */
412 	case VREG:
413 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
414 			error = EROFS;
415 			goto out;
416 		}
417 		break;
418 
419 	case VBLK:
420 		/* FALLTHROUGH */
421 	case VCHR:
422 		/* FALLTHROUGH */
423 	case VSOCK:
424 		/* FALLTHROUGH */
425 	case VFIFO:
426 		break;
427 
428 	default:
429 		error = EINVAL;
430 		goto out;
431 	}
432 
433 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
434 		error = EPERM;
435 		goto out;
436 	}
437 
438 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
439 	    accmode, cred);
440 
441 out:
442 	MPASS(VOP_ISLOCKED(vp));
443 
444 	return (error);
445 }
446 
447 int
tmpfs_stat(struct vop_stat_args * v)448 tmpfs_stat(struct vop_stat_args *v)
449 {
450 	struct vnode *vp = v->a_vp;
451 	struct stat *sb = v->a_sb;
452 	struct tmpfs_node *node;
453 	int error;
454 
455 	node = VP_TO_TMPFS_NODE(vp);
456 
457 	tmpfs_update_getattr(vp);
458 
459 	error = vop_stat_helper_pre(v);
460 	if (__predict_false(error))
461 		return (error);
462 
463 	sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
464 	sb->st_ino = node->tn_id;
465 	sb->st_mode = node->tn_mode | VTTOIF(vp->v_type);
466 	sb->st_nlink = node->tn_links;
467 	sb->st_uid = node->tn_uid;
468 	sb->st_gid = node->tn_gid;
469 	sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
470 		node->tn_rdev : NODEV;
471 	sb->st_size = node->tn_size;
472 	sb->st_atim.tv_sec = node->tn_atime.tv_sec;
473 	sb->st_atim.tv_nsec = node->tn_atime.tv_nsec;
474 	sb->st_mtim.tv_sec = node->tn_mtime.tv_sec;
475 	sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec;
476 	sb->st_ctim.tv_sec = node->tn_ctime.tv_sec;
477 	sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec;
478 	sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec;
479 	sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec;
480 	sb->st_blksize = PAGE_SIZE;
481 	sb->st_flags = node->tn_flags;
482 	sb->st_gen = node->tn_gen;
483 	if (vp->v_type == VREG) {
484 #ifdef __ILP32__
485 		vm_object_t obj = node->tn_reg.tn_aobj;
486 
487 		/* Handle torn read */
488 		VM_OBJECT_RLOCK(obj);
489 #endif
490 		sb->st_blocks = ptoa(node->tn_reg.tn_pages);
491 #ifdef __ILP32__
492 		VM_OBJECT_RUNLOCK(obj);
493 #endif
494 	} else {
495 		sb->st_blocks = node->tn_size;
496 	}
497 	sb->st_blocks /= S_BLKSIZE;
498 	return (vop_stat_helper_post(v, error));
499 }
500 
501 int
tmpfs_getattr(struct vop_getattr_args * v)502 tmpfs_getattr(struct vop_getattr_args *v)
503 {
504 	struct vnode *vp = v->a_vp;
505 	struct vattr *vap = v->a_vap;
506 	struct tmpfs_node *node;
507 
508 	node = VP_TO_TMPFS_NODE(vp);
509 
510 	tmpfs_update_getattr(vp);
511 
512 	vap->va_type = vp->v_type;
513 	vap->va_mode = node->tn_mode;
514 	vap->va_nlink = node->tn_links;
515 	vap->va_uid = node->tn_uid;
516 	vap->va_gid = node->tn_gid;
517 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
518 	vap->va_fileid = node->tn_id;
519 	vap->va_size = node->tn_size;
520 	vap->va_blocksize = PAGE_SIZE;
521 	vap->va_atime = node->tn_atime;
522 	vap->va_mtime = node->tn_mtime;
523 	vap->va_ctime = node->tn_ctime;
524 	vap->va_birthtime = node->tn_birthtime;
525 	vap->va_gen = node->tn_gen;
526 	vap->va_flags = node->tn_flags;
527 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
528 	    node->tn_rdev : NODEV;
529 	if (vp->v_type == VREG) {
530 #ifdef __ILP32__
531 		vm_object_t obj = node->tn_reg.tn_aobj;
532 
533 		VM_OBJECT_RLOCK(obj);
534 #endif
535 		vap->va_bytes = ptoa(node->tn_reg.tn_pages);
536 #ifdef __ILP32__
537 		VM_OBJECT_RUNLOCK(obj);
538 #endif
539 	} else {
540 		vap->va_bytes = node->tn_size;
541 	}
542 	vap->va_filerev = 0;
543 
544 	return (0);
545 }
546 
547 int
tmpfs_setattr(struct vop_setattr_args * v)548 tmpfs_setattr(struct vop_setattr_args *v)
549 {
550 	struct vnode *vp = v->a_vp;
551 	struct vattr *vap = v->a_vap;
552 	struct ucred *cred = v->a_cred;
553 	struct thread *td = curthread;
554 
555 	int error;
556 
557 	ASSERT_VOP_IN_SEQC(vp);
558 
559 	error = 0;
560 
561 	/* Abort if any unsettable attribute is given. */
562 	if (vap->va_type != VNON ||
563 	    vap->va_nlink != VNOVAL ||
564 	    vap->va_fsid != VNOVAL ||
565 	    vap->va_fileid != VNOVAL ||
566 	    vap->va_blocksize != VNOVAL ||
567 	    vap->va_gen != VNOVAL ||
568 	    vap->va_rdev != VNOVAL ||
569 	    vap->va_bytes != VNOVAL)
570 		error = EINVAL;
571 
572 	if (error == 0 && (vap->va_flags != VNOVAL))
573 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
574 
575 	if (error == 0 && (vap->va_size != VNOVAL))
576 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
577 
578 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
579 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
580 
581 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
582 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
583 
584 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
585 	    vap->va_atime.tv_nsec != VNOVAL) ||
586 	    (vap->va_mtime.tv_sec != VNOVAL &&
587 	    vap->va_mtime.tv_nsec != VNOVAL) ||
588 	    (vap->va_birthtime.tv_sec != VNOVAL &&
589 	    vap->va_birthtime.tv_nsec != VNOVAL)))
590 		error = tmpfs_chtimes(vp, vap, cred, td);
591 
592 	/*
593 	 * Update the node times.  We give preference to the error codes
594 	 * generated by this function rather than the ones that may arise
595 	 * from tmpfs_update.
596 	 */
597 	tmpfs_update(vp);
598 
599 	return (error);
600 }
601 
602 static int
tmpfs_read(struct vop_read_args * v)603 tmpfs_read(struct vop_read_args *v)
604 {
605 	struct vnode *vp;
606 	struct uio *uio;
607 	struct tmpfs_node *node;
608 
609 	vp = v->a_vp;
610 	if (vp->v_type != VREG)
611 		return (EISDIR);
612 	uio = v->a_uio;
613 	if (uio->uio_offset < 0)
614 		return (EINVAL);
615 	node = VP_TO_TMPFS_NODE(vp);
616 	tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
617 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
618 }
619 
620 static int
tmpfs_read_pgcache(struct vop_read_pgcache_args * v)621 tmpfs_read_pgcache(struct vop_read_pgcache_args *v)
622 {
623 	struct vnode *vp;
624 	struct tmpfs_node *node;
625 	vm_object_t object;
626 	off_t size;
627 	int error;
628 
629 	vp = v->a_vp;
630 	VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp);
631 
632 	if (v->a_uio->uio_offset < 0)
633 		return (EINVAL);
634 
635 	error = EJUSTRETURN;
636 	vfs_smr_enter();
637 
638 	node = VP_TO_TMPFS_NODE_SMR(vp);
639 	if (node == NULL)
640 		goto out_smr;
641 	MPASS(node->tn_type == VREG);
642 	MPASS(node->tn_refcount >= 1);
643 	object = node->tn_reg.tn_aobj;
644 	if (object == NULL)
645 		goto out_smr;
646 
647 	MPASS(object->type == tmpfs_pager_type);
648 	MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_SWAP)) ==
649 	    OBJ_SWAP);
650 	if (!VN_IS_DOOMED(vp)) {
651 		/* size cannot become shorter due to rangelock. */
652 		size = node->tn_size;
653 		tmpfs_set_accessed(node->tn_reg.tn_tmp, node);
654 		vfs_smr_exit();
655 		error = uiomove_object(object, size, v->a_uio);
656 		return (error);
657 	}
658 out_smr:
659 	vfs_smr_exit();
660 	return (error);
661 }
662 
663 static int
tmpfs_write(struct vop_write_args * v)664 tmpfs_write(struct vop_write_args *v)
665 {
666 	struct vnode *vp;
667 	struct uio *uio;
668 	struct tmpfs_node *node;
669 	off_t oldsize;
670 	ssize_t r;
671 	int error, ioflag;
672 	mode_t newmode;
673 
674 	vp = v->a_vp;
675 	uio = v->a_uio;
676 	ioflag = v->a_ioflag;
677 	error = 0;
678 	node = VP_TO_TMPFS_NODE(vp);
679 	oldsize = node->tn_size;
680 
681 	if (uio->uio_offset < 0 || vp->v_type != VREG)
682 		return (EINVAL);
683 	if (uio->uio_resid == 0)
684 		return (0);
685 	if (ioflag & IO_APPEND)
686 		uio->uio_offset = node->tn_size;
687 	error = vn_rlimit_fsizex(vp, uio, VFS_TO_TMPFS(vp->v_mount)->
688 	    tm_maxfilesize, &r, uio->uio_td);
689 	if (error != 0) {
690 		vn_rlimit_fsizex_res(uio, r);
691 		return (error);
692 	}
693 
694 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
695 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
696 		    FALSE);
697 		if (error != 0)
698 			goto out;
699 	}
700 
701 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
702 	node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED;
703 	node->tn_accessed = true;
704 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
705 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
706 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
707 			vn_seqc_write_begin(vp);
708 			atomic_store_short(&node->tn_mode, newmode);
709 			vn_seqc_write_end(vp);
710 		}
711 	}
712 	if (error != 0)
713 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
714 
715 out:
716 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
717 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
718 
719 	vn_rlimit_fsizex_res(uio, r);
720 	return (error);
721 }
722 
723 static int
tmpfs_fsync(struct vop_fsync_args * v)724 tmpfs_fsync(struct vop_fsync_args *v)
725 {
726 	struct vnode *vp = v->a_vp;
727 
728 	tmpfs_check_mtime(vp);
729 	tmpfs_update(vp);
730 
731 	return (0);
732 }
733 
734 static int
tmpfs_remove(struct vop_remove_args * v)735 tmpfs_remove(struct vop_remove_args *v)
736 {
737 	struct vnode *dvp = v->a_dvp;
738 	struct vnode *vp = v->a_vp;
739 
740 	int error;
741 	struct tmpfs_dirent *de;
742 	struct tmpfs_mount *tmp;
743 	struct tmpfs_node *dnode;
744 	struct tmpfs_node *node;
745 
746 	if (vp->v_type == VDIR) {
747 		error = EISDIR;
748 		goto out;
749 	}
750 
751 	dnode = VP_TO_TMPFS_DIR(dvp);
752 	node = VP_TO_TMPFS_NODE(vp);
753 	tmp = VFS_TO_TMPFS(vp->v_mount);
754 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
755 	MPASS(de != NULL);
756 
757 	/* Files marked as immutable or append-only cannot be deleted. */
758 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
759 	    (dnode->tn_flags & APPEND)) {
760 		error = EPERM;
761 		goto out;
762 	}
763 
764 	/* Remove the entry from the directory; as it is a file, we do not
765 	 * have to change the number of hard links of the directory. */
766 	tmpfs_dir_detach(dvp, de);
767 	if (v->a_cnp->cn_flags & DOWHITEOUT)
768 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
769 
770 	/* Free the directory entry we just deleted.  Note that the node
771 	 * referred by it will not be removed until the vnode is really
772 	 * reclaimed. */
773 	tmpfs_free_dirent(tmp, de);
774 
775 	node->tn_status |= TMPFS_NODE_CHANGED;
776 	node->tn_accessed = true;
777 	error = 0;
778 
779 out:
780 	return (error);
781 }
782 
783 static int
tmpfs_link(struct vop_link_args * v)784 tmpfs_link(struct vop_link_args *v)
785 {
786 	struct vnode *dvp = v->a_tdvp;
787 	struct vnode *vp = v->a_vp;
788 	struct componentname *cnp = v->a_cnp;
789 
790 	int error;
791 	struct tmpfs_dirent *de;
792 	struct tmpfs_node *node;
793 
794 	MPASS(cnp->cn_flags & HASBUF);
795 	MPASS(dvp != vp); /* XXX When can this be false? */
796 	node = VP_TO_TMPFS_NODE(vp);
797 
798 	/* Ensure that we do not overflow the maximum number of links imposed
799 	 * by the system. */
800 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
801 	if (node->tn_links == TMPFS_LINK_MAX) {
802 		error = EMLINK;
803 		goto out;
804 	}
805 
806 	/* We cannot create links of files marked immutable or append-only. */
807 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
808 		error = EPERM;
809 		goto out;
810 	}
811 
812 	/* Allocate a new directory entry to represent the node. */
813 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
814 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
815 	if (error != 0)
816 		goto out;
817 
818 	/* Insert the new directory entry into the appropriate directory. */
819 	if (cnp->cn_flags & ISWHITEOUT)
820 		tmpfs_dir_whiteout_remove(dvp, cnp);
821 	tmpfs_dir_attach(dvp, de);
822 
823 	/* vp link count has changed, so update node times. */
824 	node->tn_status |= TMPFS_NODE_CHANGED;
825 	tmpfs_update(vp);
826 
827 	error = 0;
828 
829 out:
830 	return (error);
831 }
832 
833 /*
834  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
835  * fail to acquire any lock in the path we will drop all held locks,
836  * acquire the new lock in a blocking fashion, and then release it and
837  * restart the rename.  This acquire/release step ensures that we do not
838  * spin on a lock waiting for release.  On error release all vnode locks
839  * and decrement references the way tmpfs_rename() would do.
840  */
841 static int
tmpfs_rename_relock(struct vnode * fdvp,struct vnode ** fvpp,struct vnode * tdvp,struct vnode ** tvpp,struct componentname * fcnp,struct componentname * tcnp)842 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
843     struct vnode *tdvp, struct vnode **tvpp,
844     struct componentname *fcnp, struct componentname *tcnp)
845 {
846 	struct vnode *nvp;
847 	struct mount *mp;
848 	struct tmpfs_dirent *de;
849 	int error, restarts = 0;
850 
851 	VOP_UNLOCK(tdvp);
852 	if (*tvpp != NULL && *tvpp != tdvp)
853 		VOP_UNLOCK(*tvpp);
854 	mp = fdvp->v_mount;
855 
856 relock:
857 	restarts += 1;
858 	error = vn_lock(fdvp, LK_EXCLUSIVE);
859 	if (error)
860 		goto releout;
861 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
862 		VOP_UNLOCK(fdvp);
863 		error = vn_lock(tdvp, LK_EXCLUSIVE);
864 		if (error)
865 			goto releout;
866 		VOP_UNLOCK(tdvp);
867 		goto relock;
868 	}
869 	/*
870 	 * Re-resolve fvp to be certain it still exists and fetch the
871 	 * correct vnode.
872 	 */
873 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
874 	if (de == NULL) {
875 		VOP_UNLOCK(fdvp);
876 		VOP_UNLOCK(tdvp);
877 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
878 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
879 			error = EINVAL;
880 		else
881 			error = ENOENT;
882 		goto releout;
883 	}
884 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
885 	if (error != 0) {
886 		VOP_UNLOCK(fdvp);
887 		VOP_UNLOCK(tdvp);
888 		if (error != EBUSY)
889 			goto releout;
890 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
891 		if (error != 0)
892 			goto releout;
893 		VOP_UNLOCK(nvp);
894 		/*
895 		 * Concurrent rename race.
896 		 */
897 		if (nvp == tdvp) {
898 			vrele(nvp);
899 			error = EINVAL;
900 			goto releout;
901 		}
902 		vrele(*fvpp);
903 		*fvpp = nvp;
904 		goto relock;
905 	}
906 	vrele(*fvpp);
907 	*fvpp = nvp;
908 	VOP_UNLOCK(*fvpp);
909 	/*
910 	 * Re-resolve tvp and acquire the vnode lock if present.
911 	 */
912 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
913 	/*
914 	 * If tvp disappeared we just carry on.
915 	 */
916 	if (de == NULL && *tvpp != NULL) {
917 		vrele(*tvpp);
918 		*tvpp = NULL;
919 	}
920 	/*
921 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
922 	 * if the non-blocking acquire fails.
923 	 */
924 	if (de != NULL) {
925 		nvp = NULL;
926 		error = tmpfs_alloc_vp(mp, de->td_node,
927 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
928 		if (*tvpp != NULL)
929 			vrele(*tvpp);
930 		*tvpp = nvp;
931 		if (error != 0) {
932 			VOP_UNLOCK(fdvp);
933 			VOP_UNLOCK(tdvp);
934 			if (error != EBUSY)
935 				goto releout;
936 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
937 			    &nvp);
938 			if (error != 0)
939 				goto releout;
940 			VOP_UNLOCK(nvp);
941 			/*
942 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
943 			 */
944 			if (nvp == fdvp) {
945 				error = ENOTEMPTY;
946 				goto releout;
947 			}
948 			goto relock;
949 		}
950 	}
951 	tmpfs_rename_restarts += restarts;
952 
953 	return (0);
954 
955 releout:
956 	vrele(fdvp);
957 	vrele(*fvpp);
958 	vrele(tdvp);
959 	if (*tvpp != NULL)
960 		vrele(*tvpp);
961 	tmpfs_rename_restarts += restarts;
962 
963 	return (error);
964 }
965 
966 static int
tmpfs_rename(struct vop_rename_args * v)967 tmpfs_rename(struct vop_rename_args *v)
968 {
969 	struct vnode *fdvp = v->a_fdvp;
970 	struct vnode *fvp = v->a_fvp;
971 	struct componentname *fcnp = v->a_fcnp;
972 	struct vnode *tdvp = v->a_tdvp;
973 	struct vnode *tvp = v->a_tvp;
974 	struct componentname *tcnp = v->a_tcnp;
975 	char *newname;
976 	struct tmpfs_dirent *de;
977 	struct tmpfs_mount *tmp;
978 	struct tmpfs_node *fdnode;
979 	struct tmpfs_node *fnode;
980 	struct tmpfs_node *tnode;
981 	struct tmpfs_node *tdnode;
982 	int error;
983 	bool want_seqc_end;
984 
985 	MPASS(fcnp->cn_flags & HASBUF);
986 	MPASS(tcnp->cn_flags & HASBUF);
987 
988 	want_seqc_end = false;
989 
990 	/*
991 	 * Disallow cross-device renames.
992 	 * XXX Why isn't this done by the caller?
993 	 */
994 	if (fvp->v_mount != tdvp->v_mount ||
995 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
996 		error = EXDEV;
997 		goto out;
998 	}
999 
1000 	/* If source and target are the same file, there is nothing to do. */
1001 	if (fvp == tvp) {
1002 		error = 0;
1003 		goto out;
1004 	}
1005 
1006 	/*
1007 	 * If we need to move the directory between entries, lock the
1008 	 * source so that we can safely operate on it.
1009 	 */
1010 	if (fdvp != tdvp && fdvp != tvp) {
1011 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1012 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
1013 			    fcnp, tcnp);
1014 			if (error != 0)
1015 				return (error);
1016 			ASSERT_VOP_ELOCKED(fdvp,
1017 			    "tmpfs_rename: fdvp not locked");
1018 			ASSERT_VOP_ELOCKED(tdvp,
1019 			    "tmpfs_rename: tdvp not locked");
1020 			if (tvp != NULL)
1021 				ASSERT_VOP_ELOCKED(tvp,
1022 				    "tmpfs_rename: tvp not locked");
1023 			if (fvp == tvp) {
1024 				error = 0;
1025 				goto out_locked;
1026 			}
1027 		}
1028 	}
1029 
1030 	if (tvp != NULL)
1031 		vn_seqc_write_begin(tvp);
1032 	vn_seqc_write_begin(tdvp);
1033 	vn_seqc_write_begin(fvp);
1034 	vn_seqc_write_begin(fdvp);
1035 	want_seqc_end = true;
1036 
1037 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1038 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1039 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
1040 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1041 	fnode = VP_TO_TMPFS_NODE(fvp);
1042 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
1043 
1044 	/*
1045 	 * Entry can disappear before we lock fdvp,
1046 	 * also avoid manipulating '.' and '..' entries.
1047 	 */
1048 	if (de == NULL) {
1049 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
1050 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
1051 			error = EINVAL;
1052 		else
1053 			error = ENOENT;
1054 		goto out_locked;
1055 	}
1056 	MPASS(de->td_node == fnode);
1057 
1058 	/*
1059 	 * If re-naming a directory to another preexisting directory
1060 	 * ensure that the target directory is empty so that its
1061 	 * removal causes no side effects.
1062 	 * Kern_rename guarantees the destination to be a directory
1063 	 * if the source is one.
1064 	 */
1065 	if (tvp != NULL) {
1066 		MPASS(tnode != NULL);
1067 
1068 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1069 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1070 			error = EPERM;
1071 			goto out_locked;
1072 		}
1073 
1074 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1075 			if (tnode->tn_size > 0) {
1076 				error = ENOTEMPTY;
1077 				goto out_locked;
1078 			}
1079 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1080 			error = ENOTDIR;
1081 			goto out_locked;
1082 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1083 			error = EISDIR;
1084 			goto out_locked;
1085 		} else {
1086 			MPASS(fnode->tn_type != VDIR &&
1087 				tnode->tn_type != VDIR);
1088 		}
1089 	}
1090 
1091 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
1092 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1093 		error = EPERM;
1094 		goto out_locked;
1095 	}
1096 
1097 	/*
1098 	 * Ensure that we have enough memory to hold the new name, if it
1099 	 * has to be changed.
1100 	 */
1101 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
1102 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
1103 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
1104 	} else
1105 		newname = NULL;
1106 
1107 	/*
1108 	 * If the node is being moved to another directory, we have to do
1109 	 * the move.
1110 	 */
1111 	if (fdnode != tdnode) {
1112 		/*
1113 		 * In case we are moving a directory, we have to adjust its
1114 		 * parent to point to the new parent.
1115 		 */
1116 		if (de->td_node->tn_type == VDIR) {
1117 			struct tmpfs_node *n;
1118 
1119 			/*
1120 			 * Ensure the target directory is not a child of the
1121 			 * directory being moved.  Otherwise, we'd end up
1122 			 * with stale nodes.
1123 			 */
1124 			n = tdnode;
1125 			/*
1126 			 * TMPFS_LOCK guaranties that no nodes are freed while
1127 			 * traversing the list. Nodes can only be marked as
1128 			 * removed: tn_parent == NULL.
1129 			 */
1130 			TMPFS_LOCK(tmp);
1131 			TMPFS_NODE_LOCK(n);
1132 			while (n != n->tn_dir.tn_parent) {
1133 				struct tmpfs_node *parent;
1134 
1135 				if (n == fnode) {
1136 					TMPFS_NODE_UNLOCK(n);
1137 					TMPFS_UNLOCK(tmp);
1138 					error = EINVAL;
1139 					if (newname != NULL)
1140 						    free(newname, M_TMPFSNAME);
1141 					goto out_locked;
1142 				}
1143 				parent = n->tn_dir.tn_parent;
1144 				TMPFS_NODE_UNLOCK(n);
1145 				if (parent == NULL) {
1146 					n = NULL;
1147 					break;
1148 				}
1149 				TMPFS_NODE_LOCK(parent);
1150 				if (parent->tn_dir.tn_parent == NULL) {
1151 					TMPFS_NODE_UNLOCK(parent);
1152 					n = NULL;
1153 					break;
1154 				}
1155 				n = parent;
1156 			}
1157 			TMPFS_UNLOCK(tmp);
1158 			if (n == NULL) {
1159 				error = EINVAL;
1160 				if (newname != NULL)
1161 					    free(newname, M_TMPFSNAME);
1162 				goto out_locked;
1163 			}
1164 			TMPFS_NODE_UNLOCK(n);
1165 
1166 			/* Adjust the parent pointer. */
1167 			TMPFS_VALIDATE_DIR(fnode);
1168 			TMPFS_NODE_LOCK(de->td_node);
1169 			de->td_node->tn_dir.tn_parent = tdnode;
1170 			TMPFS_NODE_UNLOCK(de->td_node);
1171 
1172 			/*
1173 			 * As a result of changing the target of the '..'
1174 			 * entry, the link count of the source and target
1175 			 * directories has to be adjusted.
1176 			 */
1177 			TMPFS_NODE_LOCK(tdnode);
1178 			TMPFS_ASSERT_LOCKED(tdnode);
1179 			tdnode->tn_links++;
1180 			TMPFS_NODE_UNLOCK(tdnode);
1181 
1182 			TMPFS_NODE_LOCK(fdnode);
1183 			TMPFS_ASSERT_LOCKED(fdnode);
1184 			fdnode->tn_links--;
1185 			TMPFS_NODE_UNLOCK(fdnode);
1186 		}
1187 	}
1188 
1189 	/*
1190 	 * Do the move: just remove the entry from the source directory
1191 	 * and insert it into the target one.
1192 	 */
1193 	tmpfs_dir_detach(fdvp, de);
1194 
1195 	if (fcnp->cn_flags & DOWHITEOUT)
1196 		tmpfs_dir_whiteout_add(fdvp, fcnp);
1197 	if (tcnp->cn_flags & ISWHITEOUT)
1198 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
1199 
1200 	/*
1201 	 * If the name has changed, we need to make it effective by changing
1202 	 * it in the directory entry.
1203 	 */
1204 	if (newname != NULL) {
1205 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1206 
1207 		free(de->ud.td_name, M_TMPFSNAME);
1208 		de->ud.td_name = newname;
1209 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
1210 
1211 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1212 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1213 	}
1214 
1215 	/*
1216 	 * If we are overwriting an entry, we have to remove the old one
1217 	 * from the target directory.
1218 	 */
1219 	if (tvp != NULL) {
1220 		struct tmpfs_dirent *tde;
1221 
1222 		/* Remove the old entry from the target directory. */
1223 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1224 		tmpfs_dir_detach(tdvp, tde);
1225 
1226 		/*
1227 		 * Free the directory entry we just deleted.  Note that the
1228 		 * node referred by it will not be removed until the vnode is
1229 		 * really reclaimed.
1230 		 */
1231 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1232 	}
1233 
1234 	tmpfs_dir_attach(tdvp, de);
1235 
1236 	if (tmpfs_use_nc(fvp)) {
1237 		cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp);
1238 	}
1239 
1240 	error = 0;
1241 
1242 out_locked:
1243 	if (fdvp != tdvp && fdvp != tvp)
1244 		VOP_UNLOCK(fdvp);
1245 
1246 out:
1247 	if (want_seqc_end) {
1248 		if (tvp != NULL)
1249 			vn_seqc_write_end(tvp);
1250 		vn_seqc_write_end(tdvp);
1251 		vn_seqc_write_end(fvp);
1252 		vn_seqc_write_end(fdvp);
1253 	}
1254 
1255 	/*
1256 	 * Release target nodes.
1257 	 * XXX: I don't understand when tdvp can be the same as tvp, but
1258 	 * other code takes care of this...
1259 	 */
1260 	if (tdvp == tvp)
1261 		vrele(tdvp);
1262 	else
1263 		vput(tdvp);
1264 	if (tvp != NULL)
1265 		vput(tvp);
1266 
1267 	/* Release source nodes. */
1268 	vrele(fdvp);
1269 	vrele(fvp);
1270 
1271 	return (error);
1272 }
1273 
1274 static int
tmpfs_mkdir(struct vop_mkdir_args * v)1275 tmpfs_mkdir(struct vop_mkdir_args *v)
1276 {
1277 	struct vnode *dvp = v->a_dvp;
1278 	struct vnode **vpp = v->a_vpp;
1279 	struct componentname *cnp = v->a_cnp;
1280 	struct vattr *vap = v->a_vap;
1281 
1282 	MPASS(vap->va_type == VDIR);
1283 
1284 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
1285 }
1286 
1287 static int
tmpfs_rmdir(struct vop_rmdir_args * v)1288 tmpfs_rmdir(struct vop_rmdir_args *v)
1289 {
1290 	struct vnode *dvp = v->a_dvp;
1291 	struct vnode *vp = v->a_vp;
1292 
1293 	int error;
1294 	struct tmpfs_dirent *de;
1295 	struct tmpfs_mount *tmp;
1296 	struct tmpfs_node *dnode;
1297 	struct tmpfs_node *node;
1298 
1299 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1300 	dnode = VP_TO_TMPFS_DIR(dvp);
1301 	node = VP_TO_TMPFS_DIR(vp);
1302 
1303 	/* Directories with more than two entries ('.' and '..') cannot be
1304 	 * removed. */
1305 	 if (node->tn_size > 0) {
1306 		 error = ENOTEMPTY;
1307 		 goto out;
1308 	 }
1309 
1310 	/* Check flags to see if we are allowed to remove the directory. */
1311 	if ((dnode->tn_flags & APPEND)
1312 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1313 		error = EPERM;
1314 		goto out;
1315 	}
1316 
1317 	/* This invariant holds only if we are not trying to remove "..".
1318 	  * We checked for that above so this is safe now. */
1319 	MPASS(node->tn_dir.tn_parent == dnode);
1320 
1321 	/* Get the directory entry associated with node (vp).  This was
1322 	 * filled by tmpfs_lookup while looking up the entry. */
1323 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1324 	MPASS(TMPFS_DIRENT_MATCHES(de,
1325 	    v->a_cnp->cn_nameptr,
1326 	    v->a_cnp->cn_namelen));
1327 
1328 	/* Detach the directory entry from the directory (dnode). */
1329 	tmpfs_dir_detach(dvp, de);
1330 	if (v->a_cnp->cn_flags & DOWHITEOUT)
1331 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1332 
1333 	/* No vnode should be allocated for this entry from this point */
1334 	TMPFS_NODE_LOCK(node);
1335 	node->tn_links--;
1336 	node->tn_dir.tn_parent = NULL;
1337 	node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1338 	node->tn_accessed = true;
1339 
1340 	TMPFS_NODE_UNLOCK(node);
1341 
1342 	TMPFS_NODE_LOCK(dnode);
1343 	dnode->tn_links--;
1344 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1345 	dnode->tn_accessed = true;
1346 	TMPFS_NODE_UNLOCK(dnode);
1347 
1348 	if (tmpfs_use_nc(dvp)) {
1349 		cache_vop_rmdir(dvp, vp);
1350 	}
1351 
1352 	/* Free the directory entry we just deleted.  Note that the node
1353 	 * referred by it will not be removed until the vnode is really
1354 	 * reclaimed. */
1355 	tmpfs_free_dirent(tmp, de);
1356 
1357 	/* Release the deleted vnode (will destroy the node, notify
1358 	 * interested parties and clean it from the cache). */
1359 
1360 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1361 	tmpfs_update(dvp);
1362 
1363 	error = 0;
1364 
1365 out:
1366 	return (error);
1367 }
1368 
1369 static int
tmpfs_symlink(struct vop_symlink_args * v)1370 tmpfs_symlink(struct vop_symlink_args *v)
1371 {
1372 	struct vnode *dvp = v->a_dvp;
1373 	struct vnode **vpp = v->a_vpp;
1374 	struct componentname *cnp = v->a_cnp;
1375 	struct vattr *vap = v->a_vap;
1376 	const char *target = v->a_target;
1377 
1378 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1379 	MPASS(vap->va_type == VLNK);
1380 #else
1381 	vap->va_type = VLNK;
1382 #endif
1383 
1384 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, target));
1385 }
1386 
1387 static int
tmpfs_readdir(struct vop_readdir_args * va)1388 tmpfs_readdir(struct vop_readdir_args *va)
1389 {
1390 	struct vnode *vp;
1391 	struct uio *uio;
1392 	struct tmpfs_mount *tm;
1393 	struct tmpfs_node *node;
1394 	u_long **cookies;
1395 	int *eofflag, *ncookies;
1396 	ssize_t startresid;
1397 	int error, maxcookies;
1398 
1399 	vp = va->a_vp;
1400 	uio = va->a_uio;
1401 	eofflag = va->a_eofflag;
1402 	cookies = va->a_cookies;
1403 	ncookies = va->a_ncookies;
1404 
1405 	/* This operation only makes sense on directory nodes. */
1406 	if (vp->v_type != VDIR)
1407 		return (ENOTDIR);
1408 
1409 	maxcookies = 0;
1410 	node = VP_TO_TMPFS_DIR(vp);
1411 	tm = VFS_TO_TMPFS(vp->v_mount);
1412 
1413 	startresid = uio->uio_resid;
1414 
1415 	/* Allocate cookies for NFS and compat modules. */
1416 	if (cookies != NULL && ncookies != NULL) {
1417 		maxcookies = howmany(node->tn_size,
1418 		    sizeof(struct tmpfs_dirent)) + 2;
1419 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
1420 		    M_WAITOK);
1421 		*ncookies = 0;
1422 	}
1423 
1424 	if (cookies == NULL)
1425 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
1426 	else
1427 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
1428 		    ncookies);
1429 
1430 	/* Buffer was filled without hitting EOF. */
1431 	if (error == EJUSTRETURN)
1432 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
1433 
1434 	if (error != 0 && cookies != NULL && ncookies != NULL) {
1435 		free(*cookies, M_TEMP);
1436 		*cookies = NULL;
1437 		*ncookies = 0;
1438 	}
1439 
1440 	if (eofflag != NULL)
1441 		*eofflag =
1442 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1443 
1444 	return (error);
1445 }
1446 
1447 static int
tmpfs_readlink(struct vop_readlink_args * v)1448 tmpfs_readlink(struct vop_readlink_args *v)
1449 {
1450 	struct vnode *vp = v->a_vp;
1451 	struct uio *uio = v->a_uio;
1452 
1453 	int error;
1454 	struct tmpfs_node *node;
1455 
1456 	MPASS(uio->uio_offset == 0);
1457 	MPASS(vp->v_type == VLNK);
1458 
1459 	node = VP_TO_TMPFS_NODE(vp);
1460 
1461 	error = uiomove(node->tn_link_target, MIN(node->tn_size, uio->uio_resid),
1462 	    uio);
1463 	tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
1464 
1465 	return (error);
1466 }
1467 
1468 /*
1469  * VOP_FPLOOKUP_SYMLINK routines are subject to special circumstances, see
1470  * the comment above cache_fplookup for details.
1471  *
1472  * Check tmpfs_alloc_node for tmpfs-specific synchronisation notes.
1473  */
1474 static int
tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args * v)1475 tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args *v)
1476 {
1477 	struct vnode *vp;
1478 	struct tmpfs_node *node;
1479 	char *symlink;
1480 
1481 	vp = v->a_vp;
1482 	node = VP_TO_TMPFS_NODE_SMR(vp);
1483 	if (__predict_false(node == NULL))
1484 		return (EAGAIN);
1485 	if (!atomic_load_char(&node->tn_link_smr))
1486 		return (EAGAIN);
1487 	symlink = atomic_load_ptr(&node->tn_link_target);
1488 	if (symlink == NULL)
1489 		return (EAGAIN);
1490 
1491 	return (cache_symlink_resolve(v->a_fpl, symlink, node->tn_size));
1492 }
1493 
1494 static int
tmpfs_inactive(struct vop_inactive_args * v)1495 tmpfs_inactive(struct vop_inactive_args *v)
1496 {
1497 	struct vnode *vp;
1498 	struct tmpfs_node *node;
1499 
1500 	vp = v->a_vp;
1501 	node = VP_TO_TMPFS_NODE(vp);
1502 	if (node->tn_links == 0)
1503 		vrecycle(vp);
1504 	else
1505 		tmpfs_check_mtime(vp);
1506 	return (0);
1507 }
1508 
1509 static int
tmpfs_need_inactive(struct vop_need_inactive_args * ap)1510 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
1511 {
1512 	struct vnode *vp;
1513 	struct tmpfs_node *node;
1514 	struct vm_object *obj;
1515 
1516 	vp = ap->a_vp;
1517 	node = VP_TO_TMPFS_NODE(vp);
1518 	if (node->tn_links == 0)
1519 		goto need;
1520 	if (vp->v_type == VREG) {
1521 		obj = vp->v_object;
1522 		if (obj->generation != obj->cleangeneration)
1523 			goto need;
1524 	}
1525 	return (0);
1526 need:
1527 	return (1);
1528 }
1529 
1530 int
tmpfs_reclaim(struct vop_reclaim_args * v)1531 tmpfs_reclaim(struct vop_reclaim_args *v)
1532 {
1533 	struct vnode *vp;
1534 	struct tmpfs_mount *tmp;
1535 	struct tmpfs_node *node;
1536 	bool unlock;
1537 
1538 	vp = v->a_vp;
1539 	node = VP_TO_TMPFS_NODE(vp);
1540 	tmp = VFS_TO_TMPFS(vp->v_mount);
1541 
1542 	if (vp->v_type == VREG)
1543 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
1544 	vp->v_object = NULL;
1545 
1546 	TMPFS_LOCK(tmp);
1547 	TMPFS_NODE_LOCK(node);
1548 	tmpfs_free_vp(vp);
1549 
1550 	/*
1551 	 * If the node referenced by this vnode was deleted by the user,
1552 	 * we must free its associated data structures (now that the vnode
1553 	 * is being reclaimed).
1554 	 */
1555 	unlock = true;
1556 	if (node->tn_links == 0 &&
1557 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1558 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1559 		unlock = !tmpfs_free_node_locked(tmp, node, true);
1560 	}
1561 
1562 	if (unlock) {
1563 		TMPFS_NODE_UNLOCK(node);
1564 		TMPFS_UNLOCK(tmp);
1565 	}
1566 
1567 	MPASS(vp->v_data == NULL);
1568 	return (0);
1569 }
1570 
1571 int
tmpfs_print(struct vop_print_args * v)1572 tmpfs_print(struct vop_print_args *v)
1573 {
1574 	struct vnode *vp = v->a_vp;
1575 
1576 	struct tmpfs_node *node;
1577 
1578 	node = VP_TO_TMPFS_NODE(vp);
1579 
1580 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
1581 	    node, node->tn_flags, (uintmax_t)node->tn_links);
1582 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1583 	    node->tn_mode, node->tn_uid, node->tn_gid,
1584 	    (intmax_t)node->tn_size, node->tn_status);
1585 
1586 	if (vp->v_type == VFIFO)
1587 		fifo_printinfo(vp);
1588 
1589 	printf("\n");
1590 
1591 	return (0);
1592 }
1593 
1594 int
tmpfs_pathconf(struct vop_pathconf_args * v)1595 tmpfs_pathconf(struct vop_pathconf_args *v)
1596 {
1597 	struct vnode *vp = v->a_vp;
1598 	int name = v->a_name;
1599 	long *retval = v->a_retval;
1600 
1601 	int error;
1602 
1603 	error = 0;
1604 
1605 	switch (name) {
1606 	case _PC_LINK_MAX:
1607 		*retval = TMPFS_LINK_MAX;
1608 		break;
1609 
1610 	case _PC_SYMLINK_MAX:
1611 		*retval = MAXPATHLEN;
1612 		break;
1613 
1614 	case _PC_NAME_MAX:
1615 		*retval = NAME_MAX;
1616 		break;
1617 
1618 	case _PC_PIPE_BUF:
1619 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
1620 			*retval = PIPE_BUF;
1621 		else
1622 			error = EINVAL;
1623 		break;
1624 
1625 	case _PC_CHOWN_RESTRICTED:
1626 		*retval = 1;
1627 		break;
1628 
1629 	case _PC_NO_TRUNC:
1630 		*retval = 1;
1631 		break;
1632 
1633 	case _PC_SYNC_IO:
1634 		*retval = 1;
1635 		break;
1636 
1637 	case _PC_FILESIZEBITS:
1638 		*retval = 64;
1639 		break;
1640 
1641 	case _PC_MIN_HOLE_SIZE:
1642 		*retval = PAGE_SIZE;
1643 		break;
1644 
1645 	default:
1646 		error = vop_stdpathconf(v);
1647 	}
1648 
1649 	return (error);
1650 }
1651 
1652 static int
tmpfs_vptofh(struct vop_vptofh_args * ap)1653 tmpfs_vptofh(struct vop_vptofh_args *ap)
1654 /*
1655 vop_vptofh {
1656 	IN struct vnode *a_vp;
1657 	IN struct fid *a_fhp;
1658 };
1659 */
1660 {
1661 	struct tmpfs_fid_data tfd;
1662 	struct tmpfs_node *node;
1663 	struct fid *fhp;
1664 
1665 	node = VP_TO_TMPFS_NODE(ap->a_vp);
1666 	fhp = ap->a_fhp;
1667 	fhp->fid_len = sizeof(tfd);
1668 
1669 	/*
1670 	 * Copy into fid_data from the stack to avoid unaligned pointer use.
1671 	 * See the comment in sys/mount.h on struct fid for details.
1672 	 */
1673 	tfd.tfd_id = node->tn_id;
1674 	tfd.tfd_gen = node->tn_gen;
1675 	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
1676 
1677 	return (0);
1678 }
1679 
1680 static int
tmpfs_whiteout(struct vop_whiteout_args * ap)1681 tmpfs_whiteout(struct vop_whiteout_args *ap)
1682 {
1683 	struct vnode *dvp = ap->a_dvp;
1684 	struct componentname *cnp = ap->a_cnp;
1685 	struct tmpfs_dirent *de;
1686 
1687 	switch (ap->a_flags) {
1688 	case LOOKUP:
1689 		return (0);
1690 	case CREATE:
1691 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1692 		if (de != NULL)
1693 			return (de->td_node == NULL ? 0 : EEXIST);
1694 		return (tmpfs_dir_whiteout_add(dvp, cnp));
1695 	case DELETE:
1696 		tmpfs_dir_whiteout_remove(dvp, cnp);
1697 		return (0);
1698 	default:
1699 		panic("tmpfs_whiteout: unknown op");
1700 	}
1701 }
1702 
1703 static int
tmpfs_vptocnp_dir(struct tmpfs_node * tn,struct tmpfs_node * tnp,struct tmpfs_dirent ** pde)1704 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
1705     struct tmpfs_dirent **pde)
1706 {
1707 	struct tmpfs_dir_cursor dc;
1708 	struct tmpfs_dirent *de;
1709 
1710 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
1711 	     de = tmpfs_dir_next(tnp, &dc)) {
1712 		if (de->td_node == tn) {
1713 			*pde = de;
1714 			return (0);
1715 		}
1716 	}
1717 	return (ENOENT);
1718 }
1719 
1720 static int
tmpfs_vptocnp_fill(struct vnode * vp,struct tmpfs_node * tn,struct tmpfs_node * tnp,char * buf,size_t * buflen,struct vnode ** dvp)1721 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
1722     struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
1723 {
1724 	struct tmpfs_dirent *de;
1725 	int error, i;
1726 
1727 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
1728 	    dvp);
1729 	if (error != 0)
1730 		return (error);
1731 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
1732 	if (error == 0) {
1733 		i = *buflen;
1734 		i -= de->td_namelen;
1735 		if (i < 0) {
1736 			error = ENOMEM;
1737 		} else {
1738 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
1739 			*buflen = i;
1740 		}
1741 	}
1742 	if (error == 0) {
1743 		if (vp != *dvp)
1744 			VOP_UNLOCK(*dvp);
1745 	} else {
1746 		if (vp != *dvp)
1747 			vput(*dvp);
1748 		else
1749 			vrele(vp);
1750 	}
1751 	return (error);
1752 }
1753 
1754 static int
tmpfs_vptocnp(struct vop_vptocnp_args * ap)1755 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
1756 {
1757 	struct vnode *vp, **dvp;
1758 	struct tmpfs_node *tn, *tnp, *tnp1;
1759 	struct tmpfs_dirent *de;
1760 	struct tmpfs_mount *tm;
1761 	char *buf;
1762 	size_t *buflen;
1763 	int error;
1764 
1765 	vp = ap->a_vp;
1766 	dvp = ap->a_vpp;
1767 	buf = ap->a_buf;
1768 	buflen = ap->a_buflen;
1769 
1770 	tm = VFS_TO_TMPFS(vp->v_mount);
1771 	tn = VP_TO_TMPFS_NODE(vp);
1772 	if (tn->tn_type == VDIR) {
1773 		tnp = tn->tn_dir.tn_parent;
1774 		if (tnp == NULL)
1775 			return (ENOENT);
1776 		tmpfs_ref_node(tnp);
1777 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
1778 		    buflen, dvp);
1779 		tmpfs_free_node(tm, tnp);
1780 		return (error);
1781 	}
1782 restart:
1783 	TMPFS_LOCK(tm);
1784 restart_locked:
1785 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
1786 		if (tnp->tn_type != VDIR)
1787 			continue;
1788 		TMPFS_NODE_LOCK(tnp);
1789 		tmpfs_ref_node(tnp);
1790 
1791 		/*
1792 		 * tn_vnode cannot be instantiated while we hold the
1793 		 * node lock, so the directory cannot be changed while
1794 		 * we iterate over it.  Do this to avoid instantiating
1795 		 * vnode for directories which cannot point to our
1796 		 * node.
1797 		 */
1798 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
1799 		    &de) : 0;
1800 
1801 		if (error == 0) {
1802 			TMPFS_NODE_UNLOCK(tnp);
1803 			TMPFS_UNLOCK(tm);
1804 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
1805 			    dvp);
1806 			if (error == 0) {
1807 				tmpfs_free_node(tm, tnp);
1808 				return (0);
1809 			}
1810 			if (VN_IS_DOOMED(vp)) {
1811 				tmpfs_free_node(tm, tnp);
1812 				return (ENOENT);
1813 			}
1814 			TMPFS_LOCK(tm);
1815 			TMPFS_NODE_LOCK(tnp);
1816 		}
1817 		if (tmpfs_free_node_locked(tm, tnp, false)) {
1818 			goto restart;
1819 		} else {
1820 			KASSERT(tnp->tn_refcount > 0,
1821 			    ("node %p refcount zero", tnp));
1822 			if (tnp->tn_attached) {
1823 				tnp1 = LIST_NEXT(tnp, tn_entries);
1824 				TMPFS_NODE_UNLOCK(tnp);
1825 			} else {
1826 				TMPFS_NODE_UNLOCK(tnp);
1827 				goto restart_locked;
1828 			}
1829 		}
1830 	}
1831 	TMPFS_UNLOCK(tm);
1832 	return (ENOENT);
1833 }
1834 
1835 static off_t
tmpfs_seek_data_locked(vm_object_t obj,off_t noff)1836 tmpfs_seek_data_locked(vm_object_t obj, off_t noff)
1837 {
1838 	vm_page_t m;
1839 	vm_pindex_t p, p_m, p_swp;
1840 
1841 	p = OFF_TO_IDX(noff);
1842 	m = vm_page_find_least(obj, p);
1843 
1844 	/*
1845 	 * Microoptimize the most common case for SEEK_DATA, where
1846 	 * there is no hole and the page is resident.
1847 	 */
1848 	if (m != NULL && vm_page_any_valid(m) && m->pindex == p)
1849 		return (noff);
1850 
1851 	p_swp = swap_pager_find_least(obj, p);
1852 	if (p_swp == p)
1853 		return (noff);
1854 
1855 	p_m = m == NULL ? obj->size : m->pindex;
1856 	return (IDX_TO_OFF(MIN(p_m, p_swp)));
1857 }
1858 
1859 static off_t
tmpfs_seek_next(off_t noff)1860 tmpfs_seek_next(off_t noff)
1861 {
1862 	return (noff + PAGE_SIZE - (noff & PAGE_MASK));
1863 }
1864 
1865 static int
tmpfs_seek_clamp(struct tmpfs_node * tn,off_t * noff,bool seekdata)1866 tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata)
1867 {
1868 	if (*noff < tn->tn_size)
1869 		return (0);
1870 	if (seekdata)
1871 		return (ENXIO);
1872 	*noff = tn->tn_size;
1873 	return (0);
1874 }
1875 
1876 static off_t
tmpfs_seek_hole_locked(vm_object_t obj,off_t noff)1877 tmpfs_seek_hole_locked(vm_object_t obj, off_t noff)
1878 {
1879 	vm_page_t m;
1880 	vm_pindex_t p, p_swp;
1881 
1882 	for (;; noff = tmpfs_seek_next(noff)) {
1883 		/*
1884 		 * Walk over the largest sequential run of the valid pages.
1885 		 */
1886 		for (m = vm_page_lookup(obj, OFF_TO_IDX(noff));
1887 		    m != NULL && vm_page_any_valid(m);
1888 		    m = vm_page_next(m), noff = tmpfs_seek_next(noff))
1889 			;
1890 
1891 		/*
1892 		 * Found a hole in the object's page queue.  Check if
1893 		 * there is a hole in the swap at the same place.
1894 		 */
1895 		p = OFF_TO_IDX(noff);
1896 		p_swp = swap_pager_find_least(obj, p);
1897 		if (p_swp != p) {
1898 			noff = IDX_TO_OFF(p);
1899 			break;
1900 		}
1901 	}
1902 	return (noff);
1903 }
1904 
1905 static int
tmpfs_seek_datahole(struct vnode * vp,off_t * off,bool seekdata)1906 tmpfs_seek_datahole(struct vnode *vp, off_t *off, bool seekdata)
1907 {
1908 	struct tmpfs_node *tn;
1909 	vm_object_t obj;
1910 	off_t noff;
1911 	int error;
1912 
1913 	if (vp->v_type != VREG)
1914 		return (ENOTTY);
1915 	tn = VP_TO_TMPFS_NODE(vp);
1916 	noff = *off;
1917 	if (noff < 0)
1918 		return (ENXIO);
1919 	error = tmpfs_seek_clamp(tn, &noff, seekdata);
1920 	if (error != 0)
1921 		return (error);
1922 	obj = tn->tn_reg.tn_aobj;
1923 
1924 	VM_OBJECT_RLOCK(obj);
1925 	noff = seekdata ? tmpfs_seek_data_locked(obj, noff) :
1926 	    tmpfs_seek_hole_locked(obj, noff);
1927 	VM_OBJECT_RUNLOCK(obj);
1928 
1929 	error = tmpfs_seek_clamp(tn, &noff, seekdata);
1930 	if (error == 0)
1931 		*off = noff;
1932 	return (error);
1933 }
1934 
1935 static int
tmpfs_ioctl(struct vop_ioctl_args * ap)1936 tmpfs_ioctl(struct vop_ioctl_args *ap)
1937 {
1938 	struct vnode *vp = ap->a_vp;
1939 	int error = 0;
1940 
1941 	switch (ap->a_command) {
1942 	case FIOSEEKDATA:
1943 	case FIOSEEKHOLE:
1944 		error = vn_lock(vp, LK_SHARED);
1945 		if (error != 0) {
1946 			error = EBADF;
1947 			break;
1948 		}
1949 		error = tmpfs_seek_datahole(vp, (off_t *)ap->a_data,
1950 		    ap->a_command == FIOSEEKDATA);
1951 		VOP_UNLOCK(vp);
1952 		break;
1953 	default:
1954 		error = ENOTTY;
1955 		break;
1956 	}
1957 	return (error);
1958 }
1959 
1960 /*
1961  * Vnode operations vector used for files stored in a tmpfs file system.
1962  */
1963 struct vop_vector tmpfs_vnodeop_entries = {
1964 	.vop_default =			&default_vnodeops,
1965 	.vop_lookup =			vfs_cache_lookup,
1966 	.vop_cachedlookup =		tmpfs_cached_lookup,
1967 	.vop_create =			tmpfs_create,
1968 	.vop_mknod =			tmpfs_mknod,
1969 	.vop_open =			tmpfs_open,
1970 	.vop_close =			tmpfs_close,
1971 	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
1972 	.vop_fplookup_symlink =		tmpfs_fplookup_symlink,
1973 	.vop_access =			tmpfs_access,
1974 	.vop_stat =			tmpfs_stat,
1975 	.vop_getattr =			tmpfs_getattr,
1976 	.vop_setattr =			tmpfs_setattr,
1977 	.vop_read =			tmpfs_read,
1978 	.vop_read_pgcache =		tmpfs_read_pgcache,
1979 	.vop_write =			tmpfs_write,
1980 	.vop_fsync =			tmpfs_fsync,
1981 	.vop_remove =			tmpfs_remove,
1982 	.vop_link =			tmpfs_link,
1983 	.vop_rename =			tmpfs_rename,
1984 	.vop_mkdir =			tmpfs_mkdir,
1985 	.vop_rmdir =			tmpfs_rmdir,
1986 	.vop_symlink =			tmpfs_symlink,
1987 	.vop_readdir =			tmpfs_readdir,
1988 	.vop_readlink =			tmpfs_readlink,
1989 	.vop_inactive =			tmpfs_inactive,
1990 	.vop_need_inactive =		tmpfs_need_inactive,
1991 	.vop_reclaim =			tmpfs_reclaim,
1992 	.vop_print =			tmpfs_print,
1993 	.vop_pathconf =			tmpfs_pathconf,
1994 	.vop_vptofh =			tmpfs_vptofh,
1995 	.vop_whiteout =			tmpfs_whiteout,
1996 	.vop_bmap =			VOP_EOPNOTSUPP,
1997 	.vop_vptocnp =			tmpfs_vptocnp,
1998 	.vop_lock1 =			vop_lock,
1999 	.vop_unlock = 			vop_unlock,
2000 	.vop_islocked = 		vop_islocked,
2001 	.vop_ioctl =			tmpfs_ioctl,
2002 };
2003 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
2004 
2005 /*
2006  * Same vector for mounts which do not use namecache.
2007  */
2008 struct vop_vector tmpfs_vnodeop_nonc_entries = {
2009 	.vop_default =			&tmpfs_vnodeop_entries,
2010 	.vop_lookup =			tmpfs_lookup,
2011 };
2012 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
2013