1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: stable/9/sys/nfsclient/nfs_vnops.c 247502 2013-02-28 21:57:38Z jhb $");
37 
38 /*
39  * vnode op calls for Sun NFS version 2 and 3
40  */
41 
42 #include "opt_inet.h"
43 #include "opt_kdtrace.h"
44 
45 #include <sys/param.h>
46 #include <sys/kernel.h>
47 #include <sys/systm.h>
48 #include <sys/resourcevar.h>
49 #include <sys/proc.h>
50 #include <sys/mount.h>
51 #include <sys/bio.h>
52 #include <sys/buf.h>
53 #include <sys/jail.h>
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/namei.h>
57 #include <sys/socket.h>
58 #include <sys/vnode.h>
59 #include <sys/dirent.h>
60 #include <sys/fcntl.h>
61 #include <sys/lockf.h>
62 #include <sys/stat.h>
63 #include <sys/sysctl.h>
64 #include <sys/signalvar.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_extern.h>
68 #include <vm/vm_object.h>
69 
70 #include <fs/fifofs/fifo.h>
71 
72 #include <nfs/nfsproto.h>
73 #include <nfsclient/nfs.h>
74 #include <nfsclient/nfsnode.h>
75 #include <nfsclient/nfsmount.h>
76 #include <nfs/nfs_kdtrace.h>
77 #include <nfs/nfs_lock.h>
78 #include <nfs/xdr_subs.h>
79 #include <nfsclient/nfsm_subs.h>
80 
81 #include <net/if.h>
82 #include <netinet/in.h>
83 #include <netinet/in_var.h>
84 
85 #include <machine/stdarg.h>
86 
87 #ifdef KDTRACE_HOOKS
88 #include <sys/dtrace_bsd.h>
89 
90 dtrace_nfsclient_accesscache_flush_probe_func_t
91     dtrace_nfsclient_accesscache_flush_done_probe;
92 uint32_t nfsclient_accesscache_flush_done_id;
93 
94 dtrace_nfsclient_accesscache_get_probe_func_t
95     dtrace_nfsclient_accesscache_get_hit_probe,
96     dtrace_nfsclient_accesscache_get_miss_probe;
97 uint32_t nfsclient_accesscache_get_hit_id;
98 uint32_t nfsclient_accesscache_get_miss_id;
99 
100 dtrace_nfsclient_accesscache_load_probe_func_t
101     dtrace_nfsclient_accesscache_load_done_probe;
102 uint32_t nfsclient_accesscache_load_done_id;
103 #endif /* !KDTRACE_HOOKS */
104 
105 /* Defs */
106 #define	TRUE	1
107 #define	FALSE	0
108 
109 /*
110  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
111  * calls are not in getblk() and brelse() so that they would not be necessary
112  * here.
113  */
114 #ifndef B_VMIO
115 #define vfs_busy_pages(bp, f)
116 #endif
117 
118 static vop_read_t	nfsfifo_read;
119 static vop_write_t	nfsfifo_write;
120 static vop_close_t	nfsfifo_close;
121 static int	nfs_flush(struct vnode *, int, int);
122 static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *);
123 static vop_lookup_t	nfs_lookup;
124 static vop_create_t	nfs_create;
125 static vop_mknod_t	nfs_mknod;
126 static vop_open_t	nfs_open;
127 static vop_close_t	nfs_close;
128 static vop_access_t	nfs_access;
129 static vop_getattr_t	nfs_getattr;
130 static vop_setattr_t	nfs_setattr;
131 static vop_read_t	nfs_read;
132 static vop_fsync_t	nfs_fsync;
133 static vop_remove_t	nfs_remove;
134 static vop_link_t	nfs_link;
135 static vop_rename_t	nfs_rename;
136 static vop_mkdir_t	nfs_mkdir;
137 static vop_rmdir_t	nfs_rmdir;
138 static vop_symlink_t	nfs_symlink;
139 static vop_readdir_t	nfs_readdir;
140 static vop_strategy_t	nfs_strategy;
141 static	int	nfs_lookitup(struct vnode *, const char *, int,
142 		    struct ucred *, struct thread *, struct nfsnode **);
143 static	int	nfs_sillyrename(struct vnode *, struct vnode *,
144 		    struct componentname *);
145 static vop_access_t	nfsspec_access;
146 static vop_readlink_t	nfs_readlink;
147 static vop_print_t	nfs_print;
148 static vop_advlock_t	nfs_advlock;
149 static vop_advlockasync_t nfs_advlockasync;
150 
151 /*
152  * Global vfs data structures for nfs
153  */
154 struct vop_vector nfs_vnodeops = {
155 	.vop_default =		&default_vnodeops,
156 	.vop_access =		nfs_access,
157 	.vop_advlock =		nfs_advlock,
158 	.vop_advlockasync =	nfs_advlockasync,
159 	.vop_close =		nfs_close,
160 	.vop_create =		nfs_create,
161 	.vop_fsync =		nfs_fsync,
162 	.vop_getattr =		nfs_getattr,
163 	.vop_getpages =		nfs_getpages,
164 	.vop_putpages =		nfs_putpages,
165 	.vop_inactive =		nfs_inactive,
166 	.vop_link =		nfs_link,
167 	.vop_lookup =		nfs_lookup,
168 	.vop_mkdir =		nfs_mkdir,
169 	.vop_mknod =		nfs_mknod,
170 	.vop_open =		nfs_open,
171 	.vop_print =		nfs_print,
172 	.vop_read =		nfs_read,
173 	.vop_readdir =		nfs_readdir,
174 	.vop_readlink =		nfs_readlink,
175 	.vop_reclaim =		nfs_reclaim,
176 	.vop_remove =		nfs_remove,
177 	.vop_rename =		nfs_rename,
178 	.vop_rmdir =		nfs_rmdir,
179 	.vop_setattr =		nfs_setattr,
180 	.vop_strategy =		nfs_strategy,
181 	.vop_symlink =		nfs_symlink,
182 	.vop_write =		nfs_write,
183 };
184 
185 struct vop_vector nfs_fifoops = {
186 	.vop_default =		&fifo_specops,
187 	.vop_access =		nfsspec_access,
188 	.vop_close =		nfsfifo_close,
189 	.vop_fsync =		nfs_fsync,
190 	.vop_getattr =		nfs_getattr,
191 	.vop_inactive =		nfs_inactive,
192 	.vop_print =		nfs_print,
193 	.vop_read =		nfsfifo_read,
194 	.vop_reclaim =		nfs_reclaim,
195 	.vop_setattr =		nfs_setattr,
196 	.vop_write =		nfsfifo_write,
197 };
198 
199 static int	nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
200 			     struct componentname *cnp, struct vattr *vap);
201 static int	nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
202 			      struct ucred *cred, struct thread *td);
203 static int	nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
204 			      int fnamelen, struct vnode *tdvp,
205 			      const char *tnameptr, int tnamelen,
206 			      struct ucred *cred, struct thread *td);
207 static int	nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
208 			     struct sillyrename *sp);
209 
210 /*
211  * Global variables
212  */
213 struct mtx 	nfs_iod_mtx;
214 enum nfsiod_state nfs_iodwant[NFS_MAXASYNCDAEMON];
215 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
216 int		 nfs_numasync = 0;
217 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
218 
219 SYSCTL_DECL(_vfs_oldnfs);
220 
221 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
222 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
223 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
224 
225 static int	nfs_prime_access_cache = 0;
226 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
227 	   &nfs_prime_access_cache, 0,
228 	   "Prime NFS ACCESS cache when fetching attributes");
229 
230 static int	nfsv3_commit_on_close = 0;
231 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
232 	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
233 
234 static int	nfs_clean_pages_on_close = 1;
235 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
236 	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
237 
238 int nfs_directio_enable = 0;
239 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
240 	   &nfs_directio_enable, 0, "Enable NFS directio");
241 
242 /*
243  * This sysctl allows other processes to mmap a file that has been opened
244  * O_DIRECT by a process.  In general, having processes mmap the file while
245  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
246  * this by default to prevent DoS attacks - to prevent a malicious user from
247  * opening up files O_DIRECT preventing other users from mmap'ing these
248  * files.  "Protected" environments where stricter consistency guarantees are
249  * required can disable this knob.  The process that opened the file O_DIRECT
250  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
251  * meaningful.
252  */
253 int nfs_directio_allow_mmap = 1;
254 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
255 	   &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
256 
257 #if 0
258 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
259 	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
260 
261 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
262 	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
263 #endif
264 
265 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
266 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
267 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
268 
269 /*
270  * SMP Locking Note :
271  * The list of locks after the description of the lock is the ordering
272  * of other locks acquired with the lock held.
273  * np->n_mtx : Protects the fields in the nfsnode.
274        VM Object Lock
275        VI_MTX (acquired indirectly)
276  * nmp->nm_mtx : Protects the fields in the nfsmount.
277        rep->r_mtx
278  * nfs_iod_mtx : Global lock, protects shared nfsiod state.
279  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
280        nmp->nm_mtx
281        rep->r_mtx
282  * rep->r_mtx : Protects the fields in an nfsreq.
283  */
284 
285 static int
nfs3_access_otw(struct vnode * vp,int wmode,struct thread * td,struct ucred * cred,uint32_t * retmode)286 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
287     struct ucred *cred, uint32_t *retmode)
288 {
289 	const int v3 = 1;
290 	u_int32_t *tl;
291 	int error = 0, attrflag, i, lrupos;
292 
293 	struct mbuf *mreq, *mrep, *md, *mb;
294 	caddr_t bpos, dpos;
295 	u_int32_t rmode;
296 	struct nfsnode *np = VTONFS(vp);
297 
298 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
299 	mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
300 	mb = mreq;
301 	bpos = mtod(mb, caddr_t);
302 	nfsm_fhtom(vp, v3);
303 	tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
304 	*tl = txdr_unsigned(wmode);
305 	nfsm_request(vp, NFSPROC_ACCESS, td, cred);
306 	nfsm_postop_attr(vp, attrflag);
307 	if (!error) {
308 		lrupos = 0;
309 		tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
310 		rmode = fxdr_unsigned(u_int32_t, *tl);
311 		mtx_lock(&np->n_mtx);
312 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
313 			if (np->n_accesscache[i].uid == cred->cr_uid) {
314 				np->n_accesscache[i].mode = rmode;
315 				np->n_accesscache[i].stamp = time_second;
316 				break;
317 			}
318 			if (i > 0 && np->n_accesscache[i].stamp <
319 			    np->n_accesscache[lrupos].stamp)
320 				lrupos = i;
321 		}
322 		if (i == NFS_ACCESSCACHESIZE) {
323 			np->n_accesscache[lrupos].uid = cred->cr_uid;
324 			np->n_accesscache[lrupos].mode = rmode;
325 			np->n_accesscache[lrupos].stamp = time_second;
326 		}
327 		mtx_unlock(&np->n_mtx);
328 		if (retmode != NULL)
329 			*retmode = rmode;
330 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
331 	}
332 	m_freem(mrep);
333 nfsmout:
334 #ifdef KDTRACE_HOOKS
335 	if (error) {
336 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
337 		    error);
338 	}
339 #endif
340 	return (error);
341 }
342 
343 /*
344  * nfs access vnode op.
345  * For nfs version 2, just return ok. File accesses may fail later.
346  * For nfs version 3, use the access rpc to check accessibility. If file modes
347  * are changed on the server, accesses might still fail later.
348  */
349 static int
nfs_access(struct vop_access_args * ap)350 nfs_access(struct vop_access_args *ap)
351 {
352 	struct vnode *vp = ap->a_vp;
353 	int error = 0, i, gotahit;
354 	u_int32_t mode, rmode, wmode;
355 	int v3 = NFS_ISV3(vp);
356 	struct nfsnode *np = VTONFS(vp);
357 
358 	/*
359 	 * Disallow write attempts on filesystems mounted read-only;
360 	 * unless the file is a socket, fifo, or a block or character
361 	 * device resident on the filesystem.
362 	 */
363 	if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
364 		switch (vp->v_type) {
365 		case VREG:
366 		case VDIR:
367 		case VLNK:
368 			return (EROFS);
369 		default:
370 			break;
371 		}
372 	}
373 	/*
374 	 * For nfs v3, check to see if we have done this recently, and if
375 	 * so return our cached result instead of making an ACCESS call.
376 	 * If not, do an access rpc, otherwise you are stuck emulating
377 	 * ufs_access() locally using the vattr. This may not be correct,
378 	 * since the server may apply other access criteria such as
379 	 * client uid-->server uid mapping that we do not know about.
380 	 */
381 	if (v3) {
382 		if (ap->a_accmode & VREAD)
383 			mode = NFSV3ACCESS_READ;
384 		else
385 			mode = 0;
386 		if (vp->v_type != VDIR) {
387 			if (ap->a_accmode & VWRITE)
388 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
389 			if (ap->a_accmode & VEXEC)
390 				mode |= NFSV3ACCESS_EXECUTE;
391 		} else {
392 			if (ap->a_accmode & VWRITE)
393 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
394 					 NFSV3ACCESS_DELETE);
395 			if (ap->a_accmode & VEXEC)
396 				mode |= NFSV3ACCESS_LOOKUP;
397 		}
398 		/* XXX safety belt, only make blanket request if caching */
399 		if (nfsaccess_cache_timeout > 0) {
400 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
401 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
402 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
403 		} else {
404 			wmode = mode;
405 		}
406 
407 		/*
408 		 * Does our cached result allow us to give a definite yes to
409 		 * this request?
410 		 */
411 		gotahit = 0;
412 		mtx_lock(&np->n_mtx);
413 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
414 			if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
415 				if (time_second < (np->n_accesscache[i].stamp +
416 				    nfsaccess_cache_timeout) &&
417 				    (np->n_accesscache[i].mode & mode) == mode) {
418 					nfsstats.accesscache_hits++;
419 					gotahit = 1;
420 				}
421 				break;
422 			}
423 		}
424 		mtx_unlock(&np->n_mtx);
425 #ifdef KDTRACE_HOOKS
426 		if (gotahit)
427 			KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
428 			    ap->a_cred->cr_uid, mode);
429 		else
430 			KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
431 			    ap->a_cred->cr_uid, mode);
432 #endif
433 		if (gotahit == 0) {
434 			/*
435 			 * Either a no, or a don't know.  Go to the wire.
436 			 */
437 			nfsstats.accesscache_misses++;
438 		        error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred,
439 			    &rmode);
440 			if (!error) {
441 				if ((rmode & mode) != mode)
442 					error = EACCES;
443 			}
444 		}
445 		return (error);
446 	} else {
447 		if ((error = nfsspec_access(ap)) != 0) {
448 			return (error);
449 		}
450 		/*
451 		 * Attempt to prevent a mapped root from accessing a file
452 		 * which it shouldn't.  We try to read a byte from the file
453 		 * if the user is root and the file is not zero length.
454 		 * After calling nfsspec_access, we should have the correct
455 		 * file size cached.
456 		 */
457 		mtx_lock(&np->n_mtx);
458 		if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
459 		    && VTONFS(vp)->n_size > 0) {
460 			struct iovec aiov;
461 			struct uio auio;
462 			char buf[1];
463 
464 			mtx_unlock(&np->n_mtx);
465 			aiov.iov_base = buf;
466 			aiov.iov_len = 1;
467 			auio.uio_iov = &aiov;
468 			auio.uio_iovcnt = 1;
469 			auio.uio_offset = 0;
470 			auio.uio_resid = 1;
471 			auio.uio_segflg = UIO_SYSSPACE;
472 			auio.uio_rw = UIO_READ;
473 			auio.uio_td = ap->a_td;
474 
475 			if (vp->v_type == VREG)
476 				error = nfs_readrpc(vp, &auio, ap->a_cred);
477 			else if (vp->v_type == VDIR) {
478 				char* bp;
479 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
480 				aiov.iov_base = bp;
481 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
482 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
483 				free(bp, M_TEMP);
484 			} else if (vp->v_type == VLNK)
485 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
486 			else
487 				error = EACCES;
488 		} else
489 			mtx_unlock(&np->n_mtx);
490 		return (error);
491 	}
492 }
493 
494 int nfs_otw_getattr_avoid = 0;
495 
496 /*
497  * nfs open vnode op
498  * Check to see if the type is ok
499  * and that deletion is not in progress.
500  * For paged in text files, you will need to flush the page cache
501  * if consistency is lost.
502  */
503 /* ARGSUSED */
504 static int
nfs_open(struct vop_open_args * ap)505 nfs_open(struct vop_open_args *ap)
506 {
507 	struct vnode *vp = ap->a_vp;
508 	struct nfsnode *np = VTONFS(vp);
509 	struct vattr vattr;
510 	int error;
511 	int fmode = ap->a_mode;
512 	struct ucred *cred;
513 
514 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
515 		return (EOPNOTSUPP);
516 
517 	/*
518 	 * Get a valid lease. If cached data is stale, flush it.
519 	 */
520 	mtx_lock(&np->n_mtx);
521 	if (np->n_flag & NMODIFIED) {
522 		mtx_unlock(&np->n_mtx);
523 		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
524 		if (error == EINTR || error == EIO)
525 			return (error);
526 		mtx_lock(&np->n_mtx);
527 		np->n_attrstamp = 0;
528 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
529 		if (vp->v_type == VDIR)
530 			np->n_direofoffset = 0;
531 		mtx_unlock(&np->n_mtx);
532 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
533 		if (error)
534 			return (error);
535 		mtx_lock(&np->n_mtx);
536 		np->n_mtime = vattr.va_mtime;
537 	} else {
538 		mtx_unlock(&np->n_mtx);
539 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
540 		if (error)
541 			return (error);
542 		mtx_lock(&np->n_mtx);
543 		if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
544 			if (vp->v_type == VDIR)
545 				np->n_direofoffset = 0;
546 			mtx_unlock(&np->n_mtx);
547 			error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
548 			if (error == EINTR || error == EIO) {
549 				return (error);
550 			}
551 			mtx_lock(&np->n_mtx);
552 			np->n_mtime = vattr.va_mtime;
553 		}
554 	}
555 	/*
556 	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
557 	 */
558 	if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
559 		if (np->n_directio_opens == 0) {
560 			mtx_unlock(&np->n_mtx);
561 			error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
562 			if (error)
563 				return (error);
564 			mtx_lock(&np->n_mtx);
565 			np->n_flag |= NNONCACHE;
566 		}
567 		np->n_directio_opens++;
568 	}
569 
570 	/*
571 	 * If this is an open for writing, capture a reference to the
572 	 * credentials, so they can be used by nfs_putpages(). Using
573 	 * these write credentials is preferable to the credentials of
574 	 * whatever thread happens to be doing the VOP_PUTPAGES() since
575 	 * the write RPCs are less likely to fail with EACCES.
576 	 */
577 	if ((fmode & FWRITE) != 0) {
578 		cred = np->n_writecred;
579 		np->n_writecred = crhold(ap->a_cred);
580 	} else
581 		cred = NULL;
582 	mtx_unlock(&np->n_mtx);
583 	if (cred != NULL)
584 		crfree(cred);
585 	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
586 	return (0);
587 }
588 
589 /*
590  * nfs close vnode op
591  * What an NFS client should do upon close after writing is a debatable issue.
592  * Most NFS clients push delayed writes to the server upon close, basically for
593  * two reasons:
594  * 1 - So that any write errors may be reported back to the client process
595  *     doing the close system call. By far the two most likely errors are
596  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
597  * 2 - To put a worst case upper bound on cache inconsistency between
598  *     multiple clients for the file.
599  * There is also a consistency problem for Version 2 of the protocol w.r.t.
600  * not being able to tell if other clients are writing a file concurrently,
601  * since there is no way of knowing if the changed modify time in the reply
602  * is only due to the write for this client.
603  * (NFS Version 3 provides weak cache consistency data in the reply that
604  *  should be sufficient to detect and handle this case.)
605  *
606  * The current code does the following:
607  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
608  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
609  *                     or commit them (this satisfies 1 and 2 except for the
610  *                     case where the server crashes after this close but
611  *                     before the commit RPC, which is felt to be "good
612  *                     enough". Changing the last argument to nfs_flush() to
613  *                     a 1 would force a commit operation, if it is felt a
614  *                     commit is necessary now.
615  */
616 /* ARGSUSED */
617 static int
nfs_close(struct vop_close_args * ap)618 nfs_close(struct vop_close_args *ap)
619 {
620 	struct vnode *vp = ap->a_vp;
621 	struct nfsnode *np = VTONFS(vp);
622 	int error = 0;
623 	int fmode = ap->a_fflag;
624 
625 	if (vp->v_type == VREG) {
626 	    /*
627 	     * Examine and clean dirty pages, regardless of NMODIFIED.
628 	     * This closes a major hole in close-to-open consistency.
629 	     * We want to push out all dirty pages (and buffers) on
630 	     * close, regardless of whether they were dirtied by
631 	     * mmap'ed writes or via write().
632 	     */
633 	    if (nfs_clean_pages_on_close && vp->v_object) {
634 		VM_OBJECT_LOCK(vp->v_object);
635 		vm_object_page_clean(vp->v_object, 0, 0, 0);
636 		VM_OBJECT_UNLOCK(vp->v_object);
637 	    }
638 	    mtx_lock(&np->n_mtx);
639 	    if (np->n_flag & NMODIFIED) {
640 		mtx_unlock(&np->n_mtx);
641 		if (NFS_ISV3(vp)) {
642 		    /*
643 		     * Under NFSv3 we have dirty buffers to dispose of.  We
644 		     * must flush them to the NFS server.  We have the option
645 		     * of waiting all the way through the commit rpc or just
646 		     * waiting for the initial write.  The default is to only
647 		     * wait through the initial write so the data is in the
648 		     * server's cache, which is roughly similar to the state
649 		     * a standard disk subsystem leaves the file in on close().
650 		     *
651 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
652 		     * potential races with other processes, and certainly
653 		     * cannot clear it if we don't commit.
654 		     */
655 		    int cm = nfsv3_commit_on_close ? 1 : 0;
656 		    error = nfs_flush(vp, MNT_WAIT, cm);
657 		    /* np->n_flag &= ~NMODIFIED; */
658 		} else
659 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
660 		mtx_lock(&np->n_mtx);
661 	    }
662 	    if (np->n_flag & NWRITEERR) {
663 		np->n_flag &= ~NWRITEERR;
664 		error = np->n_error;
665 	    }
666 	    mtx_unlock(&np->n_mtx);
667 	}
668 	if (nfs_directio_enable)
669 		KASSERT((np->n_directio_asyncwr == 0),
670 			("nfs_close: dirty unflushed (%d) directio buffers\n",
671 			 np->n_directio_asyncwr));
672 	if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
673 		mtx_lock(&np->n_mtx);
674 		KASSERT((np->n_directio_opens > 0),
675 			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
676 		np->n_directio_opens--;
677 		if (np->n_directio_opens == 0)
678 			np->n_flag &= ~NNONCACHE;
679 		mtx_unlock(&np->n_mtx);
680 	}
681 	return (error);
682 }
683 
684 /*
685  * nfs getattr call from vfs.
686  */
687 static int
nfs_getattr(struct vop_getattr_args * ap)688 nfs_getattr(struct vop_getattr_args *ap)
689 {
690 	struct vnode *vp = ap->a_vp;
691 	struct nfsnode *np = VTONFS(vp);
692 	struct thread *td = curthread;
693 	struct vattr *vap = ap->a_vap;
694 	struct vattr vattr;
695 	caddr_t bpos, dpos;
696 	int error = 0;
697 	struct mbuf *mreq, *mrep, *md, *mb;
698 	int v3 = NFS_ISV3(vp);
699 
700 	/*
701 	 * Update local times for special files.
702 	 */
703 	mtx_lock(&np->n_mtx);
704 	if (np->n_flag & (NACC | NUPD))
705 		np->n_flag |= NCHG;
706 	mtx_unlock(&np->n_mtx);
707 	/*
708 	 * First look in the cache.
709 	 */
710 	if (nfs_getattrcache(vp, &vattr) == 0)
711 		goto nfsmout;
712 	if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) {
713 		nfsstats.accesscache_misses++;
714 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL);
715 		if (nfs_getattrcache(vp, &vattr) == 0)
716 			goto nfsmout;
717 	}
718 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
719 	mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
720 	mb = mreq;
721 	bpos = mtod(mb, caddr_t);
722 	nfsm_fhtom(vp, v3);
723 	nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred);
724 	if (!error) {
725 		nfsm_loadattr(vp, &vattr);
726 	}
727 	m_freem(mrep);
728 nfsmout:
729 	vap->va_type = vattr.va_type;
730 	vap->va_mode = vattr.va_mode;
731 	vap->va_nlink = vattr.va_nlink;
732 	vap->va_uid = vattr.va_uid;
733 	vap->va_gid = vattr.va_gid;
734 	vap->va_fsid = vattr.va_fsid;
735 	vap->va_fileid = vattr.va_fileid;
736 	vap->va_size = vattr.va_size;
737 	vap->va_blocksize = vattr.va_blocksize;
738 	vap->va_atime = vattr.va_atime;
739 	vap->va_mtime = vattr.va_mtime;
740 	vap->va_ctime = vattr.va_ctime;
741 	vap->va_gen = vattr.va_gen;
742 	vap->va_flags = vattr.va_flags;
743 	vap->va_rdev = vattr.va_rdev;
744 	vap->va_bytes = vattr.va_bytes;
745 	vap->va_filerev = vattr.va_filerev;
746 
747 	return (error);
748 }
749 
750 /*
751  * nfs setattr call.
752  */
753 static int
nfs_setattr(struct vop_setattr_args * ap)754 nfs_setattr(struct vop_setattr_args *ap)
755 {
756 	struct vnode *vp = ap->a_vp;
757 	struct nfsnode *np = VTONFS(vp);
758 	struct vattr *vap = ap->a_vap;
759 	struct thread *td = curthread;
760 	int error = 0;
761 	u_quad_t tsize;
762 
763 #ifndef nolint
764 	tsize = (u_quad_t)0;
765 #endif
766 
767 	/*
768 	 * Setting of flags is not supported.
769 	 */
770 	if (vap->va_flags != VNOVAL)
771 		return (EOPNOTSUPP);
772 
773 	/*
774 	 * Disallow write attempts if the filesystem is mounted read-only.
775 	 */
776   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
777 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
778 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
779 	    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
780 		error = EROFS;
781 		goto out;
782 	}
783 	if (vap->va_size != VNOVAL) {
784  		switch (vp->v_type) {
785  		case VDIR:
786  			return (EISDIR);
787  		case VCHR:
788  		case VBLK:
789  		case VSOCK:
790  		case VFIFO:
791 			if (vap->va_mtime.tv_sec == VNOVAL &&
792 			    vap->va_atime.tv_sec == VNOVAL &&
793 			    vap->va_mode == (mode_t)VNOVAL &&
794 			    vap->va_uid == (uid_t)VNOVAL &&
795 			    vap->va_gid == (gid_t)VNOVAL)
796 				return (0);
797  			vap->va_size = VNOVAL;
798  			break;
799  		default:
800 			/*
801 			 * Disallow write attempts if the filesystem is
802 			 * mounted read-only.
803 			 */
804 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
805 				return (EROFS);
806 			/*
807 			 *  We run vnode_pager_setsize() early (why?),
808 			 * we must set np->n_size now to avoid vinvalbuf
809 			 * V_SAVE races that might setsize a lower
810 			 * value.
811 			 */
812 			mtx_lock(&np->n_mtx);
813 			tsize = np->n_size;
814 			mtx_unlock(&np->n_mtx);
815 			error = nfs_meta_setsize(vp, ap->a_cred, td,
816 			    vap->va_size);
817 			mtx_lock(&np->n_mtx);
818  			if (np->n_flag & NMODIFIED) {
819 			    tsize = np->n_size;
820 			    mtx_unlock(&np->n_mtx);
821  			    if (vap->va_size == 0)
822  				error = nfs_vinvalbuf(vp, 0, td, 1);
823  			    else
824  				error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
825  			    if (error) {
826 				vnode_pager_setsize(vp, tsize);
827 				goto out;
828 			    }
829  			} else
830 			    mtx_unlock(&np->n_mtx);
831 			/*
832 			 * np->n_size has already been set to vap->va_size
833 			 * in nfs_meta_setsize(). We must set it again since
834 			 * nfs_loadattrcache() could be called through
835 			 * nfs_meta_setsize() and could modify np->n_size.
836 			 */
837 			mtx_lock(&np->n_mtx);
838  			np->n_vattr.va_size = np->n_size = vap->va_size;
839 			mtx_unlock(&np->n_mtx);
840   		};
841   	} else {
842 		mtx_lock(&np->n_mtx);
843 		if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
844 		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
845 			mtx_unlock(&np->n_mtx);
846 			if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
847 			    (error == EINTR || error == EIO))
848 				return error;
849 		} else
850 			mtx_unlock(&np->n_mtx);
851 	}
852 	error = nfs_setattrrpc(vp, vap, ap->a_cred);
853 	if (error && vap->va_size != VNOVAL) {
854 		mtx_lock(&np->n_mtx);
855 		np->n_size = np->n_vattr.va_size = tsize;
856 		vnode_pager_setsize(vp, tsize);
857 		mtx_unlock(&np->n_mtx);
858 	}
859 out:
860 	return (error);
861 }
862 
863 /*
864  * Do an nfs setattr rpc.
865  */
866 static int
nfs_setattrrpc(struct vnode * vp,struct vattr * vap,struct ucred * cred)867 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred)
868 {
869 	struct nfsv2_sattr *sp;
870 	struct nfsnode *np = VTONFS(vp);
871 	caddr_t bpos, dpos;
872 	u_int32_t *tl;
873 	int error = 0, i, wccflag = NFSV3_WCCRATTR;
874 	struct mbuf *mreq, *mrep, *md, *mb;
875 	int v3 = NFS_ISV3(vp);
876 
877 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
878 	mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
879 	mb = mreq;
880 	bpos = mtod(mb, caddr_t);
881 	nfsm_fhtom(vp, v3);
882 	if (v3) {
883 		nfsm_v3attrbuild(vap, TRUE);
884 		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
885 		*tl = nfs_false;
886 	} else {
887 		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
888 		if (vap->va_mode == (mode_t)VNOVAL)
889 			sp->sa_mode = nfs_xdrneg1;
890 		else
891 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
892 		if (vap->va_uid == (uid_t)VNOVAL)
893 			sp->sa_uid = nfs_xdrneg1;
894 		else
895 			sp->sa_uid = txdr_unsigned(vap->va_uid);
896 		if (vap->va_gid == (gid_t)VNOVAL)
897 			sp->sa_gid = nfs_xdrneg1;
898 		else
899 			sp->sa_gid = txdr_unsigned(vap->va_gid);
900 		sp->sa_size = txdr_unsigned(vap->va_size);
901 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
902 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
903 	}
904 	nfsm_request(vp, NFSPROC_SETATTR, curthread, cred);
905 	if (v3) {
906 		mtx_lock(&np->n_mtx);
907 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
908 			np->n_accesscache[i].stamp = 0;
909 		mtx_unlock(&np->n_mtx);
910 		KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
911 		nfsm_wcc_data(vp, wccflag);
912 	} else
913 		nfsm_loadattr(vp, NULL);
914 	m_freem(mrep);
915 nfsmout:
916 	return (error);
917 }
918 
919 /*
920  * nfs lookup call, one step at a time...
921  * First look in cache
922  * If not found, unlock the directory nfsnode and do the rpc
923  */
924 static int
nfs_lookup(struct vop_lookup_args * ap)925 nfs_lookup(struct vop_lookup_args *ap)
926 {
927 	struct componentname *cnp = ap->a_cnp;
928 	struct vnode *dvp = ap->a_dvp;
929 	struct vnode **vpp = ap->a_vpp;
930 	struct mount *mp = dvp->v_mount;
931 	struct vattr dvattr, vattr;
932 	struct timespec nctime;
933 	int flags = cnp->cn_flags;
934 	struct vnode *newvp;
935 	struct nfsmount *nmp;
936 	caddr_t bpos, dpos;
937 	struct mbuf *mreq, *mrep, *md, *mb;
938 	long len;
939 	nfsfh_t *fhp;
940 	struct nfsnode *np, *newnp;
941 	int error = 0, attrflag, dattrflag, fhsize, ltype, ncticks;
942 	int v3 = NFS_ISV3(dvp);
943 	struct thread *td = cnp->cn_thread;
944 
945 	*vpp = NULLVP;
946 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
947 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
948 		return (EROFS);
949 	if (dvp->v_type != VDIR)
950 		return (ENOTDIR);
951 	nmp = VFSTONFS(mp);
952 	np = VTONFS(dvp);
953 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
954 		*vpp = NULLVP;
955 		return (error);
956 	}
957 	error = cache_lookup_times(dvp, vpp, cnp, &nctime, &ncticks);
958 	if (error > 0 && error != ENOENT)
959 		return (error);
960 	if (error == -1) {
961 		/*
962 		 * Lookups of "." are special and always return the
963 		 * current directory.  cache_lookup() already handles
964 		 * associated locking bookkeeping, etc.
965 		 */
966 		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
967 			/* XXX: Is this really correct? */
968 			if (cnp->cn_nameiop != LOOKUP &&
969 			    (flags & ISLASTCN))
970 				cnp->cn_flags |= SAVENAME;
971 			return (0);
972 		}
973 
974 		/*
975 		 * We only accept a positive hit in the cache if the
976 		 * change time of the file matches our cached copy.
977 		 * Otherwise, we discard the cache entry and fallback
978 		 * to doing a lookup RPC.  We also only trust cache
979 		 * entries for less than nm_nametimeo seconds.
980 		 *
981 		 * To better handle stale file handles and attributes,
982 		 * clear the attribute cache of this node if it is a
983 		 * leaf component, part of an open() call, and not
984 		 * locally modified before fetching the attributes.
985 		 * This should allow stale file handles to be detected
986 		 * here where we can fall back to a LOOKUP RPC to
987 		 * recover rather than having nfs_open() detect the
988 		 * stale file handle and failing open(2) with ESTALE.
989 		 */
990 		newvp = *vpp;
991 		newnp = VTONFS(newvp);
992 		if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
993 		    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
994 		    !(newnp->n_flag & NMODIFIED)) {
995 			mtx_lock(&newnp->n_mtx);
996 			newnp->n_attrstamp = 0;
997 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
998 			mtx_unlock(&newnp->n_mtx);
999 		}
1000 		if ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
1001 		    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
1002 		    timespeccmp(&vattr.va_ctime, &nctime, ==)) {
1003 			nfsstats.lookupcache_hits++;
1004 			if (cnp->cn_nameiop != LOOKUP &&
1005 			    (flags & ISLASTCN))
1006 				cnp->cn_flags |= SAVENAME;
1007 			return (0);
1008 		}
1009 		cache_purge(newvp);
1010 		if (dvp != newvp)
1011 			vput(newvp);
1012 		else
1013 			vrele(newvp);
1014 		*vpp = NULLVP;
1015 	} else if (error == ENOENT) {
1016 		if (dvp->v_iflag & VI_DOOMED)
1017 			return (ENOENT);
1018 		/*
1019 		 * We only accept a negative hit in the cache if the
1020 		 * modification time of the parent directory matches
1021 		 * the cached copy in the name cache entry.
1022 		 * Otherwise, we discard all of the negative cache
1023 		 * entries for this directory.  We also only trust
1024 		 * negative cache entries for up to nm_negnametimeo
1025 		 * seconds.
1026 		 */
1027 		if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
1028 		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
1029 		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
1030 			nfsstats.lookupcache_hits++;
1031 			return (ENOENT);
1032 		}
1033 		cache_purge_negative(dvp);
1034 	}
1035 
1036 	attrflag = dattrflag = 0;
1037 	error = 0;
1038 	newvp = NULLVP;
1039 	nfsstats.lookupcache_misses++;
1040 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
1041 	len = cnp->cn_namelen;
1042 	mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
1043 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
1044 	mb = mreq;
1045 	bpos = mtod(mb, caddr_t);
1046 	nfsm_fhtom(dvp, v3);
1047 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1048 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
1049 	if (error) {
1050 		if (v3) {
1051 			nfsm_postop_attr_va(dvp, dattrflag, &vattr);
1052 			m_freem(mrep);
1053 		}
1054 		goto nfsmout;
1055 	}
1056 	nfsm_getfh(fhp, fhsize, v3);
1057 
1058 	/*
1059 	 * Handle RENAME case...
1060 	 */
1061 	if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
1062 		if (NFS_CMPFH(np, fhp, fhsize)) {
1063 			m_freem(mrep);
1064 			return (EISDIR);
1065 		}
1066 		error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE);
1067 		if (error) {
1068 			m_freem(mrep);
1069 			return (error);
1070 		}
1071 		newvp = NFSTOV(np);
1072 		if (v3) {
1073 			nfsm_postop_attr(newvp, attrflag);
1074 			nfsm_postop_attr(dvp, attrflag);
1075 		} else
1076 			nfsm_loadattr(newvp, NULL);
1077 		*vpp = newvp;
1078 		m_freem(mrep);
1079 		cnp->cn_flags |= SAVENAME;
1080 		return (0);
1081 	}
1082 
1083 	if (flags & ISDOTDOT) {
1084 		ltype = VOP_ISLOCKED(dvp);
1085 		error = vfs_busy(mp, MBF_NOWAIT);
1086 		if (error != 0) {
1087 			vfs_ref(mp);
1088 			VOP_UNLOCK(dvp, 0);
1089 			error = vfs_busy(mp, 0);
1090 			vn_lock(dvp, ltype | LK_RETRY);
1091 			vfs_rel(mp);
1092 			if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
1093 				vfs_unbusy(mp);
1094 				error = ENOENT;
1095 			}
1096 			if (error != 0) {
1097 				m_freem(mrep);
1098 				return (error);
1099 			}
1100 		}
1101 		VOP_UNLOCK(dvp, 0);
1102 		error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
1103 		if (error == 0)
1104 			newvp = NFSTOV(np);
1105 		vfs_unbusy(mp);
1106 		if (newvp != dvp)
1107 			vn_lock(dvp, ltype | LK_RETRY);
1108 		if (dvp->v_iflag & VI_DOOMED) {
1109 			if (error == 0) {
1110 				if (newvp == dvp)
1111 					vrele(newvp);
1112 				else
1113 					vput(newvp);
1114 			}
1115 			error = ENOENT;
1116 		}
1117 		if (error) {
1118 			m_freem(mrep);
1119 			return (error);
1120 		}
1121 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
1122 		VREF(dvp);
1123 		newvp = dvp;
1124 	} else {
1125 		error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
1126 		if (error) {
1127 			m_freem(mrep);
1128 			return (error);
1129 		}
1130 		newvp = NFSTOV(np);
1131 
1132 		/*
1133 		 * Flush the attribute cache when opening a leaf node
1134 		 * to ensure that fresh attributes are fetched in
1135 		 * nfs_open() if we are unable to fetch attributes
1136 		 * from the LOOKUP reply.
1137 		 */
1138 		if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
1139 		    !(np->n_flag & NMODIFIED)) {
1140 			mtx_lock(&np->n_mtx);
1141 			np->n_attrstamp = 0;
1142 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
1143 			mtx_unlock(&np->n_mtx);
1144 		}
1145 	}
1146 	if (v3) {
1147 		nfsm_postop_attr_va(newvp, attrflag, &vattr);
1148 		nfsm_postop_attr_va(dvp, dattrflag, &dvattr);
1149 	} else {
1150 		nfsm_loadattr(newvp, &vattr);
1151 		attrflag = 1;
1152 	}
1153 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1154 		cnp->cn_flags |= SAVENAME;
1155 	if ((cnp->cn_flags & MAKEENTRY) &&
1156 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
1157 	    attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
1158 		cache_enter_time(dvp, newvp, cnp, &vattr.va_ctime,
1159 		    newvp->v_type != VDIR ? NULL : &dvattr.va_ctime);
1160 	*vpp = newvp;
1161 	m_freem(mrep);
1162 nfsmout:
1163 	if (error) {
1164 		if (newvp != NULLVP) {
1165 			vput(newvp);
1166 			*vpp = NULLVP;
1167 		}
1168 
1169 		if (error != ENOENT)
1170 			goto done;
1171 
1172 		/* The requested file was not found. */
1173 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
1174 		    (flags & ISLASTCN)) {
1175 			/*
1176 			 * XXX: UFS does a full VOP_ACCESS(dvp,
1177 			 * VWRITE) here instead of just checking
1178 			 * MNT_RDONLY.
1179 			 */
1180 			if (mp->mnt_flag & MNT_RDONLY)
1181 				return (EROFS);
1182 			cnp->cn_flags |= SAVENAME;
1183 			return (EJUSTRETURN);
1184 		}
1185 
1186 		if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
1187 		    dattrflag) {
1188 			/*
1189 			 * Cache the modification time of the parent
1190 			 * directory from the post-op attributes in
1191 			 * the name cache entry.  The negative cache
1192 			 * entry will be ignored once the directory
1193 			 * has changed.  Don't bother adding the entry
1194 			 * if the directory has already changed.
1195 			 */
1196 			mtx_lock(&np->n_mtx);
1197 			if (timespeccmp(&np->n_vattr.va_mtime,
1198 			    &vattr.va_mtime, ==)) {
1199 				mtx_unlock(&np->n_mtx);
1200 				cache_enter_time(dvp, NULL, cnp,
1201 				    &vattr.va_mtime, NULL);
1202 			} else
1203 				mtx_unlock(&np->n_mtx);
1204 		}
1205 		return (ENOENT);
1206 	}
1207 done:
1208 	return (error);
1209 }
1210 
1211 /*
1212  * nfs read call.
1213  * Just call nfs_bioread() to do the work.
1214  */
1215 static int
nfs_read(struct vop_read_args * ap)1216 nfs_read(struct vop_read_args *ap)
1217 {
1218 	struct vnode *vp = ap->a_vp;
1219 
1220 	switch (vp->v_type) {
1221 	case VREG:
1222 		return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1223 	case VDIR:
1224 		return (EISDIR);
1225 	default:
1226 		return (EOPNOTSUPP);
1227 	}
1228 }
1229 
1230 /*
1231  * nfs readlink call
1232  */
1233 static int
nfs_readlink(struct vop_readlink_args * ap)1234 nfs_readlink(struct vop_readlink_args *ap)
1235 {
1236 	struct vnode *vp = ap->a_vp;
1237 
1238 	if (vp->v_type != VLNK)
1239 		return (EINVAL);
1240 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1241 }
1242 
1243 /*
1244  * Do a readlink rpc.
1245  * Called by nfs_doio() from below the buffer cache.
1246  */
1247 int
nfs_readlinkrpc(struct vnode * vp,struct uio * uiop,struct ucred * cred)1248 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1249 {
1250 	caddr_t bpos, dpos;
1251 	int error = 0, len, attrflag;
1252 	struct mbuf *mreq, *mrep, *md, *mb;
1253 	int v3 = NFS_ISV3(vp);
1254 
1255 	nfsstats.rpccnt[NFSPROC_READLINK]++;
1256 	mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1257 	mb = mreq;
1258 	bpos = mtod(mb, caddr_t);
1259 	nfsm_fhtom(vp, v3);
1260 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
1261 	if (v3)
1262 		nfsm_postop_attr(vp, attrflag);
1263 	if (!error) {
1264 		nfsm_strsiz(len, NFS_MAXPATHLEN);
1265 		if (len == NFS_MAXPATHLEN) {
1266 			struct nfsnode *np = VTONFS(vp);
1267 			mtx_lock(&np->n_mtx);
1268 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1269 				len = np->n_size;
1270 			mtx_unlock(&np->n_mtx);
1271 		}
1272 		nfsm_mtouio(uiop, len);
1273 	}
1274 	m_freem(mrep);
1275 nfsmout:
1276 	return (error);
1277 }
1278 
1279 /*
1280  * nfs read rpc call
1281  * Ditto above
1282  */
1283 int
nfs_readrpc(struct vnode * vp,struct uio * uiop,struct ucred * cred)1284 nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1285 {
1286 	u_int32_t *tl;
1287 	caddr_t bpos, dpos;
1288 	struct mbuf *mreq, *mrep, *md, *mb;
1289 	struct nfsmount *nmp;
1290 	off_t end;
1291 	int error = 0, len, retlen, tsiz, eof, attrflag;
1292 	int v3 = NFS_ISV3(vp);
1293 	int rsize;
1294 
1295 #ifndef nolint
1296 	eof = 0;
1297 #endif
1298 	nmp = VFSTONFS(vp->v_mount);
1299 	tsiz = uiop->uio_resid;
1300 	mtx_lock(&nmp->nm_mtx);
1301 	end = uiop->uio_offset + tsiz;
1302 	if (end > nmp->nm_maxfilesize || end < uiop->uio_offset) {
1303 		mtx_unlock(&nmp->nm_mtx);
1304 		return (EFBIG);
1305 	}
1306 	rsize = nmp->nm_rsize;
1307 	mtx_unlock(&nmp->nm_mtx);
1308 	while (tsiz > 0) {
1309 		nfsstats.rpccnt[NFSPROC_READ]++;
1310 		len = (tsiz > rsize) ? rsize : tsiz;
1311 		mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1312 		mb = mreq;
1313 		bpos = mtod(mb, caddr_t);
1314 		nfsm_fhtom(vp, v3);
1315 		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
1316 		if (v3) {
1317 			txdr_hyper(uiop->uio_offset, tl);
1318 			*(tl + 2) = txdr_unsigned(len);
1319 		} else {
1320 			*tl++ = txdr_unsigned(uiop->uio_offset);
1321 			*tl++ = txdr_unsigned(len);
1322 			*tl = 0;
1323 		}
1324 		nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
1325 		if (v3) {
1326 			nfsm_postop_attr(vp, attrflag);
1327 			if (error) {
1328 				m_freem(mrep);
1329 				goto nfsmout;
1330 			}
1331 			tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
1332 			eof = fxdr_unsigned(int, *(tl + 1));
1333 		} else {
1334 			nfsm_loadattr(vp, NULL);
1335 		}
1336 		nfsm_strsiz(retlen, rsize);
1337 		nfsm_mtouio(uiop, retlen);
1338 		m_freem(mrep);
1339 		tsiz -= retlen;
1340 		if (v3) {
1341 			if (eof || retlen == 0) {
1342 				tsiz = 0;
1343 			}
1344 		} else if (retlen < len) {
1345 			tsiz = 0;
1346 		}
1347 	}
1348 nfsmout:
1349 	return (error);
1350 }
1351 
1352 /*
1353  * nfs write call
1354  */
1355 int
nfs_writerpc(struct vnode * vp,struct uio * uiop,struct ucred * cred,int * iomode,int * must_commit)1356 nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
1357 	     int *iomode, int *must_commit)
1358 {
1359 	u_int32_t *tl;
1360 	int32_t backup;
1361 	caddr_t bpos, dpos;
1362 	struct mbuf *mreq, *mrep, *md, *mb;
1363 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1364 	off_t end;
1365 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1366 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1367 	int wsize;
1368 
1369 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1370 	*must_commit = 0;
1371 	tsiz = uiop->uio_resid;
1372 	mtx_lock(&nmp->nm_mtx);
1373 	end = uiop->uio_offset + tsiz;
1374 	if (end > nmp->nm_maxfilesize || end < uiop->uio_offset) {
1375 		mtx_unlock(&nmp->nm_mtx);
1376 		return (EFBIG);
1377 	}
1378 	wsize = nmp->nm_wsize;
1379 	mtx_unlock(&nmp->nm_mtx);
1380 	while (tsiz > 0) {
1381 		nfsstats.rpccnt[NFSPROC_WRITE]++;
1382 		len = (tsiz > wsize) ? wsize : tsiz;
1383 		mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
1384 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1385 		mb = mreq;
1386 		bpos = mtod(mb, caddr_t);
1387 		nfsm_fhtom(vp, v3);
1388 		if (v3) {
1389 			tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
1390 			txdr_hyper(uiop->uio_offset, tl);
1391 			tl += 2;
1392 			*tl++ = txdr_unsigned(len);
1393 			*tl++ = txdr_unsigned(*iomode);
1394 			*tl = txdr_unsigned(len);
1395 		} else {
1396 			u_int32_t x;
1397 
1398 			tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1399 			/* Set both "begin" and "current" to non-garbage. */
1400 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1401 			*tl++ = x;	/* "begin offset" */
1402 			*tl++ = x;	/* "current offset" */
1403 			x = txdr_unsigned(len);
1404 			*tl++ = x;	/* total to this offset */
1405 			*tl = x;	/* size of this write */
1406 		}
1407 		nfsm_uiotom(uiop, len);
1408 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
1409 		if (v3) {
1410 			wccflag = NFSV3_WCCCHK;
1411 			nfsm_wcc_data(vp, wccflag);
1412 			if (!error) {
1413 				tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
1414 					+ NFSX_V3WRITEVERF);
1415 				rlen = fxdr_unsigned(int, *tl++);
1416 				if (rlen == 0) {
1417 					error = NFSERR_IO;
1418 					m_freem(mrep);
1419 					break;
1420 				} else if (rlen < len) {
1421 					backup = len - rlen;
1422 					uiop->uio_iov->iov_base =
1423 					    (char *)uiop->uio_iov->iov_base -
1424 					    backup;
1425 					uiop->uio_iov->iov_len += backup;
1426 					uiop->uio_offset -= backup;
1427 					uiop->uio_resid += backup;
1428 					len = rlen;
1429 				}
1430 				commit = fxdr_unsigned(int, *tl++);
1431 
1432 				/*
1433 				 * Return the lowest committment level
1434 				 * obtained by any of the RPCs.
1435 				 */
1436 				if (committed == NFSV3WRITE_FILESYNC)
1437 					committed = commit;
1438 				else if (committed == NFSV3WRITE_DATASYNC &&
1439 					commit == NFSV3WRITE_UNSTABLE)
1440 					committed = commit;
1441 				mtx_lock(&nmp->nm_mtx);
1442 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1443 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1444 					NFSX_V3WRITEVERF);
1445 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1446 				} else if (bcmp((caddr_t)tl,
1447 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1448 				    *must_commit = 1;
1449 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1450 					NFSX_V3WRITEVERF);
1451 				}
1452 				mtx_unlock(&nmp->nm_mtx);
1453 			}
1454 		} else {
1455 			nfsm_loadattr(vp, NULL);
1456 		}
1457 		if (wccflag) {
1458 			mtx_lock(&(VTONFS(vp))->n_mtx);
1459 			VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
1460 			mtx_unlock(&(VTONFS(vp))->n_mtx);
1461 		}
1462 		m_freem(mrep);
1463 		if (error)
1464 			break;
1465 		tsiz -= len;
1466 	}
1467 nfsmout:
1468 	if (DOINGASYNC(vp))
1469 		committed = NFSV3WRITE_FILESYNC;
1470 	*iomode = committed;
1471 	if (error)
1472 		uiop->uio_resid = tsiz;
1473 	return (error);
1474 }
1475 
1476 /*
1477  * nfs mknod rpc
1478  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1479  * mode set to specify the file type and the size field for rdev.
1480  */
1481 static int
nfs_mknodrpc(struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp,struct vattr * vap)1482 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1483     struct vattr *vap)
1484 {
1485 	struct nfsv2_sattr *sp;
1486 	u_int32_t *tl;
1487 	struct vnode *newvp = NULL;
1488 	struct nfsnode *np = NULL;
1489 	struct vattr vattr;
1490 	caddr_t bpos, dpos;
1491 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1492 	struct mbuf *mreq, *mrep, *md, *mb;
1493 	u_int32_t rdev;
1494 	int v3 = NFS_ISV3(dvp);
1495 
1496 	if (vap->va_type == VCHR || vap->va_type == VBLK)
1497 		rdev = txdr_unsigned(vap->va_rdev);
1498 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1499 		rdev = nfs_xdrneg1;
1500 	else {
1501 		return (EOPNOTSUPP);
1502 	}
1503 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
1504 		return (error);
1505 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1506 	mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1507 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1508 	mb = mreq;
1509 	bpos = mtod(mb, caddr_t);
1510 	nfsm_fhtom(dvp, v3);
1511 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1512 	if (v3) {
1513 		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1514 		*tl++ = vtonfsv3_type(vap->va_type);
1515 		nfsm_v3attrbuild(vap, FALSE);
1516 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1517 			tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
1518 			*tl++ = txdr_unsigned(major(vap->va_rdev));
1519 			*tl = txdr_unsigned(minor(vap->va_rdev));
1520 		}
1521 	} else {
1522 		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1523 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1524 		sp->sa_uid = nfs_xdrneg1;
1525 		sp->sa_gid = nfs_xdrneg1;
1526 		sp->sa_size = rdev;
1527 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1528 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1529 	}
1530 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
1531 	if (!error) {
1532 		nfsm_mtofh(dvp, newvp, v3, gotvp);
1533 		if (!gotvp) {
1534 			if (newvp) {
1535 				vput(newvp);
1536 				newvp = NULL;
1537 			}
1538 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1539 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1540 			if (!error)
1541 				newvp = NFSTOV(np);
1542 		}
1543 	}
1544 	if (v3)
1545 		nfsm_wcc_data(dvp, wccflag);
1546 	m_freem(mrep);
1547 nfsmout:
1548 	if (error) {
1549 		if (newvp)
1550 			vput(newvp);
1551 	} else {
1552 		*vpp = newvp;
1553 	}
1554 	mtx_lock(&(VTONFS(dvp))->n_mtx);
1555 	VTONFS(dvp)->n_flag |= NMODIFIED;
1556 	if (!wccflag) {
1557 		VTONFS(dvp)->n_attrstamp = 0;
1558 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
1559 	}
1560 	mtx_unlock(&(VTONFS(dvp))->n_mtx);
1561 	return (error);
1562 }
1563 
1564 /*
1565  * nfs mknod vop
1566  * just call nfs_mknodrpc() to do the work.
1567  */
1568 /* ARGSUSED */
1569 static int
nfs_mknod(struct vop_mknod_args * ap)1570 nfs_mknod(struct vop_mknod_args *ap)
1571 {
1572 	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
1573 }
1574 
1575 static u_long create_verf;
1576 /*
1577  * nfs file create call
1578  */
1579 static int
nfs_create(struct vop_create_args * ap)1580 nfs_create(struct vop_create_args *ap)
1581 {
1582 	struct vnode *dvp = ap->a_dvp;
1583 	struct vattr *vap = ap->a_vap;
1584 	struct componentname *cnp = ap->a_cnp;
1585 	struct nfsv2_sattr *sp;
1586 	u_int32_t *tl;
1587 	struct nfsnode *np = NULL;
1588 	struct vnode *newvp = NULL;
1589 	caddr_t bpos, dpos;
1590 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1591 	struct mbuf *mreq, *mrep, *md, *mb;
1592 	struct vattr vattr;
1593 	int v3 = NFS_ISV3(dvp);
1594 
1595 	/*
1596 	 * Oops, not for me..
1597 	 */
1598 	if (vap->va_type == VSOCK) {
1599 		error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap);
1600 		return (error);
1601 	}
1602 
1603 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) {
1604 		return (error);
1605 	}
1606 	if (vap->va_vaflags & VA_EXCLUSIVE)
1607 		fmode |= O_EXCL;
1608 again:
1609 	nfsstats.rpccnt[NFSPROC_CREATE]++;
1610 	mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1611 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1612 	mb = mreq;
1613 	bpos = mtod(mb, caddr_t);
1614 	nfsm_fhtom(dvp, v3);
1615 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1616 	if (v3) {
1617 		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1618 		if (fmode & O_EXCL) {
1619 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1620 			tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
1621 #ifdef INET
1622 			CURVNET_SET(CRED_TO_VNET(cnp->cn_cred));
1623 			IN_IFADDR_RLOCK();
1624 			if (!TAILQ_EMPTY(&V_in_ifaddrhead))
1625 				*tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr;
1626 			else
1627 #endif
1628 				*tl++ = create_verf;
1629 #ifdef INET
1630 			IN_IFADDR_RUNLOCK();
1631 			CURVNET_RESTORE();
1632 #endif
1633 			*tl = ++create_verf;
1634 		} else {
1635 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1636 			nfsm_v3attrbuild(vap, FALSE);
1637 		}
1638 	} else {
1639 		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1640 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1641 		sp->sa_uid = nfs_xdrneg1;
1642 		sp->sa_gid = nfs_xdrneg1;
1643 		sp->sa_size = 0;
1644 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1645 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1646 	}
1647 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
1648 	if (!error) {
1649 		nfsm_mtofh(dvp, newvp, v3, gotvp);
1650 		if (!gotvp) {
1651 			if (newvp) {
1652 				vput(newvp);
1653 				newvp = NULL;
1654 			}
1655 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1656 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1657 			if (!error)
1658 				newvp = NFSTOV(np);
1659 		}
1660 	}
1661 	if (v3)
1662 		nfsm_wcc_data(dvp, wccflag);
1663 	m_freem(mrep);
1664 nfsmout:
1665 	if (error) {
1666 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1667 			fmode &= ~O_EXCL;
1668 			goto again;
1669 		}
1670 		if (newvp)
1671 			vput(newvp);
1672 	} else if (v3 && (fmode & O_EXCL)) {
1673 		/*
1674 		 * We are normally called with only a partially initialized
1675 		 * VAP.  Since the NFSv3 spec says that server may use the
1676 		 * file attributes to store the verifier, the spec requires
1677 		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1678 		 * in atime, but we can't really assume that all servers will
1679 		 * so we ensure that our SETATTR sets both atime and mtime.
1680 		 */
1681 		if (vap->va_mtime.tv_sec == VNOVAL)
1682 			vfs_timestamp(&vap->va_mtime);
1683 		if (vap->va_atime.tv_sec == VNOVAL)
1684 			vap->va_atime = vap->va_mtime;
1685 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred);
1686 		if (error)
1687 			vput(newvp);
1688 	}
1689 	if (!error) {
1690 		*ap->a_vpp = newvp;
1691 	}
1692 	mtx_lock(&(VTONFS(dvp))->n_mtx);
1693 	VTONFS(dvp)->n_flag |= NMODIFIED;
1694 	if (!wccflag) {
1695 		VTONFS(dvp)->n_attrstamp = 0;
1696 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
1697 	}
1698 	mtx_unlock(&(VTONFS(dvp))->n_mtx);
1699 	return (error);
1700 }
1701 
1702 /*
1703  * nfs file remove call
1704  * To try and make nfs semantics closer to ufs semantics, a file that has
1705  * other processes using the vnode is renamed instead of removed and then
1706  * removed later on the last close.
1707  * - If v_usecount > 1
1708  *	  If a rename is not already in the works
1709  *	     call nfs_sillyrename() to set it up
1710  *     else
1711  *	  do the remove rpc
1712  */
1713 static int
nfs_remove(struct vop_remove_args * ap)1714 nfs_remove(struct vop_remove_args *ap)
1715 {
1716 	struct vnode *vp = ap->a_vp;
1717 	struct vnode *dvp = ap->a_dvp;
1718 	struct componentname *cnp = ap->a_cnp;
1719 	struct nfsnode *np = VTONFS(vp);
1720 	int error = 0;
1721 	struct vattr vattr;
1722 
1723 	KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
1724 	KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
1725 	if (vp->v_type == VDIR)
1726 		error = EPERM;
1727 	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
1728 	    !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) {
1729 		/*
1730 		 * Purge the name cache so that the chance of a lookup for
1731 		 * the name succeeding while the remove is in progress is
1732 		 * minimized. Without node locking it can still happen, such
1733 		 * that an I/O op returns ESTALE, but since you get this if
1734 		 * another host removes the file..
1735 		 */
1736 		cache_purge(vp);
1737 		/*
1738 		 * throw away biocache buffers, mainly to avoid
1739 		 * unnecessary delayed writes later.
1740 		 */
1741 		error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
1742 		/* Do the rpc */
1743 		if (error != EINTR && error != EIO)
1744 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1745 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
1746 		/*
1747 		 * Kludge City: If the first reply to the remove rpc is lost..
1748 		 *   the reply to the retransmitted request will be ENOENT
1749 		 *   since the file was in fact removed
1750 		 *   Therefore, we cheat and return success.
1751 		 */
1752 		if (error == ENOENT)
1753 			error = 0;
1754 	} else if (!np->n_sillyrename)
1755 		error = nfs_sillyrename(dvp, vp, cnp);
1756 	mtx_lock(&np->n_mtx);
1757 	np->n_attrstamp = 0;
1758 	mtx_unlock(&np->n_mtx);
1759 	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
1760 	return (error);
1761 }
1762 
1763 /*
1764  * nfs file remove rpc called from nfs_inactive
1765  */
1766 int
nfs_removeit(struct sillyrename * sp)1767 nfs_removeit(struct sillyrename *sp)
1768 {
1769 	/*
1770 	 * Make sure that the directory vnode is still valid.
1771 	 * XXX we should lock sp->s_dvp here.
1772 	 */
1773 	if (sp->s_dvp->v_type == VBAD)
1774 		return (0);
1775 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1776 		NULL));
1777 }
1778 
1779 /*
1780  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1781  */
1782 static int
nfs_removerpc(struct vnode * dvp,const char * name,int namelen,struct ucred * cred,struct thread * td)1783 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1784     struct ucred *cred, struct thread *td)
1785 {
1786 	caddr_t bpos, dpos;
1787 	int error = 0, wccflag = NFSV3_WCCRATTR;
1788 	struct mbuf *mreq, *mrep, *md, *mb;
1789 	int v3 = NFS_ISV3(dvp);
1790 
1791 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1792 	mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
1793 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1794 	mb = mreq;
1795 	bpos = mtod(mb, caddr_t);
1796 	nfsm_fhtom(dvp, v3);
1797 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1798 	nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
1799 	if (v3)
1800 		nfsm_wcc_data(dvp, wccflag);
1801 	m_freem(mrep);
1802 nfsmout:
1803 	mtx_lock(&(VTONFS(dvp))->n_mtx);
1804 	VTONFS(dvp)->n_flag |= NMODIFIED;
1805 	if (!wccflag) {
1806 		VTONFS(dvp)->n_attrstamp = 0;
1807 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
1808 	}
1809 	mtx_unlock(&(VTONFS(dvp))->n_mtx);
1810 	return (error);
1811 }
1812 
1813 /*
1814  * nfs file rename call
1815  */
1816 static int
nfs_rename(struct vop_rename_args * ap)1817 nfs_rename(struct vop_rename_args *ap)
1818 {
1819 	struct vnode *fvp = ap->a_fvp;
1820 	struct vnode *tvp = ap->a_tvp;
1821 	struct vnode *fdvp = ap->a_fdvp;
1822 	struct vnode *tdvp = ap->a_tdvp;
1823 	struct componentname *tcnp = ap->a_tcnp;
1824 	struct componentname *fcnp = ap->a_fcnp;
1825 	int error;
1826 
1827 	KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
1828 	    (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
1829 	/* Check for cross-device rename */
1830 	if ((fvp->v_mount != tdvp->v_mount) ||
1831 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1832 		error = EXDEV;
1833 		goto out;
1834 	}
1835 
1836 	if (fvp == tvp) {
1837 		nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
1838 		error = 0;
1839 		goto out;
1840 	}
1841 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
1842 		goto out;
1843 
1844 	/*
1845 	 * We have to flush B_DELWRI data prior to renaming
1846 	 * the file.  If we don't, the delayed-write buffers
1847 	 * can be flushed out later after the file has gone stale
1848 	 * under NFSV3.  NFSV2 does not have this problem because
1849 	 * ( as far as I can tell ) it flushes dirty buffers more
1850 	 * often.
1851 	 *
1852 	 * Skip the rename operation if the fsync fails, this can happen
1853 	 * due to the server's volume being full, when we pushed out data
1854 	 * that was written back to our cache earlier. Not checking for
1855 	 * this condition can result in potential (silent) data loss.
1856 	 */
1857 	error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
1858 	VOP_UNLOCK(fvp, 0);
1859 	if (!error && tvp)
1860 		error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
1861 	if (error)
1862 		goto out;
1863 
1864 	/*
1865 	 * If the tvp exists and is in use, sillyrename it before doing the
1866 	 * rename of the new file over it.
1867 	 * XXX Can't sillyrename a directory.
1868 	 */
1869 	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1870 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1871 		vput(tvp);
1872 		tvp = NULL;
1873 	}
1874 
1875 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1876 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1877 		tcnp->cn_thread);
1878 
1879 	if (fvp->v_type == VDIR) {
1880 		if (tvp != NULL && tvp->v_type == VDIR)
1881 			cache_purge(tdvp);
1882 		cache_purge(fdvp);
1883 	}
1884 
1885 out:
1886 	if (tdvp == tvp)
1887 		vrele(tdvp);
1888 	else
1889 		vput(tdvp);
1890 	if (tvp)
1891 		vput(tvp);
1892 	vrele(fdvp);
1893 	vrele(fvp);
1894 	/*
1895 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1896 	 */
1897 	if (error == ENOENT)
1898 		error = 0;
1899 	return (error);
1900 }
1901 
1902 /*
1903  * nfs file rename rpc called from nfs_remove() above
1904  */
1905 static int
nfs_renameit(struct vnode * sdvp,struct componentname * scnp,struct sillyrename * sp)1906 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
1907     struct sillyrename *sp)
1908 {
1909 
1910 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
1911 	    sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
1912 }
1913 
1914 /*
1915  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1916  */
1917 static int
nfs_renamerpc(struct vnode * fdvp,const char * fnameptr,int fnamelen,struct vnode * tdvp,const char * tnameptr,int tnamelen,struct ucred * cred,struct thread * td)1918 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
1919     struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
1920     struct thread *td)
1921 {
1922 	caddr_t bpos, dpos;
1923 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1924 	struct mbuf *mreq, *mrep, *md, *mb;
1925 	int v3 = NFS_ISV3(fdvp);
1926 
1927 	nfsstats.rpccnt[NFSPROC_RENAME]++;
1928 	mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
1929 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1930 		nfsm_rndup(tnamelen));
1931 	mb = mreq;
1932 	bpos = mtod(mb, caddr_t);
1933 	nfsm_fhtom(fdvp, v3);
1934 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1935 	nfsm_fhtom(tdvp, v3);
1936 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1937 	nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
1938 	if (v3) {
1939 		nfsm_wcc_data(fdvp, fwccflag);
1940 		nfsm_wcc_data(tdvp, twccflag);
1941 	}
1942 	m_freem(mrep);
1943 nfsmout:
1944 	mtx_lock(&(VTONFS(fdvp))->n_mtx);
1945 	VTONFS(fdvp)->n_flag |= NMODIFIED;
1946 	mtx_unlock(&(VTONFS(fdvp))->n_mtx);
1947 	mtx_lock(&(VTONFS(tdvp))->n_mtx);
1948 	VTONFS(tdvp)->n_flag |= NMODIFIED;
1949 	mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1950 	if (!fwccflag) {
1951 		VTONFS(fdvp)->n_attrstamp = 0;
1952 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
1953 	}
1954 	if (!twccflag) {
1955 		VTONFS(tdvp)->n_attrstamp = 0;
1956 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
1957 	}
1958 	return (error);
1959 }
1960 
1961 /*
1962  * nfs hard link create call
1963  */
1964 static int
nfs_link(struct vop_link_args * ap)1965 nfs_link(struct vop_link_args *ap)
1966 {
1967 	struct vnode *vp = ap->a_vp;
1968 	struct vnode *tdvp = ap->a_tdvp;
1969 	struct componentname *cnp = ap->a_cnp;
1970 	caddr_t bpos, dpos;
1971 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1972 	struct mbuf *mreq, *mrep, *md, *mb;
1973 	int v3;
1974 
1975 	if (vp->v_mount != tdvp->v_mount) {
1976 		return (EXDEV);
1977 	}
1978 
1979 	/*
1980 	 * Push all writes to the server, so that the attribute cache
1981 	 * doesn't get "out of sync" with the server.
1982 	 * XXX There should be a better way!
1983 	 */
1984 	VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
1985 
1986 	v3 = NFS_ISV3(vp);
1987 	nfsstats.rpccnt[NFSPROC_LINK]++;
1988 	mreq = nfsm_reqhead(vp, NFSPROC_LINK,
1989 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1990 	mb = mreq;
1991 	bpos = mtod(mb, caddr_t);
1992 	nfsm_fhtom(vp, v3);
1993 	nfsm_fhtom(tdvp, v3);
1994 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1995 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
1996 	if (v3) {
1997 		nfsm_postop_attr(vp, attrflag);
1998 		nfsm_wcc_data(tdvp, wccflag);
1999 	}
2000 	m_freem(mrep);
2001 nfsmout:
2002 	mtx_lock(&(VTONFS(tdvp))->n_mtx);
2003 	VTONFS(tdvp)->n_flag |= NMODIFIED;
2004 	mtx_unlock(&(VTONFS(tdvp))->n_mtx);
2005 	if (!attrflag) {
2006 		VTONFS(vp)->n_attrstamp = 0;
2007 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
2008 	}
2009 	if (!wccflag) {
2010 		VTONFS(tdvp)->n_attrstamp = 0;
2011 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
2012 	}
2013 	return (error);
2014 }
2015 
2016 /*
2017  * nfs symbolic link create call
2018  */
2019 static int
nfs_symlink(struct vop_symlink_args * ap)2020 nfs_symlink(struct vop_symlink_args *ap)
2021 {
2022 	struct vnode *dvp = ap->a_dvp;
2023 	struct vattr *vap = ap->a_vap;
2024 	struct componentname *cnp = ap->a_cnp;
2025 	struct nfsv2_sattr *sp;
2026 	caddr_t bpos, dpos;
2027 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
2028 	struct mbuf *mreq, *mrep, *md, *mb;
2029 	struct vnode *newvp = NULL;
2030 	int v3 = NFS_ISV3(dvp);
2031 
2032 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
2033 	slen = strlen(ap->a_target);
2034 	mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
2035 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
2036 	mb = mreq;
2037 	bpos = mtod(mb, caddr_t);
2038 	nfsm_fhtom(dvp, v3);
2039 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2040 	if (v3) {
2041 		nfsm_v3attrbuild(vap, FALSE);
2042 	}
2043 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
2044 	if (!v3) {
2045 		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
2046 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
2047 		sp->sa_uid = nfs_xdrneg1;
2048 		sp->sa_gid = nfs_xdrneg1;
2049 		sp->sa_size = nfs_xdrneg1;
2050 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2051 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2052 	}
2053 
2054 	/*
2055 	 * Issue the NFS request and get the rpc response.
2056 	 *
2057 	 * Only NFSv3 responses returning an error of 0 actually return
2058 	 * a file handle that can be converted into newvp without having
2059 	 * to do an extra lookup rpc.
2060 	 */
2061 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
2062 	if (v3) {
2063 		if (error == 0)
2064 			nfsm_mtofh(dvp, newvp, v3, gotvp);
2065 		nfsm_wcc_data(dvp, wccflag);
2066 	}
2067 
2068 	/*
2069 	 * out code jumps -> here, mrep is also freed.
2070 	 */
2071 
2072 	m_freem(mrep);
2073 nfsmout:
2074 
2075 	/*
2076 	 * If we do not have an error and we could not extract the newvp from
2077 	 * the response due to the request being NFSv2, we have to do a
2078 	 * lookup in order to obtain a newvp to return.
2079 	 */
2080 	if (error == 0 && newvp == NULL) {
2081 		struct nfsnode *np = NULL;
2082 
2083 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
2084 		    cnp->cn_cred, cnp->cn_thread, &np);
2085 		if (!error)
2086 			newvp = NFSTOV(np);
2087 	}
2088 	if (error) {
2089 		if (newvp)
2090 			vput(newvp);
2091 	} else {
2092 		*ap->a_vpp = newvp;
2093 	}
2094 	mtx_lock(&(VTONFS(dvp))->n_mtx);
2095 	VTONFS(dvp)->n_flag |= NMODIFIED;
2096 	mtx_unlock(&(VTONFS(dvp))->n_mtx);
2097 	if (!wccflag) {
2098 		VTONFS(dvp)->n_attrstamp = 0;
2099 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
2100 	}
2101 	return (error);
2102 }
2103 
2104 /*
2105  * nfs make dir call
2106  */
2107 static int
nfs_mkdir(struct vop_mkdir_args * ap)2108 nfs_mkdir(struct vop_mkdir_args *ap)
2109 {
2110 	struct vnode *dvp = ap->a_dvp;
2111 	struct vattr *vap = ap->a_vap;
2112 	struct componentname *cnp = ap->a_cnp;
2113 	struct nfsv2_sattr *sp;
2114 	int len;
2115 	struct nfsnode *np = NULL;
2116 	struct vnode *newvp = NULL;
2117 	caddr_t bpos, dpos;
2118 	int error = 0, wccflag = NFSV3_WCCRATTR;
2119 	int gotvp = 0;
2120 	struct mbuf *mreq, *mrep, *md, *mb;
2121 	struct vattr vattr;
2122 	int v3 = NFS_ISV3(dvp);
2123 
2124 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
2125 		return (error);
2126 	len = cnp->cn_namelen;
2127 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
2128 	mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
2129 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
2130 	mb = mreq;
2131 	bpos = mtod(mb, caddr_t);
2132 	nfsm_fhtom(dvp, v3);
2133 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
2134 	if (v3) {
2135 		nfsm_v3attrbuild(vap, FALSE);
2136 	} else {
2137 		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
2138 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2139 		sp->sa_uid = nfs_xdrneg1;
2140 		sp->sa_gid = nfs_xdrneg1;
2141 		sp->sa_size = nfs_xdrneg1;
2142 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2143 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2144 	}
2145 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
2146 	if (!error)
2147 		nfsm_mtofh(dvp, newvp, v3, gotvp);
2148 	if (v3)
2149 		nfsm_wcc_data(dvp, wccflag);
2150 	m_freem(mrep);
2151 nfsmout:
2152 	mtx_lock(&(VTONFS(dvp))->n_mtx);
2153 	VTONFS(dvp)->n_flag |= NMODIFIED;
2154 	mtx_unlock(&(VTONFS(dvp))->n_mtx);
2155 	if (!wccflag) {
2156 		VTONFS(dvp)->n_attrstamp = 0;
2157 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
2158 	}
2159 	if (error == 0 && newvp == NULL) {
2160 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
2161 			cnp->cn_thread, &np);
2162 		if (!error) {
2163 			newvp = NFSTOV(np);
2164 			if (newvp->v_type != VDIR)
2165 				error = EEXIST;
2166 		}
2167 	}
2168 	if (error) {
2169 		if (newvp)
2170 			vput(newvp);
2171 	} else
2172 		*ap->a_vpp = newvp;
2173 	return (error);
2174 }
2175 
2176 /*
2177  * nfs remove directory call
2178  */
2179 static int
nfs_rmdir(struct vop_rmdir_args * ap)2180 nfs_rmdir(struct vop_rmdir_args *ap)
2181 {
2182 	struct vnode *vp = ap->a_vp;
2183 	struct vnode *dvp = ap->a_dvp;
2184 	struct componentname *cnp = ap->a_cnp;
2185 	caddr_t bpos, dpos;
2186 	int error = 0, wccflag = NFSV3_WCCRATTR;
2187 	struct mbuf *mreq, *mrep, *md, *mb;
2188 	int v3 = NFS_ISV3(dvp);
2189 
2190 	if (dvp == vp)
2191 		return (EINVAL);
2192 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
2193 	mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
2194 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2195 	mb = mreq;
2196 	bpos = mtod(mb, caddr_t);
2197 	nfsm_fhtom(dvp, v3);
2198 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2199 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
2200 	if (v3)
2201 		nfsm_wcc_data(dvp, wccflag);
2202 	m_freem(mrep);
2203 nfsmout:
2204 	mtx_lock(&(VTONFS(dvp))->n_mtx);
2205 	VTONFS(dvp)->n_flag |= NMODIFIED;
2206 	mtx_unlock(&(VTONFS(dvp))->n_mtx);
2207 	if (!wccflag) {
2208 		VTONFS(dvp)->n_attrstamp = 0;
2209 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
2210 	}
2211 	cache_purge(dvp);
2212 	cache_purge(vp);
2213 	/*
2214 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2215 	 */
2216 	if (error == ENOENT)
2217 		error = 0;
2218 	return (error);
2219 }
2220 
2221 /*
2222  * nfs readdir call
2223  */
2224 static int
nfs_readdir(struct vop_readdir_args * ap)2225 nfs_readdir(struct vop_readdir_args *ap)
2226 {
2227 	struct vnode *vp = ap->a_vp;
2228 	struct nfsnode *np = VTONFS(vp);
2229 	struct uio *uio = ap->a_uio;
2230 	int tresid, error = 0;
2231 	struct vattr vattr;
2232 
2233 	if (vp->v_type != VDIR)
2234 		return(EPERM);
2235 
2236 	/*
2237 	 * First, check for hit on the EOF offset cache
2238 	 */
2239 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2240 	    (np->n_flag & NMODIFIED) == 0) {
2241 		if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
2242 			mtx_lock(&np->n_mtx);
2243 			if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
2244 				mtx_unlock(&np->n_mtx);
2245 				nfsstats.direofcache_hits++;
2246 				goto out;
2247 			} else
2248 				mtx_unlock(&np->n_mtx);
2249 		}
2250 	}
2251 
2252 	/*
2253 	 * Call nfs_bioread() to do the real work.
2254 	 */
2255 	tresid = uio->uio_resid;
2256 	error = nfs_bioread(vp, uio, 0, ap->a_cred);
2257 
2258 	if (!error && uio->uio_resid == tresid) {
2259 		nfsstats.direofcache_misses++;
2260 	}
2261 out:
2262 	return (error);
2263 }
2264 
2265 /*
2266  * Readdir rpc call.
2267  * Called from below the buffer cache by nfs_doio().
2268  */
2269 int
nfs_readdirrpc(struct vnode * vp,struct uio * uiop,struct ucred * cred)2270 nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2271 {
2272 	int len, left;
2273 	struct dirent *dp = NULL;
2274 	u_int32_t *tl;
2275 	caddr_t cp;
2276 	nfsuint64 *cookiep;
2277 	caddr_t bpos, dpos;
2278 	struct mbuf *mreq, *mrep, *md, *mb;
2279 	nfsuint64 cookie;
2280 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2281 	struct nfsnode *dnp = VTONFS(vp);
2282 	u_quad_t fileno;
2283 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2284 	int attrflag;
2285 	int v3 = NFS_ISV3(vp);
2286 
2287 	KASSERT(uiop->uio_iovcnt == 1 &&
2288 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
2289 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
2290 	    ("nfs readdirrpc bad uio"));
2291 
2292 	/*
2293 	 * If there is no cookie, assume directory was stale.
2294 	 */
2295 	nfs_dircookie_lock(dnp);
2296 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2297 	if (cookiep) {
2298 		cookie = *cookiep;
2299 		nfs_dircookie_unlock(dnp);
2300 	} else {
2301 		nfs_dircookie_unlock(dnp);
2302 		return (NFSERR_BAD_COOKIE);
2303 	}
2304 
2305 	/*
2306 	 * Loop around doing readdir rpc's of size nm_readdirsize
2307 	 * truncated to a multiple of DIRBLKSIZ.
2308 	 * The stopping criteria is EOF or buffer full.
2309 	 */
2310 	while (more_dirs && bigenough) {
2311 		nfsstats.rpccnt[NFSPROC_READDIR]++;
2312 		mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2313 			NFSX_READDIR(v3));
2314 		mb = mreq;
2315 		bpos = mtod(mb, caddr_t);
2316 		nfsm_fhtom(vp, v3);
2317 		if (v3) {
2318 			tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
2319 			*tl++ = cookie.nfsuquad[0];
2320 			*tl++ = cookie.nfsuquad[1];
2321 			mtx_lock(&dnp->n_mtx);
2322 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2323 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2324 			mtx_unlock(&dnp->n_mtx);
2325 		} else {
2326 			tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
2327 			*tl++ = cookie.nfsuquad[0];
2328 		}
2329 		*tl = txdr_unsigned(nmp->nm_readdirsize);
2330 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
2331 		if (v3) {
2332 			nfsm_postop_attr(vp, attrflag);
2333 			if (!error) {
2334 				tl = nfsm_dissect(u_int32_t *,
2335 				    2 * NFSX_UNSIGNED);
2336 				mtx_lock(&dnp->n_mtx);
2337 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2338 				dnp->n_cookieverf.nfsuquad[1] = *tl;
2339 				mtx_unlock(&dnp->n_mtx);
2340 			} else {
2341 				m_freem(mrep);
2342 				goto nfsmout;
2343 			}
2344 		}
2345 		tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2346 		more_dirs = fxdr_unsigned(int, *tl);
2347 
2348 		/* loop thru the dir entries, doctoring them to 4bsd form */
2349 		while (more_dirs && bigenough) {
2350 			if (v3) {
2351 				tl = nfsm_dissect(u_int32_t *,
2352 				    3 * NFSX_UNSIGNED);
2353 				fileno = fxdr_hyper(tl);
2354 				len = fxdr_unsigned(int, *(tl + 2));
2355 			} else {
2356 				tl = nfsm_dissect(u_int32_t *,
2357 				    2 * NFSX_UNSIGNED);
2358 				fileno = fxdr_unsigned(u_quad_t, *tl++);
2359 				len = fxdr_unsigned(int, *tl);
2360 			}
2361 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2362 				error = EBADRPC;
2363 				m_freem(mrep);
2364 				goto nfsmout;
2365 			}
2366 			tlen = nfsm_rndup(len);
2367 			if (tlen == len)
2368 				tlen += 4;	/* To ensure null termination */
2369 			left = DIRBLKSIZ - blksiz;
2370 			if ((tlen + DIRHDSIZ) > left) {
2371 				dp->d_reclen += left;
2372 				uiop->uio_iov->iov_base =
2373 				    (char *)uiop->uio_iov->iov_base + left;
2374 				uiop->uio_iov->iov_len -= left;
2375 				uiop->uio_offset += left;
2376 				uiop->uio_resid -= left;
2377 				blksiz = 0;
2378 			}
2379 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2380 				bigenough = 0;
2381 			if (bigenough) {
2382 				dp = (struct dirent *)uiop->uio_iov->iov_base;
2383 				dp->d_fileno = (int)fileno;
2384 				dp->d_namlen = len;
2385 				dp->d_reclen = tlen + DIRHDSIZ;
2386 				dp->d_type = DT_UNKNOWN;
2387 				blksiz += dp->d_reclen;
2388 				if (blksiz == DIRBLKSIZ)
2389 					blksiz = 0;
2390 				uiop->uio_offset += DIRHDSIZ;
2391 				uiop->uio_resid -= DIRHDSIZ;
2392 				uiop->uio_iov->iov_base =
2393 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2394 				uiop->uio_iov->iov_len -= DIRHDSIZ;
2395 				nfsm_mtouio(uiop, len);
2396 				cp = uiop->uio_iov->iov_base;
2397 				tlen -= len;
2398 				*cp = '\0';	/* null terminate */
2399 				uiop->uio_iov->iov_base =
2400 				    (char *)uiop->uio_iov->iov_base + tlen;
2401 				uiop->uio_iov->iov_len -= tlen;
2402 				uiop->uio_offset += tlen;
2403 				uiop->uio_resid -= tlen;
2404 			} else
2405 				nfsm_adv(nfsm_rndup(len));
2406 			if (v3) {
2407 				tl = nfsm_dissect(u_int32_t *,
2408 				    3 * NFSX_UNSIGNED);
2409 			} else {
2410 				tl = nfsm_dissect(u_int32_t *,
2411 				    2 * NFSX_UNSIGNED);
2412 			}
2413 			if (bigenough) {
2414 				cookie.nfsuquad[0] = *tl++;
2415 				if (v3)
2416 					cookie.nfsuquad[1] = *tl++;
2417 			} else if (v3)
2418 				tl += 2;
2419 			else
2420 				tl++;
2421 			more_dirs = fxdr_unsigned(int, *tl);
2422 		}
2423 		/*
2424 		 * If at end of rpc data, get the eof boolean
2425 		 */
2426 		if (!more_dirs) {
2427 			tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2428 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2429 		}
2430 		m_freem(mrep);
2431 	}
2432 	/*
2433 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2434 	 * by increasing d_reclen for the last record.
2435 	 */
2436 	if (blksiz > 0) {
2437 		left = DIRBLKSIZ - blksiz;
2438 		dp->d_reclen += left;
2439 		uiop->uio_iov->iov_base =
2440 		    (char *)uiop->uio_iov->iov_base + left;
2441 		uiop->uio_iov->iov_len -= left;
2442 		uiop->uio_offset += left;
2443 		uiop->uio_resid -= left;
2444 	}
2445 
2446 	/*
2447 	 * We are now either at the end of the directory or have filled the
2448 	 * block.
2449 	 */
2450 	if (bigenough)
2451 		dnp->n_direofoffset = uiop->uio_offset;
2452 	else {
2453 		if (uiop->uio_resid > 0)
2454 			nfs_printf("EEK! readdirrpc resid > 0\n");
2455 		nfs_dircookie_lock(dnp);
2456 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2457 		*cookiep = cookie;
2458 		nfs_dircookie_unlock(dnp);
2459 	}
2460 nfsmout:
2461 	return (error);
2462 }
2463 
2464 /*
2465  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2466  */
2467 int
nfs_readdirplusrpc(struct vnode * vp,struct uio * uiop,struct ucred * cred)2468 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2469 {
2470 	int len, left;
2471 	struct dirent *dp;
2472 	u_int32_t *tl;
2473 	caddr_t cp;
2474 	struct vnode *newvp;
2475 	nfsuint64 *cookiep;
2476 	caddr_t bpos, dpos, dpossav1, dpossav2;
2477 	struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
2478 	struct nameidata nami, *ndp = &nami;
2479 	struct componentname *cnp = &ndp->ni_cnd;
2480 	nfsuint64 cookie;
2481 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2482 	struct nfsnode *dnp = VTONFS(vp), *np;
2483 	struct vattr vattr, dvattr;
2484 	nfsfh_t *fhp;
2485 	u_quad_t fileno;
2486 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2487 	int attrflag, dattrflag, fhsize;
2488 
2489 #ifndef nolint
2490 	dp = NULL;
2491 #endif
2492 	KASSERT(uiop->uio_iovcnt == 1 &&
2493 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
2494 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
2495 	    ("nfs readdirplusrpc bad uio"));
2496 	ndp->ni_dvp = vp;
2497 	newvp = NULLVP;
2498 
2499 	/*
2500 	 * If there is no cookie, assume directory was stale.
2501 	 */
2502 	nfs_dircookie_lock(dnp);
2503 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2504 	if (cookiep) {
2505 		cookie = *cookiep;
2506 		nfs_dircookie_unlock(dnp);
2507 	} else {
2508 		nfs_dircookie_unlock(dnp);
2509 		return (NFSERR_BAD_COOKIE);
2510 	}
2511 	/*
2512 	 * Loop around doing readdir rpc's of size nm_readdirsize
2513 	 * truncated to a multiple of DIRBLKSIZ.
2514 	 * The stopping criteria is EOF or buffer full.
2515 	 */
2516 	while (more_dirs && bigenough) {
2517 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2518 		mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2519 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2520 		mb = mreq;
2521 		bpos = mtod(mb, caddr_t);
2522 		nfsm_fhtom(vp, 1);
2523  		tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
2524 		*tl++ = cookie.nfsuquad[0];
2525 		*tl++ = cookie.nfsuquad[1];
2526 		mtx_lock(&dnp->n_mtx);
2527 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2528 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2529 		mtx_unlock(&dnp->n_mtx);
2530 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2531 		*tl = txdr_unsigned(nmp->nm_rsize);
2532 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
2533 		nfsm_postop_attr_va(vp, dattrflag, &dvattr);
2534 		if (error) {
2535 			m_freem(mrep);
2536 			goto nfsmout;
2537 		}
2538 		tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2539 		mtx_lock(&dnp->n_mtx);
2540 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2541 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2542 		mtx_unlock(&dnp->n_mtx);
2543 		more_dirs = fxdr_unsigned(int, *tl);
2544 
2545 		/* loop thru the dir entries, doctoring them to 4bsd form */
2546 		while (more_dirs && bigenough) {
2547 			tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2548 			fileno = fxdr_hyper(tl);
2549 			len = fxdr_unsigned(int, *(tl + 2));
2550 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2551 				error = EBADRPC;
2552 				m_freem(mrep);
2553 				goto nfsmout;
2554 			}
2555 			tlen = nfsm_rndup(len);
2556 			if (tlen == len)
2557 				tlen += 4;	/* To ensure null termination*/
2558 			left = DIRBLKSIZ - blksiz;
2559 			if ((tlen + DIRHDSIZ) > left) {
2560 				dp->d_reclen += left;
2561 				uiop->uio_iov->iov_base =
2562 				    (char *)uiop->uio_iov->iov_base + left;
2563 				uiop->uio_iov->iov_len -= left;
2564 				uiop->uio_offset += left;
2565 				uiop->uio_resid -= left;
2566 				blksiz = 0;
2567 			}
2568 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2569 				bigenough = 0;
2570 			if (bigenough) {
2571 				dp = (struct dirent *)uiop->uio_iov->iov_base;
2572 				dp->d_fileno = (int)fileno;
2573 				dp->d_namlen = len;
2574 				dp->d_reclen = tlen + DIRHDSIZ;
2575 				dp->d_type = DT_UNKNOWN;
2576 				blksiz += dp->d_reclen;
2577 				if (blksiz == DIRBLKSIZ)
2578 					blksiz = 0;
2579 				uiop->uio_offset += DIRHDSIZ;
2580 				uiop->uio_resid -= DIRHDSIZ;
2581 				uiop->uio_iov->iov_base =
2582 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2583 				uiop->uio_iov->iov_len -= DIRHDSIZ;
2584 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
2585 				cnp->cn_namelen = len;
2586 				nfsm_mtouio(uiop, len);
2587 				cp = uiop->uio_iov->iov_base;
2588 				tlen -= len;
2589 				*cp = '\0';
2590 				uiop->uio_iov->iov_base =
2591 				    (char *)uiop->uio_iov->iov_base + tlen;
2592 				uiop->uio_iov->iov_len -= tlen;
2593 				uiop->uio_offset += tlen;
2594 				uiop->uio_resid -= tlen;
2595 			} else
2596 				nfsm_adv(nfsm_rndup(len));
2597 			tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2598 			if (bigenough) {
2599 				cookie.nfsuquad[0] = *tl++;
2600 				cookie.nfsuquad[1] = *tl++;
2601 			} else
2602 				tl += 2;
2603 
2604 			/*
2605 			 * Since the attributes are before the file handle
2606 			 * (sigh), we must skip over the attributes and then
2607 			 * come back and get them.
2608 			 */
2609 			attrflag = fxdr_unsigned(int, *tl);
2610 			if (attrflag) {
2611 			    dpossav1 = dpos;
2612 			    mdsav1 = md;
2613 			    nfsm_adv(NFSX_V3FATTR);
2614 			    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2615 			    doit = fxdr_unsigned(int, *tl);
2616 			    /*
2617  			     * Skip loading the attrs for "..". There's a
2618  			     * race between loading the attrs here and
2619  			     * lookups that look for the directory currently
2620  			     * being read (in the parent). We try to acquire
2621  			     * the exclusive lock on ".." here, owning the
2622  			     * lock on the directory being read. Lookup will
2623  			     * hold the lock on ".." and try to acquire the
2624  			     * lock on the directory being read.
2625  			     *
2626  			     * There are other ways of fixing this, one would
2627  			     * be to do a trylock on the ".." vnode and skip
2628  			     * loading the attrs on ".." if it happens to be
2629  			     * locked by another process. But skipping the
2630  			     * attrload on ".." seems the easiest option.
2631  			     */
2632  			    if (strcmp(dp->d_name, "..") == 0) {
2633  				    doit = 0;
2634  				    /*
2635  				     * We've already skipped over the attrs,
2636  				     * skip over the filehandle. And store d_type
2637  				     * as VDIR.
2638  				     */
2639  				    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2640  				    i = fxdr_unsigned(int, *tl);
2641  				    nfsm_adv(nfsm_rndup(i));
2642  				    dp->d_type = IFTODT(VTTOIF(VDIR));
2643  			    }
2644 			    if (doit) {
2645 				nfsm_getfh(fhp, fhsize, 1);
2646 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
2647 				    VREF(vp);
2648 				    newvp = vp;
2649 				    np = dnp;
2650 				} else {
2651 				    error = nfs_nget(vp->v_mount, fhp,
2652 					fhsize, &np, LK_EXCLUSIVE);
2653 				    if (error)
2654 					doit = 0;
2655 				    else
2656 					newvp = NFSTOV(np);
2657 				}
2658 			    }
2659 			    if (doit && bigenough) {
2660 				dpossav2 = dpos;
2661 				dpos = dpossav1;
2662 				mdsav2 = md;
2663 				md = mdsav1;
2664 				nfsm_loadattr(newvp, &vattr);
2665 				dpos = dpossav2;
2666 				md = mdsav2;
2667 				dp->d_type = IFTODT(VTTOIF(vattr.va_type));
2668 				ndp->ni_vp = newvp;
2669 				if (newvp->v_type != VDIR || dattrflag != 0)
2670 				    cache_enter_time(ndp->ni_dvp, ndp->ni_vp,
2671 					cnp, &vattr.va_ctime,
2672 					newvp->v_type != VDIR ? NULL :
2673 					&dvattr.va_ctime);
2674 			    }
2675 			} else {
2676 			    /* Just skip over the file handle */
2677 			    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2678 			    i = fxdr_unsigned(int, *tl);
2679 			    if (i) {
2680 				    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2681 				    fhsize = fxdr_unsigned(int, *tl);
2682 				    nfsm_adv(nfsm_rndup(fhsize));
2683 			    }
2684 			}
2685 			if (newvp != NULLVP) {
2686 			    if (newvp == vp)
2687 				vrele(newvp);
2688 			    else
2689 				vput(newvp);
2690 			    newvp = NULLVP;
2691 			}
2692 			tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2693 			more_dirs = fxdr_unsigned(int, *tl);
2694 		}
2695 		/*
2696 		 * If at end of rpc data, get the eof boolean
2697 		 */
2698 		if (!more_dirs) {
2699 			tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2700 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2701 		}
2702 		m_freem(mrep);
2703 	}
2704 	/*
2705 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2706 	 * by increasing d_reclen for the last record.
2707 	 */
2708 	if (blksiz > 0) {
2709 		left = DIRBLKSIZ - blksiz;
2710 		dp->d_reclen += left;
2711 		uiop->uio_iov->iov_base =
2712 		    (char *)uiop->uio_iov->iov_base + left;
2713 		uiop->uio_iov->iov_len -= left;
2714 		uiop->uio_offset += left;
2715 		uiop->uio_resid -= left;
2716 	}
2717 
2718 	/*
2719 	 * We are now either at the end of the directory or have filled the
2720 	 * block.
2721 	 */
2722 	if (bigenough)
2723 		dnp->n_direofoffset = uiop->uio_offset;
2724 	else {
2725 		if (uiop->uio_resid > 0)
2726 			nfs_printf("EEK! readdirplusrpc resid > 0\n");
2727 		nfs_dircookie_lock(dnp);
2728 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2729 		*cookiep = cookie;
2730 		nfs_dircookie_unlock(dnp);
2731 	}
2732 nfsmout:
2733 	if (newvp != NULLVP) {
2734 	        if (newvp == vp)
2735 			vrele(newvp);
2736 		else
2737 			vput(newvp);
2738 		newvp = NULLVP;
2739 	}
2740 	return (error);
2741 }
2742 
2743 /*
2744  * Silly rename. To make the NFS filesystem that is stateless look a little
2745  * more like the "ufs" a remove of an active vnode is translated to a rename
2746  * to a funny looking filename that is removed by nfs_inactive on the
2747  * nfsnode. There is the potential for another process on a different client
2748  * to create the same funny name between the nfs_lookitup() fails and the
2749  * nfs_rename() completes, but...
2750  */
2751 static int
nfs_sillyrename(struct vnode * dvp,struct vnode * vp,struct componentname * cnp)2752 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2753 {
2754 	struct sillyrename *sp;
2755 	struct nfsnode *np;
2756 	int error;
2757 	short pid;
2758 	unsigned int lticks;
2759 
2760 	cache_purge(dvp);
2761 	np = VTONFS(vp);
2762 	KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
2763 	sp = malloc(sizeof (struct sillyrename),
2764 		M_NFSREQ, M_WAITOK);
2765 	sp->s_cred = crhold(cnp->cn_cred);
2766 	sp->s_dvp = dvp;
2767 	sp->s_removeit = nfs_removeit;
2768 	VREF(dvp);
2769 
2770 	/*
2771 	 * Fudge together a funny name.
2772 	 * Changing the format of the funny name to accomodate more
2773 	 * sillynames per directory.
2774 	 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
2775 	 * CPU ticks since boot.
2776 	 */
2777 	pid = cnp->cn_thread->td_proc->p_pid;
2778 	lticks = (unsigned int)ticks;
2779 	for ( ; ; ) {
2780 		sp->s_namlen = sprintf(sp->s_name,
2781 				       ".nfs.%08x.%04x4.4", lticks,
2782 				       pid);
2783 		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2784 				 cnp->cn_thread, NULL))
2785 			break;
2786 		lticks++;
2787 	}
2788 	error = nfs_renameit(dvp, cnp, sp);
2789 	if (error)
2790 		goto bad;
2791 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2792 		cnp->cn_thread, &np);
2793 	np->n_sillyrename = sp;
2794 	return (0);
2795 bad:
2796 	vrele(sp->s_dvp);
2797 	crfree(sp->s_cred);
2798 	free((caddr_t)sp, M_NFSREQ);
2799 	return (error);
2800 }
2801 
2802 /*
2803  * Look up a file name and optionally either update the file handle or
2804  * allocate an nfsnode, depending on the value of npp.
2805  * npp == NULL	--> just do the lookup
2806  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2807  *			handled too
2808  * *npp != NULL --> update the file handle in the vnode
2809  */
2810 static int
nfs_lookitup(struct vnode * dvp,const char * name,int len,struct ucred * cred,struct thread * td,struct nfsnode ** npp)2811 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
2812     struct thread *td, struct nfsnode **npp)
2813 {
2814 	struct vnode *newvp = NULL;
2815 	struct nfsnode *np, *dnp = VTONFS(dvp);
2816 	caddr_t bpos, dpos;
2817 	int error = 0, fhlen, attrflag;
2818 	struct mbuf *mreq, *mrep, *md, *mb;
2819 	nfsfh_t *nfhp;
2820 	int v3 = NFS_ISV3(dvp);
2821 
2822 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2823 	mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2824 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2825 	mb = mreq;
2826 	bpos = mtod(mb, caddr_t);
2827 	nfsm_fhtom(dvp, v3);
2828 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
2829 	nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
2830 	if (npp && !error) {
2831 		nfsm_getfh(nfhp, fhlen, v3);
2832 		if (*npp) {
2833 		    np = *npp;
2834 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2835 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
2836 			np->n_fhp = &np->n_fh;
2837 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2838 			np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
2839 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2840 		    np->n_fhsize = fhlen;
2841 		    newvp = NFSTOV(np);
2842 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2843 		    VREF(dvp);
2844 		    newvp = dvp;
2845 		} else {
2846 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
2847 		    if (error) {
2848 			m_freem(mrep);
2849 			return (error);
2850 		    }
2851 		    newvp = NFSTOV(np);
2852 		}
2853 		if (v3) {
2854 			nfsm_postop_attr(newvp, attrflag);
2855 			if (!attrflag && *npp == NULL) {
2856 				m_freem(mrep);
2857 				if (newvp == dvp)
2858 					vrele(newvp);
2859 				else
2860 					vput(newvp);
2861 				return (ENOENT);
2862 			}
2863 		} else
2864 			nfsm_loadattr(newvp, NULL);
2865 	}
2866 	m_freem(mrep);
2867 nfsmout:
2868 	if (npp && *npp == NULL) {
2869 		if (error) {
2870 			if (newvp) {
2871 				if (newvp == dvp)
2872 					vrele(newvp);
2873 				else
2874 					vput(newvp);
2875 			}
2876 		} else
2877 			*npp = np;
2878 	}
2879 	return (error);
2880 }
2881 
2882 /*
2883  * Nfs Version 3 commit rpc
2884  */
2885 int
nfs_commit(struct vnode * vp,u_quad_t offset,int cnt,struct ucred * cred,struct thread * td)2886 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
2887 	   struct thread *td)
2888 {
2889 	u_int32_t *tl;
2890 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2891 	caddr_t bpos, dpos;
2892 	int error = 0, wccflag = NFSV3_WCCRATTR;
2893 	struct mbuf *mreq, *mrep, *md, *mb;
2894 
2895 	mtx_lock(&nmp->nm_mtx);
2896 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
2897 		mtx_unlock(&nmp->nm_mtx);
2898 		return (0);
2899 	}
2900 	mtx_unlock(&nmp->nm_mtx);
2901 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
2902 	mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2903 	mb = mreq;
2904 	bpos = mtod(mb, caddr_t);
2905 	nfsm_fhtom(vp, 1);
2906 	tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
2907 	txdr_hyper(offset, tl);
2908 	tl += 2;
2909 	*tl = txdr_unsigned(cnt);
2910 	nfsm_request(vp, NFSPROC_COMMIT, td, cred);
2911 	nfsm_wcc_data(vp, wccflag);
2912 	if (!error) {
2913 		tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
2914 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2915 			NFSX_V3WRITEVERF)) {
2916 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2917 				NFSX_V3WRITEVERF);
2918 			error = NFSERR_STALEWRITEVERF;
2919 		}
2920 	}
2921 	m_freem(mrep);
2922 nfsmout:
2923 	return (error);
2924 }
2925 
2926 /*
2927  * Strategy routine.
2928  * For async requests when nfsiod(s) are running, queue the request by
2929  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2930  * request.
2931  */
2932 static int
nfs_strategy(struct vop_strategy_args * ap)2933 nfs_strategy(struct vop_strategy_args *ap)
2934 {
2935 	struct buf *bp = ap->a_bp;
2936 	struct ucred *cr;
2937 
2938 	KASSERT(!(bp->b_flags & B_DONE),
2939 	    ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2940 	BUF_ASSERT_HELD(bp);
2941 
2942 	if (bp->b_iocmd == BIO_READ)
2943 		cr = bp->b_rcred;
2944 	else
2945 		cr = bp->b_wcred;
2946 
2947 	/*
2948 	 * If the op is asynchronous and an i/o daemon is waiting
2949 	 * queue the request, wake it up and wait for completion
2950 	 * otherwise just do it ourselves.
2951 	 */
2952 	if ((bp->b_flags & B_ASYNC) == 0 ||
2953 	    nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
2954 		(void)nfs_doio(ap->a_vp, bp, cr, curthread);
2955 	return (0);
2956 }
2957 
2958 /*
2959  * fsync vnode op. Just call nfs_flush() with commit == 1.
2960  */
2961 /* ARGSUSED */
2962 static int
nfs_fsync(struct vop_fsync_args * ap)2963 nfs_fsync(struct vop_fsync_args *ap)
2964 {
2965 
2966 	return (nfs_flush(ap->a_vp, ap->a_waitfor, 1));
2967 }
2968 
2969 /*
2970  * Flush all the blocks associated with a vnode.
2971  * 	Walk through the buffer pool and push any dirty pages
2972  *	associated with the vnode.
2973  */
2974 static int
nfs_flush(struct vnode * vp,int waitfor,int commit)2975 nfs_flush(struct vnode *vp, int waitfor, int commit)
2976 {
2977 	struct nfsnode *np = VTONFS(vp);
2978 	struct buf *bp;
2979 	int i;
2980 	struct buf *nbp;
2981 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2982 	int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2983 	int passone = 1;
2984 	u_quad_t off, endoff, toff;
2985 	struct ucred* wcred = NULL;
2986 	struct buf **bvec = NULL;
2987 	struct bufobj *bo;
2988 	struct thread *td = curthread;
2989 #ifndef NFS_COMMITBVECSIZ
2990 #define NFS_COMMITBVECSIZ	20
2991 #endif
2992 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2993 	int bvecsize = 0, bveccount;
2994 
2995 	if (nmp->nm_flag & NFSMNT_INT)
2996 		slpflag = NFS_PCATCH;
2997 	if (!commit)
2998 		passone = 0;
2999 	bo = &vp->v_bufobj;
3000 	/*
3001 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
3002 	 * server, but has not been committed to stable storage on the server
3003 	 * yet. On the first pass, the byte range is worked out and the commit
3004 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
3005 	 * job.
3006 	 */
3007 again:
3008 	off = (u_quad_t)-1;
3009 	endoff = 0;
3010 	bvecpos = 0;
3011 	if (NFS_ISV3(vp) && commit) {
3012 		if (bvec != NULL && bvec != bvec_on_stack)
3013 			free(bvec, M_TEMP);
3014 		/*
3015 		 * Count up how many buffers waiting for a commit.
3016 		 */
3017 		bveccount = 0;
3018 		BO_LOCK(bo);
3019 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
3020 			if (!BUF_ISLOCKED(bp) &&
3021 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
3022 				== (B_DELWRI | B_NEEDCOMMIT))
3023 				bveccount++;
3024 		}
3025 		/*
3026 		 * Allocate space to remember the list of bufs to commit.  It is
3027 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
3028 		 * If we can't get memory (for whatever reason), we will end up
3029 		 * committing the buffers one-by-one in the loop below.
3030 		 */
3031 		if (bveccount > NFS_COMMITBVECSIZ) {
3032 			/*
3033 			 * Release the vnode interlock to avoid a lock
3034 			 * order reversal.
3035 			 */
3036 			BO_UNLOCK(bo);
3037 			bvec = (struct buf **)
3038 				malloc(bveccount * sizeof(struct buf *),
3039 				       M_TEMP, M_NOWAIT);
3040 			BO_LOCK(bo);
3041 			if (bvec == NULL) {
3042 				bvec = bvec_on_stack;
3043 				bvecsize = NFS_COMMITBVECSIZ;
3044 			} else
3045 				bvecsize = bveccount;
3046 		} else {
3047 			bvec = bvec_on_stack;
3048 			bvecsize = NFS_COMMITBVECSIZ;
3049 		}
3050 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
3051 			if (bvecpos >= bvecsize)
3052 				break;
3053 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
3054 				nbp = TAILQ_NEXT(bp, b_bobufs);
3055 				continue;
3056 			}
3057 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
3058 			    (B_DELWRI | B_NEEDCOMMIT)) {
3059 				BUF_UNLOCK(bp);
3060 				nbp = TAILQ_NEXT(bp, b_bobufs);
3061 				continue;
3062 			}
3063 			BO_UNLOCK(bo);
3064 			bremfree(bp);
3065 			/*
3066 			 * Work out if all buffers are using the same cred
3067 			 * so we can deal with them all with one commit.
3068 			 *
3069 			 * NOTE: we are not clearing B_DONE here, so we have
3070 			 * to do it later on in this routine if we intend to
3071 			 * initiate I/O on the bp.
3072 			 *
3073 			 * Note: to avoid loopback deadlocks, we do not
3074 			 * assign b_runningbufspace.
3075 			 */
3076 			if (wcred == NULL)
3077 				wcred = bp->b_wcred;
3078 			else if (wcred != bp->b_wcred)
3079 				wcred = NOCRED;
3080 			vfs_busy_pages(bp, 1);
3081 
3082 			BO_LOCK(bo);
3083 			/*
3084 			 * bp is protected by being locked, but nbp is not
3085 			 * and vfs_busy_pages() may sleep.  We have to
3086 			 * recalculate nbp.
3087 			 */
3088 			nbp = TAILQ_NEXT(bp, b_bobufs);
3089 
3090 			/*
3091 			 * A list of these buffers is kept so that the
3092 			 * second loop knows which buffers have actually
3093 			 * been committed. This is necessary, since there
3094 			 * may be a race between the commit rpc and new
3095 			 * uncommitted writes on the file.
3096 			 */
3097 			bvec[bvecpos++] = bp;
3098 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3099 				bp->b_dirtyoff;
3100 			if (toff < off)
3101 				off = toff;
3102 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
3103 			if (toff > endoff)
3104 				endoff = toff;
3105 		}
3106 		BO_UNLOCK(bo);
3107 	}
3108 	if (bvecpos > 0) {
3109 		/*
3110 		 * Commit data on the server, as required.
3111 		 * If all bufs are using the same wcred, then use that with
3112 		 * one call for all of them, otherwise commit each one
3113 		 * separately.
3114 		 */
3115 		if (wcred != NOCRED)
3116 			retv = nfs_commit(vp, off, (int)(endoff - off),
3117 					  wcred, td);
3118 		else {
3119 			retv = 0;
3120 			for (i = 0; i < bvecpos; i++) {
3121 				off_t off, size;
3122 				bp = bvec[i];
3123 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3124 					bp->b_dirtyoff;
3125 				size = (u_quad_t)(bp->b_dirtyend
3126 						  - bp->b_dirtyoff);
3127 				retv = nfs_commit(vp, off, (int)size,
3128 						  bp->b_wcred, td);
3129 				if (retv) break;
3130 			}
3131 		}
3132 
3133 		if (retv == NFSERR_STALEWRITEVERF)
3134 			nfs_clearcommit(vp->v_mount);
3135 
3136 		/*
3137 		 * Now, either mark the blocks I/O done or mark the
3138 		 * blocks dirty, depending on whether the commit
3139 		 * succeeded.
3140 		 */
3141 		for (i = 0; i < bvecpos; i++) {
3142 			bp = bvec[i];
3143 			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
3144 			if (retv) {
3145 				/*
3146 				 * Error, leave B_DELWRI intact
3147 				 */
3148 				vfs_unbusy_pages(bp);
3149 				brelse(bp);
3150 			} else {
3151 				/*
3152 				 * Success, remove B_DELWRI ( bundirty() ).
3153 				 *
3154 				 * b_dirtyoff/b_dirtyend seem to be NFS
3155 				 * specific.  We should probably move that
3156 				 * into bundirty(). XXX
3157 				 */
3158 				bufobj_wref(bo);
3159 				bp->b_flags |= B_ASYNC;
3160 				bundirty(bp);
3161 				bp->b_flags &= ~B_DONE;
3162 				bp->b_ioflags &= ~BIO_ERROR;
3163 				bp->b_dirtyoff = bp->b_dirtyend = 0;
3164 				bufdone(bp);
3165 			}
3166 		}
3167 	}
3168 
3169 	/*
3170 	 * Start/do any write(s) that are required.
3171 	 */
3172 loop:
3173 	BO_LOCK(bo);
3174 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
3175 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
3176 			if (waitfor != MNT_WAIT || passone)
3177 				continue;
3178 
3179 			error = BUF_TIMELOCK(bp,
3180 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
3181 			    BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
3182 			if (error == 0) {
3183 				BUF_UNLOCK(bp);
3184 				goto loop;
3185 			}
3186 			if (error == ENOLCK) {
3187 				error = 0;
3188 				goto loop;
3189 			}
3190 			if (nfs_sigintr(nmp, td)) {
3191 				error = EINTR;
3192 				goto done;
3193 			}
3194 			if (slpflag & PCATCH) {
3195 				slpflag = 0;
3196 				slptimeo = 2 * hz;
3197 			}
3198 			goto loop;
3199 		}
3200 		if ((bp->b_flags & B_DELWRI) == 0)
3201 			panic("nfs_fsync: not dirty");
3202 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
3203 			BUF_UNLOCK(bp);
3204 			continue;
3205 		}
3206 		BO_UNLOCK(bo);
3207 		bremfree(bp);
3208 		if (passone || !commit)
3209 		    bp->b_flags |= B_ASYNC;
3210 		else
3211 		    bp->b_flags |= B_ASYNC;
3212 		bwrite(bp);
3213 		if (nfs_sigintr(nmp, td)) {
3214 			error = EINTR;
3215 			goto done;
3216 		}
3217 		goto loop;
3218 	}
3219 	if (passone) {
3220 		passone = 0;
3221 		BO_UNLOCK(bo);
3222 		goto again;
3223 	}
3224 	if (waitfor == MNT_WAIT) {
3225 		while (bo->bo_numoutput) {
3226 			error = bufobj_wwait(bo, slpflag, slptimeo);
3227 			if (error) {
3228 			    BO_UNLOCK(bo);
3229 			    error = nfs_sigintr(nmp, td);
3230 			    if (error)
3231 				goto done;
3232 			    if (slpflag & PCATCH) {
3233 				slpflag = 0;
3234 				slptimeo = 2 * hz;
3235 			    }
3236 			    BO_LOCK(bo);
3237 			}
3238 		}
3239 		if (bo->bo_dirty.bv_cnt != 0 && commit) {
3240 			BO_UNLOCK(bo);
3241 			goto loop;
3242 		}
3243 		/*
3244 		 * Wait for all the async IO requests to drain
3245 		 */
3246 		BO_UNLOCK(bo);
3247 		mtx_lock(&np->n_mtx);
3248 		while (np->n_directio_asyncwr > 0) {
3249 			np->n_flag |= NFSYNCWAIT;
3250 			error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
3251 					   &np->n_mtx, slpflag | (PRIBIO + 1),
3252 					   "nfsfsync", 0);
3253 			if (error) {
3254 				if (nfs_sigintr(nmp, td)) {
3255 					mtx_unlock(&np->n_mtx);
3256 					error = EINTR;
3257 					goto done;
3258 				}
3259 			}
3260 		}
3261 		mtx_unlock(&np->n_mtx);
3262 	} else
3263 		BO_UNLOCK(bo);
3264 	mtx_lock(&np->n_mtx);
3265 	if (np->n_flag & NWRITEERR) {
3266 		error = np->n_error;
3267 		np->n_flag &= ~NWRITEERR;
3268 	}
3269   	if (commit && bo->bo_dirty.bv_cnt == 0 &&
3270 	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
3271   		np->n_flag &= ~NMODIFIED;
3272 	mtx_unlock(&np->n_mtx);
3273 done:
3274 	if (bvec != NULL && bvec != bvec_on_stack)
3275 		free(bvec, M_TEMP);
3276 	return (error);
3277 }
3278 
3279 /*
3280  * NFS advisory byte-level locks.
3281  */
3282 static int
nfs_advlock(struct vop_advlock_args * ap)3283 nfs_advlock(struct vop_advlock_args *ap)
3284 {
3285 	struct vnode *vp = ap->a_vp;
3286 	u_quad_t size;
3287 	int error;
3288 
3289 	error = vn_lock(vp, LK_SHARED);
3290 	if (error)
3291 		return (error);
3292 	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3293 		size = VTONFS(vp)->n_size;
3294 		VOP_UNLOCK(vp, 0);
3295 		error = lf_advlock(ap, &(vp->v_lockf), size);
3296 	} else {
3297 		if (nfs_advlock_p)
3298 			error = nfs_advlock_p(ap);
3299 		else
3300 			error = ENOLCK;
3301 	}
3302 
3303 	return (error);
3304 }
3305 
3306 /*
3307  * NFS advisory byte-level locks.
3308  */
3309 static int
nfs_advlockasync(struct vop_advlockasync_args * ap)3310 nfs_advlockasync(struct vop_advlockasync_args *ap)
3311 {
3312 	struct vnode *vp = ap->a_vp;
3313 	u_quad_t size;
3314 	int error;
3315 
3316 	error = vn_lock(vp, LK_SHARED);
3317 	if (error)
3318 		return (error);
3319 	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3320 		size = VTONFS(vp)->n_size;
3321 		VOP_UNLOCK(vp, 0);
3322 		error = lf_advlockasync(ap, &(vp->v_lockf), size);
3323 	} else {
3324 		VOP_UNLOCK(vp, 0);
3325 		error = EOPNOTSUPP;
3326 	}
3327 	return (error);
3328 }
3329 
3330 /*
3331  * Print out the contents of an nfsnode.
3332  */
3333 static int
nfs_print(struct vop_print_args * ap)3334 nfs_print(struct vop_print_args *ap)
3335 {
3336 	struct vnode *vp = ap->a_vp;
3337 	struct nfsnode *np = VTONFS(vp);
3338 
3339 	nfs_printf("\tfileid %ld fsid 0x%x",
3340 	   np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3341 	if (vp->v_type == VFIFO)
3342 		fifo_printinfo(vp);
3343 	printf("\n");
3344 	return (0);
3345 }
3346 
3347 /*
3348  * This is the "real" nfs::bwrite(struct buf*).
3349  * We set B_CACHE if this is a VMIO buffer.
3350  */
3351 int
nfs_writebp(struct buf * bp,int force __unused,struct thread * td)3352 nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
3353 {
3354 	int s;
3355 	int oldflags = bp->b_flags;
3356 #if 0
3357 	int retv = 1;
3358 	off_t off;
3359 #endif
3360 
3361 	BUF_ASSERT_HELD(bp);
3362 
3363 	if (bp->b_flags & B_INVAL) {
3364 		brelse(bp);
3365 		return(0);
3366 	}
3367 
3368 	bp->b_flags |= B_CACHE;
3369 
3370 	/*
3371 	 * Undirty the bp.  We will redirty it later if the I/O fails.
3372 	 */
3373 
3374 	s = splbio();
3375 	bundirty(bp);
3376 	bp->b_flags &= ~B_DONE;
3377 	bp->b_ioflags &= ~BIO_ERROR;
3378 	bp->b_iocmd = BIO_WRITE;
3379 
3380 	bufobj_wref(bp->b_bufobj);
3381 	curthread->td_ru.ru_oublock++;
3382 	splx(s);
3383 
3384 	/*
3385 	 * Note: to avoid loopback deadlocks, we do not
3386 	 * assign b_runningbufspace.
3387 	 */
3388 	vfs_busy_pages(bp, 1);
3389 
3390 	BUF_KERNPROC(bp);
3391 	bp->b_iooffset = dbtob(bp->b_blkno);
3392 	bstrategy(bp);
3393 
3394 	if( (oldflags & B_ASYNC) == 0) {
3395 		int rtval = bufwait(bp);
3396 
3397 		if (oldflags & B_DELWRI) {
3398 			s = splbio();
3399 			reassignbuf(bp);
3400 			splx(s);
3401 		}
3402 		brelse(bp);
3403 		return (rtval);
3404 	}
3405 
3406 	return (0);
3407 }
3408 
3409 /*
3410  * nfs special file access vnode op.
3411  * Essentially just get vattr and then imitate iaccess() since the device is
3412  * local to the client.
3413  */
3414 static int
nfsspec_access(struct vop_access_args * ap)3415 nfsspec_access(struct vop_access_args *ap)
3416 {
3417 	struct vattr *vap;
3418 	struct ucred *cred = ap->a_cred;
3419 	struct vnode *vp = ap->a_vp;
3420 	accmode_t accmode = ap->a_accmode;
3421 	struct vattr vattr;
3422 	int error;
3423 
3424 	/*
3425 	 * Disallow write attempts on filesystems mounted read-only;
3426 	 * unless the file is a socket, fifo, or a block or character
3427 	 * device resident on the filesystem.
3428 	 */
3429 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3430 		switch (vp->v_type) {
3431 		case VREG:
3432 		case VDIR:
3433 		case VLNK:
3434 			return (EROFS);
3435 		default:
3436 			break;
3437 		}
3438 	}
3439 	vap = &vattr;
3440 	error = VOP_GETATTR(vp, vap, cred);
3441 	if (error)
3442 		goto out;
3443 	error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
3444 			 accmode, cred, NULL);
3445 out:
3446 	return error;
3447 }
3448 
3449 /*
3450  * Read wrapper for fifos.
3451  */
3452 static int
nfsfifo_read(struct vop_read_args * ap)3453 nfsfifo_read(struct vop_read_args *ap)
3454 {
3455 	struct nfsnode *np = VTONFS(ap->a_vp);
3456 	int error;
3457 
3458 	/*
3459 	 * Set access flag.
3460 	 */
3461 	mtx_lock(&np->n_mtx);
3462 	np->n_flag |= NACC;
3463 	vfs_timestamp(&np->n_atim);
3464 	mtx_unlock(&np->n_mtx);
3465 	error = fifo_specops.vop_read(ap);
3466 	return error;
3467 }
3468 
3469 /*
3470  * Write wrapper for fifos.
3471  */
3472 static int
nfsfifo_write(struct vop_write_args * ap)3473 nfsfifo_write(struct vop_write_args *ap)
3474 {
3475 	struct nfsnode *np = VTONFS(ap->a_vp);
3476 
3477 	/*
3478 	 * Set update flag.
3479 	 */
3480 	mtx_lock(&np->n_mtx);
3481 	np->n_flag |= NUPD;
3482 	vfs_timestamp(&np->n_mtim);
3483 	mtx_unlock(&np->n_mtx);
3484 	return(fifo_specops.vop_write(ap));
3485 }
3486 
3487 /*
3488  * Close wrapper for fifos.
3489  *
3490  * Update the times on the nfsnode then do fifo close.
3491  */
3492 static int
nfsfifo_close(struct vop_close_args * ap)3493 nfsfifo_close(struct vop_close_args *ap)
3494 {
3495 	struct vnode *vp = ap->a_vp;
3496 	struct nfsnode *np = VTONFS(vp);
3497 	struct vattr vattr;
3498 	struct timespec ts;
3499 
3500 	mtx_lock(&np->n_mtx);
3501 	if (np->n_flag & (NACC | NUPD)) {
3502 		vfs_timestamp(&ts);
3503 		if (np->n_flag & NACC)
3504 			np->n_atim = ts;
3505 		if (np->n_flag & NUPD)
3506 			np->n_mtim = ts;
3507 		np->n_flag |= NCHG;
3508 		if (vrefcnt(vp) == 1 &&
3509 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3510 			VATTR_NULL(&vattr);
3511 			if (np->n_flag & NACC)
3512 				vattr.va_atime = np->n_atim;
3513 			if (np->n_flag & NUPD)
3514 				vattr.va_mtime = np->n_mtim;
3515 			mtx_unlock(&np->n_mtx);
3516 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred);
3517 			goto out;
3518 		}
3519 	}
3520 	mtx_unlock(&np->n_mtx);
3521 out:
3522 	return (fifo_specops.vop_close(ap));
3523 }
3524 
3525 /*
3526  * Just call nfs_writebp() with the force argument set to 1.
3527  *
3528  * NOTE: B_DONE may or may not be set in a_bp on call.
3529  */
3530 static int
nfs_bwrite(struct buf * bp)3531 nfs_bwrite(struct buf *bp)
3532 {
3533 
3534 	return (nfs_writebp(bp, 1, curthread));
3535 }
3536 
3537 struct buf_ops buf_ops_nfs = {
3538 	.bop_name	=	"buf_ops_nfs",
3539 	.bop_write	=	nfs_bwrite,
3540 	.bop_strategy	=	bufstrategy,
3541 	.bop_sync	=	bufsync,
3542 	.bop_bdflush	=	bufbdflush,
3543 };
3544