1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: stable/12/sys/fs/nfsclient/nfs_clstate.c 372156 2022-06-10 22:23:54Z git2svn $");
32 
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82 
83 #include <fs/nfs/nfsport.h>
84 
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;	/* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100 
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *,
104     u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **);
105 static void nfscl_clrelease(struct nfsclclient *);
106 static void nfscl_unlinkopen(struct nfsclopen *);
107 static void nfscl_cleanclient(struct nfsclclient *);
108 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
109     struct ucred *, NFSPROC_T *);
110 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
111     struct nfsmount *, struct ucred *, NFSPROC_T *);
112 static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *);
113 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
114     struct nfscllock *, int);
115 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
116     struct nfscllock **, int);
117 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *,
118     struct nfscldeleghead *);
119 static u_int32_t nfscl_nextcbident(void);
120 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
121 static struct nfsclclient *nfscl_getclnt(u_int32_t);
122 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
123 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
124     int);
125 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
126     int, struct nfsclrecalllayout **, struct nfscllayout **);
127 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
128 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
129     int);
130 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
131 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
132     u_int8_t *, struct nfscllock **);
133 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
134 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
135     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
136 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
137     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
138     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
139 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
140     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
141     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
142 static void nfscl_totalrecall(struct nfsclclient *);
143 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
144     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
145 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
146     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
147     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
148 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
149     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
150     struct ucred *, NFSPROC_T *);
151 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
152     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
153 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
154     bool);
155 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
156 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
157 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
158     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
159     vnode_t *);
160 static void nfscl_freeopenowner(struct nfsclowner *, int);
161 static void nfscl_cleandeleg(struct nfscldeleg *);
162 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
163     struct nfsmount *, NFSPROC_T *);
164 static void nfscl_emptylockowner(struct nfscllockowner *,
165     struct nfscllockownerfhhead *);
166 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
167     struct nfsclflayouthead *);
168 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
169     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
170 static int nfscl_seq(uint32_t, uint32_t);
171 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
172     struct ucred *, NFSPROC_T *);
173 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
174     struct ucred *, NFSPROC_T *);
175 
176 static short nfscberr_null[] = {
177 	0,
178 	0,
179 };
180 
181 static short nfscberr_getattr[] = {
182 	NFSERR_RESOURCE,
183 	NFSERR_BADHANDLE,
184 	NFSERR_BADXDR,
185 	NFSERR_RESOURCE,
186 	NFSERR_SERVERFAULT,
187 	0,
188 };
189 
190 static short nfscberr_recall[] = {
191 	NFSERR_RESOURCE,
192 	NFSERR_BADHANDLE,
193 	NFSERR_BADSTATEID,
194 	NFSERR_BADXDR,
195 	NFSERR_RESOURCE,
196 	NFSERR_SERVERFAULT,
197 	0,
198 };
199 
200 static short *nfscl_cberrmap[] = {
201 	nfscberr_null,
202 	nfscberr_null,
203 	nfscberr_null,
204 	nfscberr_getattr,
205 	nfscberr_recall
206 };
207 
208 #define	NETFAMILY(clp) \
209 		(((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
210 
211 /*
212  * Called for an open operation.
213  * If the nfhp argument is NULL, just get an openowner.
214  */
215 int
nfscl_open(vnode_t vp,u_int8_t * nfhp,int fhlen,u_int32_t amode,int usedeleg,struct ucred * cred,NFSPROC_T * p,struct nfsclowner ** owpp,struct nfsclopen ** opp,int * newonep,int * retp,int lockit,bool firstref)216 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
217     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
218     struct nfsclopen **opp, int *newonep, int *retp, int lockit, bool firstref)
219 {
220 	struct nfsclclient *clp;
221 	struct nfsclowner *owp, *nowp;
222 	struct nfsclopen *op = NULL, *nop = NULL;
223 	struct nfscldeleg *dp;
224 	struct nfsclownerhead *ohp;
225 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
226 	int ret;
227 
228 	if (newonep != NULL)
229 		*newonep = 0;
230 	if (opp != NULL)
231 		*opp = NULL;
232 	if (owpp != NULL)
233 		*owpp = NULL;
234 
235 	/*
236 	 * Might need one or both of these, so MALLOC them now, to
237 	 * avoid a tsleep() in MALLOC later.
238 	 */
239 	nowp = malloc(sizeof (struct nfsclowner),
240 	    M_NFSCLOWNER, M_WAITOK);
241 	if (nfhp != NULL)
242 	    nop = malloc(sizeof (struct nfsclopen) +
243 		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
244 	ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, firstref, &clp);
245 	if (ret != 0) {
246 		free(nowp, M_NFSCLOWNER);
247 		if (nop != NULL)
248 			free(nop, M_NFSCLOPEN);
249 		return (ret);
250 	}
251 
252 	/*
253 	 * Get the Open iff it already exists.
254 	 * If none found, add the new one or return error, depending upon
255 	 * "create".
256 	 */
257 	NFSLOCKCLSTATE();
258 	dp = NULL;
259 	/* First check the delegation list */
260 	if (nfhp != NULL && usedeleg) {
261 		LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
262 			if (dp->nfsdl_fhlen == fhlen &&
263 			    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
264 				if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
265 				    (dp->nfsdl_flags & NFSCLDL_WRITE))
266 					break;
267 				dp = NULL;
268 				break;
269 			}
270 		}
271 	}
272 
273 	if (dp != NULL) {
274 		nfscl_filllockowner(p->td_proc, own, F_POSIX);
275 		ohp = &dp->nfsdl_owner;
276 	} else {
277 		/* For NFSv4.1 and this option, use a single open_owner. */
278 		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
279 			nfscl_filllockowner(NULL, own, F_POSIX);
280 		else
281 			nfscl_filllockowner(p->td_proc, own, F_POSIX);
282 		ohp = &clp->nfsc_owner;
283 	}
284 	/* Now, search for an openowner */
285 	LIST_FOREACH(owp, ohp, nfsow_list) {
286 		if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
287 			break;
288 	}
289 
290 	/*
291 	 * Create a new open, as required.
292 	 */
293 	nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
294 	    cred, newonep);
295 
296 	/*
297 	 * Now, check the mode on the open and return the appropriate
298 	 * value.
299 	 */
300 	if (retp != NULL) {
301 		if (nfhp != NULL && dp != NULL && nop == NULL)
302 			/* new local open on delegation */
303 			*retp = NFSCLOPEN_SETCRED;
304 		else
305 			*retp = NFSCLOPEN_OK;
306 	}
307 	if (op != NULL && (amode & ~(op->nfso_mode))) {
308 		op->nfso_mode |= amode;
309 		if (retp != NULL && dp == NULL)
310 			*retp = NFSCLOPEN_DOOPEN;
311 	}
312 
313 	/*
314 	 * Serialize modifications to the open owner for multiple threads
315 	 * within the same process using a read/write sleep lock.
316 	 * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
317 	 * by acquiring a shared lock.  The close operations still use an
318 	 * exclusive lock for this case.
319 	 */
320 	if (lockit != 0) {
321 		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) {
322 			/*
323 			 * Get a shared lock on the OpenOwner, but first
324 			 * wait for any pending exclusive lock, so that the
325 			 * exclusive locker gets priority.
326 			 */
327 			nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
328 			    NFSCLSTATEMUTEXPTR, NULL);
329 			nfsv4_getref(&owp->nfsow_rwlock, NULL,
330 			    NFSCLSTATEMUTEXPTR, NULL);
331 		} else
332 			nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
333 	}
334 	NFSUNLOCKCLSTATE();
335 	if (nowp != NULL)
336 		free(nowp, M_NFSCLOWNER);
337 	if (nop != NULL)
338 		free(nop, M_NFSCLOPEN);
339 	if (owpp != NULL)
340 		*owpp = owp;
341 	if (opp != NULL)
342 		*opp = op;
343 	return (0);
344 }
345 
346 /*
347  * Create a new open, as required.
348  */
349 static void
nfscl_newopen(struct nfsclclient * clp,struct nfscldeleg * dp,struct nfsclowner ** owpp,struct nfsclowner ** nowpp,struct nfsclopen ** opp,struct nfsclopen ** nopp,u_int8_t * own,u_int8_t * fhp,int fhlen,struct ucred * cred,int * newonep)350 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
351     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
352     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
353     struct ucred *cred, int *newonep)
354 {
355 	struct nfsclowner *owp = *owpp, *nowp;
356 	struct nfsclopen *op, *nop;
357 
358 	if (nowpp != NULL)
359 		nowp = *nowpp;
360 	else
361 		nowp = NULL;
362 	if (nopp != NULL)
363 		nop = *nopp;
364 	else
365 		nop = NULL;
366 	if (owp == NULL && nowp != NULL) {
367 		NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
368 		LIST_INIT(&nowp->nfsow_open);
369 		nowp->nfsow_clp = clp;
370 		nowp->nfsow_seqid = 0;
371 		nowp->nfsow_defunct = 0;
372 		nfscl_lockinit(&nowp->nfsow_rwlock);
373 		if (dp != NULL) {
374 			nfsstatsv1.cllocalopenowners++;
375 			LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
376 		} else {
377 			nfsstatsv1.clopenowners++;
378 			LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
379 		}
380 		owp = *owpp = nowp;
381 		*nowpp = NULL;
382 		if (newonep != NULL)
383 			*newonep = 1;
384 	}
385 
386 	 /* If an fhp has been specified, create an Open as well. */
387 	if (fhp != NULL) {
388 		/* and look for the correct open, based upon FH */
389 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
390 			if (op->nfso_fhlen == fhlen &&
391 			    !NFSBCMP(op->nfso_fh, fhp, fhlen))
392 				break;
393 		}
394 		if (op == NULL && nop != NULL) {
395 			nop->nfso_own = owp;
396 			nop->nfso_mode = 0;
397 			nop->nfso_opencnt = 0;
398 			nop->nfso_posixlock = 1;
399 			nop->nfso_fhlen = fhlen;
400 			NFSBCOPY(fhp, nop->nfso_fh, fhlen);
401 			LIST_INIT(&nop->nfso_lock);
402 			nop->nfso_stateid.seqid = 0;
403 			nop->nfso_stateid.other[0] = 0;
404 			nop->nfso_stateid.other[1] = 0;
405 			nop->nfso_stateid.other[2] = 0;
406 			KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
407 			newnfs_copyincred(cred, &nop->nfso_cred);
408 			if (dp != NULL) {
409 				TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
410 				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
411 				    nfsdl_list);
412 				dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
413 				nfsstatsv1.cllocalopens++;
414 			} else {
415 				nfsstatsv1.clopens++;
416 			}
417 			LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
418 			*opp = nop;
419 			*nopp = NULL;
420 			if (newonep != NULL)
421 				*newonep = 1;
422 		} else {
423 			*opp = op;
424 		}
425 	}
426 }
427 
428 /*
429  * Called to find/add a delegation to a client.
430  */
431 int
nfscl_deleg(mount_t mp,struct nfsclclient * clp,u_int8_t * nfhp,int fhlen,struct ucred * cred,NFSPROC_T * p,struct nfscldeleg ** dpp)432 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
433     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
434 {
435 	struct nfscldeleg *dp = *dpp, *tdp;
436 
437 	/*
438 	 * First, if we have received a Read delegation for a file on a
439 	 * read/write file system, just return it, because they aren't
440 	 * useful, imho.
441 	 */
442 	if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) &&
443 	    (dp->nfsdl_flags & NFSCLDL_READ)) {
444 		(void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p);
445 		free(dp, M_NFSCLDELEG);
446 		*dpp = NULL;
447 		return (0);
448 	}
449 
450 	/* Look for the correct deleg, based upon FH */
451 	NFSLOCKCLSTATE();
452 	tdp = nfscl_finddeleg(clp, nfhp, fhlen);
453 	if (tdp == NULL) {
454 		if (dp == NULL) {
455 			NFSUNLOCKCLSTATE();
456 			return (NFSERR_BADSTATEID);
457 		}
458 		*dpp = NULL;
459 		TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
460 		LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
461 		    nfsdl_hash);
462 		dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
463 		nfsstatsv1.cldelegates++;
464 		nfscl_delegcnt++;
465 	} else {
466 		/*
467 		 * Delegation already exists, what do we do if a new one??
468 		 */
469 		if (dp != NULL) {
470 			printf("Deleg already exists!\n");
471 			free(dp, M_NFSCLDELEG);
472 			*dpp = NULL;
473 		} else {
474 			*dpp = tdp;
475 		}
476 	}
477 	NFSUNLOCKCLSTATE();
478 	return (0);
479 }
480 
481 /*
482  * Find a delegation for this file handle. Return NULL upon failure.
483  */
484 static struct nfscldeleg *
nfscl_finddeleg(struct nfsclclient * clp,u_int8_t * fhp,int fhlen)485 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
486 {
487 	struct nfscldeleg *dp;
488 
489 	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
490 	    if (dp->nfsdl_fhlen == fhlen &&
491 		!NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
492 		break;
493 	}
494 	return (dp);
495 }
496 
497 /*
498  * Get a stateid for an I/O operation. First, look for an open and iff
499  * found, return either a lockowner stateid or the open stateid.
500  * If no Open is found, just return error and the special stateid of all zeros.
501  */
502 int
nfscl_getstateid(vnode_t vp,u_int8_t * nfhp,int fhlen,u_int32_t mode,int fords,struct ucred * cred,NFSPROC_T * p,nfsv4stateid_t * stateidp,void ** lckpp)503 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
504     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
505     void **lckpp)
506 {
507 	struct nfsclclient *clp;
508 	struct nfsclowner *owp;
509 	struct nfsclopen *op = NULL, *top;
510 	struct nfscllockowner *lp;
511 	struct nfscldeleg *dp;
512 	struct nfsnode *np;
513 	struct nfsmount *nmp;
514 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
515 	int error, done;
516 
517 	*lckpp = NULL;
518 	/*
519 	 * Initially, just set the special stateid of all zeros.
520 	 * (Don't do this for a DS, since the special stateid can't be used.)
521 	 */
522 	if (fords == 0) {
523 		stateidp->seqid = 0;
524 		stateidp->other[0] = 0;
525 		stateidp->other[1] = 0;
526 		stateidp->other[2] = 0;
527 	}
528 	if (vnode_vtype(vp) != VREG)
529 		return (EISDIR);
530 	np = VTONFS(vp);
531 	nmp = VFSTONFS(vnode_mount(vp));
532 	NFSLOCKCLSTATE();
533 	clp = nfscl_findcl(nmp);
534 	if (clp == NULL) {
535 		NFSUNLOCKCLSTATE();
536 		return (EACCES);
537 	}
538 
539 	/*
540 	 * Wait for recovery to complete.
541 	 */
542 	while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
543 		(void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
544 		    PZERO, "nfsrecvr", NULL);
545 
546 	/*
547 	 * First, look for a delegation.
548 	 */
549 	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
550 		if (dp->nfsdl_fhlen == fhlen &&
551 		    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
552 			if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
553 			    (dp->nfsdl_flags & NFSCLDL_WRITE)) {
554 				stateidp->seqid = dp->nfsdl_stateid.seqid;
555 				stateidp->other[0] = dp->nfsdl_stateid.other[0];
556 				stateidp->other[1] = dp->nfsdl_stateid.other[1];
557 				stateidp->other[2] = dp->nfsdl_stateid.other[2];
558 				if (!(np->n_flag & NDELEGRECALL)) {
559 					TAILQ_REMOVE(&clp->nfsc_deleg, dp,
560 					    nfsdl_list);
561 					TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
562 					    nfsdl_list);
563 					dp->nfsdl_timestamp = NFSD_MONOSEC +
564 					    120;
565 					dp->nfsdl_rwlock.nfslock_usecnt++;
566 					*lckpp = (void *)&dp->nfsdl_rwlock;
567 				}
568 				NFSUNLOCKCLSTATE();
569 				return (0);
570 			}
571 			break;
572 		}
573 	}
574 
575 	if (p != NULL) {
576 		/*
577 		 * If p != NULL, we want to search the parentage tree
578 		 * for a matching OpenOwner and use that.
579 		 */
580 		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
581 			nfscl_filllockowner(NULL, own, F_POSIX);
582 		else
583 			nfscl_filllockowner(p->td_proc, own, F_POSIX);
584 		lp = NULL;
585 		error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
586 		    mode, &lp, &op);
587 		if (error == 0 && lp != NULL && fords == 0) {
588 			/* Don't return a lock stateid for a DS. */
589 			stateidp->seqid =
590 			    lp->nfsl_stateid.seqid;
591 			stateidp->other[0] =
592 			    lp->nfsl_stateid.other[0];
593 			stateidp->other[1] =
594 			    lp->nfsl_stateid.other[1];
595 			stateidp->other[2] =
596 			    lp->nfsl_stateid.other[2];
597 			NFSUNLOCKCLSTATE();
598 			return (0);
599 		}
600 	}
601 	if (op == NULL) {
602 		/* If not found, just look for any OpenOwner that will work. */
603 		top = NULL;
604 		done = 0;
605 		owp = LIST_FIRST(&clp->nfsc_owner);
606 		while (!done && owp != NULL) {
607 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
608 				if (op->nfso_fhlen == fhlen &&
609 				    !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
610 					if (top == NULL && (op->nfso_mode &
611 					    NFSV4OPEN_ACCESSWRITE) != 0 &&
612 					    (mode & NFSV4OPEN_ACCESSREAD) != 0)
613 						top = op;
614 					if ((mode & op->nfso_mode) == mode) {
615 						done = 1;
616 						break;
617 					}
618 				}
619 			}
620 			if (!done)
621 				owp = LIST_NEXT(owp, nfsow_list);
622 		}
623 		if (!done) {
624 			NFSCL_DEBUG(2, "openmode top=%p\n", top);
625 			if (top == NULL || NFSHASOPENMODE(nmp)) {
626 				NFSUNLOCKCLSTATE();
627 				return (ENOENT);
628 			} else
629 				op = top;
630 		}
631 		/*
632 		 * For read aheads or write behinds, use the open cred.
633 		 * A read ahead or write behind is indicated by p == NULL.
634 		 */
635 		if (p == NULL)
636 			newnfs_copycred(&op->nfso_cred, cred);
637 	}
638 
639 	/*
640 	 * No lock stateid, so return the open stateid.
641 	 */
642 	stateidp->seqid = op->nfso_stateid.seqid;
643 	stateidp->other[0] = op->nfso_stateid.other[0];
644 	stateidp->other[1] = op->nfso_stateid.other[1];
645 	stateidp->other[2] = op->nfso_stateid.other[2];
646 	NFSUNLOCKCLSTATE();
647 	return (0);
648 }
649 
650 /*
651  * Search for a matching file, mode and, optionally, lockowner.
652  */
653 static int
nfscl_getopen(struct nfsclownerhead * ohp,u_int8_t * nfhp,int fhlen,u_int8_t * openown,u_int8_t * lockown,u_int32_t mode,struct nfscllockowner ** lpp,struct nfsclopen ** opp)654 nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
655     u_int8_t *openown, u_int8_t *lockown, u_int32_t mode,
656     struct nfscllockowner **lpp, struct nfsclopen **opp)
657 {
658 	struct nfsclowner *owp;
659 	struct nfsclopen *op, *rop, *rop2;
660 	struct nfscllockowner *lp;
661 	int keep_looping;
662 
663 	if (lpp != NULL)
664 		*lpp = NULL;
665 	/*
666 	 * rop will be set to the open to be returned. There are three
667 	 * variants of this, all for an open of the correct file:
668 	 * 1 - A match of lockown.
669 	 * 2 - A match of the openown, when no lockown match exists.
670 	 * 3 - A match for any open, if no openown or lockown match exists.
671 	 * Looking for #2 over #3 probably isn't necessary, but since
672 	 * RFC3530 is vague w.r.t. the relationship between openowners and
673 	 * lockowners, I think this is the safer way to go.
674 	 */
675 	rop = NULL;
676 	rop2 = NULL;
677 	keep_looping = 1;
678 	/* Search the client list */
679 	owp = LIST_FIRST(ohp);
680 	while (owp != NULL && keep_looping != 0) {
681 		/* and look for the correct open */
682 		op = LIST_FIRST(&owp->nfsow_open);
683 		while (op != NULL && keep_looping != 0) {
684 			if (op->nfso_fhlen == fhlen &&
685 			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
686 			    && (op->nfso_mode & mode) == mode) {
687 				if (lpp != NULL) {
688 					/* Now look for a matching lockowner. */
689 					LIST_FOREACH(lp, &op->nfso_lock,
690 					    nfsl_list) {
691 						if (!NFSBCMP(lp->nfsl_owner,
692 						    lockown,
693 						    NFSV4CL_LOCKNAMELEN)) {
694 							*lpp = lp;
695 							rop = op;
696 							keep_looping = 0;
697 							break;
698 						}
699 					}
700 				}
701 				if (rop == NULL && !NFSBCMP(owp->nfsow_owner,
702 				    openown, NFSV4CL_LOCKNAMELEN)) {
703 					rop = op;
704 					if (lpp == NULL)
705 						keep_looping = 0;
706 				}
707 				if (rop2 == NULL)
708 					rop2 = op;
709 			}
710 			op = LIST_NEXT(op, nfso_list);
711 		}
712 		owp = LIST_NEXT(owp, nfsow_list);
713 	}
714 	if (rop == NULL)
715 		rop = rop2;
716 	if (rop == NULL)
717 		return (EBADF);
718 	*opp = rop;
719 	return (0);
720 }
721 
722 /*
723  * Release use of an open owner. Called when open operations are done
724  * with the open owner.
725  */
726 void
nfscl_ownerrelease(struct nfsmount * nmp,struct nfsclowner * owp,__unused int error,__unused int candelete,int unlocked)727 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
728     __unused int error, __unused int candelete, int unlocked)
729 {
730 
731 	if (owp == NULL)
732 		return;
733 	NFSLOCKCLSTATE();
734 	if (unlocked == 0) {
735 		if (NFSHASONEOPENOWN(nmp))
736 			nfsv4_relref(&owp->nfsow_rwlock);
737 		else
738 			nfscl_lockunlock(&owp->nfsow_rwlock);
739 	}
740 	nfscl_clrelease(owp->nfsow_clp);
741 	NFSUNLOCKCLSTATE();
742 }
743 
744 /*
745  * Release use of an open structure under an open owner.
746  */
747 void
nfscl_openrelease(struct nfsmount * nmp,struct nfsclopen * op,int error,int candelete)748 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
749     int candelete)
750 {
751 	struct nfsclclient *clp;
752 	struct nfsclowner *owp;
753 
754 	if (op == NULL)
755 		return;
756 	NFSLOCKCLSTATE();
757 	owp = op->nfso_own;
758 	if (NFSHASONEOPENOWN(nmp))
759 		nfsv4_relref(&owp->nfsow_rwlock);
760 	else
761 		nfscl_lockunlock(&owp->nfsow_rwlock);
762 	clp = owp->nfsow_clp;
763 	if (error && candelete && op->nfso_opencnt == 0)
764 		nfscl_freeopen(op, 0, true);
765 	nfscl_clrelease(clp);
766 	NFSUNLOCKCLSTATE();
767 }
768 
769 /*
770  * Called to get a clientid structure. It will optionally lock the
771  * client data structures to do the SetClientId/SetClientId_confirm,
772  * but will release that lock and return the clientid with a reference
773  * count on it.
774  * If the "cred" argument is NULL, a new clientid should not be created.
775  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
776  * be done.
777  * The start_renewthread argument tells nfscl_getcl() to start a renew
778  * thread if this creates a new clp.
779  * It always clpp with a reference count on it, unless returning an error.
780  */
781 int
nfscl_getcl(struct mount * mp,struct ucred * cred,NFSPROC_T * p,int start_renewthread,bool firstref,struct nfsclclient ** clpp)782 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
783     int start_renewthread, bool firstref, struct nfsclclient **clpp)
784 {
785 	struct nfsclclient *clp;
786 	struct nfsclclient *newclp = NULL;
787 	struct nfsmount *nmp;
788 	char uuid[HOSTUUIDLEN];
789 	int igotlock = 0, error, trystalecnt, clidinusedelay, i;
790 	u_int16_t idlen = 0;
791 
792 	nmp = VFSTONFS(mp);
793 	if (cred != NULL) {
794 		getcredhostuuid(cred, uuid, sizeof uuid);
795 		idlen = strlen(uuid);
796 		if (idlen > 0)
797 			idlen += sizeof (u_int64_t);
798 		else
799 			idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
800 		newclp = malloc(
801 		    sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
802 		    M_WAITOK | M_ZERO);
803 	}
804 	NFSLOCKCLSTATE();
805 	/*
806 	 * If a forced dismount is already in progress, don't
807 	 * allocate a new clientid and get out now. For the case where
808 	 * clp != NULL, this is a harmless optimization.
809 	 */
810 	if (NFSCL_FORCEDISM(mp)) {
811 		NFSUNLOCKCLSTATE();
812 		if (newclp != NULL)
813 			free(newclp, M_NFSCLCLIENT);
814 		return (EBADF);
815 	}
816 	clp = nmp->nm_clp;
817 	if (clp == NULL) {
818 		if (newclp == NULL) {
819 			NFSUNLOCKCLSTATE();
820 			return (EACCES);
821 		}
822 		clp = newclp;
823 		clp->nfsc_idlen = idlen;
824 		LIST_INIT(&clp->nfsc_owner);
825 		TAILQ_INIT(&clp->nfsc_deleg);
826 		TAILQ_INIT(&clp->nfsc_layout);
827 		LIST_INIT(&clp->nfsc_devinfo);
828 		for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
829 			LIST_INIT(&clp->nfsc_deleghash[i]);
830 		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
831 			LIST_INIT(&clp->nfsc_layouthash[i]);
832 		clp->nfsc_flags = NFSCLFLAGS_INITED;
833 		clp->nfsc_clientidrev = 1;
834 		clp->nfsc_cbident = nfscl_nextcbident();
835 		nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
836 		    clp->nfsc_idlen);
837 		LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
838 		nmp->nm_clp = clp;
839 		clp->nfsc_nmp = nmp;
840 		NFSUNLOCKCLSTATE();
841 		if (start_renewthread != 0)
842 			nfscl_start_renewthread(clp);
843 	} else {
844 		NFSUNLOCKCLSTATE();
845 		if (newclp != NULL)
846 			free(newclp, M_NFSCLCLIENT);
847 	}
848 	NFSLOCKCLSTATE();
849 	while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
850 	    !NFSCL_FORCEDISM(mp))
851 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
852 		    NFSCLSTATEMUTEXPTR, mp);
853 	if (igotlock == 0) {
854 		/*
855 		 * Call nfsv4_lock() with "iwantlock == 0" on the firstref so
856 		 * that it will wait for a pending exclusive lock request.
857 		 * This gives the exclusive lock request priority over this
858 		 * shared lock request.
859 		 * An exclusive lock on nfsc_lock is used mainly for server
860 		 * crash recoveries and delegation recalls.
861 		 */
862 		if (firstref)
863 			nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR,
864 			    mp);
865 		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
866 	}
867 	if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
868 		/*
869 		 * Both nfsv4_lock() and nfsv4_getref() know to check
870 		 * for NFSCL_FORCEDISM() and return without sleeping to
871 		 * wait for the exclusive lock to be released, since it
872 		 * might be held by nfscl_umount() and we need to get out
873 		 * now for that case and not wait until nfscl_umount()
874 		 * releases it.
875 		 */
876 		NFSUNLOCKCLSTATE();
877 		return (EBADF);
878 	}
879 	NFSUNLOCKCLSTATE();
880 
881 	/*
882 	 * If it needs a clientid, do the setclientid now.
883 	 */
884 	if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
885 		if (!igotlock)
886 			panic("nfscl_clget");
887 		if (p == NULL || cred == NULL) {
888 			NFSLOCKCLSTATE();
889 			nfsv4_unlock(&clp->nfsc_lock, 0);
890 			NFSUNLOCKCLSTATE();
891 			return (EACCES);
892 		}
893 		/*
894 		 * If RFC3530 Sec. 14.2.33 is taken literally,
895 		 * NFSERR_CLIDINUSE will be returned persistently for the
896 		 * case where a new mount of the same file system is using
897 		 * a different principal. In practice, NFSERR_CLIDINUSE is
898 		 * only returned when there is outstanding unexpired state
899 		 * on the clientid. As such, try for twice the lease
900 		 * interval, if we know what that is. Otherwise, make a
901 		 * wild ass guess.
902 		 * The case of returning NFSERR_STALECLIENTID is far less
903 		 * likely, but might occur if there is a significant delay
904 		 * between doing the SetClientID and SetClientIDConfirm Ops,
905 		 * such that the server throws away the clientid before
906 		 * receiving the SetClientIDConfirm.
907 		 */
908 		if (clp->nfsc_renew > 0)
909 			clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
910 		else
911 			clidinusedelay = 120;
912 		trystalecnt = 3;
913 		do {
914 			error = nfsrpc_setclient(nmp, clp, 0, cred, p);
915 			if (error == NFSERR_STALECLIENTID ||
916 			    error == NFSERR_STALEDONTRECOVER ||
917 			    error == NFSERR_BADSESSION ||
918 			    error == NFSERR_CLIDINUSE) {
919 				(void) nfs_catnap(PZERO, error, "nfs_setcl");
920 			}
921 		} while (((error == NFSERR_STALECLIENTID ||
922 		     error == NFSERR_BADSESSION ||
923 		     error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
924 		    (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
925 		if (error) {
926 			NFSLOCKCLSTATE();
927 			nfsv4_unlock(&clp->nfsc_lock, 0);
928 			NFSUNLOCKCLSTATE();
929 			return (error);
930 		}
931 		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
932 	}
933 	if (igotlock) {
934 		NFSLOCKCLSTATE();
935 		nfsv4_unlock(&clp->nfsc_lock, 1);
936 		NFSUNLOCKCLSTATE();
937 	}
938 
939 	*clpp = clp;
940 	return (0);
941 }
942 
943 /*
944  * Get a reference to a clientid and return it, if valid.
945  */
946 struct nfsclclient *
nfscl_findcl(struct nfsmount * nmp)947 nfscl_findcl(struct nfsmount *nmp)
948 {
949 	struct nfsclclient *clp;
950 
951 	clp = nmp->nm_clp;
952 	if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
953 		return (NULL);
954 	return (clp);
955 }
956 
957 /*
958  * Release the clientid structure. It may be locked or reference counted.
959  */
960 static void
nfscl_clrelease(struct nfsclclient * clp)961 nfscl_clrelease(struct nfsclclient *clp)
962 {
963 
964 	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
965 		nfsv4_unlock(&clp->nfsc_lock, 0);
966 	else
967 		nfsv4_relref(&clp->nfsc_lock);
968 }
969 
970 /*
971  * External call for nfscl_clrelease.
972  */
973 void
nfscl_clientrelease(struct nfsclclient * clp)974 nfscl_clientrelease(struct nfsclclient *clp)
975 {
976 
977 	NFSLOCKCLSTATE();
978 	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
979 		nfsv4_unlock(&clp->nfsc_lock, 0);
980 	else
981 		nfsv4_relref(&clp->nfsc_lock);
982 	NFSUNLOCKCLSTATE();
983 }
984 
985 /*
986  * Called when wanting to lock a byte region.
987  */
988 int
nfscl_getbytelock(vnode_t vp,u_int64_t off,u_int64_t len,short type,struct ucred * cred,NFSPROC_T * p,struct nfsclclient * rclp,int recovery,void * id,int flags,u_int8_t * rownp,u_int8_t * ropenownp,struct nfscllockowner ** lpp,int * newonep,int * donelocallyp)989 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
990     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
991     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
992     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
993 {
994 	struct nfscllockowner *lp;
995 	struct nfsclopen *op;
996 	struct nfsclclient *clp;
997 	struct nfscllockowner *nlp;
998 	struct nfscllock *nlop, *otherlop;
999 	struct nfscldeleg *dp = NULL, *ldp = NULL;
1000 	struct nfscllockownerhead *lhp = NULL;
1001 	struct nfsnode *np;
1002 	u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1003 	u_int8_t *openownp;
1004 	int error = 0, ret, donelocally = 0;
1005 	u_int32_t mode;
1006 
1007 	/* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1008 	mode = 0;
1009 	np = VTONFS(vp);
1010 	*lpp = NULL;
1011 	lp = NULL;
1012 	*newonep = 0;
1013 	*donelocallyp = 0;
1014 
1015 	/*
1016 	 * Might need these, so MALLOC them now, to
1017 	 * avoid a tsleep() in MALLOC later.
1018 	 */
1019 	nlp = malloc(
1020 	    sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1021 	otherlop = malloc(
1022 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1023 	nlop = malloc(
1024 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1025 	nlop->nfslo_type = type;
1026 	nlop->nfslo_first = off;
1027 	if (len == NFS64BITSSET) {
1028 		nlop->nfslo_end = NFS64BITSSET;
1029 	} else {
1030 		nlop->nfslo_end = off + len;
1031 		if (nlop->nfslo_end <= nlop->nfslo_first)
1032 			error = NFSERR_INVAL;
1033 	}
1034 
1035 	if (!error) {
1036 		if (recovery)
1037 			clp = rclp;
1038 		else
1039 			error = nfscl_getcl(vnode_mount(vp), cred, p, 1, true,
1040 			    &clp);
1041 	}
1042 	if (error) {
1043 		free(nlp, M_NFSCLLOCKOWNER);
1044 		free(otherlop, M_NFSCLLOCK);
1045 		free(nlop, M_NFSCLLOCK);
1046 		return (error);
1047 	}
1048 
1049 	op = NULL;
1050 	if (recovery) {
1051 		ownp = rownp;
1052 		openownp = ropenownp;
1053 	} else {
1054 		nfscl_filllockowner(id, own, flags);
1055 		ownp = own;
1056 		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
1057 			nfscl_filllockowner(NULL, openown, F_POSIX);
1058 		else
1059 			nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1060 		openownp = openown;
1061 	}
1062 	if (!recovery) {
1063 		NFSLOCKCLSTATE();
1064 		/*
1065 		 * First, search for a delegation. If one exists for this file,
1066 		 * the lock can be done locally against it, so long as there
1067 		 * isn't a local lock conflict.
1068 		 */
1069 		ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1070 		    np->n_fhp->nfh_len);
1071 		/* Just sanity check for correct type of delegation */
1072 		if (dp != NULL && ((dp->nfsdl_flags &
1073 		    (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1074 		     (type == F_WRLCK &&
1075 		      (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1076 			dp = NULL;
1077 	}
1078 	if (dp != NULL) {
1079 		/* Now, find an open and maybe a lockowner. */
1080 		ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh,
1081 		    np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1082 		if (ret)
1083 			ret = nfscl_getopen(&clp->nfsc_owner,
1084 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1085 			    ownp, mode, NULL, &op);
1086 		if (!ret) {
1087 			lhp = &dp->nfsdl_lock;
1088 			TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1089 			TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1090 			dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1091 			donelocally = 1;
1092 		} else {
1093 			dp = NULL;
1094 		}
1095 	}
1096 	if (!donelocally) {
1097 		/*
1098 		 * Get the related Open and maybe lockowner.
1099 		 */
1100 		error = nfscl_getopen(&clp->nfsc_owner,
1101 		    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1102 		    ownp, mode, &lp, &op);
1103 		if (!error)
1104 			lhp = &op->nfso_lock;
1105 	}
1106 	if (!error && !recovery)
1107 		error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1108 		    np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1109 	if (error) {
1110 		if (!recovery) {
1111 			nfscl_clrelease(clp);
1112 			NFSUNLOCKCLSTATE();
1113 		}
1114 		free(nlp, M_NFSCLLOCKOWNER);
1115 		free(otherlop, M_NFSCLLOCK);
1116 		free(nlop, M_NFSCLLOCK);
1117 		return (error);
1118 	}
1119 
1120 	/*
1121 	 * Ok, see if a lockowner exists and create one, as required.
1122 	 */
1123 	if (lp == NULL)
1124 		LIST_FOREACH(lp, lhp, nfsl_list) {
1125 			if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1126 				break;
1127 		}
1128 	if (lp == NULL) {
1129 		NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1130 		if (recovery)
1131 			NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1132 			    NFSV4CL_LOCKNAMELEN);
1133 		else
1134 			NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1135 			    NFSV4CL_LOCKNAMELEN);
1136 		nlp->nfsl_seqid = 0;
1137 		nlp->nfsl_lockflags = flags;
1138 		nlp->nfsl_inprog = NULL;
1139 		nfscl_lockinit(&nlp->nfsl_rwlock);
1140 		LIST_INIT(&nlp->nfsl_lock);
1141 		if (donelocally) {
1142 			nlp->nfsl_open = NULL;
1143 			nfsstatsv1.cllocallockowners++;
1144 		} else {
1145 			nlp->nfsl_open = op;
1146 			nfsstatsv1.cllockowners++;
1147 		}
1148 		LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1149 		lp = nlp;
1150 		nlp = NULL;
1151 		*newonep = 1;
1152 	}
1153 
1154 	/*
1155 	 * Now, update the byte ranges for locks.
1156 	 */
1157 	ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1158 	if (!ret)
1159 		donelocally = 1;
1160 	if (donelocally) {
1161 		*donelocallyp = 1;
1162 		if (!recovery)
1163 			nfscl_clrelease(clp);
1164 	} else {
1165 		/*
1166 		 * Serial modifications on the lock owner for multiple threads
1167 		 * for the same process using a read/write lock.
1168 		 */
1169 		if (!recovery)
1170 			nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1171 	}
1172 	if (!recovery)
1173 		NFSUNLOCKCLSTATE();
1174 
1175 	if (nlp)
1176 		free(nlp, M_NFSCLLOCKOWNER);
1177 	if (nlop)
1178 		free(nlop, M_NFSCLLOCK);
1179 	if (otherlop)
1180 		free(otherlop, M_NFSCLLOCK);
1181 
1182 	*lpp = lp;
1183 	return (0);
1184 }
1185 
1186 /*
1187  * Called to unlock a byte range, for LockU.
1188  */
1189 int
nfscl_relbytelock(vnode_t vp,u_int64_t off,u_int64_t len,__unused struct ucred * cred,NFSPROC_T * p,int callcnt,struct nfsclclient * clp,void * id,int flags,struct nfscllockowner ** lpp,int * dorpcp)1190 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1191     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1192     struct nfsclclient *clp, void *id, int flags,
1193     struct nfscllockowner **lpp, int *dorpcp)
1194 {
1195 	struct nfscllockowner *lp;
1196 	struct nfsclowner *owp;
1197 	struct nfsclopen *op;
1198 	struct nfscllock *nlop, *other_lop = NULL;
1199 	struct nfscldeleg *dp;
1200 	struct nfsnode *np;
1201 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1202 	int ret = 0, fnd;
1203 
1204 	np = VTONFS(vp);
1205 	*lpp = NULL;
1206 	*dorpcp = 0;
1207 
1208 	/*
1209 	 * Might need these, so MALLOC them now, to
1210 	 * avoid a tsleep() in MALLOC later.
1211 	 */
1212 	nlop = malloc(
1213 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1214 	nlop->nfslo_type = F_UNLCK;
1215 	nlop->nfslo_first = off;
1216 	if (len == NFS64BITSSET) {
1217 		nlop->nfslo_end = NFS64BITSSET;
1218 	} else {
1219 		nlop->nfslo_end = off + len;
1220 		if (nlop->nfslo_end <= nlop->nfslo_first) {
1221 			free(nlop, M_NFSCLLOCK);
1222 			return (NFSERR_INVAL);
1223 		}
1224 	}
1225 	if (callcnt == 0) {
1226 		other_lop = malloc(
1227 		    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1228 		*other_lop = *nlop;
1229 	}
1230 	nfscl_filllockowner(id, own, flags);
1231 	dp = NULL;
1232 	NFSLOCKCLSTATE();
1233 	if (callcnt == 0)
1234 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1235 		    np->n_fhp->nfh_len);
1236 
1237 	/*
1238 	 * First, unlock any local regions on a delegation.
1239 	 */
1240 	if (dp != NULL) {
1241 		/* Look for this lockowner. */
1242 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1243 			if (!NFSBCMP(lp->nfsl_owner, own,
1244 			    NFSV4CL_LOCKNAMELEN))
1245 				break;
1246 		}
1247 		if (lp != NULL)
1248 			/* Use other_lop, so nlop is still available */
1249 			(void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1250 	}
1251 
1252 	/*
1253 	 * Now, find a matching open/lockowner that hasn't already been done,
1254 	 * as marked by nfsl_inprog.
1255 	 */
1256 	lp = NULL;
1257 	fnd = 0;
1258 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1259 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1260 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1261 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1262 		    LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1263 			if (lp->nfsl_inprog == NULL &&
1264 			    !NFSBCMP(lp->nfsl_owner, own,
1265 			     NFSV4CL_LOCKNAMELEN)) {
1266 				fnd = 1;
1267 				break;
1268 			}
1269 		    }
1270 		    if (fnd)
1271 			break;
1272 		}
1273 	    }
1274 	    if (fnd)
1275 		break;
1276 	}
1277 
1278 	if (lp != NULL) {
1279 		ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1280 		if (ret)
1281 			*dorpcp = 1;
1282 		/*
1283 		 * Serial modifications on the lock owner for multiple
1284 		 * threads for the same process using a read/write lock.
1285 		 */
1286 		lp->nfsl_inprog = p;
1287 		nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1288 		*lpp = lp;
1289 	}
1290 	NFSUNLOCKCLSTATE();
1291 	if (nlop)
1292 		free(nlop, M_NFSCLLOCK);
1293 	if (other_lop)
1294 		free(other_lop, M_NFSCLLOCK);
1295 	return (0);
1296 }
1297 
1298 /*
1299  * Release all lockowners marked in progess for this process and file.
1300  */
1301 void
nfscl_releasealllocks(struct nfsclclient * clp,vnode_t vp,NFSPROC_T * p,void * id,int flags)1302 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1303     void *id, int flags)
1304 {
1305 	struct nfsclowner *owp;
1306 	struct nfsclopen *op;
1307 	struct nfscllockowner *lp;
1308 	struct nfsnode *np;
1309 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1310 
1311 	np = VTONFS(vp);
1312 	nfscl_filllockowner(id, own, flags);
1313 	NFSLOCKCLSTATE();
1314 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1315 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1316 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1317 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1318 		    LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1319 			if (lp->nfsl_inprog == p &&
1320 			    !NFSBCMP(lp->nfsl_owner, own,
1321 			    NFSV4CL_LOCKNAMELEN)) {
1322 			    lp->nfsl_inprog = NULL;
1323 			    nfscl_lockunlock(&lp->nfsl_rwlock);
1324 			}
1325 		    }
1326 		}
1327 	    }
1328 	}
1329 	nfscl_clrelease(clp);
1330 	NFSUNLOCKCLSTATE();
1331 }
1332 
1333 /*
1334  * Called to find out if any bytes within the byte range specified are
1335  * write locked by the calling process. Used to determine if flushing
1336  * is required before a LockU.
1337  * If in doubt, return 1, so the flush will occur.
1338  */
1339 int
nfscl_checkwritelocked(vnode_t vp,struct flock * fl,struct ucred * cred,NFSPROC_T * p,void * id,int flags)1340 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1341     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1342 {
1343 	struct nfsclowner *owp;
1344 	struct nfscllockowner *lp;
1345 	struct nfsclopen *op;
1346 	struct nfsclclient *clp;
1347 	struct nfscllock *lop;
1348 	struct nfscldeleg *dp;
1349 	struct nfsnode *np;
1350 	u_int64_t off, end;
1351 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1352 	int error = 0;
1353 
1354 	np = VTONFS(vp);
1355 	switch (fl->l_whence) {
1356 	case SEEK_SET:
1357 	case SEEK_CUR:
1358 		/*
1359 		 * Caller is responsible for adding any necessary offset
1360 		 * when SEEK_CUR is used.
1361 		 */
1362 		off = fl->l_start;
1363 		break;
1364 	case SEEK_END:
1365 		off = np->n_size + fl->l_start;
1366 		break;
1367 	default:
1368 		return (1);
1369 	}
1370 	if (fl->l_len != 0) {
1371 		end = off + fl->l_len;
1372 		if (end < off)
1373 			return (1);
1374 	} else {
1375 		end = NFS64BITSSET;
1376 	}
1377 
1378 	error = nfscl_getcl(vnode_mount(vp), cred, p, 1, true, &clp);
1379 	if (error)
1380 		return (1);
1381 	nfscl_filllockowner(id, own, flags);
1382 	NFSLOCKCLSTATE();
1383 
1384 	/*
1385 	 * First check the delegation locks.
1386 	 */
1387 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1388 	if (dp != NULL) {
1389 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1390 			if (!NFSBCMP(lp->nfsl_owner, own,
1391 			    NFSV4CL_LOCKNAMELEN))
1392 				break;
1393 		}
1394 		if (lp != NULL) {
1395 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1396 				if (lop->nfslo_first >= end)
1397 					break;
1398 				if (lop->nfslo_end <= off)
1399 					continue;
1400 				if (lop->nfslo_type == F_WRLCK) {
1401 					nfscl_clrelease(clp);
1402 					NFSUNLOCKCLSTATE();
1403 					return (1);
1404 				}
1405 			}
1406 		}
1407 	}
1408 
1409 	/*
1410 	 * Now, check state against the server.
1411 	 */
1412 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1413 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1414 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1415 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1416 		    LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1417 			if (!NFSBCMP(lp->nfsl_owner, own,
1418 			    NFSV4CL_LOCKNAMELEN))
1419 			    break;
1420 		    }
1421 		    if (lp != NULL) {
1422 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1423 			    if (lop->nfslo_first >= end)
1424 				break;
1425 			    if (lop->nfslo_end <= off)
1426 				continue;
1427 			    if (lop->nfslo_type == F_WRLCK) {
1428 				nfscl_clrelease(clp);
1429 				NFSUNLOCKCLSTATE();
1430 				return (1);
1431 			    }
1432 			}
1433 		    }
1434 		}
1435 	    }
1436 	}
1437 	nfscl_clrelease(clp);
1438 	NFSUNLOCKCLSTATE();
1439 	return (0);
1440 }
1441 
1442 /*
1443  * Release a byte range lock owner structure.
1444  */
1445 void
nfscl_lockrelease(struct nfscllockowner * lp,int error,int candelete)1446 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1447 {
1448 	struct nfsclclient *clp;
1449 
1450 	if (lp == NULL)
1451 		return;
1452 	NFSLOCKCLSTATE();
1453 	clp = lp->nfsl_open->nfso_own->nfsow_clp;
1454 	if (error != 0 && candelete &&
1455 	    (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1456 		nfscl_freelockowner(lp, 0);
1457 	else
1458 		nfscl_lockunlock(&lp->nfsl_rwlock);
1459 	nfscl_clrelease(clp);
1460 	NFSUNLOCKCLSTATE();
1461 }
1462 
1463 /*
1464  * Unlink the open structure.
1465  */
1466 static void
nfscl_unlinkopen(struct nfsclopen * op)1467 nfscl_unlinkopen(struct nfsclopen *op)
1468 {
1469 
1470 	LIST_REMOVE(op, nfso_list);
1471 }
1472 
1473 /*
1474  * Free up an open structure and any associated byte range lock structures.
1475  */
1476 void
nfscl_freeopen(struct nfsclopen * op,int local,bool unlink)1477 nfscl_freeopen(struct nfsclopen *op, int local, bool unlink)
1478 {
1479 
1480 	if (unlink)
1481 		nfscl_unlinkopen(op);
1482 	nfscl_freealllocks(&op->nfso_lock, local);
1483 	free(op, M_NFSCLOPEN);
1484 	if (local)
1485 		nfsstatsv1.cllocalopens--;
1486 	else
1487 		nfsstatsv1.clopens--;
1488 }
1489 
1490 /*
1491  * Free up all lock owners and associated locks.
1492  */
1493 static void
nfscl_freealllocks(struct nfscllockownerhead * lhp,int local)1494 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1495 {
1496 	struct nfscllockowner *lp, *nlp;
1497 
1498 	LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1499 		if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1500 			panic("nfscllckw");
1501 		nfscl_freelockowner(lp, local);
1502 	}
1503 }
1504 
1505 /*
1506  * Called for an Open when NFSERR_EXPIRED is received from the server.
1507  * If there are no byte range locks nor a Share Deny lost, try to do a
1508  * fresh Open. Otherwise, free the open.
1509  */
1510 static int
nfscl_expireopen(struct nfsclclient * clp,struct nfsclopen * op,struct nfsmount * nmp,struct ucred * cred,NFSPROC_T * p)1511 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1512     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1513 {
1514 	struct nfscllockowner *lp;
1515 	struct nfscldeleg *dp;
1516 	int mustdelete = 0, error;
1517 
1518 	/*
1519 	 * Look for any byte range lock(s).
1520 	 */
1521 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1522 		if (!LIST_EMPTY(&lp->nfsl_lock)) {
1523 			mustdelete = 1;
1524 			break;
1525 		}
1526 	}
1527 
1528 	/*
1529 	 * If no byte range lock(s) nor a Share deny, try to re-open.
1530 	 */
1531 	if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1532 		newnfs_copycred(&op->nfso_cred, cred);
1533 		dp = NULL;
1534 		error = nfsrpc_reopen(nmp, op->nfso_fh,
1535 		    op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1536 		if (error) {
1537 			mustdelete = 1;
1538 			if (dp != NULL) {
1539 				free(dp, M_NFSCLDELEG);
1540 				dp = NULL;
1541 			}
1542 		}
1543 		if (dp != NULL)
1544 			nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1545 			    op->nfso_fhlen, cred, p, &dp);
1546 	}
1547 
1548 	/*
1549 	 * If a byte range lock or Share deny or couldn't re-open, free it.
1550 	 */
1551 	if (mustdelete)
1552 		nfscl_freeopen(op, 0, true);
1553 	return (mustdelete);
1554 }
1555 
1556 /*
1557  * Free up an open owner structure.
1558  */
1559 static void
nfscl_freeopenowner(struct nfsclowner * owp,int local)1560 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1561 {
1562 	int owned;
1563 
1564 	/*
1565 	 * Make sure the NFSCLSTATE mutex is held, to avoid races with
1566 	 * calls in nfscl_renewthread() that do not hold a reference
1567 	 * count on the nfsclclient and just the mutex.
1568 	 * The mutex will not be held for calls done with the exclusive
1569 	 * nfsclclient lock held, in particular, nfscl_hasexpired()
1570 	 * and nfscl_recalldeleg() might do this.
1571 	 */
1572 	owned = mtx_owned(NFSCLSTATEMUTEXPTR);
1573 	if (owned == 0)
1574 		NFSLOCKCLSTATE();
1575 	LIST_REMOVE(owp, nfsow_list);
1576 	if (owned == 0)
1577 		NFSUNLOCKCLSTATE();
1578 	free(owp, M_NFSCLOWNER);
1579 	if (local)
1580 		nfsstatsv1.cllocalopenowners--;
1581 	else
1582 		nfsstatsv1.clopenowners--;
1583 }
1584 
1585 /*
1586  * Free up a byte range lock owner structure.
1587  */
1588 void
nfscl_freelockowner(struct nfscllockowner * lp,int local)1589 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1590 {
1591 	struct nfscllock *lop, *nlop;
1592 	int owned;
1593 
1594 	/*
1595 	 * Make sure the NFSCLSTATE mutex is held, to avoid races with
1596 	 * calls in nfscl_renewthread() that do not hold a reference
1597 	 * count on the nfsclclient and just the mutex.
1598 	 * The mutex will not be held for calls done with the exclusive
1599 	 * nfsclclient lock held, in particular, nfscl_hasexpired()
1600 	 * and nfscl_recalldeleg() might do this.
1601 	 */
1602 	owned = mtx_owned(NFSCLSTATEMUTEXPTR);
1603 	if (owned == 0)
1604 		NFSLOCKCLSTATE();
1605 	LIST_REMOVE(lp, nfsl_list);
1606 	if (owned == 0)
1607 		NFSUNLOCKCLSTATE();
1608 	LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1609 		nfscl_freelock(lop, local);
1610 	}
1611 	free(lp, M_NFSCLLOCKOWNER);
1612 	if (local)
1613 		nfsstatsv1.cllocallockowners--;
1614 	else
1615 		nfsstatsv1.cllockowners--;
1616 }
1617 
1618 /*
1619  * Free up a byte range lock structure.
1620  */
1621 void
nfscl_freelock(struct nfscllock * lop,int local)1622 nfscl_freelock(struct nfscllock *lop, int local)
1623 {
1624 
1625 	LIST_REMOVE(lop, nfslo_list);
1626 	free(lop, M_NFSCLLOCK);
1627 	if (local)
1628 		nfsstatsv1.cllocallocks--;
1629 	else
1630 		nfsstatsv1.cllocks--;
1631 }
1632 
1633 /*
1634  * Clean out the state related to a delegation.
1635  */
1636 static void
nfscl_cleandeleg(struct nfscldeleg * dp)1637 nfscl_cleandeleg(struct nfscldeleg *dp)
1638 {
1639 	struct nfsclowner *owp, *nowp;
1640 	struct nfsclopen *op;
1641 
1642 	LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1643 		op = LIST_FIRST(&owp->nfsow_open);
1644 		if (op != NULL) {
1645 			if (LIST_NEXT(op, nfso_list) != NULL)
1646 				panic("nfscleandel");
1647 			nfscl_freeopen(op, 1, true);
1648 		}
1649 		nfscl_freeopenowner(owp, 1);
1650 	}
1651 	nfscl_freealllocks(&dp->nfsdl_lock, 1);
1652 }
1653 
1654 /*
1655  * Free a delegation.
1656  */
1657 static void
nfscl_freedeleg(struct nfscldeleghead * hdp,struct nfscldeleg * dp,bool freeit)1658 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1659 {
1660 
1661 	TAILQ_REMOVE(hdp, dp, nfsdl_list);
1662 	LIST_REMOVE(dp, nfsdl_hash);
1663 	if (freeit)
1664 		free(dp, M_NFSCLDELEG);
1665 	nfsstatsv1.cldelegates--;
1666 	nfscl_delegcnt--;
1667 }
1668 
1669 /*
1670  * Free up all state related to this client structure.
1671  */
1672 static void
nfscl_cleanclient(struct nfsclclient * clp)1673 nfscl_cleanclient(struct nfsclclient *clp)
1674 {
1675 	struct nfsclowner *owp, *nowp;
1676 	struct nfsclopen *op, *nop;
1677 	struct nfscllayout *lyp, *nlyp;
1678 	struct nfscldevinfo *dip, *ndip;
1679 
1680 	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1681 		nfscl_freelayout(lyp);
1682 
1683 	LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1684 		nfscl_freedevinfo(dip);
1685 
1686 	/* Now, all the OpenOwners, etc. */
1687 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1688 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1689 			nfscl_freeopen(op, 0, true);
1690 		}
1691 		nfscl_freeopenowner(owp, 0);
1692 	}
1693 }
1694 
1695 /*
1696  * Called when an NFSERR_EXPIRED is received from the server.
1697  */
1698 static void
nfscl_expireclient(struct nfsclclient * clp,struct nfsmount * nmp,struct ucred * cred,NFSPROC_T * p)1699 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1700     struct ucred *cred, NFSPROC_T *p)
1701 {
1702 	struct nfsclowner *owp, *nowp, *towp;
1703 	struct nfsclopen *op, *nop, *top;
1704 	struct nfscldeleg *dp, *ndp;
1705 	int ret, printed = 0;
1706 
1707 	/*
1708 	 * First, merge locally issued Opens into the list for the server.
1709 	 */
1710 	dp = TAILQ_FIRST(&clp->nfsc_deleg);
1711 	while (dp != NULL) {
1712 	    ndp = TAILQ_NEXT(dp, nfsdl_list);
1713 	    owp = LIST_FIRST(&dp->nfsdl_owner);
1714 	    while (owp != NULL) {
1715 		nowp = LIST_NEXT(owp, nfsow_list);
1716 		op = LIST_FIRST(&owp->nfsow_open);
1717 		if (op != NULL) {
1718 		    if (LIST_NEXT(op, nfso_list) != NULL)
1719 			panic("nfsclexp");
1720 		    LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1721 			if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1722 			    NFSV4CL_LOCKNAMELEN))
1723 			    break;
1724 		    }
1725 		    if (towp != NULL) {
1726 			/* Merge opens in */
1727 			LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1728 			    if (top->nfso_fhlen == op->nfso_fhlen &&
1729 				!NFSBCMP(top->nfso_fh, op->nfso_fh,
1730 				 op->nfso_fhlen)) {
1731 				top->nfso_mode |= op->nfso_mode;
1732 				top->nfso_opencnt += op->nfso_opencnt;
1733 				break;
1734 			    }
1735 			}
1736 			if (top == NULL) {
1737 			    /* Just add the open to the owner list */
1738 			    LIST_REMOVE(op, nfso_list);
1739 			    op->nfso_own = towp;
1740 			    LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1741 			    nfsstatsv1.cllocalopens--;
1742 			    nfsstatsv1.clopens++;
1743 			}
1744 		    } else {
1745 			/* Just add the openowner to the client list */
1746 			LIST_REMOVE(owp, nfsow_list);
1747 			owp->nfsow_clp = clp;
1748 			LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1749 			nfsstatsv1.cllocalopenowners--;
1750 			nfsstatsv1.clopenowners++;
1751 			nfsstatsv1.cllocalopens--;
1752 			nfsstatsv1.clopens++;
1753 		    }
1754 		}
1755 		owp = nowp;
1756 	    }
1757 	    if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1758 		printed = 1;
1759 		printf("nfsv4 expired locks lost\n");
1760 	    }
1761 	    nfscl_cleandeleg(dp);
1762 	    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1763 	    dp = ndp;
1764 	}
1765 	if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1766 	    panic("nfsclexp");
1767 
1768 	/*
1769 	 * Now, try and reopen against the server.
1770 	 */
1771 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1772 		owp->nfsow_seqid = 0;
1773 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1774 			ret = nfscl_expireopen(clp, op, nmp, cred, p);
1775 			if (ret && !printed) {
1776 				printed = 1;
1777 				printf("nfsv4 expired locks lost\n");
1778 			}
1779 		}
1780 		if (LIST_EMPTY(&owp->nfsow_open))
1781 			nfscl_freeopenowner(owp, 0);
1782 	}
1783 }
1784 
1785 /*
1786  * This function must be called after the process represented by "own" has
1787  * exited. Must be called with CLSTATE lock held.
1788  */
1789 static void
nfscl_cleanup_common(struct nfsclclient * clp,u_int8_t * own)1790 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1791 {
1792 	struct nfsclowner *owp, *nowp;
1793 	struct nfscllockowner *lp;
1794 	struct nfscldeleg *dp;
1795 
1796 	/* First, get rid of local locks on delegations. */
1797 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1798 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1799 		    if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1800 			if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1801 			    panic("nfscllckw");
1802 			nfscl_freelockowner(lp, 1);
1803 			break;
1804 		    }
1805 		}
1806 	}
1807 	owp = LIST_FIRST(&clp->nfsc_owner);
1808 	while (owp != NULL) {
1809 		nowp = LIST_NEXT(owp, nfsow_list);
1810 		if (!NFSBCMP(owp->nfsow_owner, own,
1811 		    NFSV4CL_LOCKNAMELEN)) {
1812 			/*
1813 			 * If there are children that haven't closed the
1814 			 * file descriptors yet, the opens will still be
1815 			 * here. For that case, let the renew thread clear
1816 			 * out the OpenOwner later.
1817 			 */
1818 			if (LIST_EMPTY(&owp->nfsow_open))
1819 				nfscl_freeopenowner(owp, 0);
1820 			else
1821 				owp->nfsow_defunct = 1;
1822 			break;
1823 		}
1824 		owp = nowp;
1825 	}
1826 }
1827 
1828 /*
1829  * Find open/lock owners for processes that have exited.
1830  */
1831 static void
nfscl_cleanupkext(struct nfsclclient * clp,struct nfscllockownerfhhead * lhp)1832 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1833 {
1834 	struct nfsclowner *owp, *nowp;
1835 	struct nfsclopen *op;
1836 	struct nfscllockowner *lp, *nlp;
1837 	struct nfscldeleg *dp;
1838 	uint8_t own[NFSV4CL_LOCKNAMELEN];
1839 
1840 	NFSPROCLISTLOCK();
1841 	NFSLOCKCLSTATE();
1842 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1843 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1844 			LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1845 				if (LIST_EMPTY(&lp->nfsl_lock))
1846 					nfscl_emptylockowner(lp, lhp);
1847 			}
1848 		}
1849 		if (nfscl_procdoesntexist(owp->nfsow_owner)) {
1850 			memcpy(own, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
1851 			nfscl_cleanup_common(clp, own);
1852 		}
1853 	}
1854 
1855 	/*
1856 	 * For the single open_owner case, these lock owners need to be
1857 	 * checked to see if they still exist separately.
1858 	 * This is because nfscl_procdoesntexist() never returns true for
1859 	 * the single open_owner so that the above doesn't ever call
1860 	 * nfscl_cleanup_common().
1861 	 */
1862 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1863 		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1864 			if (nfscl_procdoesntexist(lp->nfsl_owner)) {
1865 				memcpy(own, lp->nfsl_owner,
1866 				    NFSV4CL_LOCKNAMELEN);
1867 				nfscl_cleanup_common(clp, own);
1868 			}
1869 		}
1870 	}
1871 	NFSUNLOCKCLSTATE();
1872 	NFSPROCLISTUNLOCK();
1873 }
1874 
1875 /*
1876  * Take the empty lock owner and move it to the local lhp list if the
1877  * associated process no longer exists.
1878  */
1879 static void
nfscl_emptylockowner(struct nfscllockowner * lp,struct nfscllockownerfhhead * lhp)1880 nfscl_emptylockowner(struct nfscllockowner *lp,
1881     struct nfscllockownerfhhead *lhp)
1882 {
1883 	struct nfscllockownerfh *lfhp, *mylfhp;
1884 	struct nfscllockowner *nlp;
1885 	int fnd_it;
1886 
1887 	/* If not a Posix lock owner, just return. */
1888 	if ((lp->nfsl_lockflags & F_POSIX) == 0)
1889 		return;
1890 
1891 	fnd_it = 0;
1892 	mylfhp = NULL;
1893 	/*
1894 	 * First, search to see if this lock owner is already in the list.
1895 	 * If it is, then the associated process no longer exists.
1896 	 */
1897 	SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1898 		if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1899 		    !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1900 		    lfhp->nfslfh_len))
1901 			mylfhp = lfhp;
1902 		LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1903 			if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1904 			    NFSV4CL_LOCKNAMELEN))
1905 				fnd_it = 1;
1906 	}
1907 	/* If not found, check if process still exists. */
1908 	if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1909 		return;
1910 
1911 	/* Move the lock owner over to the local list. */
1912 	if (mylfhp == NULL) {
1913 		mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1914 		    M_NOWAIT);
1915 		if (mylfhp == NULL)
1916 			return;
1917 		mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1918 		NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1919 		    mylfhp->nfslfh_len);
1920 		LIST_INIT(&mylfhp->nfslfh_lock);
1921 		SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1922 	}
1923 	LIST_REMOVE(lp, nfsl_list);
1924 	LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1925 }
1926 
1927 static int	fake_global;	/* Used to force visibility of MNTK_UNMOUNTF */
1928 /*
1929  * Called from nfs umount to free up the clientid.
1930  */
1931 void
nfscl_umount(struct nfsmount * nmp,NFSPROC_T * p,struct nfscldeleghead * dhp)1932 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p, struct nfscldeleghead *dhp)
1933 {
1934 	struct nfsclclient *clp;
1935 	struct ucred *cred;
1936 	int igotlock;
1937 
1938 	/*
1939 	 * For the case that matters, this is the thread that set
1940 	 * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1941 	 * done to ensure that any thread executing nfscl_getcl() after
1942 	 * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1943 	 * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1944 	 * explanation, courtesy of Alan Cox.
1945 	 * What follows is a snippet from Alan Cox's email at:
1946 	 * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1947 	 *
1948 	 * 1. Set MNTK_UNMOUNTF
1949 	 * 2. Acquire a standard FreeBSD mutex "m".
1950 	 * 3. Update some data structures.
1951 	 * 4. Release mutex "m".
1952 	 *
1953 	 * Then, other threads that acquire "m" after step 4 has occurred will
1954 	 * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1955 	 * step 2 may or may not see MNTK_UNMOUNTF as set.
1956 	 */
1957 	NFSLOCKCLSTATE();
1958 	if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1959 		fake_global++;
1960 		NFSUNLOCKCLSTATE();
1961 		NFSLOCKCLSTATE();
1962 	}
1963 
1964 	clp = nmp->nm_clp;
1965 	if (clp != NULL) {
1966 		if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1967 			panic("nfscl umount");
1968 
1969 		/*
1970 		 * First, handshake with the nfscl renew thread, to terminate
1971 		 * it.
1972 		 */
1973 		clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1974 		while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1975 			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1976 			    "nfsclumnt", hz);
1977 
1978 		/*
1979 		 * Now, get the exclusive lock on the client state, so
1980 		 * that no uses of the state are still in progress.
1981 		 */
1982 		do {
1983 			igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1984 			    NFSCLSTATEMUTEXPTR, NULL);
1985 		} while (!igotlock);
1986 		NFSUNLOCKCLSTATE();
1987 
1988 		/*
1989 		 * Free up all the state. It will expire on the server, but
1990 		 * maybe we should do a SetClientId/SetClientIdConfirm so
1991 		 * the server throws it away?
1992 		 */
1993 		LIST_REMOVE(clp, nfsc_list);
1994 		nfscl_delegreturnall(clp, p, dhp);
1995 		cred = newnfs_getcred();
1996 		if (NFSHASNFSV4N(nmp)) {
1997 			(void)nfsrpc_destroysession(nmp, clp, cred, p);
1998 			(void)nfsrpc_destroyclient(nmp, clp, cred, p);
1999 		} else
2000 			(void)nfsrpc_setclient(nmp, clp, 0, cred, p);
2001 		nfscl_cleanclient(clp);
2002 		nmp->nm_clp = NULL;
2003 		NFSFREECRED(cred);
2004 		free(clp, M_NFSCLCLIENT);
2005 	} else
2006 		NFSUNLOCKCLSTATE();
2007 }
2008 
2009 /*
2010  * This function is called when a server replies with NFSERR_STALECLIENTID
2011  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
2012  * doing Opens and Locks with reclaim. If these fail, it deletes the
2013  * corresponding state.
2014  */
2015 static void
nfscl_recover(struct nfsclclient * clp,struct ucred * cred,NFSPROC_T * p)2016 nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p)
2017 {
2018 	struct nfsclowner *owp, *nowp;
2019 	struct nfsclopen *op, *nop;
2020 	struct nfscllockowner *lp, *nlp;
2021 	struct nfscllock *lop, *nlop;
2022 	struct nfscldeleg *dp, *ndp, *tdp;
2023 	struct nfsmount *nmp;
2024 	struct ucred *tcred;
2025 	struct nfsclopenhead extra_open;
2026 	struct nfscldeleghead extra_deleg;
2027 	struct nfsreq *rep;
2028 	u_int64_t len;
2029 	u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
2030 	int i, igotlock = 0, error, trycnt, firstlock;
2031 	struct nfscllayout *lyp, *nlyp;
2032 	bool recovered_one;
2033 
2034 	/*
2035 	 * First, lock the client structure, so everyone else will
2036 	 * block when trying to use state.
2037 	 */
2038 	NFSLOCKCLSTATE();
2039 	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2040 	do {
2041 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2042 		    NFSCLSTATEMUTEXPTR, NULL);
2043 	} while (!igotlock);
2044 	NFSUNLOCKCLSTATE();
2045 
2046 	nmp = clp->nfsc_nmp;
2047 	if (nmp == NULL)
2048 		panic("nfscl recover");
2049 
2050 	/*
2051 	 * For now, just get rid of all layouts. There may be a need
2052 	 * to do LayoutCommit Ops with reclaim == true later.
2053 	 */
2054 	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2055 		nfscl_freelayout(lyp);
2056 	TAILQ_INIT(&clp->nfsc_layout);
2057 	for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2058 		LIST_INIT(&clp->nfsc_layouthash[i]);
2059 
2060 	trycnt = 5;
2061 	do {
2062 		error = nfsrpc_setclient(nmp, clp, 1, cred, p);
2063 	} while ((error == NFSERR_STALECLIENTID ||
2064 	     error == NFSERR_BADSESSION ||
2065 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2066 	if (error) {
2067 		NFSLOCKCLSTATE();
2068 		clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2069 		    NFSCLFLAGS_RECVRINPROG);
2070 		wakeup(&clp->nfsc_flags);
2071 		nfsv4_unlock(&clp->nfsc_lock, 0);
2072 		NFSUNLOCKCLSTATE();
2073 		return;
2074 	}
2075 	clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2076 	clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2077 
2078 	/*
2079 	 * Mark requests already queued on the server, so that they don't
2080 	 * initiate another recovery cycle. Any requests already in the
2081 	 * queue that handle state information will have the old stale
2082 	 * clientid/stateid and will get a NFSERR_STALESTATEID,
2083 	 * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2084 	 * This will be translated to NFSERR_STALEDONTRECOVER when
2085 	 * R_DONTRECOVER is set.
2086 	 */
2087 	NFSLOCKREQ();
2088 	TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2089 		if (rep->r_nmp == nmp)
2090 			rep->r_flags |= R_DONTRECOVER;
2091 	}
2092 	NFSUNLOCKREQ();
2093 
2094 	/*
2095 	 * Now, mark all delegations "need reclaim".
2096 	 */
2097 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2098 		dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2099 
2100 	TAILQ_INIT(&extra_deleg);
2101 	LIST_INIT(&extra_open);
2102 	/*
2103 	 * Now traverse the state lists, doing Open and Lock Reclaims.
2104 	 */
2105 	tcred = newnfs_getcred();
2106 	recovered_one = false;
2107 	owp = LIST_FIRST(&clp->nfsc_owner);
2108 	while (owp != NULL) {
2109 	    nowp = LIST_NEXT(owp, nfsow_list);
2110 	    owp->nfsow_seqid = 0;
2111 	    op = LIST_FIRST(&owp->nfsow_open);
2112 	    while (op != NULL) {
2113 		nop = LIST_NEXT(op, nfso_list);
2114 		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2115 		    /* Search for a delegation to reclaim with the open */
2116 		    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2117 			if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2118 			    continue;
2119 			if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2120 			    mode = NFSV4OPEN_ACCESSWRITE;
2121 			    delegtype = NFSV4OPEN_DELEGATEWRITE;
2122 			} else {
2123 			    mode = NFSV4OPEN_ACCESSREAD;
2124 			    delegtype = NFSV4OPEN_DELEGATEREAD;
2125 			}
2126 			if ((op->nfso_mode & mode) == mode &&
2127 			    op->nfso_fhlen == dp->nfsdl_fhlen &&
2128 			    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2129 			    break;
2130 		    }
2131 		    ndp = dp;
2132 		    if (dp == NULL)
2133 			delegtype = NFSV4OPEN_DELEGATENONE;
2134 		    newnfs_copycred(&op->nfso_cred, tcred);
2135 		    error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2136 			op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2137 			op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2138 			tcred, p);
2139 		    if (!error) {
2140 			recovered_one = true;
2141 			/* Handle any replied delegation */
2142 			if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2143 			    || NFSMNT_RDONLY(nmp->nm_mountp))) {
2144 			    if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2145 				mode = NFSV4OPEN_ACCESSWRITE;
2146 			    else
2147 				mode = NFSV4OPEN_ACCESSREAD;
2148 			    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2149 				if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2150 				    continue;
2151 				if ((op->nfso_mode & mode) == mode &&
2152 				    op->nfso_fhlen == dp->nfsdl_fhlen &&
2153 				    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2154 				    op->nfso_fhlen)) {
2155 				    dp->nfsdl_stateid = ndp->nfsdl_stateid;
2156 				    dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2157 				    dp->nfsdl_ace = ndp->nfsdl_ace;
2158 				    dp->nfsdl_change = ndp->nfsdl_change;
2159 				    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2160 				    if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2161 					dp->nfsdl_flags |= NFSCLDL_RECALL;
2162 				    free(ndp, M_NFSCLDELEG);
2163 				    ndp = NULL;
2164 				    break;
2165 				}
2166 			    }
2167 			}
2168 			if (ndp != NULL)
2169 			    TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2170 
2171 			/* and reclaim all byte range locks */
2172 			lp = LIST_FIRST(&op->nfso_lock);
2173 			while (lp != NULL) {
2174 			    nlp = LIST_NEXT(lp, nfsl_list);
2175 			    lp->nfsl_seqid = 0;
2176 			    firstlock = 1;
2177 			    lop = LIST_FIRST(&lp->nfsl_lock);
2178 			    while (lop != NULL) {
2179 				nlop = LIST_NEXT(lop, nfslo_list);
2180 				if (lop->nfslo_end == NFS64BITSSET)
2181 				    len = NFS64BITSSET;
2182 				else
2183 				    len = lop->nfslo_end - lop->nfslo_first;
2184 				error = nfscl_trylock(nmp, NULL,
2185 				    op->nfso_fh, op->nfso_fhlen, lp,
2186 				    firstlock, 1, lop->nfslo_first, len,
2187 				    lop->nfslo_type, tcred, p);
2188 				if (error != 0)
2189 				    nfscl_freelock(lop, 0);
2190 				else
2191 				    firstlock = 0;
2192 				lop = nlop;
2193 			    }
2194 			    /* If no locks, but a lockowner, just delete it. */
2195 			    if (LIST_EMPTY(&lp->nfsl_lock))
2196 				nfscl_freelockowner(lp, 0);
2197 			    lp = nlp;
2198 			}
2199 		    } else if (error == NFSERR_NOGRACE && !recovered_one &&
2200 			NFSHASNFSV4N(nmp)) {
2201 			/*
2202 			 * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2203 			 * actually end up here, since the client will do
2204 			 * a recovery for NFSERR_BADSESSION, but will get
2205 			 * an NFSERR_NOGRACE reply for the first "reclaim"
2206 			 * attempt.
2207 			 * So, call nfscl_expireclient() to recover the
2208 			 * opens as best we can and then do a reclaim
2209 			 * complete and return.
2210 			 */
2211 			nfsrpc_reclaimcomplete(nmp, cred, p);
2212 			nfscl_expireclient(clp, nmp, tcred, p);
2213 			goto out;
2214 		    }
2215 		}
2216 		if (error != 0 && error != NFSERR_BADSESSION)
2217 		    nfscl_freeopen(op, 0, true);
2218 		op = nop;
2219 	    }
2220 	    owp = nowp;
2221 	}
2222 
2223 	/*
2224 	 * Now, try and get any delegations not yet reclaimed by cobbling
2225 	 * to-gether an appropriate open.
2226 	 */
2227 	nowp = NULL;
2228 	dp = TAILQ_FIRST(&clp->nfsc_deleg);
2229 	while (dp != NULL) {
2230 	    ndp = TAILQ_NEXT(dp, nfsdl_list);
2231 	    if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2232 		if (nowp == NULL) {
2233 		    nowp = malloc(
2234 			sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2235 		    /*
2236 		     * Name must be as long an largest possible
2237 		     * NFSV4CL_LOCKNAMELEN. 12 for now.
2238 		     */
2239 		    NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2240 			NFSV4CL_LOCKNAMELEN);
2241 		    LIST_INIT(&nowp->nfsow_open);
2242 		    nowp->nfsow_clp = clp;
2243 		    nowp->nfsow_seqid = 0;
2244 		    nowp->nfsow_defunct = 0;
2245 		    nfscl_lockinit(&nowp->nfsow_rwlock);
2246 		}
2247 		nop = NULL;
2248 		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2249 		    nop = malloc(sizeof (struct nfsclopen) +
2250 			dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2251 		    nop->nfso_own = nowp;
2252 		    if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2253 			nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2254 			delegtype = NFSV4OPEN_DELEGATEWRITE;
2255 		    } else {
2256 			nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2257 			delegtype = NFSV4OPEN_DELEGATEREAD;
2258 		    }
2259 		    nop->nfso_opencnt = 0;
2260 		    nop->nfso_posixlock = 1;
2261 		    nop->nfso_fhlen = dp->nfsdl_fhlen;
2262 		    NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2263 		    LIST_INIT(&nop->nfso_lock);
2264 		    nop->nfso_stateid.seqid = 0;
2265 		    nop->nfso_stateid.other[0] = 0;
2266 		    nop->nfso_stateid.other[1] = 0;
2267 		    nop->nfso_stateid.other[2] = 0;
2268 		    newnfs_copycred(&dp->nfsdl_cred, tcred);
2269 		    newnfs_copyincred(tcred, &nop->nfso_cred);
2270 		    tdp = NULL;
2271 		    error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2272 			nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2273 			nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2274 			delegtype, tcred, p);
2275 		    if (tdp != NULL) {
2276 			if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2277 			    mode = NFSV4OPEN_ACCESSWRITE;
2278 			else
2279 			    mode = NFSV4OPEN_ACCESSREAD;
2280 			if ((nop->nfso_mode & mode) == mode &&
2281 			    nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2282 			    !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2283 			    nop->nfso_fhlen)) {
2284 			    dp->nfsdl_stateid = tdp->nfsdl_stateid;
2285 			    dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2286 			    dp->nfsdl_ace = tdp->nfsdl_ace;
2287 			    dp->nfsdl_change = tdp->nfsdl_change;
2288 			    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2289 			    if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2290 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2291 			    free(tdp, M_NFSCLDELEG);
2292 			} else {
2293 			    TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2294 			}
2295 		    }
2296 		}
2297 		if (error) {
2298 		    if (nop != NULL)
2299 			free(nop, M_NFSCLOPEN);
2300 		    if (error == NFSERR_NOGRACE && !recovered_one &&
2301 			NFSHASNFSV4N(nmp)) {
2302 			/*
2303 			 * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2304 			 * actually end up here, since the client will do
2305 			 * a recovery for NFSERR_BADSESSION, but will get
2306 			 * an NFSERR_NOGRACE reply for the first "reclaim"
2307 			 * attempt.
2308 			 * So, call nfscl_expireclient() to recover the
2309 			 * opens as best we can and then do a reclaim
2310 			 * complete and return.
2311 			 */
2312 			nfsrpc_reclaimcomplete(nmp, cred, p);
2313 			nfscl_expireclient(clp, nmp, tcred, p);
2314 			free(nowp, M_NFSCLOWNER);
2315 			goto out;
2316 		    }
2317 		    /*
2318 		     * Couldn't reclaim it, so throw the state
2319 		     * away. Ouch!!
2320 		     */
2321 		    nfscl_cleandeleg(dp);
2322 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2323 		} else {
2324 		    recovered_one = true;
2325 		    LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2326 		}
2327 	    }
2328 	    dp = ndp;
2329 	}
2330 
2331 	/*
2332 	 * Now, get rid of extra Opens and Delegations.
2333 	 */
2334 	LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2335 		do {
2336 			newnfs_copycred(&op->nfso_cred, tcred);
2337 			error = nfscl_tryclose(op, tcred, nmp, p, true);
2338 			if (error == NFSERR_GRACE)
2339 				(void) nfs_catnap(PZERO, error, "nfsexcls");
2340 		} while (error == NFSERR_GRACE);
2341 		LIST_REMOVE(op, nfso_list);
2342 		free(op, M_NFSCLOPEN);
2343 	}
2344 	if (nowp != NULL)
2345 		free(nowp, M_NFSCLOWNER);
2346 
2347 	TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2348 		do {
2349 			newnfs_copycred(&dp->nfsdl_cred, tcred);
2350 			error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2351 			if (error == NFSERR_GRACE)
2352 				(void) nfs_catnap(PZERO, error, "nfsexdlg");
2353 		} while (error == NFSERR_GRACE);
2354 		TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2355 		free(dp, M_NFSCLDELEG);
2356 	}
2357 
2358 	/* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2359 	if (NFSHASNFSV4N(nmp))
2360 		(void)nfsrpc_reclaimcomplete(nmp, cred, p);
2361 
2362 out:
2363 	NFSLOCKCLSTATE();
2364 	clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2365 	wakeup(&clp->nfsc_flags);
2366 	nfsv4_unlock(&clp->nfsc_lock, 0);
2367 	NFSUNLOCKCLSTATE();
2368 	NFSFREECRED(tcred);
2369 }
2370 
2371 /*
2372  * This function is called when a server replies with NFSERR_EXPIRED.
2373  * It deletes all state for the client and does a fresh SetClientId/confirm.
2374  * XXX Someday it should post a signal to the process(es) that hold the
2375  * state, so they know that lock state has been lost.
2376  */
2377 int
nfscl_hasexpired(struct nfsclclient * clp,u_int32_t clidrev,NFSPROC_T * p)2378 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2379 {
2380 	struct nfsmount *nmp;
2381 	struct ucred *cred;
2382 	int igotlock = 0, error, trycnt;
2383 
2384 	/*
2385 	 * If the clientid has gone away or a new SetClientid has already
2386 	 * been done, just return ok.
2387 	 */
2388 	if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2389 		return (0);
2390 
2391 	/*
2392 	 * First, lock the client structure, so everyone else will
2393 	 * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2394 	 * that only one thread does the work.
2395 	 */
2396 	NFSLOCKCLSTATE();
2397 	clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2398 	do {
2399 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2400 		    NFSCLSTATEMUTEXPTR, NULL);
2401 	} while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2402 	if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2403 		if (igotlock)
2404 			nfsv4_unlock(&clp->nfsc_lock, 0);
2405 		NFSUNLOCKCLSTATE();
2406 		return (0);
2407 	}
2408 	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2409 	NFSUNLOCKCLSTATE();
2410 
2411 	nmp = clp->nfsc_nmp;
2412 	if (nmp == NULL)
2413 		panic("nfscl expired");
2414 	cred = newnfs_getcred();
2415 	trycnt = 5;
2416 	do {
2417 		error = nfsrpc_setclient(nmp, clp, 0, cred, p);
2418 	} while ((error == NFSERR_STALECLIENTID ||
2419 	     error == NFSERR_BADSESSION ||
2420 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2421 	if (error) {
2422 		NFSLOCKCLSTATE();
2423 		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2424 	} else {
2425 		/*
2426 		 * Expire the state for the client.
2427 		 */
2428 		nfscl_expireclient(clp, nmp, cred, p);
2429 		NFSLOCKCLSTATE();
2430 		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2431 		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2432 	}
2433 	clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2434 	wakeup(&clp->nfsc_flags);
2435 	nfsv4_unlock(&clp->nfsc_lock, 0);
2436 	NFSUNLOCKCLSTATE();
2437 	NFSFREECRED(cred);
2438 	return (error);
2439 }
2440 
2441 /*
2442  * This function inserts a lock in the list after insert_lop.
2443  */
2444 static void
nfscl_insertlock(struct nfscllockowner * lp,struct nfscllock * new_lop,struct nfscllock * insert_lop,int local)2445 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2446     struct nfscllock *insert_lop, int local)
2447 {
2448 
2449 	if ((struct nfscllockowner *)insert_lop == lp)
2450 		LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2451 	else
2452 		LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2453 	if (local)
2454 		nfsstatsv1.cllocallocks++;
2455 	else
2456 		nfsstatsv1.cllocks++;
2457 }
2458 
2459 /*
2460  * This function updates the locking for a lock owner and given file. It
2461  * maintains a list of lock ranges ordered on increasing file offset that
2462  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2463  * It always adds new_lop to the list and sometimes uses the one pointed
2464  * at by other_lopp.
2465  * Returns 1 if the locks were modified, 0 otherwise.
2466  */
2467 static int
nfscl_updatelock(struct nfscllockowner * lp,struct nfscllock ** new_lopp,struct nfscllock ** other_lopp,int local)2468 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2469     struct nfscllock **other_lopp, int local)
2470 {
2471 	struct nfscllock *new_lop = *new_lopp;
2472 	struct nfscllock *lop, *tlop, *ilop;
2473 	struct nfscllock *other_lop;
2474 	int unlock = 0, modified = 0;
2475 	u_int64_t tmp;
2476 
2477 	/*
2478 	 * Work down the list until the lock is merged.
2479 	 */
2480 	if (new_lop->nfslo_type == F_UNLCK)
2481 		unlock = 1;
2482 	ilop = (struct nfscllock *)lp;
2483 	lop = LIST_FIRST(&lp->nfsl_lock);
2484 	while (lop != NULL) {
2485 	    /*
2486 	     * Only check locks for this file that aren't before the start of
2487 	     * new lock's range.
2488 	     */
2489 	    if (lop->nfslo_end >= new_lop->nfslo_first) {
2490 		if (new_lop->nfslo_end < lop->nfslo_first) {
2491 		    /*
2492 		     * If the new lock ends before the start of the
2493 		     * current lock's range, no merge, just insert
2494 		     * the new lock.
2495 		     */
2496 		    break;
2497 		}
2498 		if (new_lop->nfslo_type == lop->nfslo_type ||
2499 		    (new_lop->nfslo_first <= lop->nfslo_first &&
2500 		     new_lop->nfslo_end >= lop->nfslo_end)) {
2501 		    /*
2502 		     * This lock can be absorbed by the new lock/unlock.
2503 		     * This happens when it covers the entire range
2504 		     * of the old lock or is contiguous
2505 		     * with the old lock and is of the same type or an
2506 		     * unlock.
2507 		     */
2508 		    if (new_lop->nfslo_type != lop->nfslo_type ||
2509 			new_lop->nfslo_first != lop->nfslo_first ||
2510 			new_lop->nfslo_end != lop->nfslo_end)
2511 			modified = 1;
2512 		    if (lop->nfslo_first < new_lop->nfslo_first)
2513 			new_lop->nfslo_first = lop->nfslo_first;
2514 		    if (lop->nfslo_end > new_lop->nfslo_end)
2515 			new_lop->nfslo_end = lop->nfslo_end;
2516 		    tlop = lop;
2517 		    lop = LIST_NEXT(lop, nfslo_list);
2518 		    nfscl_freelock(tlop, local);
2519 		    continue;
2520 		}
2521 
2522 		/*
2523 		 * All these cases are for contiguous locks that are not the
2524 		 * same type, so they can't be merged.
2525 		 */
2526 		if (new_lop->nfslo_first <= lop->nfslo_first) {
2527 		    /*
2528 		     * This case is where the new lock overlaps with the
2529 		     * first part of the old lock. Move the start of the
2530 		     * old lock to just past the end of the new lock. The
2531 		     * new lock will be inserted in front of the old, since
2532 		     * ilop hasn't been updated. (We are done now.)
2533 		     */
2534 		    if (lop->nfslo_first != new_lop->nfslo_end) {
2535 			lop->nfslo_first = new_lop->nfslo_end;
2536 			modified = 1;
2537 		    }
2538 		    break;
2539 		}
2540 		if (new_lop->nfslo_end >= lop->nfslo_end) {
2541 		    /*
2542 		     * This case is where the new lock overlaps with the
2543 		     * end of the old lock's range. Move the old lock's
2544 		     * end to just before the new lock's first and insert
2545 		     * the new lock after the old lock.
2546 		     * Might not be done yet, since the new lock could
2547 		     * overlap further locks with higher ranges.
2548 		     */
2549 		    if (lop->nfslo_end != new_lop->nfslo_first) {
2550 			lop->nfslo_end = new_lop->nfslo_first;
2551 			modified = 1;
2552 		    }
2553 		    ilop = lop;
2554 		    lop = LIST_NEXT(lop, nfslo_list);
2555 		    continue;
2556 		}
2557 		/*
2558 		 * The final case is where the new lock's range is in the
2559 		 * middle of the current lock's and splits the current lock
2560 		 * up. Use *other_lopp to handle the second part of the
2561 		 * split old lock range. (We are done now.)
2562 		 * For unlock, we use new_lop as other_lop and tmp, since
2563 		 * other_lop and new_lop are the same for this case.
2564 		 * We noted the unlock case above, so we don't need
2565 		 * new_lop->nfslo_type any longer.
2566 		 */
2567 		tmp = new_lop->nfslo_first;
2568 		if (unlock) {
2569 		    other_lop = new_lop;
2570 		    *new_lopp = NULL;
2571 		} else {
2572 		    other_lop = *other_lopp;
2573 		    *other_lopp = NULL;
2574 		}
2575 		other_lop->nfslo_first = new_lop->nfslo_end;
2576 		other_lop->nfslo_end = lop->nfslo_end;
2577 		other_lop->nfslo_type = lop->nfslo_type;
2578 		lop->nfslo_end = tmp;
2579 		nfscl_insertlock(lp, other_lop, lop, local);
2580 		ilop = lop;
2581 		modified = 1;
2582 		break;
2583 	    }
2584 	    ilop = lop;
2585 	    lop = LIST_NEXT(lop, nfslo_list);
2586 	    if (lop == NULL)
2587 		break;
2588 	}
2589 
2590 	/*
2591 	 * Insert the new lock in the list at the appropriate place.
2592 	 */
2593 	if (!unlock) {
2594 		nfscl_insertlock(lp, new_lop, ilop, local);
2595 		*new_lopp = NULL;
2596 		modified = 1;
2597 	}
2598 	return (modified);
2599 }
2600 
2601 /*
2602  * This function must be run as a kernel thread.
2603  * It does Renew Ops and recovery, when required.
2604  */
2605 void
nfscl_renewthread(struct nfsclclient * clp,NFSPROC_T * p)2606 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2607 {
2608 	struct nfsclowner *owp, *nowp;
2609 	struct nfsclopen *op;
2610 	struct nfscllockowner *lp, *nlp;
2611 	struct nfscldeleghead dh;
2612 	struct nfscldeleg *dp, *ndp;
2613 	struct ucred *cred;
2614 	u_int32_t clidrev;
2615 	int error, cbpathdown, islept, igotlock, ret, clearok;
2616 	uint32_t recover_done_time = 0;
2617 	time_t mytime;
2618 	static time_t prevsec = 0;
2619 	struct nfscllockownerfh *lfhp, *nlfhp;
2620 	struct nfscllockownerfhhead lfh;
2621 	struct nfscllayout *lyp, *nlyp;
2622 	struct nfscldevinfo *dip, *ndip;
2623 	struct nfscllayouthead rlh;
2624 	struct nfsclrecalllayout *recallp;
2625 	struct nfsclds *dsp;
2626 	struct mount *mp;
2627 	vnode_t vp;
2628 
2629 	cred = newnfs_getcred();
2630 	NFSLOCKCLSTATE();
2631 	clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2632 	mp = clp->nfsc_nmp->nm_mountp;
2633 	NFSUNLOCKCLSTATE();
2634 	for(;;) {
2635 		newnfs_setroot(cred);
2636 		cbpathdown = 0;
2637 		if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2638 			/*
2639 			 * Only allow one recover within 1/2 of the lease
2640 			 * duration (nfsc_renew).
2641 			 */
2642 			if (recover_done_time < NFSD_MONOSEC) {
2643 				recover_done_time = NFSD_MONOSEC +
2644 				    clp->nfsc_renew;
2645 				NFSCL_DEBUG(1, "Doing recovery..\n");
2646 				nfscl_recover(clp, cred, p);
2647 			} else {
2648 				NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n",
2649 				    recover_done_time, (intmax_t)NFSD_MONOSEC);
2650 				NFSLOCKCLSTATE();
2651 				clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2652 				NFSUNLOCKCLSTATE();
2653 			}
2654 		}
2655 		if (clp->nfsc_expire <= NFSD_MONOSEC &&
2656 		    (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2657 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2658 			clidrev = clp->nfsc_clientidrev;
2659 			error = nfsrpc_renew(clp, NULL, cred, p);
2660 			if (error == NFSERR_CBPATHDOWN)
2661 			    cbpathdown = 1;
2662 			else if (error == NFSERR_STALECLIENTID) {
2663 			    NFSLOCKCLSTATE();
2664 			    clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2665 			    NFSUNLOCKCLSTATE();
2666 			} else if (error == NFSERR_EXPIRED)
2667 			    (void) nfscl_hasexpired(clp, clidrev, p);
2668 		}
2669 
2670 checkdsrenew:
2671 		if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2672 			/* Do renews for any DS sessions. */
2673 			NFSLOCKMNT(clp->nfsc_nmp);
2674 			/* Skip first entry, since the MDS is handled above. */
2675 			dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2676 			if (dsp != NULL)
2677 				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2678 			while (dsp != NULL) {
2679 				if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2680 				    dsp->nfsclds_sess.nfsess_defunct == 0) {
2681 					dsp->nfsclds_expire = NFSD_MONOSEC +
2682 					    clp->nfsc_renew;
2683 					NFSUNLOCKMNT(clp->nfsc_nmp);
2684 					(void)nfsrpc_renew(clp, dsp, cred, p);
2685 					goto checkdsrenew;
2686 				}
2687 				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2688 			}
2689 			NFSUNLOCKMNT(clp->nfsc_nmp);
2690 		}
2691 
2692 		TAILQ_INIT(&dh);
2693 		NFSLOCKCLSTATE();
2694 		if (cbpathdown)
2695 			/* It's a Total Recall! */
2696 			nfscl_totalrecall(clp);
2697 
2698 		/*
2699 		 * Now, handle defunct owners.
2700 		 */
2701 		LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2702 			if (LIST_EMPTY(&owp->nfsow_open)) {
2703 				if (owp->nfsow_defunct != 0)
2704 					nfscl_freeopenowner(owp, 0);
2705 			}
2706 		}
2707 
2708 		/*
2709 		 * Do the recall on any delegations. To avoid trouble, always
2710 		 * come back up here after having slept.
2711 		 */
2712 		igotlock = 0;
2713 tryagain:
2714 		dp = TAILQ_FIRST(&clp->nfsc_deleg);
2715 		while (dp != NULL) {
2716 			ndp = TAILQ_NEXT(dp, nfsdl_list);
2717 			if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2718 				/*
2719 				 * Wait for outstanding I/O ops to be done.
2720 				 */
2721 				if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2722 				    if (igotlock) {
2723 					nfsv4_unlock(&clp->nfsc_lock, 0);
2724 					igotlock = 0;
2725 				    }
2726 				    dp->nfsdl_rwlock.nfslock_lock |=
2727 					NFSV4LOCK_WANTED;
2728 				    msleep(&dp->nfsdl_rwlock,
2729 					NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2730 					5 * hz);
2731 				    if (NFSCL_FORCEDISM(mp))
2732 					goto terminate;
2733 				    goto tryagain;
2734 				}
2735 				while (!igotlock) {
2736 				    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2737 					&islept, NFSCLSTATEMUTEXPTR, mp);
2738 				    if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2739 					goto terminate;
2740 				    if (islept)
2741 					goto tryagain;
2742 				}
2743 				NFSUNLOCKCLSTATE();
2744 				newnfs_copycred(&dp->nfsdl_cred, cred);
2745 				ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2746 				    NULL, cred, p, 1, &vp);
2747 				if (!ret) {
2748 				    nfscl_cleandeleg(dp);
2749 				    TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2750 					nfsdl_list);
2751 				    LIST_REMOVE(dp, nfsdl_hash);
2752 				    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2753 				    nfscl_delegcnt--;
2754 				    nfsstatsv1.cldelegates--;
2755 				}
2756 				NFSLOCKCLSTATE();
2757 				/*
2758 				 * The nfsc_lock must be released before doing
2759 				 * vrele(), since it might call nfs_inactive().
2760 				 * For the unlikely case where the vnode failed
2761 				 * to be acquired by nfscl_recalldeleg(), a
2762 				 * VOP_RECLAIM() should be in progress and it
2763 				 * will return the delegation.
2764 				 */
2765 				nfsv4_unlock(&clp->nfsc_lock, 0);
2766 				igotlock = 0;
2767 				if (vp != NULL) {
2768 					NFSUNLOCKCLSTATE();
2769 					vrele(vp);
2770 					NFSLOCKCLSTATE();
2771 				}
2772 				goto tryagain;
2773 			}
2774 			dp = ndp;
2775 		}
2776 
2777 		/*
2778 		 * Clear out old delegations, if we are above the high water
2779 		 * mark. Only clear out ones with no state related to them.
2780 		 * The tailq list is in LRU order.
2781 		 */
2782 		dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2783 		while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2784 		    ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2785 		    if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2786 			dp->nfsdl_rwlock.nfslock_lock == 0 &&
2787 			dp->nfsdl_timestamp < NFSD_MONOSEC &&
2788 			(dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2789 			  NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2790 			clearok = 1;
2791 			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2792 			    op = LIST_FIRST(&owp->nfsow_open);
2793 			    if (op != NULL) {
2794 				clearok = 0;
2795 				break;
2796 			    }
2797 			}
2798 			if (clearok) {
2799 			    LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2800 				if (!LIST_EMPTY(&lp->nfsl_lock)) {
2801 				    clearok = 0;
2802 				    break;
2803 				}
2804 			    }
2805 			}
2806 			if (clearok) {
2807 			    TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2808 			    LIST_REMOVE(dp, nfsdl_hash);
2809 			    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2810 			    nfscl_delegcnt--;
2811 			    nfsstatsv1.cldelegates--;
2812 			}
2813 		    }
2814 		    dp = ndp;
2815 		}
2816 		if (igotlock)
2817 			nfsv4_unlock(&clp->nfsc_lock, 0);
2818 
2819 		/*
2820 		 * Do the recall on any layouts. To avoid trouble, always
2821 		 * come back up here after having slept.
2822 		 */
2823 		TAILQ_INIT(&rlh);
2824 tryagain2:
2825 		TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2826 			if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2827 				/*
2828 				 * Wait for outstanding I/O ops to be done.
2829 				 */
2830 				if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2831 				    (lyp->nfsly_lock.nfslock_lock &
2832 				     NFSV4LOCK_LOCK) != 0) {
2833 					lyp->nfsly_lock.nfslock_lock |=
2834 					    NFSV4LOCK_WANTED;
2835 					msleep(&lyp->nfsly_lock.nfslock_lock,
2836 					    NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2837 					    5 * hz);
2838 					if (NFSCL_FORCEDISM(mp))
2839 					    goto terminate;
2840 					goto tryagain2;
2841 				}
2842 				/* Move the layout to the recall list. */
2843 				TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2844 				    nfsly_list);
2845 				LIST_REMOVE(lyp, nfsly_hash);
2846 				TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2847 
2848 				/* Handle any layout commits. */
2849 				if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2850 				    (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2851 					lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2852 					NFSUNLOCKCLSTATE();
2853 					NFSCL_DEBUG(3, "do layoutcommit\n");
2854 					nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2855 					    cred, p);
2856 					NFSLOCKCLSTATE();
2857 					goto tryagain2;
2858 				}
2859 			}
2860 		}
2861 
2862 		/* Now, look for stale layouts. */
2863 		lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2864 		while (lyp != NULL) {
2865 			nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2866 			if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2867 			    (lyp->nfsly_flags & (NFSLY_RECALL |
2868 			     NFSLY_RETONCLOSE)) == 0 &&
2869 			    lyp->nfsly_lock.nfslock_usecnt == 0 &&
2870 			    lyp->nfsly_lock.nfslock_lock == 0) {
2871 				NFSCL_DEBUG(4, "ret stale lay=%d\n",
2872 				    nfscl_layoutcnt);
2873 				recallp = malloc(sizeof(*recallp),
2874 				    M_NFSLAYRECALL, M_NOWAIT);
2875 				if (recallp == NULL)
2876 					break;
2877 				(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2878 				    lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2879 				    lyp->nfsly_stateid.seqid, 0, 0, NULL,
2880 				    recallp);
2881 			}
2882 			lyp = nlyp;
2883 		}
2884 
2885 		/*
2886 		 * Free up any unreferenced device info structures.
2887 		 */
2888 		LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2889 			if (dip->nfsdi_layoutrefs == 0 &&
2890 			    dip->nfsdi_refcnt == 0) {
2891 				NFSCL_DEBUG(4, "freeing devinfo\n");
2892 				LIST_REMOVE(dip, nfsdi_list);
2893 				nfscl_freedevinfo(dip);
2894 			}
2895 		}
2896 		NFSUNLOCKCLSTATE();
2897 
2898 		/* Do layout return(s), as required. */
2899 		TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2900 			TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2901 			NFSCL_DEBUG(4, "ret layout\n");
2902 			nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2903 			if ((lyp->nfsly_flags & NFSLY_RETONCLOSE) != 0) {
2904 				NFSLOCKCLSTATE();
2905 				lyp->nfsly_flags |= NFSLY_RETURNED;
2906 				wakeup(lyp);
2907 				NFSUNLOCKCLSTATE();
2908 			} else
2909 				nfscl_freelayout(lyp);
2910 		}
2911 
2912 		/*
2913 		 * Delegreturn any delegations cleaned out or recalled.
2914 		 */
2915 		TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2916 			newnfs_copycred(&dp->nfsdl_cred, cred);
2917 			(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2918 			TAILQ_REMOVE(&dh, dp, nfsdl_list);
2919 			free(dp, M_NFSCLDELEG);
2920 		}
2921 
2922 		SLIST_INIT(&lfh);
2923 		/*
2924 		 * Call nfscl_cleanupkext() once per second to check for
2925 		 * open/lock owners where the process has exited.
2926 		 */
2927 		mytime = NFSD_MONOSEC;
2928 		if (prevsec != mytime) {
2929 			prevsec = mytime;
2930 			nfscl_cleanupkext(clp, &lfh);
2931 		}
2932 
2933 		/*
2934 		 * Do a ReleaseLockOwner for all lock owners where the
2935 		 * associated process no longer exists, as found by
2936 		 * nfscl_cleanupkext().
2937 		 */
2938 		newnfs_setroot(cred);
2939 		SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2940 			LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2941 			    nlp) {
2942 				(void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2943 				    lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2944 				    p);
2945 				nfscl_freelockowner(lp, 0);
2946 			}
2947 			free(lfhp, M_TEMP);
2948 		}
2949 		SLIST_INIT(&lfh);
2950 
2951 		NFSLOCKCLSTATE();
2952 		if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2953 			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2954 			    hz);
2955 terminate:
2956 		if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2957 			clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2958 			NFSUNLOCKCLSTATE();
2959 			NFSFREECRED(cred);
2960 			wakeup((caddr_t)clp);
2961 			return;
2962 		}
2963 		NFSUNLOCKCLSTATE();
2964 	}
2965 }
2966 
2967 /*
2968  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2969  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2970  */
2971 void
nfscl_initiate_recovery(struct nfsclclient * clp)2972 nfscl_initiate_recovery(struct nfsclclient *clp)
2973 {
2974 
2975 	if (clp == NULL)
2976 		return;
2977 	NFSLOCKCLSTATE();
2978 	clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2979 	NFSUNLOCKCLSTATE();
2980 	wakeup((caddr_t)clp);
2981 }
2982 
2983 /*
2984  * Dump out the state stuff for debugging.
2985  */
2986 void
nfscl_dumpstate(struct nfsmount * nmp,int openowner,int opens,int lockowner,int locks)2987 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
2988     int lockowner, int locks)
2989 {
2990 	struct nfsclclient *clp;
2991 	struct nfsclowner *owp;
2992 	struct nfsclopen *op;
2993 	struct nfscllockowner *lp;
2994 	struct nfscllock *lop;
2995 	struct nfscldeleg *dp;
2996 
2997 	clp = nmp->nm_clp;
2998 	if (clp == NULL) {
2999 		printf("nfscl dumpstate NULL clp\n");
3000 		return;
3001 	}
3002 	NFSLOCKCLSTATE();
3003 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
3004 	  LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3005 	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3006 		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3007 		    owp->nfsow_owner[0], owp->nfsow_owner[1],
3008 		    owp->nfsow_owner[2], owp->nfsow_owner[3],
3009 		    owp->nfsow_seqid);
3010 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3011 		if (opens)
3012 		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3013 			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3014 			op->nfso_stateid.other[2], op->nfso_opencnt,
3015 			op->nfso_fh[12]);
3016 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3017 		    if (lockowner)
3018 			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3019 			    lp->nfsl_owner[0], lp->nfsl_owner[1],
3020 			    lp->nfsl_owner[2], lp->nfsl_owner[3],
3021 			    lp->nfsl_seqid,
3022 			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3023 			    lp->nfsl_stateid.other[2]);
3024 		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3025 			if (locks)
3026 #ifdef __FreeBSD__
3027 			    printf("lck typ=%d fst=%ju end=%ju\n",
3028 				lop->nfslo_type, (intmax_t)lop->nfslo_first,
3029 				(intmax_t)lop->nfslo_end);
3030 #else
3031 			    printf("lck typ=%d fst=%qd end=%qd\n",
3032 				lop->nfslo_type, lop->nfslo_first,
3033 				lop->nfslo_end);
3034 #endif
3035 		    }
3036 		}
3037 	    }
3038 	  }
3039 	}
3040 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3041 	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3042 		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3043 		    owp->nfsow_owner[0], owp->nfsow_owner[1],
3044 		    owp->nfsow_owner[2], owp->nfsow_owner[3],
3045 		    owp->nfsow_seqid);
3046 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3047 		if (opens)
3048 		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3049 			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3050 			op->nfso_stateid.other[2], op->nfso_opencnt,
3051 			op->nfso_fh[12]);
3052 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3053 		    if (lockowner)
3054 			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3055 			    lp->nfsl_owner[0], lp->nfsl_owner[1],
3056 			    lp->nfsl_owner[2], lp->nfsl_owner[3],
3057 			    lp->nfsl_seqid,
3058 			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3059 			    lp->nfsl_stateid.other[2]);
3060 		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3061 			if (locks)
3062 #ifdef __FreeBSD__
3063 			    printf("lck typ=%d fst=%ju end=%ju\n",
3064 				lop->nfslo_type, (intmax_t)lop->nfslo_first,
3065 				(intmax_t)lop->nfslo_end);
3066 #else
3067 			    printf("lck typ=%d fst=%qd end=%qd\n",
3068 				lop->nfslo_type, lop->nfslo_first,
3069 				lop->nfslo_end);
3070 #endif
3071 		    }
3072 		}
3073 	    }
3074 	}
3075 	NFSUNLOCKCLSTATE();
3076 }
3077 
3078 /*
3079  * Check for duplicate open owners and opens.
3080  * (Only used as a diagnostic aid.)
3081  */
3082 void
nfscl_dupopen(vnode_t vp,int dupopens)3083 nfscl_dupopen(vnode_t vp, int dupopens)
3084 {
3085 	struct nfsclclient *clp;
3086 	struct nfsclowner *owp, *owp2;
3087 	struct nfsclopen *op, *op2;
3088 	struct nfsfh *nfhp;
3089 
3090 	clp = VFSTONFS(vnode_mount(vp))->nm_clp;
3091 	if (clp == NULL) {
3092 		printf("nfscl dupopen NULL clp\n");
3093 		return;
3094 	}
3095 	nfhp = VTONFS(vp)->n_fhp;
3096 	NFSLOCKCLSTATE();
3097 
3098 	/*
3099 	 * First, search for duplicate owners.
3100 	 * These should never happen!
3101 	 */
3102 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3103 	    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3104 		if (owp != owp2 &&
3105 		    !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3106 		    NFSV4CL_LOCKNAMELEN)) {
3107 			NFSUNLOCKCLSTATE();
3108 			printf("DUP OWNER\n");
3109 			nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0);
3110 			return;
3111 		}
3112 	    }
3113 	}
3114 
3115 	/*
3116 	 * Now, search for duplicate stateids.
3117 	 * These shouldn't happen, either.
3118 	 */
3119 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3120 	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3121 		LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3122 		    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3123 			if (op != op2 &&
3124 			    (op->nfso_stateid.other[0] != 0 ||
3125 			     op->nfso_stateid.other[1] != 0 ||
3126 			     op->nfso_stateid.other[2] != 0) &&
3127 			    op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3128 			    op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3129 			    op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3130 			    NFSUNLOCKCLSTATE();
3131 			    printf("DUP STATEID\n");
3132 			    nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0,
3133 				0);
3134 			    return;
3135 			}
3136 		    }
3137 		}
3138 	    }
3139 	}
3140 
3141 	/*
3142 	 * Now search for duplicate opens.
3143 	 * Duplicate opens for the same owner
3144 	 * should never occur. Other duplicates are
3145 	 * possible and are checked for if "dupopens"
3146 	 * is true.
3147 	 */
3148 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3149 	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3150 		if (nfhp->nfh_len == op2->nfso_fhlen &&
3151 		    !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3152 		    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3153 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3154 			    if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3155 				!NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3156 				(!NFSBCMP(op->nfso_own->nfsow_owner,
3157 				 op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3158 				 dupopens)) {
3159 				if (!NFSBCMP(op->nfso_own->nfsow_owner,
3160 				    op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3161 				    NFSUNLOCKCLSTATE();
3162 				    printf("BADDUP OPEN\n");
3163 				} else {
3164 				    NFSUNLOCKCLSTATE();
3165 				    printf("DUP OPEN\n");
3166 				}
3167 				nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1,
3168 				    0, 0);
3169 				return;
3170 			    }
3171 			}
3172 		    }
3173 		}
3174 	    }
3175 	}
3176 	NFSUNLOCKCLSTATE();
3177 }
3178 
3179 /*
3180  * During close, find an open that needs to be dereferenced and
3181  * dereference it. If there are no more opens for this file,
3182  * log a message to that effect.
3183  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3184  * on the file's vnode.
3185  * This is the safe way, since it is difficult to identify
3186  * which open the close is for and I/O can be performed after the
3187  * close(2) system call when a file is mmap'd.
3188  * If it returns 0 for success, there will be a referenced
3189  * clp returned via clpp.
3190  */
3191 int
nfscl_getclose(vnode_t vp,struct nfsclclient ** clpp)3192 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3193 {
3194 	struct nfsclclient *clp;
3195 	struct nfsclowner *owp;
3196 	struct nfsclopen *op;
3197 	struct nfscldeleg *dp;
3198 	struct nfsfh *nfhp;
3199 	int error, notdecr;
3200 
3201 	error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, true, &clp);
3202 	if (error)
3203 		return (error);
3204 	*clpp = clp;
3205 
3206 	nfhp = VTONFS(vp)->n_fhp;
3207 	notdecr = 1;
3208 	NFSLOCKCLSTATE();
3209 	/*
3210 	 * First, look for one under a delegation that was locally issued
3211 	 * and just decrement the opencnt for it. Since all my Opens against
3212 	 * the server are DENY_NONE, I don't see a problem with hanging
3213 	 * onto them. (It is much easier to use one of the extant Opens
3214 	 * that I already have on the server when a Delegation is recalled
3215 	 * than to do fresh Opens.) Someday, I might need to rethink this, but.
3216 	 */
3217 	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3218 	if (dp != NULL) {
3219 		LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3220 			op = LIST_FIRST(&owp->nfsow_open);
3221 			if (op != NULL) {
3222 				/*
3223 				 * Since a delegation is for a file, there
3224 				 * should never be more than one open for
3225 				 * each openowner.
3226 				 */
3227 				if (LIST_NEXT(op, nfso_list) != NULL)
3228 					panic("nfscdeleg opens");
3229 				if (notdecr && op->nfso_opencnt > 0) {
3230 					notdecr = 0;
3231 					op->nfso_opencnt--;
3232 					break;
3233 				}
3234 			}
3235 		}
3236 	}
3237 
3238 	/* Now process the opens against the server. */
3239 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3240 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3241 			if (op->nfso_fhlen == nfhp->nfh_len &&
3242 			    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3243 			    nfhp->nfh_len)) {
3244 				/* Found an open, decrement cnt if possible */
3245 				if (notdecr && op->nfso_opencnt > 0) {
3246 					notdecr = 0;
3247 					op->nfso_opencnt--;
3248 				}
3249 				/*
3250 				 * There are more opens, so just return.
3251 				 */
3252 				if (op->nfso_opencnt > 0) {
3253 					NFSUNLOCKCLSTATE();
3254 					return (0);
3255 				}
3256 			}
3257 		}
3258 	}
3259 	NFSUNLOCKCLSTATE();
3260 	if (notdecr)
3261 		printf("nfscl: never fnd open\n");
3262 	return (0);
3263 }
3264 
3265 int
nfscl_doclose(vnode_t vp,struct nfsclclient ** clpp,NFSPROC_T * p)3266 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3267 {
3268 	struct nfsclclient *clp;
3269 	struct nfsmount *nmp;
3270 	struct nfsclowner *owp, *nowp;
3271 	struct nfsclopen *op, *nop;
3272 	struct nfsclopenhead delayed;
3273 	struct nfscldeleg *dp;
3274 	struct nfsfh *nfhp;
3275 	struct nfsclrecalllayout *recallp;
3276 	struct nfscllayout *lyp;
3277 	int error;
3278 
3279 	error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, true, &clp);
3280 	if (error)
3281 		return (error);
3282 	*clpp = clp;
3283 
3284 	nmp = VFSTONFS(vnode_mount(vp));
3285 	nfhp = VTONFS(vp)->n_fhp;
3286 	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3287 	NFSLOCKCLSTATE();
3288 	/*
3289 	 * First get rid of the local Open structures, which should be no
3290 	 * longer in use.
3291 	 */
3292 	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3293 	if (dp != NULL) {
3294 		LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3295 			op = LIST_FIRST(&owp->nfsow_open);
3296 			if (op != NULL) {
3297 				KASSERT((op->nfso_opencnt == 0),
3298 				    ("nfscl: bad open cnt on deleg"));
3299 				nfscl_freeopen(op, 1, true);
3300 			}
3301 			nfscl_freeopenowner(owp, 1);
3302 		}
3303 	}
3304 
3305 	/* Return any layouts marked return on close. */
3306 	nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp,
3307 	    &lyp);
3308 
3309 	/* Now process the opens against the server. */
3310 	LIST_INIT(&delayed);
3311 lookformore:
3312 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3313 		op = LIST_FIRST(&owp->nfsow_open);
3314 		while (op != NULL) {
3315 			if (op->nfso_fhlen == nfhp->nfh_len &&
3316 			    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3317 			    nfhp->nfh_len)) {
3318 				/* Found an open, close it. */
3319 				KASSERT((op->nfso_opencnt == 0),
3320 				    ("nfscl: bad open cnt on server"));
3321 				NFSUNLOCKCLSTATE();
3322 				if (NFSHASNFSV4N(nmp))
3323 					nfsrpc_doclose(nmp, op, p, false,
3324 					    true);
3325 				else
3326 					nfsrpc_doclose(nmp, op, p, true,
3327 					    true);
3328 				NFSLOCKCLSTATE();
3329 				if (error == NFSERR_DELAY) {
3330 					nfscl_unlinkopen(op);
3331 					op->nfso_own = NULL;
3332 					LIST_INSERT_HEAD(&delayed, op,
3333 					    nfso_list);
3334 				}
3335 				goto lookformore;
3336 			}
3337 			op = LIST_NEXT(op, nfso_list);
3338 		}
3339 	}
3340 	nfscl_clrelease(clp);
3341 
3342 	/* Now, wait for any layout that is returned upon close. */
3343 	if (lyp != NULL) {
3344 		while ((lyp->nfsly_flags & NFSLY_RETURNED) == 0) {
3345 			if (NFSCL_FORCEDISM(vnode_mount(vp))) {
3346 				lyp = NULL;
3347 				break;
3348 			}
3349 			msleep(lyp, NFSCLSTATEMUTEXPTR, PZERO, "nfslroc", hz);
3350 		}
3351 		if (lyp != NULL)
3352 			nfscl_freelayout(lyp);
3353 	}
3354 
3355 	NFSUNLOCKCLSTATE();
3356 	/*
3357 	 * recallp has been set NULL by nfscl_retoncloselayout() if it was
3358 	 * used by the function, but calling free() with a NULL pointer is ok.
3359 	 */
3360 	free(recallp, M_NFSLAYRECALL);
3361 
3362 	/* Now, loop retrying the delayed closes. */
3363 	LIST_FOREACH_SAFE(op, &delayed, nfso_list, nop) {
3364 		nfsrpc_doclose(nmp, op, p, true, false);
3365 		LIST_REMOVE(op, nfso_list);
3366 		nfscl_freeopen(op, 0, false);
3367 	}
3368 	return (0);
3369 }
3370 
3371 /*
3372  * Return all delegations on this client.
3373  * (Must be called with client sleep lock.)
3374  */
3375 static void
nfscl_delegreturnall(struct nfsclclient * clp,NFSPROC_T * p,struct nfscldeleghead * dhp)3376 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p,
3377     struct nfscldeleghead *dhp)
3378 {
3379 	struct nfscldeleg *dp, *ndp;
3380 	struct ucred *cred;
3381 
3382 	cred = newnfs_getcred();
3383 	TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3384 		nfscl_cleandeleg(dp);
3385 		(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3386 		if (dhp != NULL) {
3387 			nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3388 			TAILQ_INSERT_HEAD(dhp, dp, nfsdl_list);
3389 		} else
3390 			nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3391 	}
3392 	NFSFREECRED(cred);
3393 }
3394 
3395 /*
3396  * Return any delegation for this vp.
3397  */
3398 void
nfscl_delegreturnvp(vnode_t vp,NFSPROC_T * p)3399 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3400 {
3401 	struct nfsclclient *clp;
3402 	struct nfscldeleg *dp;
3403 	struct ucred *cred;
3404 	struct nfsnode *np;
3405 
3406 	np = VTONFS(vp);
3407 	cred = newnfs_getcred();
3408 	dp = NULL;
3409 	NFSLOCKCLSTATE();
3410 	clp = VFSTONFS(vp->v_mount)->nm_clp;
3411 	if (clp != NULL)
3412 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3413 		    np->n_fhp->nfh_len);
3414 	if (dp != NULL) {
3415 		nfscl_cleandeleg(dp);
3416 		nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3417 		NFSUNLOCKCLSTATE();
3418 		newnfs_copycred(&dp->nfsdl_cred, cred);
3419 		nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3420 		free(dp, M_NFSCLDELEG);
3421 	} else
3422 		NFSUNLOCKCLSTATE();
3423 	NFSFREECRED(cred);
3424 }
3425 
3426 /*
3427  * Do a callback RPC.
3428  */
3429 void
nfscl_docb(struct nfsrv_descript * nd,NFSPROC_T * p)3430 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3431 {
3432 	int clist, gotseq_ok, i, j, k, op, rcalls;
3433 	u_int32_t *tl;
3434 	struct nfsclclient *clp;
3435 	struct nfscldeleg *dp = NULL;
3436 	int numops, taglen = -1, error = 0, trunc __unused;
3437 	u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3438 	u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3439 	vnode_t vp = NULL;
3440 	struct nfsnode *np;
3441 	struct vattr va;
3442 	struct nfsfh *nfhp;
3443 	mount_t mp;
3444 	nfsattrbit_t attrbits, rattrbits;
3445 	nfsv4stateid_t stateid;
3446 	uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3447 	uint8_t sessionid[NFSX_V4SESSIONID];
3448 	struct mbuf *rep;
3449 	struct nfscllayout *lyp;
3450 	uint64_t filesid[2], len, off;
3451 	int changed, gotone, laytype, recalltype;
3452 	uint32_t iomode;
3453 	struct nfsclrecalllayout *recallp = NULL;
3454 	struct nfsclsession *tsep;
3455 
3456 	gotseq_ok = 0;
3457 	nfsrvd_rephead(nd);
3458 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3459 	taglen = fxdr_unsigned(int, *tl);
3460 	if (taglen < 0 || taglen > NFSV4_OPAQUELIMIT) {
3461 		error = EBADRPC;
3462 		taglen = -1;
3463 		goto nfsmout;
3464 	}
3465 	if (taglen <= NFSV4_SMALLSTR)
3466 		tagstr = tag;
3467 	else
3468 		tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3469 	error = nfsrv_mtostr(nd, tagstr, taglen);
3470 	if (error) {
3471 		if (taglen > NFSV4_SMALLSTR)
3472 			free(tagstr, M_TEMP);
3473 		taglen = -1;
3474 		goto nfsmout;
3475 	}
3476 	(void) nfsm_strtom(nd, tag, taglen);
3477 	if (taglen > NFSV4_SMALLSTR) {
3478 		free(tagstr, M_TEMP);
3479 	}
3480 	NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3481 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3482 	minorvers = fxdr_unsigned(u_int32_t, *tl++);
3483 	if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION)
3484 		nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3485 	cbident = fxdr_unsigned(u_int32_t, *tl++);
3486 	if (nd->nd_repstat)
3487 		numops = 0;
3488 	else
3489 		numops = fxdr_unsigned(int, *tl);
3490 	/*
3491 	 * Loop around doing the sub ops.
3492 	 */
3493 	for (i = 0; i < numops; i++) {
3494 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3495 		NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3496 		*repp++ = *tl;
3497 		op = fxdr_unsigned(int, *tl);
3498 		nd->nd_procnum = op;
3499 		if (i == 0 && op != NFSV4OP_CBSEQUENCE && minorvers !=
3500 		    NFSV4_MINORVERSION) {
3501 		    nd->nd_repstat = NFSERR_OPNOTINSESS;
3502 		    *repp = nfscl_errmap(nd, minorvers);
3503 		    retops++;
3504 		    break;
3505 		}
3506 		if (op < NFSV4OP_CBGETATTR ||
3507 		   (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3508 		   (op > NFSV4OP_CBNOTIFYDEVID &&
3509 		    minorvers == NFSV41_MINORVERSION)) {
3510 		    nd->nd_repstat = NFSERR_OPILLEGAL;
3511 		    *repp = nfscl_errmap(nd, minorvers);
3512 		    retops++;
3513 		    break;
3514 		}
3515 		if (op < NFSV41_CBNOPS)
3516 			nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3517 		switch (op) {
3518 		case NFSV4OP_CBGETATTR:
3519 			NFSCL_DEBUG(4, "cbgetattr\n");
3520 			mp = NULL;
3521 			vp = NULL;
3522 			error = nfsm_getfh(nd, &nfhp);
3523 			if (!error)
3524 				error = nfsrv_getattrbits(nd, &attrbits,
3525 				    NULL, NULL);
3526 			if (!error) {
3527 				mp = nfscl_getmnt(minorvers, sessionid, cbident,
3528 				    &clp);
3529 				if (mp == NULL)
3530 					error = NFSERR_SERVERFAULT;
3531 			}
3532 			if (!error) {
3533 				error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3534 				    nfhp->nfh_len, p, &np);
3535 				if (!error)
3536 					vp = NFSTOV(np);
3537 			}
3538 			if (!error) {
3539 				NFSZERO_ATTRBIT(&rattrbits);
3540 				NFSLOCKCLSTATE();
3541 				dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3542 				    nfhp->nfh_len);
3543 				if (dp != NULL) {
3544 					if (NFSISSET_ATTRBIT(&attrbits,
3545 					    NFSATTRBIT_SIZE)) {
3546 						if (vp != NULL)
3547 							va.va_size = np->n_size;
3548 						else
3549 							va.va_size =
3550 							    dp->nfsdl_size;
3551 						NFSSETBIT_ATTRBIT(&rattrbits,
3552 						    NFSATTRBIT_SIZE);
3553 					}
3554 					if (NFSISSET_ATTRBIT(&attrbits,
3555 					    NFSATTRBIT_CHANGE)) {
3556 						va.va_filerev =
3557 						    dp->nfsdl_change;
3558 						if (vp == NULL ||
3559 						    (np->n_flag & NDELEGMOD))
3560 							va.va_filerev++;
3561 						NFSSETBIT_ATTRBIT(&rattrbits,
3562 						    NFSATTRBIT_CHANGE);
3563 					}
3564 				} else
3565 					error = NFSERR_SERVERFAULT;
3566 				NFSUNLOCKCLSTATE();
3567 			}
3568 			if (vp != NULL)
3569 				vrele(vp);
3570 			if (mp != NULL)
3571 				vfs_unbusy(mp);
3572 			if (nfhp != NULL)
3573 				free(nfhp, M_NFSFH);
3574 			if (!error)
3575 				(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3576 				    NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3577 				    (uint64_t)0, NULL);
3578 			break;
3579 		case NFSV4OP_CBRECALL:
3580 			NFSCL_DEBUG(4, "cbrecall\n");
3581 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3582 			    NFSX_UNSIGNED);
3583 			stateid.seqid = *tl++;
3584 			NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3585 			    NFSX_STATEIDOTHER);
3586 			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3587 			trunc = fxdr_unsigned(int, *tl);
3588 			error = nfsm_getfh(nd, &nfhp);
3589 			if (!error) {
3590 				NFSLOCKCLSTATE();
3591 				if (minorvers == NFSV4_MINORVERSION)
3592 					clp = nfscl_getclnt(cbident);
3593 				else
3594 					clp = nfscl_getclntsess(sessionid);
3595 				if (clp != NULL) {
3596 					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3597 					    nfhp->nfh_len);
3598 					if (dp != NULL && (dp->nfsdl_flags &
3599 					    NFSCLDL_DELEGRET) == 0) {
3600 						dp->nfsdl_flags |=
3601 						    NFSCLDL_RECALL;
3602 						wakeup((caddr_t)clp);
3603 					}
3604 				} else {
3605 					error = NFSERR_SERVERFAULT;
3606 				}
3607 				NFSUNLOCKCLSTATE();
3608 			}
3609 			if (nfhp != NULL)
3610 				free(nfhp, M_NFSFH);
3611 			break;
3612 		case NFSV4OP_CBLAYOUTRECALL:
3613 			NFSCL_DEBUG(4, "cblayrec\n");
3614 			nfhp = NULL;
3615 			NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3616 			laytype = fxdr_unsigned(int, *tl++);
3617 			iomode = fxdr_unsigned(uint32_t, *tl++);
3618 			if (newnfs_true == *tl++)
3619 				changed = 1;
3620 			else
3621 				changed = 0;
3622 			recalltype = fxdr_unsigned(int, *tl);
3623 			NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3624 			    laytype, iomode, changed, recalltype);
3625 			recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3626 			    M_WAITOK);
3627 			if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3628 			    laytype != NFSLAYOUT_FLEXFILE)
3629 				error = NFSERR_NOMATCHLAYOUT;
3630 			else if (recalltype == NFSLAYOUTRETURN_FILE) {
3631 				error = nfsm_getfh(nd, &nfhp);
3632 				NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3633 				if (error != 0)
3634 					goto nfsmout;
3635 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3636 				    NFSX_STATEID);
3637 				off = fxdr_hyper(tl); tl += 2;
3638 				len = fxdr_hyper(tl); tl += 2;
3639 				stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3640 				NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3641 				if (minorvers == NFSV4_MINORVERSION)
3642 					error = NFSERR_NOTSUPP;
3643 				NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3644 				    (uintmax_t)off, (uintmax_t)len,
3645 				    stateid.seqid, error);
3646 				if (error == 0) {
3647 					NFSLOCKCLSTATE();
3648 					clp = nfscl_getclntsess(sessionid);
3649 					NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3650 					if (clp != NULL) {
3651 						lyp = nfscl_findlayout(clp,
3652 						    nfhp->nfh_fh,
3653 						    nfhp->nfh_len);
3654 						NFSCL_DEBUG(4, "cblyp=%p\n",
3655 						    lyp);
3656 						if (lyp != NULL &&
3657 						    (lyp->nfsly_flags &
3658 						     (NFSLY_FILES |
3659 						      NFSLY_FLEXFILE)) != 0 &&
3660 						    !NFSBCMP(stateid.other,
3661 						    lyp->nfsly_stateid.other,
3662 						    NFSX_STATEIDOTHER)) {
3663 							error =
3664 							    nfscl_layoutrecall(
3665 							    recalltype,
3666 							    lyp, iomode, off,
3667 							    len, stateid.seqid,
3668 							    0, 0, NULL,
3669 							    recallp);
3670 							if (error == 0 &&
3671 							    stateid.seqid >
3672 							    lyp->nfsly_stateid.seqid)
3673 								lyp->nfsly_stateid.seqid =
3674 								    stateid.seqid;
3675 							recallp = NULL;
3676 							wakeup(clp);
3677 							NFSCL_DEBUG(4,
3678 							    "aft layrcal=%d "
3679 							    "layseqid=%d\n",
3680 							    error,
3681 							    lyp->nfsly_stateid.seqid);
3682 						} else
3683 							error =
3684 							  NFSERR_NOMATCHLAYOUT;
3685 					} else
3686 						error = NFSERR_NOMATCHLAYOUT;
3687 					NFSUNLOCKCLSTATE();
3688 				}
3689 				free(nfhp, M_NFSFH);
3690 			} else if (recalltype == NFSLAYOUTRETURN_FSID) {
3691 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3692 				filesid[0] = fxdr_hyper(tl); tl += 2;
3693 				filesid[1] = fxdr_hyper(tl); tl += 2;
3694 				gotone = 0;
3695 				NFSLOCKCLSTATE();
3696 				clp = nfscl_getclntsess(sessionid);
3697 				if (clp != NULL) {
3698 					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3699 					    nfsly_list) {
3700 						if (lyp->nfsly_filesid[0] ==
3701 						    filesid[0] &&
3702 						    lyp->nfsly_filesid[1] ==
3703 						    filesid[1]) {
3704 							error =
3705 							    nfscl_layoutrecall(
3706 							    recalltype,
3707 							    lyp, iomode, 0,
3708 							    UINT64_MAX,
3709 							    lyp->nfsly_stateid.seqid,
3710 							    0, 0, NULL,
3711 							    recallp);
3712 							recallp = NULL;
3713 							gotone = 1;
3714 						}
3715 					}
3716 					if (gotone != 0)
3717 						wakeup(clp);
3718 					else
3719 						error = NFSERR_NOMATCHLAYOUT;
3720 				} else
3721 					error = NFSERR_NOMATCHLAYOUT;
3722 				NFSUNLOCKCLSTATE();
3723 			} else if (recalltype == NFSLAYOUTRETURN_ALL) {
3724 				gotone = 0;
3725 				NFSLOCKCLSTATE();
3726 				clp = nfscl_getclntsess(sessionid);
3727 				if (clp != NULL) {
3728 					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3729 					    nfsly_list) {
3730 						error = nfscl_layoutrecall(
3731 						    recalltype, lyp, iomode, 0,
3732 						    UINT64_MAX,
3733 						    lyp->nfsly_stateid.seqid,
3734 						    0, 0, NULL, recallp);
3735 						recallp = NULL;
3736 						gotone = 1;
3737 					}
3738 					if (gotone != 0)
3739 						wakeup(clp);
3740 					else
3741 						error = NFSERR_NOMATCHLAYOUT;
3742 				} else
3743 					error = NFSERR_NOMATCHLAYOUT;
3744 				NFSUNLOCKCLSTATE();
3745 			} else
3746 				error = NFSERR_NOMATCHLAYOUT;
3747 			if (recallp != NULL) {
3748 				free(recallp, M_NFSLAYRECALL);
3749 				recallp = NULL;
3750 			}
3751 			break;
3752 		case NFSV4OP_CBSEQUENCE:
3753 			if (i != 0) {
3754 			    error = NFSERR_SEQUENCEPOS;
3755 			    break;
3756 			}
3757 			NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3758 			    5 * NFSX_UNSIGNED);
3759 			bcopy(tl, sessionid, NFSX_V4SESSIONID);
3760 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3761 			seqid = fxdr_unsigned(uint32_t, *tl++);
3762 			slotid = fxdr_unsigned(uint32_t, *tl++);
3763 			highslot = fxdr_unsigned(uint32_t, *tl++);
3764 			cachethis = *tl++;
3765 			/* Throw away the referring call stuff. */
3766 			clist = fxdr_unsigned(int, *tl);
3767 			for (j = 0; j < clist; j++) {
3768 				NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3769 				    NFSX_UNSIGNED);
3770 				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3771 				rcalls = fxdr_unsigned(int, *tl);
3772 				for (k = 0; k < rcalls; k++) {
3773 					NFSM_DISSECT(tl, uint32_t *,
3774 					    2 * NFSX_UNSIGNED);
3775 				}
3776 			}
3777 			NFSLOCKCLSTATE();
3778 			clp = nfscl_getclntsess(sessionid);
3779 			if (clp == NULL)
3780 				error = NFSERR_SERVERFAULT;
3781 			if (error == 0) {
3782 				tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3783 				error = nfsv4_seqsession(seqid, slotid,
3784 				    highslot, tsep->nfsess_cbslots, &rep,
3785 				    tsep->nfsess_backslots);
3786 			}
3787 			NFSUNLOCKCLSTATE();
3788 			if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3789 				gotseq_ok = 1;
3790 				if (rep != NULL) {
3791 					/*
3792 					 * Handle a reply for a retried
3793 					 * callback.  The reply will be
3794 					 * re-inserted in the session cache
3795 					 * by the nfsv4_seqsess_cacherep() call
3796 					 * after out:
3797 					 */
3798 					KASSERT(error == NFSERR_REPLYFROMCACHE,
3799 					    ("cbsequence: non-NULL rep"));
3800 					NFSCL_DEBUG(4, "Got cbretry\n");
3801 					m_freem(nd->nd_mreq);
3802 					nd->nd_mreq = rep;
3803 					rep = NULL;
3804 					goto out;
3805 				}
3806 				NFSM_BUILD(tl, uint32_t *,
3807 				    NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3808 				bcopy(sessionid, tl, NFSX_V4SESSIONID);
3809 				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3810 				*tl++ = txdr_unsigned(seqid);
3811 				*tl++ = txdr_unsigned(slotid);
3812 				*tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3813 				*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3814 			}
3815 			break;
3816 		default:
3817 			if (i == 0 && minorvers == NFSV41_MINORVERSION)
3818 				error = NFSERR_OPNOTINSESS;
3819 			else {
3820 				NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3821 				error = NFSERR_NOTSUPP;
3822 			}
3823 			break;
3824 		}
3825 		if (error) {
3826 			if (error == EBADRPC || error == NFSERR_BADXDR) {
3827 				nd->nd_repstat = NFSERR_BADXDR;
3828 			} else {
3829 				nd->nd_repstat = error;
3830 			}
3831 			error = 0;
3832 		}
3833 		retops++;
3834 		if (nd->nd_repstat) {
3835 			*repp = nfscl_errmap(nd, minorvers);
3836 			break;
3837 		} else
3838 			*repp = 0;	/* NFS4_OK */
3839 	}
3840 nfsmout:
3841 	if (recallp != NULL)
3842 		free(recallp, M_NFSLAYRECALL);
3843 	if (error) {
3844 		if (error == EBADRPC || error == NFSERR_BADXDR)
3845 			nd->nd_repstat = NFSERR_BADXDR;
3846 		else
3847 			printf("nfsv4 comperr1=%d\n", error);
3848 	}
3849 	if (taglen == -1) {
3850 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3851 		*tl++ = 0;
3852 		*tl = 0;
3853 	} else {
3854 		*retopsp = txdr_unsigned(retops);
3855 	}
3856 	*nd->nd_errp = nfscl_errmap(nd, minorvers);
3857 out:
3858 	if (gotseq_ok != 0) {
3859 		rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3860 		NFSLOCKCLSTATE();
3861 		clp = nfscl_getclntsess(sessionid);
3862 		if (clp != NULL) {
3863 			tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3864 			nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3865 			    NFSERR_OK, &rep);
3866 			NFSUNLOCKCLSTATE();
3867 		} else {
3868 			NFSUNLOCKCLSTATE();
3869 			m_freem(rep);
3870 		}
3871 	}
3872 }
3873 
3874 /*
3875  * Generate the next cbident value. Basically just increment a static value
3876  * and then check that it isn't already in the list, if it has wrapped around.
3877  */
3878 static u_int32_t
nfscl_nextcbident(void)3879 nfscl_nextcbident(void)
3880 {
3881 	struct nfsclclient *clp;
3882 	int matched;
3883 	static u_int32_t nextcbident = 0;
3884 	static int haswrapped = 0;
3885 
3886 	nextcbident++;
3887 	if (nextcbident == 0)
3888 		haswrapped = 1;
3889 	if (haswrapped) {
3890 		/*
3891 		 * Search the clientid list for one already using this cbident.
3892 		 */
3893 		do {
3894 			matched = 0;
3895 			NFSLOCKCLSTATE();
3896 			LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3897 				if (clp->nfsc_cbident == nextcbident) {
3898 					matched = 1;
3899 					break;
3900 				}
3901 			}
3902 			NFSUNLOCKCLSTATE();
3903 			if (matched == 1)
3904 				nextcbident++;
3905 		} while (matched);
3906 	}
3907 	return (nextcbident);
3908 }
3909 
3910 /*
3911  * Get the mount point related to a given cbident or session and busy it.
3912  */
3913 static mount_t
nfscl_getmnt(int minorvers,uint8_t * sessionid,u_int32_t cbident,struct nfsclclient ** clpp)3914 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3915     struct nfsclclient **clpp)
3916 {
3917 	struct nfsclclient *clp;
3918 	mount_t mp;
3919 	int error;
3920 	struct nfsclsession *tsep;
3921 
3922 	*clpp = NULL;
3923 	NFSLOCKCLSTATE();
3924 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3925 		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3926 		if (minorvers == NFSV4_MINORVERSION) {
3927 			if (clp->nfsc_cbident == cbident)
3928 				break;
3929 		} else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3930 		    NFSX_V4SESSIONID))
3931 			break;
3932 	}
3933 	if (clp == NULL) {
3934 		NFSUNLOCKCLSTATE();
3935 		return (NULL);
3936 	}
3937 	mp = clp->nfsc_nmp->nm_mountp;
3938 	vfs_ref(mp);
3939 	NFSUNLOCKCLSTATE();
3940 	error = vfs_busy(mp, 0);
3941 	vfs_rel(mp);
3942 	if (error != 0)
3943 		return (NULL);
3944 	*clpp = clp;
3945 	return (mp);
3946 }
3947 
3948 /*
3949  * Get the clientid pointer related to a given cbident.
3950  */
3951 static struct nfsclclient *
nfscl_getclnt(u_int32_t cbident)3952 nfscl_getclnt(u_int32_t cbident)
3953 {
3954 	struct nfsclclient *clp;
3955 
3956 	LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3957 		if (clp->nfsc_cbident == cbident)
3958 			break;
3959 	return (clp);
3960 }
3961 
3962 /*
3963  * Get the clientid pointer related to a given sessionid.
3964  */
3965 static struct nfsclclient *
nfscl_getclntsess(uint8_t * sessionid)3966 nfscl_getclntsess(uint8_t *sessionid)
3967 {
3968 	struct nfsclclient *clp;
3969 	struct nfsclsession *tsep;
3970 
3971 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3972 		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3973 		if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3974 		    NFSX_V4SESSIONID))
3975 			break;
3976 	}
3977 	return (clp);
3978 }
3979 
3980 /*
3981  * Search for a lock conflict locally on the client. A conflict occurs if
3982  * - not same owner and overlapping byte range and at least one of them is
3983  *   a write lock or this is an unlock.
3984  */
3985 static int
nfscl_localconflict(struct nfsclclient * clp,u_int8_t * fhp,int fhlen,struct nfscllock * nlop,u_int8_t * own,struct nfscldeleg * dp,struct nfscllock ** lopp)3986 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3987     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3988     struct nfscllock **lopp)
3989 {
3990 	struct nfsclowner *owp;
3991 	struct nfsclopen *op;
3992 	int ret;
3993 
3994 	if (dp != NULL) {
3995 		ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3996 		if (ret)
3997 			return (ret);
3998 	}
3999 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4000 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4001 			if (op->nfso_fhlen == fhlen &&
4002 			    !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
4003 				ret = nfscl_checkconflict(&op->nfso_lock, nlop,
4004 				    own, lopp);
4005 				if (ret)
4006 					return (ret);
4007 			}
4008 		}
4009 	}
4010 	return (0);
4011 }
4012 
4013 static int
nfscl_checkconflict(struct nfscllockownerhead * lhp,struct nfscllock * nlop,u_int8_t * own,struct nfscllock ** lopp)4014 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
4015     u_int8_t *own, struct nfscllock **lopp)
4016 {
4017 	struct nfscllockowner *lp;
4018 	struct nfscllock *lop;
4019 
4020 	LIST_FOREACH(lp, lhp, nfsl_list) {
4021 		if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
4022 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
4023 				if (lop->nfslo_first >= nlop->nfslo_end)
4024 					break;
4025 				if (lop->nfslo_end <= nlop->nfslo_first)
4026 					continue;
4027 				if (lop->nfslo_type == F_WRLCK ||
4028 				    nlop->nfslo_type == F_WRLCK ||
4029 				    nlop->nfslo_type == F_UNLCK) {
4030 					if (lopp != NULL)
4031 						*lopp = lop;
4032 					return (NFSERR_DENIED);
4033 				}
4034 			}
4035 		}
4036 	}
4037 	return (0);
4038 }
4039 
4040 /*
4041  * Check for a local conflicting lock.
4042  */
4043 int
nfscl_lockt(vnode_t vp,struct nfsclclient * clp,u_int64_t off,u_int64_t len,struct flock * fl,NFSPROC_T * p,void * id,int flags)4044 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
4045     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
4046 {
4047 	struct nfscllock *lop, nlck;
4048 	struct nfscldeleg *dp;
4049 	struct nfsnode *np;
4050 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
4051 	int error;
4052 
4053 	nlck.nfslo_type = fl->l_type;
4054 	nlck.nfslo_first = off;
4055 	if (len == NFS64BITSSET) {
4056 		nlck.nfslo_end = NFS64BITSSET;
4057 	} else {
4058 		nlck.nfslo_end = off + len;
4059 		if (nlck.nfslo_end <= nlck.nfslo_first)
4060 			return (NFSERR_INVAL);
4061 	}
4062 	np = VTONFS(vp);
4063 	nfscl_filllockowner(id, own, flags);
4064 	NFSLOCKCLSTATE();
4065 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4066 	error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
4067 	    &nlck, own, dp, &lop);
4068 	if (error != 0) {
4069 		fl->l_whence = SEEK_SET;
4070 		fl->l_start = lop->nfslo_first;
4071 		if (lop->nfslo_end == NFS64BITSSET)
4072 			fl->l_len = 0;
4073 		else
4074 			fl->l_len = lop->nfslo_end - lop->nfslo_first;
4075 		fl->l_pid = (pid_t)0;
4076 		fl->l_type = lop->nfslo_type;
4077 		error = -1;			/* no RPC required */
4078 	} else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
4079 	    fl->l_type == F_RDLCK)) {
4080 		/*
4081 		 * The delegation ensures that there isn't a conflicting
4082 		 * lock on the server, so return -1 to indicate an RPC
4083 		 * isn't required.
4084 		 */
4085 		fl->l_type = F_UNLCK;
4086 		error = -1;
4087 	}
4088 	NFSUNLOCKCLSTATE();
4089 	return (error);
4090 }
4091 
4092 /*
4093  * Handle Recall of a delegation.
4094  * The clp must be exclusive locked when this is called.
4095  */
4096 static int
nfscl_recalldeleg(struct nfsclclient * clp,struct nfsmount * nmp,struct nfscldeleg * dp,vnode_t vp,struct ucred * cred,NFSPROC_T * p,int called_from_renewthread,vnode_t * vpp)4097 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
4098     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4099     int called_from_renewthread, vnode_t *vpp)
4100 {
4101 	struct nfsclowner *owp, *lowp, *nowp;
4102 	struct nfsclopen *op, *lop;
4103 	struct nfscllockowner *lp;
4104 	struct nfscllock *lckp;
4105 	struct nfsnode *np;
4106 	int error = 0, ret;
4107 
4108 	if (vp == NULL) {
4109 		KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4110 		*vpp = NULL;
4111 		/*
4112 		 * First, get a vnode for the file. This is needed to do RPCs.
4113 		 */
4114 		ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4115 		    dp->nfsdl_fhlen, p, &np);
4116 		if (ret) {
4117 			/*
4118 			 * File isn't open, so nothing to move over to the
4119 			 * server.
4120 			 */
4121 			return (0);
4122 		}
4123 		vp = NFSTOV(np);
4124 		*vpp = vp;
4125 	} else {
4126 		np = VTONFS(vp);
4127 	}
4128 	dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4129 
4130 	/*
4131 	 * Ok, if it's a write delegation, flush data to the server, so
4132 	 * that close/open consistency is retained.
4133 	 */
4134 	ret = 0;
4135 	NFSLOCKNODE(np);
4136 	if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4137 		np->n_flag |= NDELEGRECALL;
4138 		NFSUNLOCKNODE(np);
4139 		ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4140 		NFSLOCKNODE(np);
4141 		np->n_flag &= ~NDELEGRECALL;
4142 	}
4143 	NFSINVALATTRCACHE(np);
4144 	NFSUNLOCKNODE(np);
4145 	if (ret == EIO && called_from_renewthread != 0) {
4146 		/*
4147 		 * If the flush failed with EIO for the renew thread,
4148 		 * return now, so that the dirty buffer will be flushed
4149 		 * later.
4150 		 */
4151 		return (ret);
4152 	}
4153 
4154 	/*
4155 	 * Now, for each openowner with opens issued locally, move them
4156 	 * over to state against the server.
4157 	 */
4158 	LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4159 		lop = LIST_FIRST(&lowp->nfsow_open);
4160 		if (lop != NULL) {
4161 			if (LIST_NEXT(lop, nfso_list) != NULL)
4162 				panic("nfsdlg mult opens");
4163 			/*
4164 			 * Look for the same openowner against the server.
4165 			 */
4166 			LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4167 				if (!NFSBCMP(lowp->nfsow_owner,
4168 				    owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4169 					newnfs_copycred(&dp->nfsdl_cred, cred);
4170 					ret = nfscl_moveopen(vp, clp, nmp, lop,
4171 					    owp, dp, cred, p);
4172 					if (ret == NFSERR_STALECLIENTID ||
4173 					    ret == NFSERR_STALEDONTRECOVER ||
4174 					    ret == NFSERR_BADSESSION)
4175 						return (ret);
4176 					if (ret) {
4177 						nfscl_freeopen(lop, 1, true);
4178 						if (!error)
4179 							error = ret;
4180 					}
4181 					break;
4182 				}
4183 			}
4184 
4185 			/*
4186 			 * If no openowner found, create one and get an open
4187 			 * for it.
4188 			 */
4189 			if (owp == NULL) {
4190 				nowp = malloc(
4191 				    sizeof (struct nfsclowner), M_NFSCLOWNER,
4192 				    M_WAITOK);
4193 				nfscl_newopen(clp, NULL, &owp, &nowp, &op,
4194 				    NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4195 				    dp->nfsdl_fhlen, NULL, NULL);
4196 				newnfs_copycred(&dp->nfsdl_cred, cred);
4197 				ret = nfscl_moveopen(vp, clp, nmp, lop,
4198 				    owp, dp, cred, p);
4199 				if (ret) {
4200 					nfscl_freeopenowner(owp, 0);
4201 					if (ret == NFSERR_STALECLIENTID ||
4202 					    ret == NFSERR_STALEDONTRECOVER ||
4203 					    ret == NFSERR_BADSESSION)
4204 						return (ret);
4205 					if (ret) {
4206 						nfscl_freeopen(lop, 1, true);
4207 						if (!error)
4208 							error = ret;
4209 					}
4210 				}
4211 			}
4212 		}
4213 	}
4214 
4215 	/*
4216 	 * Now, get byte range locks for any locks done locally.
4217 	 */
4218 	LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4219 		LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4220 			newnfs_copycred(&dp->nfsdl_cred, cred);
4221 			ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4222 			if (ret == NFSERR_STALESTATEID ||
4223 			    ret == NFSERR_STALEDONTRECOVER ||
4224 			    ret == NFSERR_STALECLIENTID ||
4225 			    ret == NFSERR_BADSESSION)
4226 				return (ret);
4227 			if (ret && !error)
4228 				error = ret;
4229 		}
4230 	}
4231 	return (error);
4232 }
4233 
4234 /*
4235  * Move a locally issued open over to an owner on the state list.
4236  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4237  * returns with it unlocked.
4238  */
4239 static int
nfscl_moveopen(vnode_t vp,struct nfsclclient * clp,struct nfsmount * nmp,struct nfsclopen * lop,struct nfsclowner * owp,struct nfscldeleg * dp,struct ucred * cred,NFSPROC_T * p)4240 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4241     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4242     struct ucred *cred, NFSPROC_T *p)
4243 {
4244 	struct nfsclopen *op, *nop;
4245 	struct nfscldeleg *ndp;
4246 	struct nfsnode *np;
4247 	int error = 0, newone;
4248 
4249 	/*
4250 	 * First, look for an appropriate open, If found, just increment the
4251 	 * opencnt in it.
4252 	 */
4253 	LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4254 		if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4255 		    op->nfso_fhlen == lop->nfso_fhlen &&
4256 		    !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4257 			op->nfso_opencnt += lop->nfso_opencnt;
4258 			nfscl_freeopen(lop, 1, true);
4259 			return (0);
4260 		}
4261 	}
4262 
4263 	/* No appropriate open, so we have to do one against the server. */
4264 	np = VTONFS(vp);
4265 	nop = malloc(sizeof (struct nfsclopen) +
4266 	    lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4267 	newone = 0;
4268 	nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4269 	    lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4270 	ndp = dp;
4271 	error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4272 	    lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4273 	    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4274 	if (error) {
4275 		if (newone)
4276 			nfscl_freeopen(op, 0, true);
4277 	} else {
4278 		op->nfso_mode |= lop->nfso_mode;
4279 		op->nfso_opencnt += lop->nfso_opencnt;
4280 		nfscl_freeopen(lop, 1, true);
4281 	}
4282 	if (nop != NULL)
4283 		free(nop, M_NFSCLOPEN);
4284 	if (ndp != NULL) {
4285 		/*
4286 		 * What should I do with the returned delegation, since the
4287 		 * delegation is being recalled? For now, just printf and
4288 		 * through it away.
4289 		 */
4290 		printf("Moveopen returned deleg\n");
4291 		free(ndp, M_NFSCLDELEG);
4292 	}
4293 	return (error);
4294 }
4295 
4296 /*
4297  * Recall all delegations on this client.
4298  */
4299 static void
nfscl_totalrecall(struct nfsclclient * clp)4300 nfscl_totalrecall(struct nfsclclient *clp)
4301 {
4302 	struct nfscldeleg *dp;
4303 
4304 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4305 		if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4306 			dp->nfsdl_flags |= NFSCLDL_RECALL;
4307 	}
4308 }
4309 
4310 /*
4311  * Relock byte ranges. Called for delegation recall and state expiry.
4312  */
4313 static int
nfscl_relock(vnode_t vp,struct nfsclclient * clp,struct nfsmount * nmp,struct nfscllockowner * lp,struct nfscllock * lop,struct ucred * cred,NFSPROC_T * p)4314 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4315     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4316     NFSPROC_T *p)
4317 {
4318 	struct nfscllockowner *nlp;
4319 	struct nfsfh *nfhp;
4320 	u_int64_t off, len;
4321 	int error, newone, donelocally;
4322 
4323 	off = lop->nfslo_first;
4324 	len = lop->nfslo_end - lop->nfslo_first;
4325 	error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4326 	    clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4327 	    lp->nfsl_openowner, &nlp, &newone, &donelocally);
4328 	if (error || donelocally)
4329 		return (error);
4330 	nfhp = VTONFS(vp)->n_fhp;
4331 	error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4332 	    nfhp->nfh_len, nlp, newone, 0, off,
4333 	    len, lop->nfslo_type, cred, p);
4334 	if (error)
4335 		nfscl_freelockowner(nlp, 0);
4336 	return (error);
4337 }
4338 
4339 /*
4340  * Called to re-open a file. Basically get a vnode for the file handle
4341  * and then call nfsrpc_openrpc() to do the rest.
4342  */
4343 static int
nfsrpc_reopen(struct nfsmount * nmp,u_int8_t * fhp,int fhlen,u_int32_t mode,struct nfsclopen * op,struct nfscldeleg ** dpp,struct ucred * cred,NFSPROC_T * p)4344 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4345     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4346     struct ucred *cred, NFSPROC_T *p)
4347 {
4348 	struct nfsnode *np;
4349 	vnode_t vp;
4350 	int error;
4351 
4352 	error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4353 	if (error)
4354 		return (error);
4355 	vp = NFSTOV(np);
4356 	if (np->n_v4 != NULL) {
4357 		error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4358 		    np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4359 		    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4360 		    cred, p);
4361 	} else {
4362 		error = EINVAL;
4363 	}
4364 	vrele(vp);
4365 	return (error);
4366 }
4367 
4368 /*
4369  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4370  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4371  * fail.
4372  */
4373 static int
nfscl_tryopen(struct nfsmount * nmp,vnode_t vp,u_int8_t * fhp,int fhlen,u_int8_t * newfhp,int newfhlen,u_int32_t mode,struct nfsclopen * op,u_int8_t * name,int namelen,struct nfscldeleg ** ndpp,int reclaim,u_int32_t delegtype,struct ucred * cred,NFSPROC_T * p)4374 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4375     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4376     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4377     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4378 {
4379 	int error;
4380 
4381 	do {
4382 		error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4383 		    mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4384 		    0, 0);
4385 		if (error == NFSERR_DELAY)
4386 			(void) nfs_catnap(PZERO, error, "nfstryop");
4387 	} while (error == NFSERR_DELAY);
4388 	if (error == EAUTH || error == EACCES) {
4389 		/* Try again using system credentials */
4390 		newnfs_setroot(cred);
4391 		do {
4392 		    error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4393 			newfhlen, mode, op, name, namelen, ndpp, reclaim,
4394 			delegtype, cred, p, 1, 0);
4395 		    if (error == NFSERR_DELAY)
4396 			(void) nfs_catnap(PZERO, error, "nfstryop");
4397 		} while (error == NFSERR_DELAY);
4398 	}
4399 	return (error);
4400 }
4401 
4402 /*
4403  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4404  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4405  * cred don't work.
4406  */
4407 static int
nfscl_trylock(struct nfsmount * nmp,vnode_t vp,u_int8_t * fhp,int fhlen,struct nfscllockowner * nlp,int newone,int reclaim,u_int64_t off,u_int64_t len,short type,struct ucred * cred,NFSPROC_T * p)4408 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4409     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4410     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4411 {
4412 	struct nfsrv_descript nfsd, *nd = &nfsd;
4413 	int error;
4414 
4415 	do {
4416 		error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4417 		    reclaim, off, len, type, cred, p, 0);
4418 		if (!error && nd->nd_repstat == NFSERR_DELAY)
4419 			(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4420 			    "nfstrylck");
4421 	} while (!error && nd->nd_repstat == NFSERR_DELAY);
4422 	if (!error)
4423 		error = nd->nd_repstat;
4424 	if (error == EAUTH || error == EACCES) {
4425 		/* Try again using root credentials */
4426 		newnfs_setroot(cred);
4427 		do {
4428 			error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4429 			    newone, reclaim, off, len, type, cred, p, 1);
4430 			if (!error && nd->nd_repstat == NFSERR_DELAY)
4431 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4432 				    "nfstrylck");
4433 		} while (!error && nd->nd_repstat == NFSERR_DELAY);
4434 		if (!error)
4435 			error = nd->nd_repstat;
4436 	}
4437 	return (error);
4438 }
4439 
4440 /*
4441  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4442  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4443  * credentials fail.
4444  */
4445 static int
nfscl_trydelegreturn(struct nfscldeleg * dp,struct ucred * cred,struct nfsmount * nmp,NFSPROC_T * p)4446 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4447     struct nfsmount *nmp, NFSPROC_T *p)
4448 {
4449 	int error;
4450 
4451 	do {
4452 		error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4453 		if (error == NFSERR_DELAY)
4454 			(void) nfs_catnap(PZERO, error, "nfstrydp");
4455 	} while (error == NFSERR_DELAY);
4456 	if (error == EAUTH || error == EACCES) {
4457 		/* Try again using system credentials */
4458 		newnfs_setroot(cred);
4459 		do {
4460 			error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4461 			if (error == NFSERR_DELAY)
4462 				(void) nfs_catnap(PZERO, error, "nfstrydp");
4463 		} while (error == NFSERR_DELAY);
4464 	}
4465 	return (error);
4466 }
4467 
4468 /*
4469  * Try a close against the server. Just call nfsrpc_closerpc(),
4470  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4471  * credentials fail.
4472  */
4473 int
nfscl_tryclose(struct nfsclopen * op,struct ucred * cred,struct nfsmount * nmp,NFSPROC_T * p,bool loop_on_delayed)4474 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4475     struct nfsmount *nmp, NFSPROC_T *p, bool loop_on_delayed)
4476 {
4477 	struct nfsrv_descript nfsd, *nd = &nfsd;
4478 	int error;
4479 
4480 	do {
4481 		error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4482 		if (loop_on_delayed && error == NFSERR_DELAY)
4483 			(void) nfs_catnap(PZERO, error, "nfstrycl");
4484 	} while (loop_on_delayed && error == NFSERR_DELAY);
4485 	if (error == EAUTH || error == EACCES) {
4486 		/* Try again using system credentials */
4487 		newnfs_setroot(cred);
4488 		do {
4489 			error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4490 			if (loop_on_delayed && error == NFSERR_DELAY)
4491 				(void) nfs_catnap(PZERO, error, "nfstrycl");
4492 		} while (loop_on_delayed && error == NFSERR_DELAY);
4493 	}
4494 	return (error);
4495 }
4496 
4497 /*
4498  * Decide if a delegation on a file permits close without flushing writes
4499  * to the server. This might be a big performance win in some environments.
4500  * (Not useful until the client does caching on local stable storage.)
4501  */
4502 int
nfscl_mustflush(vnode_t vp)4503 nfscl_mustflush(vnode_t vp)
4504 {
4505 	struct nfsclclient *clp;
4506 	struct nfscldeleg *dp;
4507 	struct nfsnode *np;
4508 	struct nfsmount *nmp;
4509 
4510 	np = VTONFS(vp);
4511 	nmp = VFSTONFS(vnode_mount(vp));
4512 	if (!NFSHASNFSV4(nmp))
4513 		return (1);
4514 	NFSLOCKCLSTATE();
4515 	clp = nfscl_findcl(nmp);
4516 	if (clp == NULL) {
4517 		NFSUNLOCKCLSTATE();
4518 		return (1);
4519 	}
4520 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4521 	if (dp != NULL && (dp->nfsdl_flags &
4522 	    (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4523 	     NFSCLDL_WRITE &&
4524 	    (dp->nfsdl_sizelimit >= np->n_size ||
4525 	     !NFSHASSTRICT3530(nmp))) {
4526 		NFSUNLOCKCLSTATE();
4527 		return (0);
4528 	}
4529 	NFSUNLOCKCLSTATE();
4530 	return (1);
4531 }
4532 
4533 /*
4534  * See if a (write) delegation exists for this file.
4535  */
4536 int
nfscl_nodeleg(vnode_t vp,int writedeleg)4537 nfscl_nodeleg(vnode_t vp, int writedeleg)
4538 {
4539 	struct nfsclclient *clp;
4540 	struct nfscldeleg *dp;
4541 	struct nfsnode *np;
4542 	struct nfsmount *nmp;
4543 
4544 	np = VTONFS(vp);
4545 	nmp = VFSTONFS(vnode_mount(vp));
4546 	if (!NFSHASNFSV4(nmp))
4547 		return (1);
4548 	NFSLOCKCLSTATE();
4549 	clp = nfscl_findcl(nmp);
4550 	if (clp == NULL) {
4551 		NFSUNLOCKCLSTATE();
4552 		return (1);
4553 	}
4554 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4555 	if (dp != NULL &&
4556 	    (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4557 	    (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4558 	     NFSCLDL_WRITE)) {
4559 		NFSUNLOCKCLSTATE();
4560 		return (0);
4561 	}
4562 	NFSUNLOCKCLSTATE();
4563 	return (1);
4564 }
4565 
4566 /*
4567  * Look for an associated delegation that should be DelegReturned.
4568  */
4569 int
nfscl_removedeleg(vnode_t vp,NFSPROC_T * p,nfsv4stateid_t * stp)4570 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4571 {
4572 	struct nfsclclient *clp;
4573 	struct nfscldeleg *dp;
4574 	struct nfsclowner *owp;
4575 	struct nfscllockowner *lp;
4576 	struct nfsmount *nmp;
4577 	struct mount *mp;
4578 	struct ucred *cred;
4579 	struct nfsnode *np;
4580 	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4581 
4582 	nmp = VFSTONFS(vnode_mount(vp));
4583 	if (NFSHASPNFS(nmp))
4584 		return (retcnt);
4585 	np = VTONFS(vp);
4586 	mp = nmp->nm_mountp;
4587 	NFSLOCKCLSTATE();
4588 	/*
4589 	 * Loop around waiting for:
4590 	 * - outstanding I/O operations on delegations to complete
4591 	 * - for a delegation on vp that has state, lock the client and
4592 	 *   do a recall
4593 	 * - return delegation with no state
4594 	 */
4595 	while (1) {
4596 		clp = nfscl_findcl(nmp);
4597 		if (clp == NULL) {
4598 			NFSUNLOCKCLSTATE();
4599 			return (retcnt);
4600 		}
4601 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4602 		    np->n_fhp->nfh_len);
4603 		if (dp != NULL) {
4604 		    /*
4605 		     * Wait for outstanding I/O ops to be done.
4606 		     */
4607 		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4608 			if (igotlock) {
4609 			    nfsv4_unlock(&clp->nfsc_lock, 0);
4610 			    igotlock = 0;
4611 			}
4612 			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4613 			msleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO,
4614 			    "nfscld", hz);
4615 			if (NFSCL_FORCEDISM(mp)) {
4616 			    dp->nfsdl_flags &= ~NFSCLDL_DELEGRET;
4617 			    NFSUNLOCKCLSTATE();
4618 			    return (0);
4619 			}
4620 			continue;
4621 		    }
4622 		    needsrecall = 0;
4623 		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4624 			if (!LIST_EMPTY(&owp->nfsow_open)) {
4625 			    needsrecall = 1;
4626 			    break;
4627 			}
4628 		    }
4629 		    if (!needsrecall) {
4630 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4631 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4632 				needsrecall = 1;
4633 				break;
4634 			    }
4635 			}
4636 		    }
4637 		    if (needsrecall && !triedrecall) {
4638 			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4639 			islept = 0;
4640 			while (!igotlock) {
4641 			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4642 				&islept, NFSCLSTATEMUTEXPTR, mp);
4643 			    if (NFSCL_FORCEDISM(mp)) {
4644 				dp->nfsdl_flags &= ~NFSCLDL_DELEGRET;
4645 				if (igotlock)
4646 				    nfsv4_unlock(&clp->nfsc_lock, 0);
4647 				NFSUNLOCKCLSTATE();
4648 				return (0);
4649 			    }
4650 			    if (islept)
4651 				break;
4652 			}
4653 			if (islept)
4654 			    continue;
4655 			NFSUNLOCKCLSTATE();
4656 			cred = newnfs_getcred();
4657 			newnfs_copycred(&dp->nfsdl_cred, cred);
4658 			nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4659 			NFSFREECRED(cred);
4660 			triedrecall = 1;
4661 			NFSLOCKCLSTATE();
4662 			nfsv4_unlock(&clp->nfsc_lock, 0);
4663 			igotlock = 0;
4664 			continue;
4665 		    }
4666 		    *stp = dp->nfsdl_stateid;
4667 		    retcnt = 1;
4668 		    nfscl_cleandeleg(dp);
4669 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4670 		}
4671 		if (igotlock)
4672 		    nfsv4_unlock(&clp->nfsc_lock, 0);
4673 		NFSUNLOCKCLSTATE();
4674 		return (retcnt);
4675 	}
4676 }
4677 
4678 /*
4679  * Look for associated delegation(s) that should be DelegReturned.
4680  */
4681 int
nfscl_renamedeleg(vnode_t fvp,nfsv4stateid_t * fstp,int * gotfdp,vnode_t tvp,nfsv4stateid_t * tstp,int * gottdp,NFSPROC_T * p)4682 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4683     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4684 {
4685 	struct nfsclclient *clp;
4686 	struct nfscldeleg *dp;
4687 	struct nfsclowner *owp;
4688 	struct nfscllockowner *lp;
4689 	struct nfsmount *nmp;
4690 	struct mount *mp;
4691 	struct ucred *cred;
4692 	struct nfsnode *np;
4693 	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4694 
4695 	nmp = VFSTONFS(vnode_mount(fvp));
4696 	*gotfdp = 0;
4697 	*gottdp = 0;
4698 	if (NFSHASPNFS(nmp))
4699 		return (retcnt);
4700 	mp = nmp->nm_mountp;
4701 	NFSLOCKCLSTATE();
4702 	/*
4703 	 * Loop around waiting for:
4704 	 * - outstanding I/O operations on delegations to complete
4705 	 * - for a delegation on fvp that has state, lock the client and
4706 	 *   do a recall
4707 	 * - return delegation(s) with no state.
4708 	 */
4709 	while (1) {
4710 		clp = nfscl_findcl(nmp);
4711 		if (clp == NULL) {
4712 			NFSUNLOCKCLSTATE();
4713 			return (retcnt);
4714 		}
4715 		np = VTONFS(fvp);
4716 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4717 		    np->n_fhp->nfh_len);
4718 		if (dp != NULL && *gotfdp == 0) {
4719 		    /*
4720 		     * Wait for outstanding I/O ops to be done.
4721 		     */
4722 		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4723 			if (igotlock) {
4724 			    nfsv4_unlock(&clp->nfsc_lock, 0);
4725 			    igotlock = 0;
4726 			}
4727 			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4728 			msleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO,
4729 			    "nfscld", hz);
4730 			if (NFSCL_FORCEDISM(mp)) {
4731 			    dp->nfsdl_flags &= ~NFSCLDL_DELEGRET;
4732 			    NFSUNLOCKCLSTATE();
4733 			    *gotfdp = 0;
4734 			    *gottdp = 0;
4735 			    return (0);
4736 			}
4737 			continue;
4738 		    }
4739 		    needsrecall = 0;
4740 		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4741 			if (!LIST_EMPTY(&owp->nfsow_open)) {
4742 			    needsrecall = 1;
4743 			    break;
4744 			}
4745 		    }
4746 		    if (!needsrecall) {
4747 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4748 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4749 				needsrecall = 1;
4750 				break;
4751 			    }
4752 			}
4753 		    }
4754 		    if (needsrecall && !triedrecall) {
4755 			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4756 			islept = 0;
4757 			while (!igotlock) {
4758 			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4759 				&islept, NFSCLSTATEMUTEXPTR, mp);
4760 			    if (NFSCL_FORCEDISM(mp)) {
4761 				dp->nfsdl_flags &= ~NFSCLDL_DELEGRET;
4762 				if (igotlock)
4763 				    nfsv4_unlock(&clp->nfsc_lock, 0);
4764 				NFSUNLOCKCLSTATE();
4765 				*gotfdp = 0;
4766 				*gottdp = 0;
4767 				return (0);
4768 			    }
4769 			    if (islept)
4770 				break;
4771 			}
4772 			if (islept)
4773 			    continue;
4774 			NFSUNLOCKCLSTATE();
4775 			cred = newnfs_getcred();
4776 			newnfs_copycred(&dp->nfsdl_cred, cred);
4777 			nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4778 			NFSFREECRED(cred);
4779 			triedrecall = 1;
4780 			NFSLOCKCLSTATE();
4781 			nfsv4_unlock(&clp->nfsc_lock, 0);
4782 			igotlock = 0;
4783 			continue;
4784 		    }
4785 		    *fstp = dp->nfsdl_stateid;
4786 		    retcnt++;
4787 		    *gotfdp = 1;
4788 		    nfscl_cleandeleg(dp);
4789 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4790 		}
4791 		if (igotlock) {
4792 		    nfsv4_unlock(&clp->nfsc_lock, 0);
4793 		    igotlock = 0;
4794 		}
4795 		if (tvp != NULL) {
4796 		    np = VTONFS(tvp);
4797 		    dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4798 			np->n_fhp->nfh_len);
4799 		    if (dp != NULL && *gottdp == 0) {
4800 			/*
4801 			 * Wait for outstanding I/O ops to be done.
4802 			 */
4803 			if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4804 			    dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4805 			    msleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO,
4806 				"nfscld", hz);
4807 			    if (NFSCL_FORCEDISM(mp)) {
4808 				NFSUNLOCKCLSTATE();
4809 				*gotfdp = 0;
4810 				*gottdp = 0;
4811 				return (0);
4812 			    }
4813 			    continue;
4814 			}
4815 			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4816 			    if (!LIST_EMPTY(&owp->nfsow_open)) {
4817 				NFSUNLOCKCLSTATE();
4818 				return (retcnt);
4819 			    }
4820 			}
4821 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4822 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4823 				NFSUNLOCKCLSTATE();
4824 				return (retcnt);
4825 			    }
4826 			}
4827 			*tstp = dp->nfsdl_stateid;
4828 			retcnt++;
4829 			*gottdp = 1;
4830 			nfscl_cleandeleg(dp);
4831 			nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4832 		    }
4833 		}
4834 		NFSUNLOCKCLSTATE();
4835 		return (retcnt);
4836 	}
4837 }
4838 
4839 /*
4840  * Get a reference on the clientid associated with the mount point.
4841  * Return 1 if success, 0 otherwise.
4842  */
4843 int
nfscl_getref(struct nfsmount * nmp)4844 nfscl_getref(struct nfsmount *nmp)
4845 {
4846 	struct nfsclclient *clp;
4847 	int ret;
4848 
4849 	NFSLOCKCLSTATE();
4850 	clp = nfscl_findcl(nmp);
4851 	if (clp == NULL) {
4852 		NFSUNLOCKCLSTATE();
4853 		return (0);
4854 	}
4855 	nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, nmp->nm_mountp);
4856 	ret = 1;
4857 	if (NFSCL_FORCEDISM(nmp->nm_mountp))
4858 		ret = 0;
4859 	NFSUNLOCKCLSTATE();
4860 	return (ret);
4861 }
4862 
4863 /*
4864  * Release a reference on a clientid acquired with the above call.
4865  */
4866 void
nfscl_relref(struct nfsmount * nmp)4867 nfscl_relref(struct nfsmount *nmp)
4868 {
4869 	struct nfsclclient *clp;
4870 
4871 	NFSLOCKCLSTATE();
4872 	clp = nfscl_findcl(nmp);
4873 	if (clp == NULL) {
4874 		NFSUNLOCKCLSTATE();
4875 		return;
4876 	}
4877 	nfsv4_relref(&clp->nfsc_lock);
4878 	NFSUNLOCKCLSTATE();
4879 }
4880 
4881 /*
4882  * Save the size attribute in the delegation, since the nfsnode
4883  * is going away.
4884  */
4885 void
nfscl_reclaimnode(vnode_t vp)4886 nfscl_reclaimnode(vnode_t vp)
4887 {
4888 	struct nfsclclient *clp;
4889 	struct nfscldeleg *dp;
4890 	struct nfsnode *np = VTONFS(vp);
4891 	struct nfsmount *nmp;
4892 
4893 	nmp = VFSTONFS(vnode_mount(vp));
4894 	if (!NFSHASNFSV4(nmp))
4895 		return;
4896 	NFSLOCKCLSTATE();
4897 	clp = nfscl_findcl(nmp);
4898 	if (clp == NULL) {
4899 		NFSUNLOCKCLSTATE();
4900 		return;
4901 	}
4902 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4903 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4904 		dp->nfsdl_size = np->n_size;
4905 	NFSUNLOCKCLSTATE();
4906 }
4907 
4908 /*
4909  * Get the saved size attribute in the delegation, since it is a
4910  * newly allocated nfsnode.
4911  */
4912 void
nfscl_newnode(vnode_t vp)4913 nfscl_newnode(vnode_t vp)
4914 {
4915 	struct nfsclclient *clp;
4916 	struct nfscldeleg *dp;
4917 	struct nfsnode *np = VTONFS(vp);
4918 	struct nfsmount *nmp;
4919 
4920 	nmp = VFSTONFS(vnode_mount(vp));
4921 	if (!NFSHASNFSV4(nmp))
4922 		return;
4923 	NFSLOCKCLSTATE();
4924 	clp = nfscl_findcl(nmp);
4925 	if (clp == NULL) {
4926 		NFSUNLOCKCLSTATE();
4927 		return;
4928 	}
4929 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4930 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4931 		np->n_size = dp->nfsdl_size;
4932 	NFSUNLOCKCLSTATE();
4933 }
4934 
4935 /*
4936  * If there is a valid write delegation for this file, set the modtime
4937  * to the local clock time.
4938  */
4939 void
nfscl_delegmodtime(vnode_t vp)4940 nfscl_delegmodtime(vnode_t vp)
4941 {
4942 	struct nfsclclient *clp;
4943 	struct nfscldeleg *dp;
4944 	struct nfsnode *np = VTONFS(vp);
4945 	struct nfsmount *nmp;
4946 
4947 	nmp = VFSTONFS(vnode_mount(vp));
4948 	if (!NFSHASNFSV4(nmp))
4949 		return;
4950 	NFSLOCKCLSTATE();
4951 	clp = nfscl_findcl(nmp);
4952 	if (clp == NULL) {
4953 		NFSUNLOCKCLSTATE();
4954 		return;
4955 	}
4956 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4957 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4958 		nanotime(&dp->nfsdl_modtime);
4959 		dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4960 	}
4961 	NFSUNLOCKCLSTATE();
4962 }
4963 
4964 /*
4965  * If there is a valid write delegation for this file with a modtime set,
4966  * put that modtime in mtime.
4967  */
4968 void
nfscl_deleggetmodtime(vnode_t vp,struct timespec * mtime)4969 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4970 {
4971 	struct nfsclclient *clp;
4972 	struct nfscldeleg *dp;
4973 	struct nfsnode *np = VTONFS(vp);
4974 	struct nfsmount *nmp;
4975 
4976 	nmp = VFSTONFS(vnode_mount(vp));
4977 	if (!NFSHASNFSV4(nmp))
4978 		return;
4979 	NFSLOCKCLSTATE();
4980 	clp = nfscl_findcl(nmp);
4981 	if (clp == NULL) {
4982 		NFSUNLOCKCLSTATE();
4983 		return;
4984 	}
4985 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4986 	if (dp != NULL &&
4987 	    (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4988 	    (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4989 		*mtime = dp->nfsdl_modtime;
4990 	NFSUNLOCKCLSTATE();
4991 }
4992 
4993 static int
nfscl_errmap(struct nfsrv_descript * nd,u_int32_t minorvers)4994 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4995 {
4996 	short *defaulterrp, *errp;
4997 
4998 	if (!nd->nd_repstat)
4999 		return (0);
5000 	if (nd->nd_procnum == NFSPROC_NOOP)
5001 		return (txdr_unsigned(nd->nd_repstat & 0xffff));
5002 	if (nd->nd_repstat == EBADRPC)
5003 		return (txdr_unsigned(NFSERR_BADXDR));
5004 	if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
5005 	    nd->nd_repstat == NFSERR_OPILLEGAL)
5006 		return (txdr_unsigned(nd->nd_repstat));
5007 	if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
5008 	    minorvers > NFSV4_MINORVERSION) {
5009 		/* NFSv4.n error. */
5010 		return (txdr_unsigned(nd->nd_repstat));
5011 	}
5012 	if (nd->nd_procnum < NFSV4OP_CBNOPS)
5013 		errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
5014 	else
5015 		return (txdr_unsigned(nd->nd_repstat));
5016 	while (*++errp)
5017 		if (*errp == (short)nd->nd_repstat)
5018 			return (txdr_unsigned(nd->nd_repstat));
5019 	return (txdr_unsigned(*defaulterrp));
5020 }
5021 
5022 /*
5023  * Called to find/add a layout to a client.
5024  * This function returns the layout with a refcnt (shared lock) upon
5025  * success (returns 0) or with no lock/refcnt on the layout when an
5026  * error is returned.
5027  * If a layout is passed in via lypp, it is locked (exclusively locked).
5028  */
5029 int
nfscl_layout(struct nfsmount * nmp,vnode_t vp,u_int8_t * fhp,int fhlen,nfsv4stateid_t * stateidp,int layouttype,int retonclose,struct nfsclflayouthead * fhlp,struct nfscllayout ** lypp,struct ucred * cred,NFSPROC_T * p)5030 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
5031     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
5032     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
5033     struct ucred *cred, NFSPROC_T *p)
5034 {
5035 	struct nfsclclient *clp;
5036 	struct nfscllayout *lyp, *tlyp;
5037 	struct nfsclflayout *flp;
5038 	struct nfsnode *np = VTONFS(vp);
5039 	mount_t mp;
5040 	int layout_passed_in;
5041 
5042 	mp = nmp->nm_mountp;
5043 	layout_passed_in = 1;
5044 	tlyp = NULL;
5045 	lyp = *lypp;
5046 	if (lyp == NULL) {
5047 		layout_passed_in = 0;
5048 		tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
5049 		    M_WAITOK | M_ZERO);
5050 	}
5051 
5052 	NFSLOCKCLSTATE();
5053 	clp = nmp->nm_clp;
5054 	if (clp == NULL) {
5055 		if (layout_passed_in != 0)
5056 			nfsv4_unlock(&lyp->nfsly_lock, 0);
5057 		NFSUNLOCKCLSTATE();
5058 		if (tlyp != NULL)
5059 			free(tlyp, M_NFSLAYOUT);
5060 		return (EPERM);
5061 	}
5062 	if (lyp == NULL) {
5063 		/*
5064 		 * Although no lyp was passed in, another thread might have
5065 		 * allocated one. If one is found, just increment it's ref
5066 		 * count and return it.
5067 		 */
5068 		lyp = nfscl_findlayout(clp, fhp, fhlen);
5069 		if (lyp == NULL) {
5070 			lyp = tlyp;
5071 			tlyp = NULL;
5072 			lyp->nfsly_stateid.seqid = stateidp->seqid;
5073 			lyp->nfsly_stateid.other[0] = stateidp->other[0];
5074 			lyp->nfsly_stateid.other[1] = stateidp->other[1];
5075 			lyp->nfsly_stateid.other[2] = stateidp->other[2];
5076 			lyp->nfsly_lastbyte = 0;
5077 			LIST_INIT(&lyp->nfsly_flayread);
5078 			LIST_INIT(&lyp->nfsly_flayrw);
5079 			LIST_INIT(&lyp->nfsly_recall);
5080 			lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
5081 			lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
5082 			lyp->nfsly_clp = clp;
5083 			if (layouttype == NFSLAYOUT_FLEXFILE)
5084 				lyp->nfsly_flags = NFSLY_FLEXFILE;
5085 			else
5086 				lyp->nfsly_flags = NFSLY_FILES;
5087 			if (retonclose != 0)
5088 				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5089 			lyp->nfsly_fhlen = fhlen;
5090 			NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
5091 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5092 			LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
5093 			    nfsly_hash);
5094 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5095 			nfscl_layoutcnt++;
5096 		} else {
5097 			if (retonclose != 0)
5098 				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5099 			if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5100 				lyp->nfsly_stateid.seqid = stateidp->seqid;
5101 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5102 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5103 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5104 		}
5105 		nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5106 		if (NFSCL_FORCEDISM(mp)) {
5107 			NFSUNLOCKCLSTATE();
5108 			if (tlyp != NULL)
5109 				free(tlyp, M_NFSLAYOUT);
5110 			return (EPERM);
5111 		}
5112 		*lypp = lyp;
5113 	} else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5114 		lyp->nfsly_stateid.seqid = stateidp->seqid;
5115 
5116 	/* Merge the new list of File Layouts into the list. */
5117 	flp = LIST_FIRST(fhlp);
5118 	if (flp != NULL) {
5119 		if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
5120 			nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
5121 		else
5122 			nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
5123 	}
5124 	if (layout_passed_in != 0)
5125 		nfsv4_unlock(&lyp->nfsly_lock, 1);
5126 	NFSUNLOCKCLSTATE();
5127 	if (tlyp != NULL)
5128 		free(tlyp, M_NFSLAYOUT);
5129 	return (0);
5130 }
5131 
5132 /*
5133  * Search for a layout by MDS file handle.
5134  * If one is found, it is returned with a refcnt (shared lock) iff
5135  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
5136  * returned NULL.
5137  */
5138 struct nfscllayout *
nfscl_getlayout(struct nfsclclient * clp,uint8_t * fhp,int fhlen,uint64_t off,uint32_t rwaccess,struct nfsclflayout ** retflpp,int * recalledp)5139 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
5140     uint64_t off, uint32_t rwaccess, struct nfsclflayout **retflpp,
5141     int *recalledp)
5142 {
5143 	struct nfscllayout *lyp;
5144 	mount_t mp;
5145 	int error, igotlock;
5146 
5147 	mp = clp->nfsc_nmp->nm_mountp;
5148 	*recalledp = 0;
5149 	*retflpp = NULL;
5150 	NFSLOCKCLSTATE();
5151 	lyp = nfscl_findlayout(clp, fhp, fhlen);
5152 	if (lyp != NULL) {
5153 		if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5154 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5155 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5156 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5157 			error = nfscl_findlayoutforio(lyp, off, rwaccess,
5158 			    retflpp);
5159 			if (error == 0)
5160 				nfsv4_getref(&lyp->nfsly_lock, NULL,
5161 				    NFSCLSTATEMUTEXPTR, mp);
5162 			else {
5163 				do {
5164 					igotlock = nfsv4_lock(&lyp->nfsly_lock,
5165 					    1, NULL, NFSCLSTATEMUTEXPTR, mp);
5166 				} while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5167 				*retflpp = NULL;
5168 			}
5169 			if (NFSCL_FORCEDISM(mp)) {
5170 				lyp = NULL;
5171 				*recalledp = 1;
5172 			}
5173 		} else {
5174 			lyp = NULL;
5175 			*recalledp = 1;
5176 		}
5177 	}
5178 	NFSUNLOCKCLSTATE();
5179 	return (lyp);
5180 }
5181 
5182 /*
5183  * Search for a layout by MDS file handle. If one is found, mark in to be
5184  * recalled, if it already marked "return on close".
5185  */
5186 static void
nfscl_retoncloselayout(vnode_t vp,struct nfsclclient * clp,uint8_t * fhp,int fhlen,struct nfsclrecalllayout ** recallpp,struct nfscllayout ** lypp)5187 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5188     int fhlen, struct nfsclrecalllayout **recallpp, struct nfscllayout **lypp)
5189 {
5190 	struct nfscllayout *lyp;
5191 	uint32_t iomode;
5192 
5193 	*lypp = NULL;
5194 	if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) ||
5195 	    nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5196 	    (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5197 		return;
5198 	lyp = nfscl_findlayout(clp, fhp, fhlen);
5199 	if (lyp != NULL && (lyp->nfsly_flags & NFSLY_RETONCLOSE) != 0) {
5200 		if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5201 			iomode = 0;
5202 			if (!LIST_EMPTY(&lyp->nfsly_flayread))
5203 				iomode |= NFSLAYOUTIOMODE_READ;
5204 			if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5205 				iomode |= NFSLAYOUTIOMODE_RW;
5206 			nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5207 			    0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5208 			    *recallpp);
5209 			NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5210 			*recallpp = NULL;
5211 		}
5212 
5213 		/* Now, wake up renew thread to do LayoutReturn. */
5214 		wakeup(clp);
5215 		*lypp = lyp;
5216 	}
5217 }
5218 
5219 /*
5220  * Mark the layout to be recalled and with an error.
5221  * Also, disable the dsp from further use.
5222  */
5223 void
nfscl_dserr(uint32_t op,uint32_t stat,struct nfscldevinfo * dp,struct nfscllayout * lyp,struct nfsclds * dsp)5224 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5225     struct nfscllayout *lyp, struct nfsclds *dsp)
5226 {
5227 	struct nfsclrecalllayout *recallp;
5228 	uint32_t iomode;
5229 
5230 	printf("DS being disabled, error=%d\n", stat);
5231 	/* Set up the return of the layout. */
5232 	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5233 	iomode = 0;
5234 	NFSLOCKCLSTATE();
5235 	if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5236 		if (!LIST_EMPTY(&lyp->nfsly_flayread))
5237 			iomode |= NFSLAYOUTIOMODE_READ;
5238 		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5239 			iomode |= NFSLAYOUTIOMODE_RW;
5240 		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5241 		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5242 		    dp->nfsdi_deviceid, recallp);
5243 		NFSUNLOCKCLSTATE();
5244 		NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5245 	} else {
5246 		NFSUNLOCKCLSTATE();
5247 		free(recallp, M_NFSLAYRECALL);
5248 	}
5249 
5250 	/* And shut the TCP connection down. */
5251 	nfscl_cancelreqs(dsp);
5252 }
5253 
5254 /*
5255  * Cancel all RPCs for this "dsp" by closing the connection.
5256  * Also, mark the session as defunct.
5257  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5258  * cannot be shut down.
5259  */
5260 void
nfscl_cancelreqs(struct nfsclds * dsp)5261 nfscl_cancelreqs(struct nfsclds *dsp)
5262 {
5263 	struct __rpc_client *cl;
5264 	static int non_event;
5265 
5266 	NFSLOCKDS(dsp);
5267 	if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5268 	    dsp->nfsclds_sockp != NULL &&
5269 	    dsp->nfsclds_sockp->nr_client != NULL) {
5270 		dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5271 		cl = dsp->nfsclds_sockp->nr_client;
5272 		dsp->nfsclds_sess.nfsess_defunct = 1;
5273 		NFSUNLOCKDS(dsp);
5274 		CLNT_CLOSE(cl);
5275 		/*
5276 		 * This 1sec sleep is done to reduce the number of reconnect
5277 		 * attempts made on the DS while it has failed.
5278 		 */
5279 		tsleep(&non_event, PVFS, "ndscls", hz);
5280 		return;
5281 	}
5282 	NFSUNLOCKDS(dsp);
5283 }
5284 
5285 /*
5286  * Dereference a layout.
5287  */
5288 void
nfscl_rellayout(struct nfscllayout * lyp,int exclocked)5289 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5290 {
5291 
5292 	NFSLOCKCLSTATE();
5293 	if (exclocked != 0)
5294 		nfsv4_unlock(&lyp->nfsly_lock, 0);
5295 	else
5296 		nfsv4_relref(&lyp->nfsly_lock);
5297 	NFSUNLOCKCLSTATE();
5298 }
5299 
5300 /*
5301  * Search for a devinfo by deviceid. If one is found, return it after
5302  * acquiring a reference count on it.
5303  */
5304 struct nfscldevinfo *
nfscl_getdevinfo(struct nfsclclient * clp,uint8_t * deviceid,struct nfscldevinfo * dip)5305 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5306     struct nfscldevinfo *dip)
5307 {
5308 
5309 	NFSLOCKCLSTATE();
5310 	if (dip == NULL)
5311 		dip = nfscl_finddevinfo(clp, deviceid);
5312 	if (dip != NULL)
5313 		dip->nfsdi_refcnt++;
5314 	NFSUNLOCKCLSTATE();
5315 	return (dip);
5316 }
5317 
5318 /*
5319  * Dereference a devinfo structure.
5320  */
5321 static void
nfscl_reldevinfo_locked(struct nfscldevinfo * dip)5322 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5323 {
5324 
5325 	dip->nfsdi_refcnt--;
5326 	if (dip->nfsdi_refcnt == 0)
5327 		wakeup(&dip->nfsdi_refcnt);
5328 }
5329 
5330 /*
5331  * Dereference a devinfo structure.
5332  */
5333 void
nfscl_reldevinfo(struct nfscldevinfo * dip)5334 nfscl_reldevinfo(struct nfscldevinfo *dip)
5335 {
5336 
5337 	NFSLOCKCLSTATE();
5338 	nfscl_reldevinfo_locked(dip);
5339 	NFSUNLOCKCLSTATE();
5340 }
5341 
5342 /*
5343  * Find a layout for this file handle. Return NULL upon failure.
5344  */
5345 static struct nfscllayout *
nfscl_findlayout(struct nfsclclient * clp,u_int8_t * fhp,int fhlen)5346 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5347 {
5348 	struct nfscllayout *lyp;
5349 
5350 	LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5351 		if (lyp->nfsly_fhlen == fhlen &&
5352 		    !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5353 			break;
5354 	return (lyp);
5355 }
5356 
5357 /*
5358  * Find a devinfo for this deviceid. Return NULL upon failure.
5359  */
5360 static struct nfscldevinfo *
nfscl_finddevinfo(struct nfsclclient * clp,uint8_t * deviceid)5361 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5362 {
5363 	struct nfscldevinfo *dip;
5364 
5365 	LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5366 		if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5367 		    == 0)
5368 			break;
5369 	return (dip);
5370 }
5371 
5372 /*
5373  * Merge the new file layout list into the main one, maintaining it in
5374  * increasing offset order.
5375  */
5376 static void
nfscl_mergeflayouts(struct nfsclflayouthead * fhlp,struct nfsclflayouthead * newfhlp)5377 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5378     struct nfsclflayouthead *newfhlp)
5379 {
5380 	struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5381 
5382 	flp = LIST_FIRST(fhlp);
5383 	prevflp = NULL;
5384 	LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5385 		while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5386 			prevflp = flp;
5387 			flp = LIST_NEXT(flp, nfsfl_list);
5388 		}
5389 		if (prevflp == NULL)
5390 			LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5391 		else
5392 			LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5393 		prevflp = nflp;
5394 	}
5395 }
5396 
5397 /*
5398  * Add this nfscldevinfo to the client, if it doesn't already exist.
5399  * This function consumes the structure pointed at by dip, if not NULL.
5400  */
5401 int
nfscl_adddevinfo(struct nfsmount * nmp,struct nfscldevinfo * dip,int ind,struct nfsclflayout * flp)5402 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5403     struct nfsclflayout *flp)
5404 {
5405 	struct nfsclclient *clp;
5406 	struct nfscldevinfo *tdip;
5407 	uint8_t *dev;
5408 
5409 	NFSLOCKCLSTATE();
5410 	clp = nmp->nm_clp;
5411 	if (clp == NULL) {
5412 		NFSUNLOCKCLSTATE();
5413 		if (dip != NULL)
5414 			free(dip, M_NFSDEVINFO);
5415 		return (ENODEV);
5416 	}
5417 	if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5418 		dev = flp->nfsfl_dev;
5419 	else
5420 		dev = flp->nfsfl_ffm[ind].dev;
5421 	tdip = nfscl_finddevinfo(clp, dev);
5422 	if (tdip != NULL) {
5423 		tdip->nfsdi_layoutrefs++;
5424 		if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5425 			flp->nfsfl_devp = tdip;
5426 		else
5427 			flp->nfsfl_ffm[ind].devp = tdip;
5428 		nfscl_reldevinfo_locked(tdip);
5429 		NFSUNLOCKCLSTATE();
5430 		if (dip != NULL)
5431 			free(dip, M_NFSDEVINFO);
5432 		return (0);
5433 	}
5434 	if (dip != NULL) {
5435 		LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5436 		dip->nfsdi_layoutrefs = 1;
5437 		if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5438 			flp->nfsfl_devp = dip;
5439 		else
5440 			flp->nfsfl_ffm[ind].devp = dip;
5441 	}
5442 	NFSUNLOCKCLSTATE();
5443 	if (dip == NULL)
5444 		return (ENODEV);
5445 	return (0);
5446 }
5447 
5448 /*
5449  * Free up a layout structure and associated file layout structure(s).
5450  */
5451 void
nfscl_freelayout(struct nfscllayout * layp)5452 nfscl_freelayout(struct nfscllayout *layp)
5453 {
5454 	struct nfsclflayout *flp, *nflp;
5455 	struct nfsclrecalllayout *rp, *nrp;
5456 
5457 	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5458 		LIST_REMOVE(flp, nfsfl_list);
5459 		nfscl_freeflayout(flp);
5460 	}
5461 	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5462 		LIST_REMOVE(flp, nfsfl_list);
5463 		nfscl_freeflayout(flp);
5464 	}
5465 	LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5466 		LIST_REMOVE(rp, nfsrecly_list);
5467 		free(rp, M_NFSLAYRECALL);
5468 	}
5469 	nfscl_layoutcnt--;
5470 	free(layp, M_NFSLAYOUT);
5471 }
5472 
5473 /*
5474  * Free up a file layout structure.
5475  */
5476 void
nfscl_freeflayout(struct nfsclflayout * flp)5477 nfscl_freeflayout(struct nfsclflayout *flp)
5478 {
5479 	int i, j;
5480 
5481 	if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5482 		for (i = 0; i < flp->nfsfl_fhcnt; i++)
5483 			free(flp->nfsfl_fh[i], M_NFSFH);
5484 		if (flp->nfsfl_devp != NULL)
5485 			flp->nfsfl_devp->nfsdi_layoutrefs--;
5486 	}
5487 	if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5488 		for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5489 			for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5490 				free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5491 			if (flp->nfsfl_ffm[i].devp != NULL)
5492 				flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;
5493 		}
5494 	free(flp, M_NFSFLAYOUT);
5495 }
5496 
5497 /*
5498  * Free up a file layout devinfo structure.
5499  */
5500 void
nfscl_freedevinfo(struct nfscldevinfo * dip)5501 nfscl_freedevinfo(struct nfscldevinfo *dip)
5502 {
5503 
5504 	free(dip, M_NFSDEVINFO);
5505 }
5506 
5507 /*
5508  * Mark any layouts that match as recalled.
5509  */
5510 static int
nfscl_layoutrecall(int recalltype,struct nfscllayout * lyp,uint32_t iomode,uint64_t off,uint64_t len,uint32_t stateseqid,uint32_t stat,uint32_t op,char * devid,struct nfsclrecalllayout * recallp)5511 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5512     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5513     char *devid, struct nfsclrecalllayout *recallp)
5514 {
5515 	struct nfsclrecalllayout *rp, *orp;
5516 
5517 	recallp->nfsrecly_recalltype = recalltype;
5518 	recallp->nfsrecly_iomode = iomode;
5519 	recallp->nfsrecly_stateseqid = stateseqid;
5520 	recallp->nfsrecly_off = off;
5521 	recallp->nfsrecly_len = len;
5522 	recallp->nfsrecly_stat = stat;
5523 	recallp->nfsrecly_op = op;
5524 	if (devid != NULL)
5525 		NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5526 	/*
5527 	 * Order the list as file returns first, followed by fsid and any
5528 	 * returns, both in increasing stateseqid order.
5529 	 * Note that the seqids wrap around, so 1 is after 0xffffffff.
5530 	 * (I'm not sure this is correct because I find RFC5661 confusing
5531 	 *  on this, but hopefully it will work ok.)
5532 	 */
5533 	orp = NULL;
5534 	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5535 		orp = rp;
5536 		if ((recalltype == NFSLAYOUTRETURN_FILE &&
5537 		     (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5538 		      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5539 		    (recalltype != NFSLAYOUTRETURN_FILE &&
5540 		     rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5541 		     nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5542 			LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5543 			break;
5544 		}
5545 
5546 		/*
5547 		 * Put any error return on all the file returns that will
5548 		 * preceed this one.
5549 		 */
5550 		if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5551 		   stat != 0 && rp->nfsrecly_stat == 0) {
5552 			rp->nfsrecly_stat = stat;
5553 			rp->nfsrecly_op = op;
5554 			if (devid != NULL)
5555 				NFSBCOPY(devid, rp->nfsrecly_devid,
5556 				    NFSX_V4DEVICEID);
5557 		}
5558 	}
5559 	if (rp == NULL) {
5560 		if (orp == NULL)
5561 			LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5562 			    nfsrecly_list);
5563 		else
5564 			LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5565 	}
5566 	lyp->nfsly_flags |= NFSLY_RECALL;
5567 	wakeup(lyp->nfsly_clp);
5568 	return (0);
5569 }
5570 
5571 /*
5572  * Compare the two seqids for ordering. The trick is that the seqids can
5573  * wrap around from 0xffffffff->0, so check for the cases where one
5574  * has wrapped around.
5575  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5576  */
5577 static int
nfscl_seq(uint32_t seqid1,uint32_t seqid2)5578 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5579 {
5580 
5581 	if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5582 		/* seqid2 has wrapped around. */
5583 		return (0);
5584 	if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5585 		/* seqid1 has wrapped around. */
5586 		return (1);
5587 	if (seqid1 <= seqid2)
5588 		return (1);
5589 	return (0);
5590 }
5591 
5592 /*
5593  * Do a layout return for each of the recalls.
5594  */
5595 static void
nfscl_layoutreturn(struct nfsmount * nmp,struct nfscllayout * lyp,struct ucred * cred,NFSPROC_T * p)5596 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5597     struct ucred *cred, NFSPROC_T *p)
5598 {
5599 	struct nfsclrecalllayout *rp;
5600 	nfsv4stateid_t stateid;
5601 	int layouttype;
5602 
5603 	NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5604 	stateid.seqid = lyp->nfsly_stateid.seqid;
5605 	if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5606 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5607 	else
5608 		layouttype = NFSLAYOUT_FLEXFILE;
5609 	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5610 		(void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5611 		    lyp->nfsly_fhlen, 0, layouttype,
5612 		    rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5613 		    rp->nfsrecly_off, rp->nfsrecly_len,
5614 		    &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5615 		    rp->nfsrecly_devid);
5616 	}
5617 }
5618 
5619 /*
5620  * Do the layout commit for a file layout.
5621  */
5622 static void
nfscl_dolayoutcommit(struct nfsmount * nmp,struct nfscllayout * lyp,struct ucred * cred,NFSPROC_T * p)5623 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5624     struct ucred *cred, NFSPROC_T *p)
5625 {
5626 	struct nfsclflayout *flp;
5627 	uint64_t len;
5628 	int error, layouttype;
5629 
5630 	if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5631 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5632 	else
5633 		layouttype = NFSLAYOUT_FLEXFILE;
5634 	LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5635 		if (layouttype == NFSLAYOUT_FLEXFILE &&
5636 		    (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5637 			NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5638 			/* If not supported, don't bother doing it. */
5639 			NFSLOCKMNT(nmp);
5640 			nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5641 			NFSUNLOCKMNT(nmp);
5642 			break;
5643 		} else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5644 			len = flp->nfsfl_end - flp->nfsfl_off;
5645 			error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5646 			    lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5647 			    lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5648 			    layouttype, cred, p, NULL);
5649 			NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5650 			if (error == NFSERR_NOTSUPP) {
5651 				/* If not supported, don't bother doing it. */
5652 				NFSLOCKMNT(nmp);
5653 				nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5654 				NFSUNLOCKMNT(nmp);
5655 				break;
5656 			}
5657 		}
5658 	}
5659 }
5660 
5661 /*
5662  * Commit all layouts for a file (vnode).
5663  */
5664 int
nfscl_layoutcommit(vnode_t vp,NFSPROC_T * p)5665 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5666 {
5667 	struct nfsclclient *clp;
5668 	struct nfscllayout *lyp;
5669 	struct nfsnode *np = VTONFS(vp);
5670 	mount_t mp;
5671 	struct nfsmount *nmp;
5672 
5673 	mp = vnode_mount(vp);
5674 	nmp = VFSTONFS(mp);
5675 	if (NFSHASNOLAYOUTCOMMIT(nmp))
5676 		return (0);
5677 	NFSLOCKCLSTATE();
5678 	clp = nmp->nm_clp;
5679 	if (clp == NULL) {
5680 		NFSUNLOCKCLSTATE();
5681 		return (EPERM);
5682 	}
5683 	lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5684 	if (lyp == NULL) {
5685 		NFSUNLOCKCLSTATE();
5686 		return (EPERM);
5687 	}
5688 	nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5689 	if (NFSCL_FORCEDISM(mp)) {
5690 		NFSUNLOCKCLSTATE();
5691 		return (EPERM);
5692 	}
5693 tryagain:
5694 	if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5695 		lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5696 		NFSUNLOCKCLSTATE();
5697 		NFSCL_DEBUG(4, "do layoutcommit2\n");
5698 		nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5699 		NFSLOCKCLSTATE();
5700 		goto tryagain;
5701 	}
5702 	nfsv4_relref(&lyp->nfsly_lock);
5703 	NFSUNLOCKCLSTATE();
5704 	return (0);
5705 }
5706 
5707