1 /*        $NetBSD: nfs_subs.c,v 1.242 2022/02/09 21:50:24 andvar Exp $          */
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *        @(#)nfs_subs.c      8.8 (Berkeley) 5/22/95
35  */
36 
37 /*
38  * Copyright 2000 Wasabi Systems, Inc.
39  * All rights reserved.
40  *
41  * Written by Frank van der Linden for Wasabi Systems, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *      This product includes software developed for the NetBSD Project by
54  *      Wasabi Systems, Inc.
55  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
56  *    or promote products derived from this software without specific prior
57  *    written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69  * POSSIBILITY OF SUCH DAMAGE.
70  */
71 
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.242 2022/02/09 21:50:24 andvar Exp $");
74 
75 #ifdef _KERNEL_OPT
76 #include "opt_nfs.h"
77 #endif
78 
79 /*
80  * These functions support the macros and help fiddle mbuf chains for
81  * the nfs op functions. They do things like create the rpc header and
82  * copy data between mbuf chains and uio lists.
83  */
84 #include <sys/param.h>
85 #include <sys/proc.h>
86 #include <sys/systm.h>
87 #include <sys/kernel.h>
88 #include <sys/kmem.h>
89 #include <sys/mount.h>
90 #include <sys/vnode.h>
91 #include <sys/namei.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/stat.h>
95 #include <sys/filedesc.h>
96 #include <sys/time.h>
97 #include <sys/dirent.h>
98 #include <sys/once.h>
99 #include <sys/kauth.h>
100 #include <sys/atomic.h>
101 #include <sys/cprng.h>
102 
103 #include <uvm/uvm_page.h>
104 #include <uvm/uvm_page_array.h>
105 
106 #include <nfs/rpcv2.h>
107 #include <nfs/nfsproto.h>
108 #include <nfs/nfsnode.h>
109 #include <nfs/nfs.h>
110 #include <nfs/xdr_subs.h>
111 #include <nfs/nfsm_subs.h>
112 #include <nfs/nfsmount.h>
113 #include <nfs/nfsrtt.h>
114 #include <nfs/nfs_var.h>
115 
116 #include <miscfs/specfs/specdev.h>
117 
118 #include <netinet/in.h>
119 
120 static u_int32_t nfs_xid;
121 
122 int nuidhash_max = NFS_MAXUIDHASH;
123 /*
124  * Data items converted to xdr at startup, since they are constant
125  * This is kinda hokey, but may save a little time doing byte swaps
126  */
127 u_int32_t nfs_xdrneg1;
128 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
129           rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
130           rpc_auth_kerb;
131 u_int32_t nfs_prog, nfs_true, nfs_false;
132 
133 /* And other global data */
134 const nfstype nfsv2_type[9] =
135           { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON };
136 const nfstype nfsv3_type[9] =
137           { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON };
138 const enum vtype nv2tov_type[8] =
139           { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON };
140 const enum vtype nv3tov_type[8] =
141           { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO };
142 int nfs_ticks;
143 
144 /* NFS client/server stats. */
145 struct nfsstats nfsstats;
146 
147 /*
148  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
149  */
150 const int nfsv3_procid[NFS_NPROCS] = {
151           NFSPROC_NULL,
152           NFSPROC_GETATTR,
153           NFSPROC_SETATTR,
154           NFSPROC_NOOP,
155           NFSPROC_LOOKUP,
156           NFSPROC_READLINK,
157           NFSPROC_READ,
158           NFSPROC_NOOP,
159           NFSPROC_WRITE,
160           NFSPROC_CREATE,
161           NFSPROC_REMOVE,
162           NFSPROC_RENAME,
163           NFSPROC_LINK,
164           NFSPROC_SYMLINK,
165           NFSPROC_MKDIR,
166           NFSPROC_RMDIR,
167           NFSPROC_READDIR,
168           NFSPROC_FSSTAT,
169           NFSPROC_NOOP,
170           NFSPROC_NOOP,
171           NFSPROC_NOOP,
172           NFSPROC_NOOP,
173           NFSPROC_NOOP
174 };
175 
176 /*
177  * and the reverse mapping from generic to Version 2 procedure numbers
178  */
179 const int nfsv2_procid[NFS_NPROCS] = {
180           NFSV2PROC_NULL,
181           NFSV2PROC_GETATTR,
182           NFSV2PROC_SETATTR,
183           NFSV2PROC_LOOKUP,
184           NFSV2PROC_NOOP,
185           NFSV2PROC_READLINK,
186           NFSV2PROC_READ,
187           NFSV2PROC_WRITE,
188           NFSV2PROC_CREATE,
189           NFSV2PROC_MKDIR,
190           NFSV2PROC_SYMLINK,
191           NFSV2PROC_CREATE,
192           NFSV2PROC_REMOVE,
193           NFSV2PROC_RMDIR,
194           NFSV2PROC_RENAME,
195           NFSV2PROC_LINK,
196           NFSV2PROC_READDIR,
197           NFSV2PROC_NOOP,
198           NFSV2PROC_STATFS,
199           NFSV2PROC_NOOP,
200           NFSV2PROC_NOOP,
201           NFSV2PROC_NOOP,
202           NFSV2PROC_NOOP,
203 };
204 
205 /*
206  * Maps errno values to nfs error numbers.
207  * Use NFSERR_IO as the catch all for ones not specifically defined in
208  * RFC 1094.
209  */
210 static const u_char nfsrv_v2errmap[] = {
211   NFSERR_PERM,      NFSERR_NOENT,       NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
212   NFSERR_NXIO,      NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
213   NFSERR_IO,        NFSERR_IO,          NFSERR_ACCES,       NFSERR_IO,          NFSERR_IO,
214   NFSERR_IO,        NFSERR_EXIST,       NFSERR_IO,          NFSERR_NODEV,       NFSERR_NOTDIR,
215   NFSERR_ISDIR,     NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
216   NFSERR_IO,        NFSERR_FBIG,        NFSERR_NOSPC,       NFSERR_IO,          NFSERR_ROFS,
217   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
218   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
219   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
220   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
221   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
222   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
223   NFSERR_IO,        NFSERR_IO,          NFSERR_NAMETOL,     NFSERR_IO,          NFSERR_IO,
224   NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO,          NFSERR_DQUOT,       NFSERR_STALE,
225   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
226   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
227   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
228   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
229   NFSERR_IO,        NFSERR_IO,          NFSERR_IO,          NFSERR_IO,          NFSERR_IO,
230   NFSERR_IO,        NFSERR_IO,          NFSERR_IO
231 };
232 __CTASSERT(__arraycount(nfsrv_v2errmap) == ELAST);
233 
234 /*
235  * Maps errno values to nfs error numbers.
236  * Although it is not obvious whether or not NFS clients really care if
237  * a returned error value is in the specified list for the procedure, the
238  * safest thing to do is filter them appropriately. For Version 2, the
239  * X/Open XNFS document is the only specification that defines error values
240  * for each RPC (The RFC simply lists all possible error values for all RPCs),
241  * so I have decided to not do this for Version 2.
242  * The first entry is the default error return and the rest are the valid
243  * errors for that RPC in increasing numeric order.
244  */
245 static const short nfsv3err_null[] = {
246           0,
247           0,
248 };
249 
250 static const short nfsv3err_getattr[] = {
251           NFSERR_IO,
252           NFSERR_IO,
253           NFSERR_STALE,
254           NFSERR_BADHANDLE,
255           NFSERR_SERVERFAULT,
256           0,
257 };
258 
259 static const short nfsv3err_setattr[] = {
260           NFSERR_IO,
261           NFSERR_PERM,
262           NFSERR_IO,
263           NFSERR_ACCES,
264           NFSERR_INVAL,
265           NFSERR_NOSPC,
266           NFSERR_ROFS,
267           NFSERR_DQUOT,
268           NFSERR_STALE,
269           NFSERR_BADHANDLE,
270           NFSERR_NOT_SYNC,
271           NFSERR_SERVERFAULT,
272           0,
273 };
274 
275 static const short nfsv3err_lookup[] = {
276           NFSERR_IO,
277           NFSERR_NOENT,
278           NFSERR_IO,
279           NFSERR_ACCES,
280           NFSERR_NOTDIR,
281           NFSERR_NAMETOL,
282           NFSERR_STALE,
283           NFSERR_BADHANDLE,
284           NFSERR_SERVERFAULT,
285           0,
286 };
287 
288 static const short nfsv3err_access[] = {
289           NFSERR_IO,
290           NFSERR_IO,
291           NFSERR_STALE,
292           NFSERR_BADHANDLE,
293           NFSERR_SERVERFAULT,
294           0,
295 };
296 
297 static const short nfsv3err_readlink[] = {
298           NFSERR_IO,
299           NFSERR_IO,
300           NFSERR_ACCES,
301           NFSERR_INVAL,
302           NFSERR_STALE,
303           NFSERR_BADHANDLE,
304           NFSERR_NOTSUPP,
305           NFSERR_SERVERFAULT,
306           0,
307 };
308 
309 static const short nfsv3err_read[] = {
310           NFSERR_IO,
311           NFSERR_IO,
312           NFSERR_NXIO,
313           NFSERR_ACCES,
314           NFSERR_INVAL,
315           NFSERR_STALE,
316           NFSERR_BADHANDLE,
317           NFSERR_SERVERFAULT,
318           NFSERR_JUKEBOX,
319           0,
320 };
321 
322 static const short nfsv3err_write[] = {
323           NFSERR_IO,
324           NFSERR_IO,
325           NFSERR_ACCES,
326           NFSERR_INVAL,
327           NFSERR_FBIG,
328           NFSERR_NOSPC,
329           NFSERR_ROFS,
330           NFSERR_DQUOT,
331           NFSERR_STALE,
332           NFSERR_BADHANDLE,
333           NFSERR_SERVERFAULT,
334           NFSERR_JUKEBOX,
335           0,
336 };
337 
338 static const short nfsv3err_create[] = {
339           NFSERR_IO,
340           NFSERR_IO,
341           NFSERR_ACCES,
342           NFSERR_EXIST,
343           NFSERR_NOTDIR,
344           NFSERR_NOSPC,
345           NFSERR_ROFS,
346           NFSERR_NAMETOL,
347           NFSERR_DQUOT,
348           NFSERR_STALE,
349           NFSERR_BADHANDLE,
350           NFSERR_NOTSUPP,
351           NFSERR_SERVERFAULT,
352           0,
353 };
354 
355 static const short nfsv3err_mkdir[] = {
356           NFSERR_IO,
357           NFSERR_IO,
358           NFSERR_ACCES,
359           NFSERR_EXIST,
360           NFSERR_NOTDIR,
361           NFSERR_NOSPC,
362           NFSERR_ROFS,
363           NFSERR_NAMETOL,
364           NFSERR_DQUOT,
365           NFSERR_STALE,
366           NFSERR_BADHANDLE,
367           NFSERR_NOTSUPP,
368           NFSERR_SERVERFAULT,
369           0,
370 };
371 
372 static const short nfsv3err_symlink[] = {
373           NFSERR_IO,
374           NFSERR_IO,
375           NFSERR_ACCES,
376           NFSERR_EXIST,
377           NFSERR_NOTDIR,
378           NFSERR_NOSPC,
379           NFSERR_ROFS,
380           NFSERR_NAMETOL,
381           NFSERR_DQUOT,
382           NFSERR_STALE,
383           NFSERR_BADHANDLE,
384           NFSERR_NOTSUPP,
385           NFSERR_SERVERFAULT,
386           0,
387 };
388 
389 static const short nfsv3err_mknod[] = {
390           NFSERR_IO,
391           NFSERR_IO,
392           NFSERR_ACCES,
393           NFSERR_EXIST,
394           NFSERR_NOTDIR,
395           NFSERR_NOSPC,
396           NFSERR_ROFS,
397           NFSERR_NAMETOL,
398           NFSERR_DQUOT,
399           NFSERR_STALE,
400           NFSERR_BADHANDLE,
401           NFSERR_NOTSUPP,
402           NFSERR_SERVERFAULT,
403           NFSERR_BADTYPE,
404           0,
405 };
406 
407 static const short nfsv3err_remove[] = {
408           NFSERR_IO,
409           NFSERR_NOENT,
410           NFSERR_IO,
411           NFSERR_ACCES,
412           NFSERR_NOTDIR,
413           NFSERR_ROFS,
414           NFSERR_NAMETOL,
415           NFSERR_STALE,
416           NFSERR_BADHANDLE,
417           NFSERR_SERVERFAULT,
418           0,
419 };
420 
421 static const short nfsv3err_rmdir[] = {
422           NFSERR_IO,
423           NFSERR_NOENT,
424           NFSERR_IO,
425           NFSERR_ACCES,
426           NFSERR_EXIST,
427           NFSERR_NOTDIR,
428           NFSERR_INVAL,
429           NFSERR_ROFS,
430           NFSERR_NAMETOL,
431           NFSERR_NOTEMPTY,
432           NFSERR_STALE,
433           NFSERR_BADHANDLE,
434           NFSERR_NOTSUPP,
435           NFSERR_SERVERFAULT,
436           0,
437 };
438 
439 static const short nfsv3err_rename[] = {
440           NFSERR_IO,
441           NFSERR_NOENT,
442           NFSERR_IO,
443           NFSERR_ACCES,
444           NFSERR_EXIST,
445           NFSERR_XDEV,
446           NFSERR_NOTDIR,
447           NFSERR_ISDIR,
448           NFSERR_INVAL,
449           NFSERR_NOSPC,
450           NFSERR_ROFS,
451           NFSERR_MLINK,
452           NFSERR_NAMETOL,
453           NFSERR_NOTEMPTY,
454           NFSERR_DQUOT,
455           NFSERR_STALE,
456           NFSERR_BADHANDLE,
457           NFSERR_NOTSUPP,
458           NFSERR_SERVERFAULT,
459           0,
460 };
461 
462 static const short nfsv3err_link[] = {
463           NFSERR_IO,
464           NFSERR_IO,
465           NFSERR_ACCES,
466           NFSERR_EXIST,
467           NFSERR_XDEV,
468           NFSERR_NOTDIR,
469           NFSERR_INVAL,
470           NFSERR_NOSPC,
471           NFSERR_ROFS,
472           NFSERR_MLINK,
473           NFSERR_NAMETOL,
474           NFSERR_DQUOT,
475           NFSERR_STALE,
476           NFSERR_BADHANDLE,
477           NFSERR_NOTSUPP,
478           NFSERR_SERVERFAULT,
479           0,
480 };
481 
482 static const short nfsv3err_readdir[] = {
483           NFSERR_IO,
484           NFSERR_IO,
485           NFSERR_ACCES,
486           NFSERR_NOTDIR,
487           NFSERR_STALE,
488           NFSERR_BADHANDLE,
489           NFSERR_BAD_COOKIE,
490           NFSERR_TOOSMALL,
491           NFSERR_SERVERFAULT,
492           0,
493 };
494 
495 static const short nfsv3err_readdirplus[] = {
496           NFSERR_IO,
497           NFSERR_IO,
498           NFSERR_ACCES,
499           NFSERR_NOTDIR,
500           NFSERR_STALE,
501           NFSERR_BADHANDLE,
502           NFSERR_BAD_COOKIE,
503           NFSERR_NOTSUPP,
504           NFSERR_TOOSMALL,
505           NFSERR_SERVERFAULT,
506           0,
507 };
508 
509 static const short nfsv3err_fsstat[] = {
510           NFSERR_IO,
511           NFSERR_IO,
512           NFSERR_STALE,
513           NFSERR_BADHANDLE,
514           NFSERR_SERVERFAULT,
515           0,
516 };
517 
518 static const short nfsv3err_fsinfo[] = {
519           NFSERR_STALE,
520           NFSERR_STALE,
521           NFSERR_BADHANDLE,
522           NFSERR_SERVERFAULT,
523           0,
524 };
525 
526 static const short nfsv3err_pathconf[] = {
527           NFSERR_STALE,
528           NFSERR_STALE,
529           NFSERR_BADHANDLE,
530           NFSERR_SERVERFAULT,
531           0,
532 };
533 
534 static const short nfsv3err_commit[] = {
535           NFSERR_IO,
536           NFSERR_IO,
537           NFSERR_STALE,
538           NFSERR_BADHANDLE,
539           NFSERR_SERVERFAULT,
540           0,
541 };
542 
543 static const short * const nfsrv_v3errmap[] = {
544           nfsv3err_null,
545           nfsv3err_getattr,
546           nfsv3err_setattr,
547           nfsv3err_lookup,
548           nfsv3err_access,
549           nfsv3err_readlink,
550           nfsv3err_read,
551           nfsv3err_write,
552           nfsv3err_create,
553           nfsv3err_mkdir,
554           nfsv3err_symlink,
555           nfsv3err_mknod,
556           nfsv3err_remove,
557           nfsv3err_rmdir,
558           nfsv3err_rename,
559           nfsv3err_link,
560           nfsv3err_readdir,
561           nfsv3err_readdirplus,
562           nfsv3err_fsstat,
563           nfsv3err_fsinfo,
564           nfsv3err_pathconf,
565           nfsv3err_commit,
566 };
567 
568 extern struct nfsrtt nfsrtt;
569 
570 u_long nfsdirhashmask;
571 
572 int nfs_webnamei(struct nameidata *, struct vnode *, struct proc *);
573 
574 /*
575  * Create the header for an rpc request packet
576  * The hsiz is the size of the rest of the nfs request header.
577  * (just used to decide if a cluster is a good idea)
578  */
579 struct mbuf *
nfsm_reqh(struct nfsnode * np,u_long procid,int hsiz,char ** bposp)580 nfsm_reqh(struct nfsnode *np, u_long procid, int hsiz, char **bposp)
581 {
582           struct mbuf *mb;
583           char *bpos;
584 
585           mb = m_get(M_WAIT, MT_DATA);
586           MCLAIM(mb, &nfs_mowner);
587           if (hsiz >= MINCLSIZE)
588                     m_clget(mb, M_WAIT);
589           mb->m_len = 0;
590           bpos = mtod(mb, void *);
591 
592           /* Finally, return values */
593           *bposp = bpos;
594           return (mb);
595 }
596 
597 /*
598  * Build the RPC header and fill in the authorization info.
599  * The authorization string argument is only used when the credentials
600  * come from outside of the kernel.
601  * Returns the head of the mbuf list.
602  */
603 struct mbuf *
nfsm_rpchead(kauth_cred_t cr,int nmflag,int procid,int auth_type,int auth_len,char * auth_str,int verf_len,char * verf_str,struct mbuf * mrest,int mrest_len,struct mbuf ** mbp,uint32_t * xidp)604 nfsm_rpchead(kauth_cred_t cr, int nmflag, int procid,
605           int auth_type, int auth_len, char *auth_str, int verf_len,
606           char *verf_str, struct mbuf *mrest, int mrest_len,
607           struct mbuf **mbp, uint32_t *xidp)
608 {
609           struct mbuf *mb;
610           u_int32_t *tl;
611           char *bpos;
612           int i;
613           struct mbuf *mreq;
614           int siz, grpsiz, authsiz;
615 
616           authsiz = nfsm_rndup(auth_len);
617           mb = m_gethdr(M_WAIT, MT_DATA);
618           MCLAIM(mb, &nfs_mowner);
619           if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
620                     m_clget(mb, M_WAIT);
621           } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
622                     m_align(mb, authsiz + 10 * NFSX_UNSIGNED);
623           } else {
624                     m_align(mb, 8 * NFSX_UNSIGNED);
625           }
626           mb->m_len = 0;
627           mreq = mb;
628           bpos = mtod(mb, void *);
629 
630           /*
631            * First the RPC header.
632            */
633           nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
634 
635           *tl++ = *xidp = nfs_getxid();
636           *tl++ = rpc_call;
637           *tl++ = rpc_vers;
638           *tl++ = txdr_unsigned(NFS_PROG);
639           if (nmflag & NFSMNT_NFSV3)
640                     *tl++ = txdr_unsigned(NFS_VER3);
641           else
642                     *tl++ = txdr_unsigned(NFS_VER2);
643           if (nmflag & NFSMNT_NFSV3)
644                     *tl++ = txdr_unsigned(procid);
645           else
646                     *tl++ = txdr_unsigned(nfsv2_procid[procid]);
647 
648           /*
649            * And then the authorization cred.
650            */
651           *tl++ = txdr_unsigned(auth_type);
652           *tl = txdr_unsigned(authsiz);
653           switch (auth_type) {
654           case RPCAUTH_UNIX:
655                     nfsm_build(tl, u_int32_t *, auth_len);
656                     *tl++ = 0;                    /* stamp ?? */
657                     *tl++ = 0;                    /* NULL hostname */
658                     *tl++ = txdr_unsigned(kauth_cred_geteuid(cr));
659                     *tl++ = txdr_unsigned(kauth_cred_getegid(cr));
660                     grpsiz = (auth_len >> 2) - 5;
661                     *tl++ = txdr_unsigned(grpsiz);
662                     for (i = 0; i < grpsiz; i++)
663                               *tl++ = txdr_unsigned(kauth_cred_group(cr, i)); /* XXX elad review */
664                     break;
665           case RPCAUTH_KERB4:
666                     siz = auth_len;
667                     while (siz > 0) {
668                               if (M_TRAILINGSPACE(mb) == 0) {
669                                         struct mbuf *mb2;
670                                         mb2 = m_get(M_WAIT, MT_DATA);
671                                         MCLAIM(mb2, &nfs_mowner);
672                                         if (siz >= MINCLSIZE)
673                                                   m_clget(mb2, M_WAIT);
674                                         mb->m_next = mb2;
675                                         mb = mb2;
676                                         mb->m_len = 0;
677                                         bpos = mtod(mb, void *);
678                               }
679                               i = uimin(siz, M_TRAILINGSPACE(mb));
680                               memcpy(bpos, auth_str, i);
681                               mb->m_len += i;
682                               auth_str += i;
683                               bpos += i;
684                               siz -= i;
685                     }
686                     if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
687                               for (i = 0; i < siz; i++)
688                                         *bpos++ = '\0';
689                               mb->m_len += siz;
690                     }
691                     break;
692           };
693 
694           /*
695            * And the verifier...
696            */
697           nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
698           if (verf_str) {
699                     *tl++ = txdr_unsigned(RPCAUTH_KERB4);
700                     *tl = txdr_unsigned(verf_len);
701                     siz = verf_len;
702                     while (siz > 0) {
703                               if (M_TRAILINGSPACE(mb) == 0) {
704                                         struct mbuf *mb2;
705                                         mb2 = m_get(M_WAIT, MT_DATA);
706                                         MCLAIM(mb2, &nfs_mowner);
707                                         if (siz >= MINCLSIZE)
708                                                   m_clget(mb2, M_WAIT);
709                                         mb->m_next = mb2;
710                                         mb = mb2;
711                                         mb->m_len = 0;
712                                         bpos = mtod(mb, void *);
713                               }
714                               i = uimin(siz, M_TRAILINGSPACE(mb));
715                               memcpy(bpos, verf_str, i);
716                               mb->m_len += i;
717                               verf_str += i;
718                               bpos += i;
719                               siz -= i;
720                     }
721                     if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
722                               for (i = 0; i < siz; i++)
723                                         *bpos++ = '\0';
724                               mb->m_len += siz;
725                     }
726           } else {
727                     *tl++ = txdr_unsigned(RPCAUTH_NULL);
728                     *tl = 0;
729           }
730           mb->m_next = mrest;
731           mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
732           m_reset_rcvif(mreq);
733           *mbp = mb;
734           return (mreq);
735 }
736 
737 /*
738  * copies mbuf chain to the uio scatter/gather list
739  */
740 int
nfsm_mbuftouio(struct mbuf ** mrep,struct uio * uiop,int siz,char ** dpos)741 nfsm_mbuftouio(struct mbuf **mrep, struct uio *uiop, int siz, char **dpos)
742 {
743           char *mbufcp, *uiocp;
744           int xfer, left, len;
745           struct mbuf *mp;
746           long uiosiz, rem;
747           int error = 0;
748 
749           mp = *mrep;
750           mbufcp = *dpos;
751           len = mtod(mp, char *) + mp->m_len - mbufcp;
752           rem = nfsm_rndup(siz)-siz;
753           while (siz > 0) {
754                     if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
755                               return (EFBIG);
756                     left = uiop->uio_iov->iov_len;
757                     uiocp = uiop->uio_iov->iov_base;
758                     if (left > siz)
759                               left = siz;
760                     uiosiz = left;
761                     while (left > 0) {
762                               while (len == 0) {
763                                         mp = mp->m_next;
764                                         if (mp == NULL)
765                                                   return (EBADRPC);
766                                         mbufcp = mtod(mp, void *);
767                                         len = mp->m_len;
768                               }
769                               xfer = (left > len) ? len : left;
770                               error = copyout_vmspace(uiop->uio_vmspace, mbufcp,
771                                   uiocp, xfer);
772                               if (error) {
773                                         return error;
774                               }
775                               left -= xfer;
776                               len -= xfer;
777                               mbufcp += xfer;
778                               uiocp += xfer;
779                               uiop->uio_offset += xfer;
780                               uiop->uio_resid -= xfer;
781                     }
782                     if (uiop->uio_iov->iov_len <= siz) {
783                               uiop->uio_iovcnt--;
784                               uiop->uio_iov++;
785                     } else {
786                               uiop->uio_iov->iov_base =
787                                   (char *)uiop->uio_iov->iov_base + uiosiz;
788                               uiop->uio_iov->iov_len -= uiosiz;
789                     }
790                     siz -= uiosiz;
791           }
792           *dpos = mbufcp;
793           *mrep = mp;
794           if (rem > 0) {
795                     if (len < rem)
796                               error = nfs_adv(mrep, dpos, rem, len);
797                     else
798                               *dpos += rem;
799           }
800           return (error);
801 }
802 
803 /*
804  * copies a uio scatter/gather list to an mbuf chain.
805  * NOTE: can only handle iovcnt == 1
806  */
807 int
nfsm_uiotombuf(struct uio * uiop,struct mbuf ** mq,int siz,char ** bpos)808 nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, char **bpos)
809 {
810           char *uiocp;
811           struct mbuf *mp, *mp2;
812           int xfer, left, mlen;
813           int uiosiz, clflg, rem;
814           char *cp;
815           int error;
816 
817 #ifdef DIAGNOSTIC
818           if (uiop->uio_iovcnt != 1)
819                     panic("nfsm_uiotombuf: iovcnt != 1");
820 #endif
821 
822           if (siz > MLEN)               /* or should it >= MCLBYTES ?? */
823                     clflg = 1;
824           else
825                     clflg = 0;
826           rem = nfsm_rndup(siz)-siz;
827           mp = mp2 = *mq;
828           while (siz > 0) {
829                     left = uiop->uio_iov->iov_len;
830                     uiocp = uiop->uio_iov->iov_base;
831                     if (left > siz)
832                               left = siz;
833                     uiosiz = left;
834                     while (left > 0) {
835                               mlen = M_TRAILINGSPACE(mp);
836                               if (mlen == 0) {
837                                         mp = m_get(M_WAIT, MT_DATA);
838                                         MCLAIM(mp, &nfs_mowner);
839                                         if (clflg)
840                                                   m_clget(mp, M_WAIT);
841                                         mp->m_len = 0;
842                                         mp2->m_next = mp;
843                                         mp2 = mp;
844                                         mlen = M_TRAILINGSPACE(mp);
845                               }
846                               xfer = (left > mlen) ? mlen : left;
847                               cp = mtod(mp, char *) + mp->m_len;
848                               error = copyin_vmspace(uiop->uio_vmspace, uiocp, cp,
849                                   xfer);
850                               if (error) {
851                                         /* XXX */
852                               }
853                               mp->m_len += xfer;
854                               left -= xfer;
855                               uiocp += xfer;
856                               uiop->uio_offset += xfer;
857                               uiop->uio_resid -= xfer;
858                     }
859                     uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
860                         uiosiz;
861                     uiop->uio_iov->iov_len -= uiosiz;
862                     siz -= uiosiz;
863           }
864           if (rem > 0) {
865                     if (rem > M_TRAILINGSPACE(mp)) {
866                               mp = m_get(M_WAIT, MT_DATA);
867                               MCLAIM(mp, &nfs_mowner);
868                               mp->m_len = 0;
869                               mp2->m_next = mp;
870                     }
871                     cp = mtod(mp, char *) + mp->m_len;
872                     for (left = 0; left < rem; left++)
873                               *cp++ = '\0';
874                     mp->m_len += rem;
875                     *bpos = cp;
876           } else
877                     *bpos = mtod(mp, char *) + mp->m_len;
878           *mq = mp;
879           return (0);
880 }
881 
882 /*
883  * Get at least "siz" bytes of correctly aligned data.
884  * When called the mbuf pointers are not necessarily correct,
885  * dsosp points to what ought to be in m_data and left contains
886  * what ought to be in m_len.
887  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
888  * cases. (The macros use the vars. dpos and dpos2)
889  */
890 int
nfsm_disct(struct mbuf ** mdp,char ** dposp,int siz,int left,char ** cp2)891 nfsm_disct(struct mbuf **mdp, char **dposp, int siz, int left, char **cp2)
892 {
893           struct mbuf *m1, *m2;
894           struct mbuf *havebuf = NULL;
895           char *src = *dposp;
896           char *dst;
897           int len;
898 
899 #ifdef DEBUG
900           if (left < 0)
901                     panic("nfsm_disct: left < 0");
902 #endif
903           m1 = *mdp;
904           /*
905            * Skip through the mbuf chain looking for an mbuf with
906            * some data. If the first mbuf found has enough data
907            * and it is correctly aligned return it.
908            */
909           while (left == 0) {
910                     havebuf = m1;
911                     *mdp = m1 = m1->m_next;
912                     if (m1 == NULL)
913                               return (EBADRPC);
914                     src = mtod(m1, void *);
915                     left = m1->m_len;
916                     /*
917                      * If we start a new mbuf and it is big enough
918                      * and correctly aligned just return it, don't
919                      * do any pull up.
920                      */
921                     if (left >= siz && nfsm_aligned(src)) {
922                               *cp2 = src;
923                               *dposp = src + siz;
924                               return (0);
925                     }
926           }
927           if ((m1->m_flags & M_EXT) != 0) {
928                     if (havebuf && M_TRAILINGSPACE(havebuf) >= siz &&
929                         nfsm_aligned(mtod(havebuf, char *) + havebuf->m_len)) {
930                               /*
931                                * If the first mbuf with data has external data
932                                * and there is a previous mbuf with some trailing
933                                * space, use it to move the data into.
934                                */
935                               m2 = m1;
936                               *mdp = m1 = havebuf;
937                               *cp2 = mtod(m1, char *) + m1->m_len;
938                     } else if (havebuf) {
939                               /*
940                                * If the first mbuf has a external data
941                                * and there is no previous empty mbuf
942                                * allocate a new mbuf and move the external
943                                * data to the new mbuf. Also make the first
944                                * mbuf look empty.
945                                */
946                               m2 = m1;
947                               *mdp = m1 = m_get(M_WAIT, MT_DATA);
948                               MCLAIM(m1, m2->m_owner);
949                               if ((m2->m_flags & M_PKTHDR) != 0) {
950                                         m_move_pkthdr(m1, m2);
951                               }
952                               if (havebuf) {
953                                         havebuf->m_next = m1;
954                               }
955                               m1->m_next = m2;
956                               MRESETDATA(m1);
957                               m1->m_len = 0;
958                               m2->m_data = src;
959                               m2->m_len = left;
960                               *cp2 = mtod(m1, char *);
961                     } else {
962                               struct mbuf **nextp = &m1->m_next;
963 
964                               m1->m_len -= left;
965                               do {
966                                         m2 = m_get(M_WAIT, MT_DATA);
967                                         MCLAIM(m2, m1->m_owner);
968                                         if (left >= MINCLSIZE) {
969                                                   MCLGET(m2, M_WAIT);
970                                         }
971                                         m2->m_next = *nextp;
972                                         *nextp = m2;
973                                         nextp = &m2->m_next;
974                                         len = (m2->m_flags & M_EXT) != 0 ?
975                                             MCLBYTES : MLEN;
976                                         if (len > left) {
977                                                   len = left;
978                                         }
979                                         memcpy(mtod(m2, char *), src, len);
980                                         m2->m_len = len;
981                                         src += len;
982                                         left -= len;
983                               } while (left > 0);
984                               *mdp = m1 = m1->m_next;
985                               m2 = m1->m_next;
986                               *cp2 = mtod(m1, char *);
987                     }
988           } else {
989                     /*
990                      * If the first mbuf has no external data
991                      * move the data to the front of the mbuf.
992                      */
993                     MRESETDATA(m1);
994                     dst = mtod(m1, char *);
995                     if (dst != src) {
996                               memmove(dst, src, left);
997                     }
998                     m1->m_len = left;
999                     m2 = m1->m_next;
1000                     *cp2 = m1->m_data;
1001           }
1002           *dposp = *cp2 + siz;
1003           /*
1004            * Loop through mbufs pulling data up into first mbuf until
1005            * the first mbuf is full or there is no more data to
1006            * pullup.
1007            */
1008           dst = mtod(m1, char *) + m1->m_len;
1009           while ((len = M_TRAILINGSPACE(m1)) != 0 && m2) {
1010                     if ((len = uimin(len, m2->m_len)) != 0) {
1011                               memcpy(dst, mtod(m2, char *), len);
1012                     }
1013                     m1->m_len += len;
1014                     dst += len;
1015                     m2->m_data += len;
1016                     m2->m_len -= len;
1017                     m2 = m2->m_next;
1018           }
1019           if (m1->m_len < siz)
1020                     return (EBADRPC);
1021           return (0);
1022 }
1023 
1024 /*
1025  * Advance the position in the mbuf chain.
1026  */
1027 int
nfs_adv(struct mbuf ** mdp,char ** dposp,int offs,int left)1028 nfs_adv(struct mbuf **mdp, char **dposp, int offs, int left)
1029 {
1030           struct mbuf *m;
1031           int s;
1032 
1033           m = *mdp;
1034           s = left;
1035           while (s < offs) {
1036                     offs -= s;
1037                     m = m->m_next;
1038                     if (m == NULL)
1039                               return (EBADRPC);
1040                     s = m->m_len;
1041           }
1042           *mdp = m;
1043           *dposp = mtod(m, char *) + offs;
1044           return (0);
1045 }
1046 
1047 /*
1048  * Copy a string into mbufs for the hard cases...
1049  */
1050 int
nfsm_strtmbuf(struct mbuf ** mb,char ** bpos,const char * cp,long siz)1051 nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz)
1052 {
1053           struct mbuf *m1 = NULL, *m2;
1054           long left, xfer, len, tlen;
1055           u_int32_t *tl;
1056           int putsize;
1057 
1058           putsize = 1;
1059           m2 = *mb;
1060           left = M_TRAILINGSPACE(m2);
1061           if (left > 0) {
1062                     tl = ((u_int32_t *)(*bpos));
1063                     *tl++ = txdr_unsigned(siz);
1064                     putsize = 0;
1065                     left -= NFSX_UNSIGNED;
1066                     m2->m_len += NFSX_UNSIGNED;
1067                     if (left > 0) {
1068                               memcpy((void *) tl, cp, left);
1069                               siz -= left;
1070                               cp += left;
1071                               m2->m_len += left;
1072                               left = 0;
1073                     }
1074           }
1075           /* Loop around adding mbufs */
1076           while (siz > 0) {
1077                     m1 = m_get(M_WAIT, MT_DATA);
1078                     MCLAIM(m1, &nfs_mowner);
1079                     if (siz > MLEN)
1080                               m_clget(m1, M_WAIT);
1081                     m1->m_len = NFSMSIZ(m1);
1082                     m2->m_next = m1;
1083                     m2 = m1;
1084                     tl = mtod(m1, u_int32_t *);
1085                     tlen = 0;
1086                     if (putsize) {
1087                               *tl++ = txdr_unsigned(siz);
1088                               m1->m_len -= NFSX_UNSIGNED;
1089                               tlen = NFSX_UNSIGNED;
1090                               putsize = 0;
1091                     }
1092                     if (siz < m1->m_len) {
1093                               len = nfsm_rndup(siz);
1094                               xfer = siz;
1095                               if (xfer < len)
1096                                         *(tl+(xfer>>2)) = 0;
1097                     } else {
1098                               xfer = len = m1->m_len;
1099                     }
1100                     memcpy((void *) tl, cp, xfer);
1101                     m1->m_len = len+tlen;
1102                     siz -= xfer;
1103                     cp += xfer;
1104           }
1105           *mb = m1;
1106           *bpos = mtod(m1, char *) + m1->m_len;
1107           return (0);
1108 }
1109 
1110 /*
1111  * Directory caching routines. They work as follows:
1112  * - a cache is maintained per VDIR nfsnode.
1113  * - for each offset cookie that is exported to userspace, and can
1114  *   thus be thrown back at us as an offset to VOP_READDIR, store
1115  *   information in the cache.
1116  * - cached are:
1117  *   - cookie itself
1118  *   - blocknumber (essentially just a search key in the buffer cache)
1119  *   - entry number in block.
1120  *   - offset cookie of block in which this entry is stored
1121  *   - 32 bit cookie if NFSMNT_XLATECOOKIE is used.
1122  * - entries are looked up in a hash table
1123  * - also maintained is an LRU list of entries, used to determine
1124  *   which ones to delete if the cache grows too large.
1125  * - if 32 <-> 64 translation mode is requested for a filesystem,
1126  *   the cache also functions as a translation table
1127  * - in the translation case, invalidating the cache does not mean
1128  *   flushing it, but just marking entries as invalid, except for
1129  *   the <64bit cookie, 32bitcookie> pair which is still valid, to
1130  *   still be able to use the cache as a translation table.
1131  * - 32 bit cookies are uniquely created by combining the hash table
1132  *   entry value, and one generation count per hash table entry,
1133  *   incremented each time an entry is appended to the chain.
1134  * - the cache is invalidated each time a direcory is modified
1135  * - sanity checks are also done; if an entry in a block turns
1136  *   out not to have a matching cookie, the cache is invalidated
1137  *   and a new block starting from the wanted offset is fetched from
1138  *   the server.
1139  * - directory entries as read from the server are extended to contain
1140  *   the 64bit and, optionally, the 32bit cookies, for sanity checking
1141  *   the cache and exporting them to userspace through the cookie
1142  *   argument to VOP_READDIR.
1143  */
1144 
1145 u_long
nfs_dirhash(off_t off)1146 nfs_dirhash(off_t off)
1147 {
1148           int i;
1149           char *cp = (char *)&off;
1150           u_long sum = 0L;
1151 
1152           for (i = 0 ; i < sizeof (off); i++)
1153                     sum += *cp++;
1154 
1155           return sum;
1156 }
1157 
1158 #define   _NFSDC_MTX(np)                (NFSTOV(np)->v_interlock)
1159 #define   NFSDC_LOCK(np)                mutex_enter(_NFSDC_MTX(np))
1160 #define   NFSDC_UNLOCK(np)    mutex_exit(_NFSDC_MTX(np))
1161 #define   NFSDC_ASSERT_LOCKED(np) KASSERT(mutex_owned(_NFSDC_MTX(np)))
1162 
1163 void
nfs_initdircache(struct vnode * vp)1164 nfs_initdircache(struct vnode *vp)
1165 {
1166           struct nfsnode *np = VTONFS(vp);
1167           struct nfsdirhashhead *dircache;
1168 
1169           dircache = hashinit(NFS_DIRHASHSIZ, HASH_LIST, true,
1170               &nfsdirhashmask);
1171 
1172           NFSDC_LOCK(np);
1173           if (np->n_dircache == NULL) {
1174                     np->n_dircachesize = 0;
1175                     np->n_dircache = dircache;
1176                     dircache = NULL;
1177                     TAILQ_INIT(&np->n_dirchain);
1178           }
1179           NFSDC_UNLOCK(np);
1180           if (dircache)
1181                     hashdone(dircache, HASH_LIST, nfsdirhashmask);
1182 }
1183 
1184 void
nfs_initdirxlatecookie(struct vnode * vp)1185 nfs_initdirxlatecookie(struct vnode *vp)
1186 {
1187           struct nfsnode *np = VTONFS(vp);
1188           unsigned *dirgens;
1189 
1190           KASSERT(VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_XLATECOOKIE);
1191 
1192           dirgens = kmem_zalloc(NFS_DIRHASHSIZ * sizeof(unsigned), KM_SLEEP);
1193           NFSDC_LOCK(np);
1194           if (np->n_dirgens == NULL) {
1195                     np->n_dirgens = dirgens;
1196                     dirgens = NULL;
1197           }
1198           NFSDC_UNLOCK(np);
1199           if (dirgens)
1200                     kmem_free(dirgens, NFS_DIRHASHSIZ * sizeof(unsigned));
1201 }
1202 
1203 static const struct nfsdircache dzero;
1204 
1205 static void nfs_unlinkdircache(struct nfsnode *np, struct nfsdircache *);
1206 static void nfs_putdircache_unlocked(struct nfsnode *,
1207     struct nfsdircache *);
1208 
1209 static void
nfs_unlinkdircache(struct nfsnode * np,struct nfsdircache * ndp)1210 nfs_unlinkdircache(struct nfsnode *np, struct nfsdircache *ndp)
1211 {
1212 
1213           NFSDC_ASSERT_LOCKED(np);
1214           KASSERT(ndp != &dzero);
1215 
1216           if (LIST_NEXT(ndp, dc_hash) == (void *)-1)
1217                     return;
1218 
1219           TAILQ_REMOVE(&np->n_dirchain, ndp, dc_chain);
1220           LIST_REMOVE(ndp, dc_hash);
1221           LIST_NEXT(ndp, dc_hash) = (void *)-1; /* mark as unlinked */
1222 
1223           nfs_putdircache_unlocked(np, ndp);
1224 }
1225 
1226 void
nfs_putdircache(struct nfsnode * np,struct nfsdircache * ndp)1227 nfs_putdircache(struct nfsnode *np, struct nfsdircache *ndp)
1228 {
1229           int ref;
1230 
1231           if (ndp == &dzero)
1232                     return;
1233 
1234           KASSERT(ndp->dc_refcnt > 0);
1235           NFSDC_LOCK(np);
1236           ref = --ndp->dc_refcnt;
1237           NFSDC_UNLOCK(np);
1238 
1239           if (ref == 0)
1240                     kmem_free(ndp, sizeof(*ndp));
1241 }
1242 
1243 static void
nfs_putdircache_unlocked(struct nfsnode * np,struct nfsdircache * ndp)1244 nfs_putdircache_unlocked(struct nfsnode *np, struct nfsdircache *ndp)
1245 {
1246           int ref;
1247 
1248           NFSDC_ASSERT_LOCKED(np);
1249 
1250           if (ndp == &dzero)
1251                     return;
1252 
1253           KASSERT(ndp->dc_refcnt > 0);
1254           ref = --ndp->dc_refcnt;
1255           if (ref == 0)
1256                     kmem_free(ndp, sizeof(*ndp));
1257 }
1258 
1259 struct nfsdircache *
nfs_searchdircache(struct vnode * vp,off_t off,int do32,int * hashent)1260 nfs_searchdircache(struct vnode *vp, off_t off, int do32, int *hashent)
1261 {
1262           struct nfsdirhashhead *ndhp;
1263           struct nfsdircache *ndp = NULL;
1264           struct nfsnode *np = VTONFS(vp);
1265           unsigned ent;
1266 
1267           /*
1268            * Zero is always a valid cookie.
1269            */
1270           if (off == 0)
1271                     /* XXXUNCONST */
1272                     return (struct nfsdircache *)__UNCONST(&dzero);
1273 
1274           if (!np->n_dircache)
1275                     return NULL;
1276 
1277           /*
1278            * We use a 32bit cookie as search key, directly reconstruct
1279            * the hashentry. Else use the hashfunction.
1280            */
1281           if (do32) {
1282                     ent = (u_int32_t)off >> 24;
1283                     if (ent >= NFS_DIRHASHSIZ)
1284                               return NULL;
1285                     ndhp = &np->n_dircache[ent];
1286           } else {
1287                     ndhp = NFSDIRHASH(np, off);
1288           }
1289 
1290           if (hashent)
1291                     *hashent = (int)(ndhp - np->n_dircache);
1292 
1293           NFSDC_LOCK(np);
1294           if (do32) {
1295                     LIST_FOREACH(ndp, ndhp, dc_hash) {
1296                               if (ndp->dc_cookie32 == (u_int32_t)off) {
1297                                         /*
1298                                          * An invalidated entry will become the
1299                                          * start of a new block fetched from
1300                                          * the server.
1301                                          */
1302                                         if (ndp->dc_flags & NFSDC_INVALID) {
1303                                                   ndp->dc_blkcookie = ndp->dc_cookie;
1304                                                   ndp->dc_entry = 0;
1305                                                   ndp->dc_flags &= ~NFSDC_INVALID;
1306                                         }
1307                                         break;
1308                               }
1309                     }
1310           } else {
1311                     LIST_FOREACH(ndp, ndhp, dc_hash) {
1312                               if (ndp->dc_cookie == off)
1313                                         break;
1314                     }
1315           }
1316           if (ndp != NULL)
1317                     ndp->dc_refcnt++;
1318           NFSDC_UNLOCK(np);
1319           return ndp;
1320 }
1321 
1322 
1323 struct nfsdircache *
nfs_enterdircache(struct vnode * vp,off_t off,off_t blkoff,int en,daddr_t blkno)1324 nfs_enterdircache(struct vnode *vp, off_t off, off_t blkoff, int en,
1325     daddr_t blkno)
1326 {
1327           struct nfsnode *np = VTONFS(vp);
1328           struct nfsdirhashhead *ndhp;
1329           struct nfsdircache *ndp = NULL;
1330           struct nfsdircache *newndp = NULL;
1331           struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1332           int hashent = 0, gen, overwrite;        /* XXX: GCC */
1333 
1334           /*
1335            * XXX refuse entries for offset 0. amd(8) erroneously sets
1336            * cookie 0 for the '.' entry, making this necessary. This
1337            * isn't so bad, as 0 is a special case anyway.
1338            */
1339           if (off == 0)
1340                     /* XXXUNCONST */
1341                     return (struct nfsdircache *)__UNCONST(&dzero);
1342 
1343           if (!np->n_dircache)
1344                     /*
1345                      * XXX would like to do this in nfs_nget but vtype
1346                      * isn't known at that time.
1347                      */
1348                     nfs_initdircache(vp);
1349 
1350           if ((nmp->nm_flag & NFSMNT_XLATECOOKIE) && !np->n_dirgens)
1351                     nfs_initdirxlatecookie(vp);
1352 
1353 retry:
1354           ndp = nfs_searchdircache(vp, off, 0, &hashent);
1355 
1356           NFSDC_LOCK(np);
1357           if (ndp && (ndp->dc_flags & NFSDC_INVALID) == 0) {
1358                     /*
1359                      * Overwriting an old entry. Check if it's the same.
1360                      * If so, just return. If not, remove the old entry.
1361                      */
1362                     if (ndp->dc_blkcookie == blkoff && ndp->dc_entry == en)
1363                               goto done;
1364                     nfs_unlinkdircache(np, ndp);
1365                     nfs_putdircache_unlocked(np, ndp);
1366                     ndp = NULL;
1367           }
1368 
1369           ndhp = &np->n_dircache[hashent];
1370 
1371           if (!ndp) {
1372                     if (newndp == NULL) {
1373                               NFSDC_UNLOCK(np);
1374                               newndp = kmem_alloc(sizeof(*newndp), KM_SLEEP);
1375                               newndp->dc_refcnt = 1;
1376                               LIST_NEXT(newndp, dc_hash) = (void *)-1;
1377                               goto retry;
1378                     }
1379                     ndp = newndp;
1380                     newndp = NULL;
1381                     overwrite = 0;
1382                     if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
1383                               /*
1384                                * We're allocating a new entry, so bump the
1385                                * generation number.
1386                                */
1387                               KASSERT(np->n_dirgens);
1388                               gen = ++np->n_dirgens[hashent];
1389                               if (gen == 0) {
1390                                         np->n_dirgens[hashent]++;
1391                                         gen++;
1392                               }
1393                               ndp->dc_cookie32 = (hashent << 24) | (gen & 0xffffff);
1394                     }
1395           } else
1396                     overwrite = 1;
1397 
1398           ndp->dc_cookie = off;
1399           ndp->dc_blkcookie = blkoff;
1400           ndp->dc_entry = en;
1401           ndp->dc_flags = 0;
1402 
1403           if (overwrite)
1404                     goto done;
1405 
1406           /*
1407            * If the maximum directory cookie cache size has been reached
1408            * for this node, take one off the front. The idea is that
1409            * directories are typically read front-to-back once, so that
1410            * the oldest entries can be thrown away without much performance
1411            * loss.
1412            */
1413           if (np->n_dircachesize == NFS_MAXDIRCACHE) {
1414                     nfs_unlinkdircache(np, TAILQ_FIRST(&np->n_dirchain));
1415           } else
1416                     np->n_dircachesize++;
1417 
1418           KASSERT(ndp->dc_refcnt == 1);
1419           LIST_INSERT_HEAD(ndhp, ndp, dc_hash);
1420           TAILQ_INSERT_TAIL(&np->n_dirchain, ndp, dc_chain);
1421           ndp->dc_refcnt++;
1422 done:
1423           KASSERT(ndp->dc_refcnt > 0);
1424           NFSDC_UNLOCK(np);
1425           if (newndp)
1426                     nfs_putdircache(np, newndp);
1427           return ndp;
1428 }
1429 
1430 void
nfs_invaldircache(struct vnode * vp,int flags)1431 nfs_invaldircache(struct vnode *vp, int flags)
1432 {
1433           struct nfsnode *np = VTONFS(vp);
1434           struct nfsdircache *ndp = NULL;
1435           struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1436           const bool forcefree = flags & NFS_INVALDIRCACHE_FORCE;
1437 
1438 #ifdef DIAGNOSTIC
1439           if (vp->v_type != VDIR)
1440                     panic("nfs: invaldircache: not dir");
1441 #endif
1442 
1443           if ((flags & NFS_INVALDIRCACHE_KEEPEOF) == 0)
1444                     np->n_flag &= ~NEOFVALID;
1445 
1446           if (!np->n_dircache)
1447                     return;
1448 
1449           NFSDC_LOCK(np);
1450           if (!(nmp->nm_flag & NFSMNT_XLATECOOKIE) || forcefree) {
1451                     while ((ndp = TAILQ_FIRST(&np->n_dirchain)) != NULL) {
1452                               KASSERT(!forcefree || ndp->dc_refcnt == 1);
1453                               nfs_unlinkdircache(np, ndp);
1454                     }
1455                     np->n_dircachesize = 0;
1456                     if (forcefree && np->n_dirgens) {
1457                               kmem_free(np->n_dirgens,
1458                                   NFS_DIRHASHSIZ * sizeof(unsigned));
1459                               np->n_dirgens = NULL;
1460                     }
1461           } else {
1462                     TAILQ_FOREACH(ndp, &np->n_dirchain, dc_chain)
1463                               ndp->dc_flags |= NFSDC_INVALID;
1464           }
1465 
1466           NFSDC_UNLOCK(np);
1467 }
1468 
1469 /*
1470  * Called once before VFS init to initialize shared and
1471  * server-specific data structures.
1472  */
1473 static int
nfs_init0(void)1474 nfs_init0(void)
1475 {
1476 
1477           nfsrtt.pos = 0;
1478           rpc_vers = txdr_unsigned(RPC_VER2);
1479           rpc_call = txdr_unsigned(RPC_CALL);
1480           rpc_reply = txdr_unsigned(RPC_REPLY);
1481           rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
1482           rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
1483           rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
1484           rpc_autherr = txdr_unsigned(RPC_AUTHERR);
1485           rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
1486           rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
1487           nfs_prog = txdr_unsigned(NFS_PROG);
1488           nfs_true = txdr_unsigned(true);
1489           nfs_false = txdr_unsigned(false);
1490           nfs_xdrneg1 = txdr_unsigned(-1);
1491           nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
1492           if (nfs_ticks < 1)
1493                     nfs_ticks = 1;
1494           nfsdreq_init();
1495 
1496           /*
1497            * Initialize reply list and start timer
1498            */
1499           TAILQ_INIT(&nfs_reqq);
1500           mutex_init(&nfs_reqq_lock, MUTEX_DEFAULT, IPL_NONE);
1501           nfs_timer_init();
1502           MOWNER_ATTACH(&nfs_mowner);
1503 
1504           return 0;
1505 }
1506 
1507 static volatile uint32_t nfs_mutex;
1508 static uint32_t nfs_refcount;
1509 
1510 #define nfs_p()     while (atomic_cas_32(&nfs_mutex, 0, 1) == 0) continue;
1511 #define nfs_v()     while (atomic_cas_32(&nfs_mutex, 1, 0) == 1) continue;
1512 
1513 /*
1514  * This is disgusting, but it must support both modular and monolothic
1515  * configurations, plus the code is shared between server and client.
1516  * For monolithic builds NFSSERVER may not imply NFS. Unfortunately we
1517  * can't use regular mutexes here that would require static initialization
1518  * and we can get initialized from multiple places, so we improvise.
1519  *
1520  * Yuck.
1521  */
1522 void
nfs_init(void)1523 nfs_init(void)
1524 {
1525 
1526           nfs_p();
1527           if (nfs_refcount++ == 0)
1528                     nfs_init0();
1529           nfs_v();
1530 }
1531 
1532 void
nfs_fini(void)1533 nfs_fini(void)
1534 {
1535 
1536           nfs_p();
1537           if (--nfs_refcount == 0) {
1538                     MOWNER_DETACH(&nfs_mowner);
1539                     nfs_timer_fini();
1540                     mutex_destroy(&nfs_reqq_lock);
1541                     nfsdreq_fini();
1542           }
1543           nfs_v();
1544 }
1545 
1546 /*
1547  * A fiddled version of m_adj() that ensures null fill to a 32-bit
1548  * boundary and only trims off the back end
1549  *
1550  * 1. trim off 'len' bytes as m_adj(mp, -len).
1551  * 2. add zero-padding 'nul' bytes at the end of the mbuf chain.
1552  */
1553 void
nfs_zeropad(struct mbuf * mp,int len,int nul)1554 nfs_zeropad(struct mbuf *mp, int len, int nul)
1555 {
1556           struct mbuf *m;
1557           int count;
1558 
1559           /*
1560            * Trim from tail.  Scan the mbuf chain,
1561            * calculating its length and finding the last mbuf.
1562            * If the adjustment only affects this mbuf, then just
1563            * adjust and return.  Otherwise, rescan and truncate
1564            * after the remaining size.
1565            */
1566           count = 0;
1567           m = mp;
1568           for (;;) {
1569                     count += m->m_len;
1570                     if (m->m_next == NULL)
1571                               break;
1572                     m = m->m_next;
1573           }
1574 
1575           KDASSERT(count >= len);
1576 
1577           if (m->m_len >= len) {
1578                     m->m_len -= len;
1579           } else {
1580                     count -= len;
1581                     /*
1582                      * Correct length for chain is "count".
1583                      * Find the mbuf with last data, adjust its length,
1584                      * and toss data from remaining mbufs on chain.
1585                      */
1586                     for (m = mp; m; m = m->m_next) {
1587                               if (m->m_len >= count) {
1588                                         m->m_len = count;
1589                                         break;
1590                               }
1591                               count -= m->m_len;
1592                     }
1593                     KASSERT(m && m->m_next);
1594                     m_freem(m->m_next);
1595                     m->m_next = NULL;
1596           }
1597 
1598           KDASSERT(m->m_next == NULL);
1599 
1600           /*
1601            * zero-padding.
1602            */
1603           if (nul > 0) {
1604                     char *cp;
1605                     int i;
1606 
1607                     if (M_READONLY(m) || M_TRAILINGSPACE(m) < nul) {
1608                               struct mbuf *n;
1609 
1610                               KDASSERT(MLEN >= nul);
1611                               n = m_get(M_WAIT, MT_DATA);
1612                               MCLAIM(n, &nfs_mowner);
1613                               n->m_len = nul;
1614                               n->m_next = NULL;
1615                               m->m_next = n;
1616                               cp = mtod(n, void *);
1617                     } else {
1618                               cp = mtod(m, char *) + m->m_len;
1619                               m->m_len += nul;
1620                     }
1621                     for (i = 0; i < nul; i++)
1622                               *cp++ = '\0';
1623           }
1624           return;
1625 }
1626 
1627 /*
1628  * Make these functions instead of macros, so that the kernel text size
1629  * doesn't get too big...
1630  */
1631 void
nfsm_srvwcc(struct nfsrv_descript * nfsd,int before_ret,struct vattr * before_vap,int after_ret,struct vattr * after_vap,struct mbuf ** mbp,char ** bposp)1632 nfsm_srvwcc(struct nfsrv_descript *nfsd, int before_ret, struct vattr *before_vap, int after_ret, struct vattr *after_vap, struct mbuf **mbp, char **bposp)
1633 {
1634           struct mbuf *mb = *mbp;
1635           char *bpos = *bposp;
1636           u_int32_t *tl;
1637 
1638           if (before_ret) {
1639                     nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1640                     *tl = nfs_false;
1641           } else {
1642                     nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
1643                     *tl++ = nfs_true;
1644                     txdr_hyper(before_vap->va_size, tl);
1645                     tl += 2;
1646                     txdr_nfsv3time(&(before_vap->va_mtime), tl);
1647                     tl += 2;
1648                     txdr_nfsv3time(&(before_vap->va_ctime), tl);
1649           }
1650           *bposp = bpos;
1651           *mbp = mb;
1652           nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
1653 }
1654 
1655 void
nfsm_srvpostopattr(struct nfsrv_descript * nfsd,int after_ret,struct vattr * after_vap,struct mbuf ** mbp,char ** bposp)1656 nfsm_srvpostopattr(struct nfsrv_descript *nfsd, int after_ret, struct vattr *after_vap, struct mbuf **mbp, char **bposp)
1657 {
1658           struct mbuf *mb = *mbp;
1659           char *bpos = *bposp;
1660           u_int32_t *tl;
1661           struct nfs_fattr *fp;
1662 
1663           if (after_ret) {
1664                     nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1665                     *tl = nfs_false;
1666           } else {
1667                     nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
1668                     *tl++ = nfs_true;
1669                     fp = (struct nfs_fattr *)tl;
1670                     nfsm_srvfattr(nfsd, after_vap, fp);
1671           }
1672           *mbp = mb;
1673           *bposp = bpos;
1674 }
1675 
1676 void
nfsm_srvfattr(struct nfsrv_descript * nfsd,struct vattr * vap,struct nfs_fattr * fp)1677 nfsm_srvfattr(struct nfsrv_descript *nfsd, struct vattr *vap, struct nfs_fattr *fp)
1678 {
1679 
1680           fp->fa_nlink = txdr_unsigned(vap->va_nlink);
1681           fp->fa_uid = txdr_unsigned(vap->va_uid);
1682           fp->fa_gid = txdr_unsigned(vap->va_gid);
1683           if (nfsd->nd_flag & ND_NFSV3) {
1684                     fp->fa_type = vtonfsv3_type(vap->va_type);
1685                     fp->fa_mode = vtonfsv3_mode(vap->va_mode);
1686                     txdr_hyper(vap->va_size, &fp->fa3_size);
1687                     txdr_hyper(vap->va_bytes, &fp->fa3_used);
1688                     fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev));
1689                     fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev));
1690                     fp->fa3_fsid.nfsuquad[0] = 0;
1691                     fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
1692                     txdr_hyper(vap->va_fileid, &fp->fa3_fileid);
1693                     txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
1694                     txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
1695                     txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
1696           } else {
1697                     fp->fa_type = vtonfsv2_type(vap->va_type);
1698                     fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1699                     fp->fa2_size = txdr_unsigned(NFS_V2CLAMP32(vap->va_size));
1700                     fp->fa2_blocksize = txdr_unsigned(NFS_V2CLAMP16(vap->va_blocksize));
1701                     if (vap->va_type == VFIFO)
1702                               fp->fa2_rdev = 0xffffffff;
1703                     else
1704                               fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
1705                     fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
1706                     fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
1707                     fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
1708                     txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
1709                     txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
1710                     txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
1711           }
1712 }
1713 
1714 /*
1715  * This function compares two net addresses by family and returns true
1716  * if they are the same host.
1717  * If there is any doubt, return false.
1718  * The AF_INET family is handled as a special case so that address mbufs
1719  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
1720  */
1721 int
netaddr_match(int family,union nethostaddr * haddr,struct mbuf * nam)1722 netaddr_match(int family, union nethostaddr *haddr, struct mbuf *nam)
1723 {
1724           struct sockaddr_in *inetaddr;
1725 
1726           switch (family) {
1727           case AF_INET:
1728                     inetaddr = mtod(nam, struct sockaddr_in *);
1729                     if (inetaddr->sin_family == AF_INET &&
1730                         inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
1731                               return (1);
1732                     break;
1733           case AF_INET6:
1734               {
1735                     struct sockaddr_in6 *sin6_1, *sin6_2;
1736 
1737                     sin6_1 = mtod(nam, struct sockaddr_in6 *);
1738                     sin6_2 = mtod(haddr->had_nam, struct sockaddr_in6 *);
1739                     if (sin6_1->sin6_family == AF_INET6 &&
1740                         IN6_ARE_ADDR_EQUAL(&sin6_1->sin6_addr, &sin6_2->sin6_addr))
1741                               return 1;
1742               }
1743           default:
1744                     break;
1745           };
1746           return (0);
1747 }
1748 
1749 struct nfs_clearcommit_ctx {
1750           struct mount *mp;
1751 };
1752 
1753 static bool
nfs_clearcommit_selector(void * cl,struct vnode * vp)1754 nfs_clearcommit_selector(void *cl, struct vnode *vp)
1755 {
1756           struct nfs_clearcommit_ctx *c = cl;
1757           struct nfsnode *np;
1758 
1759           KASSERT(mutex_owned(vp->v_interlock));
1760 
1761           /* XXXAD mountpoint check looks like nonsense to me */
1762           np = VTONFS(vp);
1763           if (vp->v_type != VREG || vp->v_mount != c->mp || np == NULL)
1764                     return false;
1765           return false;
1766 }
1767 
1768 /*
1769  * The write verifier has changed (probably due to a server reboot), so all
1770  * PG_NEEDCOMMIT pages will have to be written again. Since they are marked
1771  * as dirty or are being written out just now, all this takes is clearing
1772  * the PG_NEEDCOMMIT flag. Once done the new write verifier can be set for
1773  * the mount point.
1774  */
1775 void
nfs_clearcommit(struct mount * mp)1776 nfs_clearcommit(struct mount *mp)
1777 {
1778           struct vnode *vp;
1779           struct vnode_iterator *marker;
1780           struct nfsmount *nmp = VFSTONFS(mp);
1781           struct nfs_clearcommit_ctx ctx;
1782           struct nfsnode *np;
1783           struct vm_page *pg;
1784           struct uvm_page_array a;
1785           voff_t off;
1786 
1787           rw_enter(&nmp->nm_writeverflock, RW_WRITER);
1788           vfs_vnode_iterator_init(mp, &marker);
1789           ctx.mp = mp;
1790           for (;;) {
1791                     vp = vfs_vnode_iterator_next(marker, nfs_clearcommit_selector,
1792                         &ctx);
1793                     if (vp == NULL)
1794                               break;
1795                     rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
1796                     np = VTONFS(vp);
1797                     np->n_pushlo = np->n_pushhi = np->n_pushedlo =
1798                         np->n_pushedhi = 0;
1799                     np->n_commitflags &=
1800                         ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);
1801                     uvm_page_array_init(&a, &vp->v_uobj, 0);
1802                     off = 0;
1803                     while ((pg = uvm_page_array_fill_and_peek(&a, off, 0)) !=
1804                         NULL) {
1805                               pg->flags &= ~PG_NEEDCOMMIT;
1806                               uvm_page_array_advance(&a);
1807                               off = pg->offset + PAGE_SIZE;
1808                     }
1809                     uvm_page_array_fini(&a);
1810                     rw_exit(vp->v_uobj.vmobjlock);
1811                     vrele(vp);
1812           }
1813           KASSERT(vp == NULL);
1814           vfs_vnode_iterator_destroy(marker);
1815           mutex_enter(&nmp->nm_lock);
1816           nmp->nm_iflag &= ~NFSMNT_STALEWRITEVERF;
1817           mutex_exit(&nmp->nm_lock);
1818           rw_exit(&nmp->nm_writeverflock);
1819 }
1820 
1821 void
nfs_merge_commit_ranges(struct vnode * vp)1822 nfs_merge_commit_ranges(struct vnode *vp)
1823 {
1824           struct nfsnode *np = VTONFS(vp);
1825 
1826           KASSERT(np->n_commitflags & NFS_COMMIT_PUSH_VALID);
1827 
1828           if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
1829                     np->n_pushedlo = np->n_pushlo;
1830                     np->n_pushedhi = np->n_pushhi;
1831                     np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
1832           } else {
1833                     if (np->n_pushlo < np->n_pushedlo)
1834                               np->n_pushedlo = np->n_pushlo;
1835                     if (np->n_pushhi > np->n_pushedhi)
1836                               np->n_pushedhi = np->n_pushhi;
1837           }
1838 
1839           np->n_pushlo = np->n_pushhi = 0;
1840           np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID;
1841 
1842 #ifdef NFS_DEBUG_COMMIT
1843           printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo,
1844               (unsigned)np->n_pushedhi);
1845 #endif
1846 }
1847 
1848 int
nfs_in_committed_range(struct vnode * vp,off_t off,off_t len)1849 nfs_in_committed_range(struct vnode *vp, off_t off, off_t len)
1850 {
1851           struct nfsnode *np = VTONFS(vp);
1852           off_t lo, hi;
1853 
1854           if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
1855                     return 0;
1856           lo = off;
1857           hi = lo + len;
1858 
1859           return (lo >= np->n_pushedlo && hi <= np->n_pushedhi);
1860 }
1861 
1862 int
nfs_in_tobecommitted_range(struct vnode * vp,off_t off,off_t len)1863 nfs_in_tobecommitted_range(struct vnode *vp, off_t off, off_t len)
1864 {
1865           struct nfsnode *np = VTONFS(vp);
1866           off_t lo, hi;
1867 
1868           if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
1869                     return 0;
1870           lo = off;
1871           hi = lo + len;
1872 
1873           return (lo >= np->n_pushlo && hi <= np->n_pushhi);
1874 }
1875 
1876 void
nfs_add_committed_range(struct vnode * vp,off_t off,off_t len)1877 nfs_add_committed_range(struct vnode *vp, off_t off, off_t len)
1878 {
1879           struct nfsnode *np = VTONFS(vp);
1880           off_t lo, hi;
1881 
1882           lo = off;
1883           hi = lo + len;
1884 
1885           if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
1886                     np->n_pushedlo = lo;
1887                     np->n_pushedhi = hi;
1888                     np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
1889           } else {
1890                     if (hi > np->n_pushedhi)
1891                               np->n_pushedhi = hi;
1892                     if (lo < np->n_pushedlo)
1893                               np->n_pushedlo = lo;
1894           }
1895 #ifdef NFS_DEBUG_COMMIT
1896           printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo,
1897               (unsigned)np->n_pushedhi);
1898 #endif
1899 }
1900 
1901 void
nfs_del_committed_range(struct vnode * vp,off_t off,off_t len)1902 nfs_del_committed_range(struct vnode *vp, off_t off, off_t len)
1903 {
1904           struct nfsnode *np = VTONFS(vp);
1905           off_t lo, hi;
1906 
1907           if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
1908                     return;
1909 
1910           lo = off;
1911           hi = lo + len;
1912 
1913           if (lo > np->n_pushedhi || hi < np->n_pushedlo)
1914                     return;
1915           if (lo <= np->n_pushedlo)
1916                     np->n_pushedlo = hi;
1917           else if (hi >= np->n_pushedhi)
1918                     np->n_pushedhi = lo;
1919           else {
1920                     /*
1921                      * XXX There's only one range. If the deleted range
1922                      * is in the middle, pick the largest of the
1923                      * contiguous ranges that it leaves.
1924                      */
1925                     if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi))
1926                               np->n_pushedhi = lo;
1927                     else
1928                               np->n_pushedlo = hi;
1929           }
1930 #ifdef NFS_DEBUG_COMMIT
1931           printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo,
1932               (unsigned)np->n_pushedhi);
1933 #endif
1934 }
1935 
1936 void
nfs_add_tobecommitted_range(struct vnode * vp,off_t off,off_t len)1937 nfs_add_tobecommitted_range(struct vnode *vp, off_t off, off_t len)
1938 {
1939           struct nfsnode *np = VTONFS(vp);
1940           off_t lo, hi;
1941 
1942           lo = off;
1943           hi = lo + len;
1944 
1945           if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) {
1946                     np->n_pushlo = lo;
1947                     np->n_pushhi = hi;
1948                     np->n_commitflags |= NFS_COMMIT_PUSH_VALID;
1949           } else {
1950                     if (lo < np->n_pushlo)
1951                               np->n_pushlo = lo;
1952                     if (hi > np->n_pushhi)
1953                               np->n_pushhi = hi;
1954           }
1955 #ifdef NFS_DEBUG_COMMIT
1956           printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
1957               (unsigned)np->n_pushhi);
1958 #endif
1959 }
1960 
1961 void
nfs_del_tobecommitted_range(struct vnode * vp,off_t off,off_t len)1962 nfs_del_tobecommitted_range(struct vnode *vp, off_t off, off_t len)
1963 {
1964           struct nfsnode *np = VTONFS(vp);
1965           off_t lo, hi;
1966 
1967           if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
1968                     return;
1969 
1970           lo = off;
1971           hi = lo + len;
1972 
1973           if (lo > np->n_pushhi || hi < np->n_pushlo)
1974                     return;
1975 
1976           if (lo <= np->n_pushlo)
1977                     np->n_pushlo = hi;
1978           else if (hi >= np->n_pushhi)
1979                     np->n_pushhi = lo;
1980           else {
1981                     /*
1982                      * XXX There's only one range. If the deleted range
1983                      * is in the middle, pick the largest of the
1984                      * contiguous ranges that it leaves.
1985                      */
1986                     if ((np->n_pushlo - lo) > (hi - np->n_pushhi))
1987                               np->n_pushhi = lo;
1988                     else
1989                               np->n_pushlo = hi;
1990           }
1991 #ifdef NFS_DEBUG_COMMIT
1992           printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
1993               (unsigned)np->n_pushhi);
1994 #endif
1995 }
1996 
1997 /*
1998  * Map errnos to NFS error numbers. For Version 3 also filter out error
1999  * numbers not specified for the associated procedure.
2000  */
2001 int
nfsrv_errmap(struct nfsrv_descript * nd,int err)2002 nfsrv_errmap(struct nfsrv_descript *nd, int err)
2003 {
2004           const short *defaulterrp, *errp;
2005 
2006           if (nd->nd_flag & ND_NFSV3) {
2007               if (nd->nd_procnum <= NFSPROC_COMMIT) {
2008                     errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
2009                     while (*++errp) {
2010                               if (*errp == err)
2011                                         return (err);
2012                               else if (*errp > err)
2013                                         break;
2014                     }
2015                     return ((int)*defaulterrp);
2016               } else
2017                     return (err & 0xffff);
2018           }
2019           if (err <= ELAST)
2020                     return ((int)nfsrv_v2errmap[err - 1]);
2021           return (NFSERR_IO);
2022 }
2023 
2024 u_int32_t
nfs_getxid(void)2025 nfs_getxid(void)
2026 {
2027           u_int32_t newxid;
2028 
2029           if (__predict_false(nfs_xid == 0)) {
2030                     nfs_xid = cprng_fast32();
2031           }
2032 
2033           /* get next xid.  skip 0 */
2034           do {
2035                     newxid = atomic_inc_32_nv(&nfs_xid);
2036           } while (__predict_false(newxid == 0));
2037 
2038           return txdr_unsigned(newxid);
2039 }
2040 
2041 /*
2042  * assign a new xid for existing request.
2043  * used for NFSERR_JUKEBOX handling.
2044  */
2045 void
nfs_renewxid(struct nfsreq * req)2046 nfs_renewxid(struct nfsreq *req)
2047 {
2048           u_int32_t xid;
2049           int off;
2050 
2051           xid = nfs_getxid();
2052           if (req->r_nmp->nm_sotype == SOCK_STREAM)
2053                     off = sizeof(u_int32_t); /* RPC record mark */
2054           else
2055                     off = 0;
2056 
2057           m_copyback(req->r_mreq, off, sizeof(xid), (void *)&xid);
2058           req->r_xid = xid;
2059 }
2060