1 /*        $NetBSD: nfs_clntsubs.c,v 1.7 2023/03/21 15:47:46 christos Exp $      */
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *        @(#)nfs_subs.c      8.8 (Berkeley) 5/22/95
35  */
36 
37 /*
38  * Copyright 2000 Wasabi Systems, Inc.
39  * All rights reserved.
40  *
41  * Written by Frank van der Linden for Wasabi Systems, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *      This product includes software developed for the NetBSD Project by
54  *      Wasabi Systems, Inc.
55  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
56  *    or promote products derived from this software without specific prior
57  *    written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69  * POSSIBILITY OF SUCH DAMAGE.
70  */
71 
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsubs.c,v 1.7 2023/03/21 15:47:46 christos Exp $");
74 
75 #ifdef _KERNEL_OPT
76 #include "opt_nfs.h"
77 #endif
78 
79 /*
80  * These functions support the macros and help fiddle mbuf chains for
81  * the nfs op functions. They do things like create the rpc header and
82  * copy data between mbuf chains and uio lists.
83  */
84 #include <sys/param.h>
85 #include <sys/proc.h>
86 #include <sys/systm.h>
87 #include <sys/kernel.h>
88 #include <sys/kmem.h>
89 #include <sys/mount.h>
90 #include <sys/vnode.h>
91 #include <sys/namei.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/stat.h>
95 #include <sys/filedesc.h>
96 #include <sys/time.h>
97 #include <sys/dirent.h>
98 #include <sys/once.h>
99 #include <sys/kauth.h>
100 #include <sys/atomic.h>
101 
102 #include <uvm/uvm_extern.h>
103 
104 #include <nfs/rpcv2.h>
105 #include <nfs/nfsproto.h>
106 #include <nfs/nfsnode.h>
107 #include <nfs/nfs.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/nfsm_subs.h>
110 #include <nfs/nfsmount.h>
111 #include <nfs/nfsrtt.h>
112 #include <nfs/nfs_var.h>
113 
114 #include <miscfs/specfs/specdev.h>
115 
116 #include <netinet/in.h>
117 
118 /*
119  * Attribute cache routines.
120  * nfs_loadattrcache() - loads or updates the cache contents from attributes
121  *        that are on the mbuf list
122  * nfs_getattrcache() - returns valid attributes if found in cache, returns
123  *        error otherwise
124  */
125 
126 /*
127  * Load the attribute cache (that lives in the nfsnode entry) with
128  * the values on the mbuf list and
129  * Iff vap not NULL
130  *    copy the attributes to *vaper
131  */
132 int
nfsm_loadattrcache(struct vnode ** vpp,struct mbuf ** mdp,char ** dposp,struct vattr * vaper,int flags)133 nfsm_loadattrcache(struct vnode **vpp, struct mbuf **mdp, char **dposp, struct vattr *vaper, int flags)
134 {
135           int32_t t1;
136           char *cp2;
137           int error = 0;
138           struct mbuf *md;
139           int v3 = NFS_ISV3(*vpp);
140 
141           md = *mdp;
142           t1 = (mtod(md, char *) + md->m_len) - *dposp;
143           error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2);
144           if (error)
145                     return (error);
146           return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags);
147 }
148 
149 int
nfs_loadattrcache(struct vnode ** vpp,struct nfs_fattr * fp,struct vattr * vaper,int flags)150 nfs_loadattrcache(struct vnode **vpp, struct nfs_fattr *fp, struct vattr *vaper, int flags)
151 {
152           struct vnode *vp = *vpp;
153           struct vattr *vap;
154           int v3 = NFS_ISV3(vp);
155           enum vtype vtyp;
156           u_short vmode;
157           struct timespec mtime;
158           struct timespec ctime;
159           int32_t rdev;
160           struct nfsnode *np;
161           extern int (**spec_nfsv2nodeop_p)(void *);
162           uid_t uid;
163           gid_t gid;
164 
165           if (v3) {
166                     vtyp = nfsv3tov_type(fp->fa_type);
167                     vmode = fxdr_unsigned(u_short, fp->fa_mode);
168                     rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1),
169                               fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2));
170                     fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
171                     fxdr_nfsv3time(&fp->fa3_ctime, &ctime);
172           } else {
173                     vtyp = nfsv2tov_type(fp->fa_type);
174                     vmode = fxdr_unsigned(u_short, fp->fa_mode);
175                     if (vtyp == VNON || vtyp == VREG)
176                               vtyp = IFTOVT(vmode);
177                     rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
178                     fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
179                     ctime.tv_sec = fxdr_unsigned(u_int32_t,
180                         fp->fa2_ctime.nfsv2_sec);
181                     ctime.tv_nsec = 0;
182 
183                     /*
184                      * Really ugly NFSv2 kludge.
185                      */
186                     if (vtyp == VCHR && rdev == 0xffffffff)
187                               vtyp = VFIFO;
188           }
189 
190           vmode &= ALLPERMS;
191 
192           /*
193            * If v_type == VNON it is a new node, so fill in the v_type,
194            * n_mtime fields. Check to see if it represents a special
195            * device, and if so, check for a possible alias. Once the
196            * correct vnode has been obtained, fill in the rest of the
197            * information.
198            */
199           np = VTONFS(vp);
200           if (vp->v_type == VNON) {
201                     vp->v_type = vtyp;
202                     if (vp->v_type == VFIFO) {
203                               extern int (**fifo_nfsv2nodeop_p)(void *);
204                               vp->v_op = fifo_nfsv2nodeop_p;
205                     } else if (vp->v_type == VREG) {
206                               mutex_init(&np->n_commitlock, MUTEX_DEFAULT, IPL_NONE);
207                     } else if (vp->v_type == VCHR || vp->v_type == VBLK) {
208                               vp->v_op = spec_nfsv2nodeop_p;
209                               spec_node_init(vp, (dev_t)rdev);
210                     }
211                     np->n_mtime = mtime;
212           }
213           uid = fxdr_unsigned(uid_t, fp->fa_uid);
214           gid = fxdr_unsigned(gid_t, fp->fa_gid);
215           vap = np->n_vattr;
216 
217           /*
218            * Invalidate access cache if uid, gid, mode or ctime changed.
219            */
220           if (np->n_accstamp != -1 &&
221               (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode
222               || timespeccmp(&ctime, &vap->va_ctime, !=)))
223                     np->n_accstamp = -1;
224 
225           vap->va_type = vtyp;
226           vap->va_mode = vmode;
227           vap->va_rdev = (dev_t)rdev;
228           vap->va_mtime = mtime;
229           vap->va_ctime = ctime;
230           vap->va_birthtime.tv_sec = VNOVAL;
231           vap->va_birthtime.tv_nsec = VNOVAL;
232           vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
233           switch (vtyp) {
234           case VDIR:
235                     vap->va_blocksize = NFS_DIRFRAGSIZ;
236                     break;
237           case VBLK:
238                     vap->va_blocksize = BLKDEV_IOSIZE;
239                     break;
240           case VCHR:
241                     vap->va_blocksize = MAXBSIZE;
242                     break;
243           default:
244                     vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize :
245                         fxdr_unsigned(int32_t, fp->fa2_blocksize);
246                     break;
247           }
248           if (v3) {
249                     vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
250                     vap->va_uid = uid;
251                     vap->va_gid = gid;
252                     vap->va_size = fxdr_hyper(&fp->fa3_size);
253                     vap->va_bytes = fxdr_hyper(&fp->fa3_used);
254                     vap->va_fileid = fxdr_hyper(&fp->fa3_fileid);
255                     fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
256                     vap->va_flags = 0;
257                     vap->va_filerev = 0;
258           } else {
259                     vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
260                     vap->va_uid = uid;
261                     vap->va_gid = gid;
262                     vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
263                     vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks)
264                         * NFS_FABLKSIZE;
265                     vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
266                     fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
267                     vap->va_flags = 0;
268                     vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
269                     vap->va_filerev = 0;
270           }
271           if (vap->va_size > VFSTONFS(vp->v_mount)->nm_maxfilesize) {
272                     return EFBIG;
273           }
274           if (vap->va_size != np->n_size) {
275                     if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
276                               vap->va_size = np->n_size;
277                     } else {
278                               np->n_size = vap->va_size;
279                               if (vap->va_type == VREG) {
280                                         /*
281                                          * we can't free pages if NAC_NOTRUNC because
282                                          * the pages can be owned by ourselves.
283                                          */
284                                         if (flags & NAC_NOTRUNC) {
285                                                   np->n_flag |= NTRUNCDELAYED;
286                                         } else {
287                                                   genfs_node_wrlock(vp);
288                                                   rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
289                                                   (void)VOP_PUTPAGES(vp, 0,
290                                                       0, PGO_SYNCIO | PGO_CLEANIT |
291                                                       PGO_FREE | PGO_ALLPAGES);
292                                                   uvm_vnp_setsize(vp, np->n_size);
293                                                   genfs_node_unlock(vp);
294                                         }
295                               }
296                     }
297           }
298           np->n_attrstamp = time_second;
299           if (vaper != NULL) {
300                     memcpy((void *)vaper, (void *)vap, sizeof(*vap));
301                     if (np->n_flag & NCHG) {
302                               if (np->n_flag & NACC)
303                                         vaper->va_atime = np->n_atim;
304                               if (np->n_flag & NUPD)
305                                         vaper->va_mtime = np->n_mtim;
306                     }
307           }
308           return (0);
309 }
310 
311 /*
312  * Check the time stamp
313  * If the cache is valid, copy contents to *vap and return 0
314  * otherwise return an error
315  */
316 int
nfs_getattrcache(struct vnode * vp,struct vattr * vaper)317 nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
318 {
319           struct nfsnode *np = VTONFS(vp);
320           struct nfsmount *nmp = VFSTONFS(vp->v_mount);
321           struct vattr *vap;
322 
323           if (np->n_attrstamp == 0 ||
324               (time_second - np->n_attrstamp) >= nfs_attrtimeo(nmp, np)) {
325                     nfsstats.attrcache_misses++;
326                     return (ENOENT);
327           }
328           nfsstats.attrcache_hits++;
329           vap = np->n_vattr;
330           if (vap->va_size != np->n_size) {
331                     if (vap->va_type == VREG) {
332                               if ((np->n_flag & NMODIFIED) != 0 &&
333                                   vap->va_size < np->n_size) {
334                                         vap->va_size = np->n_size;
335                               } else {
336                                         np->n_size = vap->va_size;
337                               }
338                               genfs_node_wrlock(vp);
339                               uvm_vnp_setsize(vp, np->n_size);
340                               genfs_node_unlock(vp);
341                     } else
342                               np->n_size = vap->va_size;
343           }
344           memcpy((void *)vaper, (void *)vap, sizeof(struct vattr));
345           if (np->n_flag & NCHG) {
346                     if (np->n_flag & NACC)
347                               vaper->va_atime = np->n_atim;
348                     if (np->n_flag & NUPD)
349                               vaper->va_mtime = np->n_mtim;
350           }
351           return (0);
352 }
353 
354 void
nfs_delayedtruncate(struct vnode * vp)355 nfs_delayedtruncate(struct vnode *vp)
356 {
357           struct nfsnode *np = VTONFS(vp);
358 
359           if (np->n_flag & NTRUNCDELAYED) {
360                     np->n_flag &= ~NTRUNCDELAYED;
361                     genfs_node_wrlock(vp);
362                     rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
363                     (void)VOP_PUTPAGES(vp, 0,
364                         0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES);
365                     uvm_vnp_setsize(vp, np->n_size);
366                     genfs_node_unlock(vp);
367           }
368 }
369 
370 #define   NFS_WCCKLUDGE_TIMEOUT         (24 * 60 * 60)      /* 1 day */
371 #define   NFS_WCCKLUDGE(nmp, now) \
372           (((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \
373           ((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0)
374 
375 /*
376  * nfs_check_wccdata: check inaccurate wcc_data
377  *
378  * => return non-zero if we shouldn't trust the wcc_data.
379  * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed".
380  */
381 
382 int
nfs_check_wccdata(struct nfsnode * np,const struct timespec * ctime,struct timespec * mtime,bool docheck)383 nfs_check_wccdata(struct nfsnode *np, const struct timespec *ctime,
384     struct timespec *mtime, bool docheck)
385 {
386           int error = 0;
387 
388 #if !defined(NFS_V2_ONLY)
389 
390           if (docheck) {
391                     struct vnode *vp = NFSTOV(np);
392                     struct nfsmount *nmp;
393                     time_t now = time_second;
394                     const struct timespec *omtime = &np->n_vattr->va_mtime;
395                     const struct timespec *octime = &np->n_vattr->va_ctime;
396                     const char *reason = NULL; /* XXX: gcc */
397 
398                     if (timespeccmp(omtime, mtime, <=)) {
399                               reason = "mtime";
400                               error = EINVAL;
401                     }
402 
403                     if (vp->v_type == VDIR && timespeccmp(octime, ctime, <=)) {
404                               reason = "ctime";
405                               error = EINVAL;
406                     }
407 
408                     nmp = VFSTONFS(vp->v_mount);
409                     if (error) {
410 
411                               /*
412                                * despite of the fact that we've updated the file,
413                                * timestamps of the file were not updated as we
414                                * expected.
415                                * it means that the server has incompatible
416                                * semantics of timestamps or (more likely)
417                                * the server time is not precise enough to
418                                * track each modifications.
419                                * in that case, we disable wcc processing.
420                                *
421                                * yes, strictly speaking, we should disable all
422                                * caching.  it's a compromise.
423                                */
424 
425                               mutex_enter(&nmp->nm_lock);
426                               if (!NFS_WCCKLUDGE(nmp, now)) {
427                                         printf("%s: inaccurate wcc data (%s) detected,"
428                                             " disabling wcc"
429                                             " (ctime %u.%09u %u.%09u,"
430                                             " mtime %u.%09u %u.%09u)\n",
431                                             vp->v_mount->mnt_stat.f_mntfromname,
432                                             reason,
433                                             (unsigned int)octime->tv_sec,
434                                             (unsigned int)octime->tv_nsec,
435                                             (unsigned int)ctime->tv_sec,
436                                             (unsigned int)ctime->tv_nsec,
437                                             (unsigned int)omtime->tv_sec,
438                                             (unsigned int)omtime->tv_nsec,
439                                             (unsigned int)mtime->tv_sec,
440                                             (unsigned int)mtime->tv_nsec);
441                               }
442                               nmp->nm_iflag |= NFSMNT_WCCKLUDGE;
443                               nmp->nm_wcckludgetime = now;
444                               mutex_exit(&nmp->nm_lock);
445                     } else if (NFS_WCCKLUDGE(nmp, now)) {
446                               error = EPERM; /* XXX */
447                     } else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
448                               mutex_enter(&nmp->nm_lock);
449                               if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
450                                         printf("%s: re-enabling wcc\n",
451                                             vp->v_mount->mnt_stat.f_mntfromname);
452                                         nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE;
453                               }
454                               mutex_exit(&nmp->nm_lock);
455                     }
456           }
457 
458 #endif /* !defined(NFS_V2_ONLY) */
459 
460           return error;
461 }
462 
463 /*
464  * Heuristic to see if the server XDR encodes directory cookies or not.
465  * it is not supposed to, but a lot of servers may do this. Also, since
466  * most/all servers will implement V2 as well, it is expected that they
467  * may return just 32 bits worth of cookie information, so we need to
468  * find out in which 32 bits this information is available. We do this
469  * to avoid trouble with emulated binaries that can't handle 64 bit
470  * directory offsets.
471  */
472 
473 void
nfs_cookieheuristic(struct vnode * vp,int * flagp,struct lwp * l,kauth_cred_t cred)474 nfs_cookieheuristic(struct vnode *vp, int *flagp, struct lwp *l, kauth_cred_t cred)
475 {
476           struct uio auio;
477           struct iovec aiov;
478           char *tbuf, *cp;
479           struct dirent *dp;
480           off_t *cookies = NULL, *cop;
481           int error, eof, nc, len;
482 
483           tbuf = malloc(NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK);
484 
485           aiov.iov_base = tbuf;
486           aiov.iov_len = NFS_DIRFRAGSIZ;
487           auio.uio_iov = &aiov;
488           auio.uio_iovcnt = 1;
489           auio.uio_rw = UIO_READ;
490           auio.uio_resid = NFS_DIRFRAGSIZ;
491           auio.uio_offset = 0;
492           UIO_SETUP_SYSSPACE(&auio);
493 
494           error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc);
495 
496           len = NFS_DIRFRAGSIZ - auio.uio_resid;
497           if (error || len == 0) {
498                     free(tbuf, M_TEMP);
499                     if (cookies)
500                               free(cookies, M_TEMP);
501                     return;
502           }
503 
504           /*
505            * Find the first valid entry and look at its offset cookie.
506            */
507 
508           cp = tbuf;
509           for (cop = cookies; len > 0; len -= dp->d_reclen) {
510                     dp = (struct dirent *)cp;
511                     if (dp->d_fileno != 0 && len >= dp->d_reclen) {
512                               if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) {
513                                         *flagp |= NFSMNT_SWAPCOOKIE;
514                                         nfs_invaldircache(vp, 0);
515                                         nfs_vinvalbuf(vp, 0, cred, l, 1);
516                               }
517                               break;
518                     }
519                     cop++;
520                     cp += dp->d_reclen;
521           }
522 
523           free(tbuf, M_TEMP);
524           free(cookies, M_TEMP);
525 }
526 
527 /*
528  * Set the attribute timeout based on how recently the file has been modified.
529  */
530 
531 time_t
nfs_attrtimeo(struct nfsmount * nmp,struct nfsnode * np)532 nfs_attrtimeo(struct nfsmount *nmp, struct nfsnode *np)
533 {
534           time_t timeo;
535 
536           if ((nmp->nm_flag & NFSMNT_NOAC) != 0)
537                     return 0;
538 
539           if (((np)->n_flag & NMODIFIED) != 0)
540                     return NFS_MINATTRTIMO;
541 
542           timeo = (time_second - np->n_mtime.tv_sec) / 10;
543           timeo = uimax(timeo, NFS_MINATTRTIMO);
544           timeo = uimin(timeo, NFS_MAXATTRTIMO);
545           return timeo;
546 }
547