1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
25  */
26 
27 /*
28  * ZFS control directory (a.k.a. ".zfs")
29  *
30  * This directory provides a common location for all ZFS meta-objects.
31  * Currently, this is only the 'snapshot' directory, but this may expand in the
32  * future.  The elements are built using the GFS primitives, as the hierarchy
33  * does not actually exist on disk.
34  *
35  * For 'snapshot', we don't want to have all snapshots always mounted, because
36  * this would take up a huge amount of space in /etc/mnttab.  We have three
37  * types of objects:
38  *
39  *        ctldir ------> snapshotdir -------> snapshot
40  *                                             |
41  *                                             |
42  *                                             V
43  *                                         mounted fs
44  *
45  * The 'snapshot' node contains just enough information to lookup '..' and act
46  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
47  * perform an automount of the underlying filesystem and return the
48  * corresponding vnode.
49  *
50  * All mounts are handled automatically by the kernel, but unmounts are
51  * (currently) handled from user land.  The main reason is that there is no
52  * reliable way to auto-unmount the filesystem when it's "no longer in use".
53  * When the user unmounts a filesystem, we call zfsctl_unmount(), which
54  * unmounts any snapshots within the snapshot directory.
55  *
56  * The '.zfs', '.zfs/snapshot', and all directories created under
57  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
58  * share the same vfs_t as the head filesystem (what '.zfs' lives under).
59  *
60  * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
61  * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
62  * However, vnodes within these mounted on file systems have their v_vfsp
63  * fields set to the head filesystem to make NFS happy (see
64  * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
65  * so that it cannot be freed until all snapshots have been unmounted.
66  */
67 
68 #ifdef __FreeBSD__
69 
70 #include <sys/zfs_context.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_ioctl.h>
73 #include <sys/zfs_vfsops.h>
74 #include <sys/namei.h>
75 #include <sys/stat.h>
76 #include <sys/dmu.h>
77 #include <sys/dsl_dataset.h>
78 #include <sys/dsl_destroy.h>
79 #include <sys/dsl_deleg.h>
80 #include <sys/mount.h>
81 #include <sys/zap.h>
82 
83 #include "zfs_namecheck.h"
84 
85 /*
86  * "Synthetic" filesystem implementation.
87  */
88 
89 /*
90  * Assert that A implies B.
91  */
92 #define KASSERT_IMPLY(A, B, msg)        KASSERT(!(A) || (B), (msg));
93 
94 static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
95 
96 typedef struct sfs_node {
97           char                sn_name[ZFS_MAX_DATASET_NAME_LEN];
98           uint64_t  sn_parent_id;
99           uint64_t  sn_id;
100 } sfs_node_t;
101 
102 /*
103  * Check the parent's ID as well as the node's to account for a chance
104  * that IDs originating from different domains (snapshot IDs, artifical
105  * IDs, znode IDs) may clash.
106  */
107 static int
sfs_compare_ids(struct vnode * vp,void * arg)108 sfs_compare_ids(struct vnode *vp, void *arg)
109 {
110           sfs_node_t *n1 = vp->v_data;
111           sfs_node_t *n2 = arg;
112           bool equal;
113 
114           equal = n1->sn_id == n2->sn_id &&
115               n1->sn_parent_id == n2->sn_parent_id;
116 
117           /* Zero means equality. */
118           return (!equal);
119 }
120 
121 static int
sfs_vnode_get(const struct mount * mp,int flags,uint64_t parent_id,uint64_t id,struct vnode ** vpp)122 sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
123    uint64_t id, struct vnode **vpp)
124 {
125           sfs_node_t search;
126           int err;
127 
128           search.sn_id = id;
129           search.sn_parent_id = parent_id;
130           err = vfs_hash_get(mp, (u_int)id, flags, curthread, vpp,
131               sfs_compare_ids, &search);
132           return (err);
133 }
134 
135 static int
sfs_vnode_insert(struct vnode * vp,int flags,uint64_t parent_id,uint64_t id,struct vnode ** vpp)136 sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
137    uint64_t id, struct vnode **vpp)
138 {
139           int err;
140 
141           KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
142           err = vfs_hash_insert(vp, (u_int)id, flags, curthread, vpp,
143               sfs_compare_ids, vp->v_data);
144           return (err);
145 }
146 
147 static void
sfs_vnode_remove(struct vnode * vp)148 sfs_vnode_remove(struct vnode *vp)
149 {
150           vfs_hash_remove(vp);
151 }
152 
153 typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg);
154 
155 static int
sfs_vgetx(struct mount * mp,int flags,uint64_t parent_id,uint64_t id,const char * tag,struct vop_vector * vops,sfs_vnode_setup_fn setup,void * arg,struct vnode ** vpp)156 sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
157     const char *tag, struct vop_vector *vops,
158     sfs_vnode_setup_fn setup, void *arg,
159     struct vnode **vpp)
160 {
161           struct vnode *vp;
162           int error;
163 
164           error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
165           if (error != 0 || *vpp != NULL) {
166                     KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
167                         "sfs vnode with no data");
168                     return (error);
169           }
170 
171           /* Allocate a new vnode/inode. */
172           error = getnewvnode(tag, mp, vops, &vp);
173           if (error != 0) {
174                     *vpp = NULL;
175                     return (error);
176           }
177 
178           /*
179            * Exclusively lock the vnode vnode while it's being constructed.
180            */
181           lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
182           error = insmntque(vp, mp);
183           if (error != 0) {
184                     *vpp = NULL;
185                     return (error);
186           }
187 
188           setup(vp, arg);
189 
190           error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
191           if (error != 0 || *vpp != NULL) {
192                     KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
193                         "sfs vnode with no data");
194                     return (error);
195           }
196 
197           *vpp = vp;
198           return (0);
199 }
200 
201 static void
sfs_print_node(sfs_node_t * node)202 sfs_print_node(sfs_node_t *node)
203 {
204           printf("\tname = %s\n", node->sn_name);
205           printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
206           printf("\tid = %ju\n", (uintmax_t)node->sn_id);
207 }
208 
209 static sfs_node_t *
sfs_alloc_node(size_t size,const char * name,uint64_t parent_id,uint64_t id)210 sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
211 {
212           struct sfs_node *node;
213 
214           KASSERT(strlen(name) < sizeof(node->sn_name),
215               ("sfs node name is too long"));
216           KASSERT(size >= sizeof(*node), ("sfs node size is too small"));
217           node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO);
218           strlcpy(node->sn_name, name, sizeof(node->sn_name));
219           node->sn_parent_id = parent_id;
220           node->sn_id = id;
221 
222           return (node);
223 }
224 
225 static void
sfs_destroy_node(sfs_node_t * node)226 sfs_destroy_node(sfs_node_t *node)
227 {
228           free(node, M_SFSNODES);
229 }
230 
231 static void *
sfs_reclaim_vnode(vnode_t * vp)232 sfs_reclaim_vnode(vnode_t *vp)
233 {
234           sfs_node_t *node;
235           void *data;
236 
237           sfs_vnode_remove(vp);
238           data = vp->v_data;
239           vp->v_data = NULL;
240           return (data);
241 }
242 
243 static int
sfs_readdir_common(uint64_t parent_id,uint64_t id,struct vop_readdir_args * ap,uio_t * uio,off_t * offp)244 sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
245     uio_t *uio, off_t *offp)
246 {
247           struct dirent entry;
248           int error;
249 
250           /* Reset ncookies for subsequent use of vfs_read_dirent. */
251           if (ap->a_ncookies != NULL)
252                     *ap->a_ncookies = 0;
253 
254           if (uio->uio_resid < sizeof(entry))
255                     return (SET_ERROR(EINVAL));
256 
257           if (uio->uio_offset < 0)
258                     return (SET_ERROR(EINVAL));
259           if (uio->uio_offset == 0) {
260                     entry.d_fileno = id;
261                     entry.d_type = DT_DIR;
262                     entry.d_name[0] = '.';
263                     entry.d_name[1] = '\0';
264                     entry.d_namlen = 1;
265                     entry.d_reclen = sizeof(entry);
266                     error = vfs_read_dirent(ap, &entry, uio->uio_offset);
267                     if (error != 0)
268                               return (SET_ERROR(error));
269           }
270 
271           if (uio->uio_offset < sizeof(entry))
272                     return (SET_ERROR(EINVAL));
273           if (uio->uio_offset == sizeof(entry)) {
274                     entry.d_fileno = parent_id;
275                     entry.d_type = DT_DIR;
276                     entry.d_name[0] = '.';
277                     entry.d_name[1] = '.';
278                     entry.d_name[2] = '\0';
279                     entry.d_namlen = 2;
280                     entry.d_reclen = sizeof(entry);
281                     error = vfs_read_dirent(ap, &entry, uio->uio_offset);
282                     if (error != 0)
283                               return (SET_ERROR(error));
284           }
285 
286           if (offp != NULL)
287                     *offp = 2 * sizeof(entry);
288           return (0);
289 }
290 
291 
292 /*
293  * .zfs inode namespace
294  *
295  * We need to generate unique inode numbers for all files and directories
296  * within the .zfs pseudo-filesystem.  We use the following scheme:
297  *
298  *        ENTRY                         ZFSCTL_INODE
299  *        .zfs                          1
300  *        .zfs/snapshot                 2
301  *        .zfs/snapshot/<snap>          objectid(snap)
302  */
303 #define   ZFSCTL_INO_SNAP(id) (id)
304 
305 static struct vop_vector zfsctl_ops_root;
306 static struct vop_vector zfsctl_ops_snapdir;
307 static struct vop_vector zfsctl_ops_snapshot;
308 static struct vop_vector zfsctl_ops_shares_dir;
309 
310 void
zfsctl_init(void)311 zfsctl_init(void)
312 {
313 }
314 
315 void
zfsctl_fini(void)316 zfsctl_fini(void)
317 {
318 }
319 
320 boolean_t
zfsctl_is_node(vnode_t * vp)321 zfsctl_is_node(vnode_t *vp)
322 {
323           return (vn_matchops(vp, zfsctl_ops_root) ||
324               vn_matchops(vp, zfsctl_ops_snapdir) ||
325               vn_matchops(vp, zfsctl_ops_snapshot) ||
326               vn_matchops(vp, zfsctl_ops_shares_dir));
327 
328 }
329 
330 typedef struct zfsctl_root {
331           sfs_node_t          node;
332           sfs_node_t          *snapdir;
333           timestruc_t         cmtime;
334 } zfsctl_root_t;
335 
336 
337 /*
338  * Create the '.zfs' directory.
339  */
340 void
zfsctl_create(zfsvfs_t * zfsvfs)341 zfsctl_create(zfsvfs_t *zfsvfs)
342 {
343           zfsctl_root_t *dot_zfs;
344           sfs_node_t *snapdir;
345           vnode_t *rvp;
346           uint64_t crtime[2];
347 
348           ASSERT(zfsvfs->z_ctldir == NULL);
349 
350           snapdir = sfs_alloc_node(sizeof(*snapdir), "snapshot", ZFSCTL_INO_ROOT,
351               ZFSCTL_INO_SNAPDIR);
352           dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof(*dot_zfs), ".zfs", 0,
353               ZFSCTL_INO_ROOT);
354           dot_zfs->snapdir = snapdir;
355 
356           VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);
357           VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
358               &crtime, sizeof(crtime)));
359           ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
360           vput(rvp);
361 
362           zfsvfs->z_ctldir = dot_zfs;
363 }
364 
365 /*
366  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
367  * The nodes must not have any associated vnodes by now as they should be
368  * vflush-ed.
369  */
370 void
zfsctl_destroy(zfsvfs_t * zfsvfs)371 zfsctl_destroy(zfsvfs_t *zfsvfs)
372 {
373           sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
374           sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
375           zfsvfs->z_ctldir = NULL;
376 }
377 
378 static int
zfsctl_fs_root_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)379 zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags,
380     struct vnode **vpp)
381 {
382           return (VFS_ROOT(mp, flags, vpp));
383 }
384 
385 static void
zfsctl_common_vnode_setup(vnode_t * vp,void * arg)386 zfsctl_common_vnode_setup(vnode_t *vp, void *arg)
387 {
388           ASSERT_VOP_ELOCKED(vp, __func__);
389 
390           /* We support shared locking. */
391           VN_LOCK_ASHARE(vp);
392           vp->v_type = VDIR;
393           vp->v_data = arg;
394 }
395 
396 static int
zfsctl_root_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)397 zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags,
398     struct vnode **vpp)
399 {
400           void *node;
401           int err;
402 
403           node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir;
404           err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
405               zfsctl_common_vnode_setup, node, vpp);
406           return (err);
407 }
408 
409 static int
zfsctl_snapdir_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)410 zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags,
411     struct vnode **vpp)
412 {
413           void *node;
414           int err;
415 
416           node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir->snapdir;
417           err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
418              &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
419           return (err);
420 }
421 
422 /*
423  * Given a root znode, retrieve the associated .zfs directory.
424  * Add a hold to the vnode and return it.
425  */
426 int
zfsctl_root(zfsvfs_t * zfsvfs,int flags,vnode_t ** vpp)427 zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp)
428 {
429           vnode_t *vp;
430           int error;
431 
432           error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
433           return (error);
434 }
435 
436 /*
437  * Common open routine.  Disallow any write access.
438  */
439 /* ARGSUSED */
440 static int
zfsctl_common_open(struct vop_open_args * ap)441 zfsctl_common_open(struct vop_open_args *ap)
442 {
443           int flags = ap->a_mode;
444 
445           if (flags & FWRITE)
446                     return (SET_ERROR(EACCES));
447 
448           return (0);
449 }
450 
451 /*
452  * Common close routine.  Nothing to do here.
453  */
454 /* ARGSUSED */
455 static int
zfsctl_common_close(struct vop_close_args * ap)456 zfsctl_common_close(struct vop_close_args *ap)
457 {
458           return (0);
459 }
460 
461 /*
462  * Common access routine.  Disallow writes.
463  */
464 /* ARGSUSED */
465 static int
zfsctl_common_access(ap)466 zfsctl_common_access(ap)
467           struct vop_access_args /* {
468                     struct vnode *a_vp;
469                     accmode_t a_accmode;
470                     struct ucred *a_cred;
471                     struct thread *a_td;
472           } */ *ap;
473 {
474           accmode_t accmode = ap->a_accmode;
475 
476           if (accmode & VWRITE)
477                     return (SET_ERROR(EACCES));
478           return (0);
479 }
480 
481 /*
482  * Common getattr function.  Fill in basic information.
483  */
484 static void
zfsctl_common_getattr(vnode_t * vp,vattr_t * vap)485 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
486 {
487           timestruc_t         now;
488           sfs_node_t *node;
489 
490           node = vp->v_data;
491 
492           vap->va_uid = 0;
493           vap->va_gid = 0;
494           vap->va_rdev = 0;
495           /*
496            * We are a purely virtual object, so we have no
497            * blocksize or allocated blocks.
498            */
499           vap->va_blksize = 0;
500           vap->va_nblocks = 0;
501           vap->va_seq = 0;
502           vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
503           vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
504               S_IROTH | S_IXOTH;
505           vap->va_type = VDIR;
506           /*
507            * We live in the now (for atime).
508            */
509           gethrestime(&now);
510           vap->va_atime = now;
511           /* FreeBSD: Reset chflags(2) flags. */
512           vap->va_flags = 0;
513 
514           vap->va_nodeid = node->sn_id;
515 
516           /* At least '.' and '..'. */
517           vap->va_nlink = 2;
518 }
519 
520 /*ARGSUSED*/
521 static int
zfsctl_common_fid(ap)522 zfsctl_common_fid(ap)
523           struct vop_fid_args /* {
524                     struct vnode *a_vp;
525                     struct fid *a_fid;
526           } */ *ap;
527 {
528           vnode_t             *vp = ap->a_vp;
529           fid_t               *fidp = (void *)ap->a_fid;
530           sfs_node_t          *node = vp->v_data;
531           uint64_t  object = node->sn_id;
532           zfid_short_t        *zfid;
533           int                 i;
534 
535           zfid = (zfid_short_t *)fidp;
536           zfid->zf_len = SHORT_FID_LEN;
537 
538           for (i = 0; i < sizeof(zfid->zf_object); i++)
539                     zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
540 
541           /* .zfs nodes always have a generation number of 0 */
542           for (i = 0; i < sizeof(zfid->zf_gen); i++)
543                     zfid->zf_gen[i] = 0;
544 
545           return (0);
546 }
547 
548 static int
zfsctl_common_reclaim(ap)549 zfsctl_common_reclaim(ap)
550           struct vop_reclaim_args /* {
551                     struct vnode *a_vp;
552                     struct thread *a_td;
553           } */ *ap;
554 {
555           vnode_t *vp = ap->a_vp;
556 
557           (void) sfs_reclaim_vnode(vp);
558           return (0);
559 }
560 
561 static int
zfsctl_common_print(ap)562 zfsctl_common_print(ap)
563           struct vop_print_args /* {
564                     struct vnode *a_vp;
565           } */ *ap;
566 {
567           sfs_print_node(ap->a_vp->v_data);
568           return (0);
569 }
570 
571 /*
572  * Get root directory attributes.
573  */
574 /* ARGSUSED */
575 static int
zfsctl_root_getattr(ap)576 zfsctl_root_getattr(ap)
577           struct vop_getattr_args /* {
578                     struct vnode *a_vp;
579                     struct vattr *a_vap;
580                     struct ucred *a_cred;
581           } */ *ap;
582 {
583           struct vnode *vp = ap->a_vp;
584           struct vattr *vap = ap->a_vap;
585           zfsctl_root_t *node = vp->v_data;
586 
587           zfsctl_common_getattr(vp, vap);
588           vap->va_ctime = node->cmtime;
589           vap->va_mtime = vap->va_ctime;
590           vap->va_birthtime = vap->va_ctime;
591           vap->va_nlink += 1; /* snapdir */
592           vap->va_size = vap->va_nlink;
593           return (0);
594 }
595 
596 /*
597  * When we lookup "." we still can be asked to lock it
598  * differently, can't we?
599  */
600 int
zfsctl_relock_dot(vnode_t * dvp,int ltype)601 zfsctl_relock_dot(vnode_t *dvp, int ltype)
602 {
603           vref(dvp);
604           if (ltype != VOP_ISLOCKED(dvp)) {
605                     if (ltype == LK_EXCLUSIVE)
606                               vn_lock(dvp, LK_UPGRADE | LK_RETRY);
607                     else /* if (ltype == LK_SHARED) */
608                               vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
609 
610                     /* Relock for the "." case may left us with reclaimed vnode. */
611                     if ((dvp->v_iflag & VI_DOOMED) != 0) {
612                               vrele(dvp);
613                               return (SET_ERROR(ENOENT));
614                     }
615           }
616           return (0);
617 }
618 
619 /*
620  * Special case the handling of "..".
621  */
622 int
zfsctl_root_lookup(ap)623 zfsctl_root_lookup(ap)
624           struct vop_lookup_args /* {
625                     struct vnode *a_dvp;
626                     struct vnode **a_vpp;
627                     struct componentname *a_cnp;
628           } */ *ap;
629 {
630           struct componentname *cnp = ap->a_cnp;
631           vnode_t *dvp = ap->a_dvp;
632           vnode_t **vpp = ap->a_vpp;
633           cred_t *cr = ap->a_cnp->cn_cred;
634           int flags = ap->a_cnp->cn_flags;
635           int lkflags = ap->a_cnp->cn_lkflags;
636           int nameiop = ap->a_cnp->cn_nameiop;
637           int err;
638           int ltype;
639 
640           ASSERT(dvp->v_type == VDIR);
641 
642           if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
643                     return (SET_ERROR(ENOTSUP));
644 
645           if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
646                     err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
647                     if (err == 0)
648                               *vpp = dvp;
649           } else if ((flags & ISDOTDOT) != 0) {
650                     err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
651                         lkflags, vpp);
652           } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
653                     err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
654           } else {
655                     err = SET_ERROR(ENOENT);
656           }
657           if (err != 0)
658                     *vpp = NULL;
659           return (err);
660 }
661 
662 static int
zfsctl_root_readdir(ap)663 zfsctl_root_readdir(ap)
664           struct vop_readdir_args /* {
665                     struct vnode *a_vp;
666                     struct uio *a_uio;
667                     struct ucred *a_cred;
668                     int *a_eofflag;
669                     int *ncookies;
670                     u_long **a_cookies;
671           } */ *ap;
672 {
673           struct dirent entry;
674           vnode_t *vp = ap->a_vp;
675           zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
676           zfsctl_root_t *node = vp->v_data;
677           uio_t *uio = ap->a_uio;
678           int *eofp = ap->a_eofflag;
679           off_t dots_offset;
680           int error;
681 
682           ASSERT(vp->v_type == VDIR);
683 
684           error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio,
685               &dots_offset);
686           if (error != 0) {
687                     if (error == ENAMETOOLONG) /* ran out of destination space */
688                               error = 0;
689                     return (error);
690           }
691           if (uio->uio_offset != dots_offset)
692                     return (SET_ERROR(EINVAL));
693 
694           CTASSERT(sizeof(node->snapdir->sn_name) <= sizeof(entry.d_name));
695           entry.d_fileno = node->snapdir->sn_id;
696           entry.d_type = DT_DIR;
697           strcpy(entry.d_name, node->snapdir->sn_name);
698           entry.d_namlen = strlen(entry.d_name);
699           entry.d_reclen = sizeof(entry);
700           error = vfs_read_dirent(ap, &entry, uio->uio_offset);
701           if (error != 0) {
702                     if (error == ENAMETOOLONG)
703                               error = 0;
704                     return (SET_ERROR(error));
705           }
706           if (eofp != NULL)
707                     *eofp = 1;
708           return (0);
709 }
710 
711 static int
zfsctl_root_vptocnp(struct vop_vptocnp_args * ap)712 zfsctl_root_vptocnp(struct vop_vptocnp_args *ap)
713 {
714           static const char dotzfs_name[4] = ".zfs";
715           vnode_t *dvp;
716           int error;
717 
718           if (*ap->a_buflen < sizeof (dotzfs_name))
719                     return (SET_ERROR(ENOMEM));
720 
721           error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL,
722               LK_SHARED, &dvp);
723           if (error != 0)
724                     return (SET_ERROR(error));
725 
726           VOP_UNLOCK(dvp, 0);
727           *ap->a_vpp = dvp;
728           *ap->a_buflen -= sizeof (dotzfs_name);
729           bcopy(dotzfs_name, ap->a_buf + *ap->a_buflen, sizeof (dotzfs_name));
730           return (0);
731 }
732 
733 static struct vop_vector zfsctl_ops_root = {
734           .vop_default =      &default_vnodeops,
735           .vop_open =         zfsctl_common_open,
736           .vop_close =        zfsctl_common_close,
737           .vop_ioctl =        VOP_EINVAL,
738           .vop_getattr =      zfsctl_root_getattr,
739           .vop_access =       zfsctl_common_access,
740           .vop_readdir =      zfsctl_root_readdir,
741           .vop_lookup =       zfsctl_root_lookup,
742           .vop_inactive =     VOP_NULL,
743           .vop_reclaim =      zfsctl_common_reclaim,
744           .vop_fid =          zfsctl_common_fid,
745           .vop_print =        zfsctl_common_print,
746           .vop_vptocnp =      zfsctl_root_vptocnp,
747 };
748 
749 static int
zfsctl_snapshot_zname(vnode_t * vp,const char * name,int len,char * zname)750 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
751 {
752           objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
753 
754           dmu_objset_name(os, zname);
755           if (strlen(zname) + 1 + strlen(name) >= len)
756                     return (SET_ERROR(ENAMETOOLONG));
757           (void) strcat(zname, "@");
758           (void) strcat(zname, name);
759           return (0);
760 }
761 
762 static int
zfsctl_snapshot_lookup(vnode_t * vp,const char * name,uint64_t * id)763 zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id)
764 {
765           objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
766           int err;
767 
768           err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id);
769           return (err);
770 }
771 
772 /*
773  * Given a vnode get a root vnode of a filesystem mounted on top of
774  * the vnode, if any.  The root vnode is referenced and locked.
775  * If no filesystem is mounted then the orinal vnode remains referenced
776  * and locked.  If any error happens the orinal vnode is unlocked and
777  * released.
778  */
779 static int
zfsctl_mounted_here(vnode_t ** vpp,int flags)780 zfsctl_mounted_here(vnode_t **vpp, int flags)
781 {
782           struct mount *mp;
783           int err;
784 
785           ASSERT_VOP_LOCKED(*vpp, __func__);
786           ASSERT3S((*vpp)->v_type, ==, VDIR);
787 
788           if ((mp = (*vpp)->v_mountedhere) != NULL) {
789                     err = vfs_busy(mp, 0);
790                     KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err));
791                     KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint"));
792                     vput(*vpp);
793                     err = VFS_ROOT(mp, flags, vpp);
794                     vfs_unbusy(mp);
795                     return (err);
796           }
797           return (EJUSTRETURN);
798 }
799 
800 typedef struct {
801           const char *snap_name;
802           uint64_t    snap_id;
803 } snapshot_setup_arg_t;
804 
805 static void
zfsctl_snapshot_vnode_setup(vnode_t * vp,void * arg)806 zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg)
807 {
808           snapshot_setup_arg_t *ssa = arg;
809           sfs_node_t *node;
810 
811           ASSERT_VOP_ELOCKED(vp, __func__);
812 
813           node = sfs_alloc_node(sizeof(sfs_node_t),
814               ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id);
815           zfsctl_common_vnode_setup(vp, node);
816 
817           /* We have to support recursive locking. */
818           VN_LOCK_AREC(vp);
819 }
820 
821 /*
822  * Lookup entry point for the 'snapshot' directory.  Try to open the
823  * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
824  * Perform a mount of the associated dataset on top of the vnode.
825  */
826 /* ARGSUSED */
827 int
zfsctl_snapdir_lookup(ap)828 zfsctl_snapdir_lookup(ap)
829           struct vop_lookup_args /* {
830                     struct vnode *a_dvp;
831                     struct vnode **a_vpp;
832                     struct componentname *a_cnp;
833           } */ *ap;
834 {
835           vnode_t *dvp = ap->a_dvp;
836           vnode_t **vpp = ap->a_vpp;
837           struct componentname *cnp = ap->a_cnp;
838           char name[NAME_MAX + 1];
839           char fullname[ZFS_MAX_DATASET_NAME_LEN];
840           char *mountpoint;
841           size_t mountpoint_len;
842           zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
843           uint64_t snap_id;
844           int nameiop = cnp->cn_nameiop;
845           int lkflags = cnp->cn_lkflags;
846           int flags = cnp->cn_flags;
847           int err;
848 
849           ASSERT(dvp->v_type == VDIR);
850 
851           if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
852                     return (SET_ERROR(ENOTSUP));
853 
854           if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
855                     err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
856                     if (err == 0)
857                               *vpp = dvp;
858                     return (err);
859           }
860           if (flags & ISDOTDOT) {
861                     err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags,
862                         vpp);
863                     return (err);
864           }
865 
866           if (cnp->cn_namelen >= sizeof(name))
867                     return (SET_ERROR(ENAMETOOLONG));
868 
869           strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
870           err = zfsctl_snapshot_lookup(dvp, name, &snap_id);
871           if (err != 0)
872                     return (SET_ERROR(ENOENT));
873 
874           for (;;) {
875                     snapshot_setup_arg_t ssa;
876 
877                     ssa.snap_name = name;
878                     ssa.snap_id = snap_id;
879                     err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR,
880                        snap_id, "zfs", &zfsctl_ops_snapshot,
881                        zfsctl_snapshot_vnode_setup, &ssa, vpp);
882                     if (err != 0)
883                               return (err);
884 
885                     /* Check if a new vnode has just been created. */
886                     if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)
887                               break;
888 
889                     /*
890                      * The vnode must be referenced at least by this thread and
891                      * the mounted snapshot or the thread doing the mounting.
892                      * There can be more references from concurrent lookups.
893                      */
894                     KASSERT(vrefcnt(*vpp) > 1, ("found unreferenced mountpoint"));
895 
896                     /*
897                      * Check if a snapshot is already mounted on top of the vnode.
898                      */
899                     err = zfsctl_mounted_here(vpp, lkflags);
900                     if (err != EJUSTRETURN)
901                               return (err);
902 
903 #ifdef INVARIANTS
904                     /*
905                      * If the vnode not covered yet, then the mount operation
906                      * must be in progress.
907                      */
908                     VI_LOCK(*vpp);
909                     KASSERT(((*vpp)->v_iflag & VI_MOUNT) != 0,
910                         ("snapshot vnode not covered"));
911                     VI_UNLOCK(*vpp);
912 #endif
913                     vput(*vpp);
914 
915                     /*
916                      * In this situation we can loop on uncontested locks and starve
917                      * the thread doing the lengthy, non-trivial mount operation.
918                      */
919                     kern_yield(PRI_USER);
920           }
921 
922           VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof(fullname), fullname));
923 
924           mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
925               strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1;
926           mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
927           (void) snprintf(mountpoint, mountpoint_len,
928               "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
929               dvp->v_vfsp->mnt_stat.f_mntonname, name);
930 
931           err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
932           kmem_free(mountpoint, mountpoint_len);
933           if (err == 0) {
934                     /*
935                      * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
936                      *
937                      * This is where we lie about our v_vfsp in order to
938                      * make .zfs/snapshot/<snapname> accessible over NFS
939                      * without requiring manual mounts of <snapname>.
940                      */
941                     ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
942                     VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
943 
944                     /* Clear the root flag (set via VFS_ROOT) as well. */
945                     (*vpp)->v_vflag &= ~VV_ROOT;
946           }
947 
948           if (err != 0)
949                     *vpp = NULL;
950           return (err);
951 }
952 
953 static int
zfsctl_snapdir_readdir(ap)954 zfsctl_snapdir_readdir(ap)
955           struct vop_readdir_args /* {
956                     struct vnode *a_vp;
957                     struct uio *a_uio;
958                     struct ucred *a_cred;
959                     int *a_eofflag;
960                     int *ncookies;
961                     u_long **a_cookies;
962           } */ *ap;
963 {
964           char snapname[ZFS_MAX_DATASET_NAME_LEN];
965           struct dirent entry;
966           vnode_t *vp = ap->a_vp;
967           zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
968           uio_t *uio = ap->a_uio;
969           int *eofp = ap->a_eofflag;
970           off_t dots_offset;
971           int error;
972 
973           ASSERT(vp->v_type == VDIR);
974 
975           error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, uio,
976               &dots_offset);
977           if (error != 0) {
978                     if (error == ENAMETOOLONG) /* ran out of destination space */
979                               error = 0;
980                     return (error);
981           }
982 
983           for (;;) {
984                     uint64_t cookie;
985                     uint64_t id;
986 
987                     cookie = uio->uio_offset - dots_offset;
988 
989                     dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
990                     error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
991                         snapname, &id, &cookie, NULL);
992                     dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
993                     if (error != 0) {
994                               if (error == ENOENT) {
995                                         if (eofp != NULL)
996                                                   *eofp = 1;
997                                         error = 0;
998                               }
999                               return (error);
1000                     }
1001 
1002                     entry.d_fileno = id;
1003                     entry.d_type = DT_DIR;
1004                     strcpy(entry.d_name, snapname);
1005                     entry.d_namlen = strlen(entry.d_name);
1006                     entry.d_reclen = sizeof(entry);
1007                     error = vfs_read_dirent(ap, &entry, uio->uio_offset);
1008                     if (error != 0) {
1009                               if (error == ENAMETOOLONG)
1010                                         error = 0;
1011                               return (SET_ERROR(error));
1012                     }
1013                     uio->uio_offset = cookie + dots_offset;
1014           }
1015           /* NOTREACHED */
1016 }
1017 
1018 /* ARGSUSED */
1019 static int
zfsctl_snapdir_getattr(ap)1020 zfsctl_snapdir_getattr(ap)
1021           struct vop_getattr_args /* {
1022                     struct vnode *a_vp;
1023                     struct vattr *a_vap;
1024                     struct ucred *a_cred;
1025           } */ *ap;
1026 {
1027           vnode_t *vp = ap->a_vp;
1028           vattr_t *vap = ap->a_vap;
1029           zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1030           dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
1031           sfs_node_t *node = vp->v_data;
1032           uint64_t snap_count;
1033           int err;
1034 
1035           zfsctl_common_getattr(vp, vap);
1036           vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1037           vap->va_mtime = vap->va_ctime;
1038           vap->va_birthtime = vap->va_ctime;
1039           if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
1040                     err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
1041                         dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
1042                     if (err != 0)
1043                               return (err);
1044                     vap->va_nlink += snap_count;
1045           }
1046           vap->va_size = vap->va_nlink;
1047 
1048           return (0);
1049 }
1050 
1051 static struct vop_vector zfsctl_ops_snapdir = {
1052           .vop_default =      &default_vnodeops,
1053           .vop_open =         zfsctl_common_open,
1054           .vop_close =        zfsctl_common_close,
1055           .vop_getattr =      zfsctl_snapdir_getattr,
1056           .vop_access =       zfsctl_common_access,
1057           .vop_readdir =      zfsctl_snapdir_readdir,
1058           .vop_lookup =       zfsctl_snapdir_lookup,
1059           .vop_reclaim =      zfsctl_common_reclaim,
1060           .vop_fid =          zfsctl_common_fid,
1061           .vop_print =        zfsctl_common_print,
1062 };
1063 
1064 static int
zfsctl_snapshot_inactive(ap)1065 zfsctl_snapshot_inactive(ap)
1066           struct vop_inactive_args /* {
1067                     struct vnode *a_vp;
1068                     struct thread *a_td;
1069           } */ *ap;
1070 {
1071           vnode_t *vp = ap->a_vp;
1072 
1073           VERIFY(vrecycle(vp) == 1);
1074           return (0);
1075 }
1076 
1077 static int
zfsctl_snapshot_reclaim(ap)1078 zfsctl_snapshot_reclaim(ap)
1079           struct vop_reclaim_args /* {
1080                     struct vnode *a_vp;
1081                     struct thread *a_td;
1082           } */ *ap;
1083 {
1084           vnode_t *vp = ap->a_vp;
1085           void *data = vp->v_data;
1086 
1087           sfs_reclaim_vnode(vp);
1088           sfs_destroy_node(data);
1089           return (0);
1090 }
1091 
1092 static int
zfsctl_snapshot_vptocnp(struct vop_vptocnp_args * ap)1093 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
1094 {
1095           struct mount *mp;
1096           vnode_t *dvp;
1097           vnode_t *vp;
1098           sfs_node_t *node;
1099           size_t len;
1100           int locked;
1101           int error;
1102 
1103           vp = ap->a_vp;
1104           node = vp->v_data;
1105           len = strlen(node->sn_name);
1106           if (*ap->a_buflen < len)
1107                     return (SET_ERROR(ENOMEM));
1108 
1109           /*
1110            * Prevent unmounting of the snapshot while the vnode lock
1111            * is not held.  That is not strictly required, but allows
1112            * us to assert that an uncovered snapshot vnode is never
1113            * "leaked".
1114            */
1115           mp = vp->v_mountedhere;
1116           if (mp == NULL)
1117                     return (SET_ERROR(ENOENT));
1118           error = vfs_busy(mp, 0);
1119           KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error));
1120 
1121           /*
1122            * We can vput the vnode as we can now depend on the reference owned
1123            * by the busied mp.  But we also need to hold the vnode, because
1124            * the reference may go after vfs_unbusy() which has to be called
1125            * before we can lock the vnode again.
1126            */
1127           locked = VOP_ISLOCKED(vp);
1128           vhold(vp);
1129           vput(vp);
1130 
1131           /* Look up .zfs/snapshot, our parent. */
1132           error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp);
1133           if (error == 0) {
1134                     VOP_UNLOCK(dvp, 0);
1135                     *ap->a_vpp = dvp;
1136                     *ap->a_buflen -= len;
1137                     bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len);
1138           }
1139           vfs_unbusy(mp);
1140           vget(vp, locked | LK_VNHELD | LK_RETRY, curthread);
1141           return (error);
1142 }
1143 
1144 /*
1145  * These VP's should never see the light of day.  They should always
1146  * be covered.
1147  */
1148 static struct vop_vector zfsctl_ops_snapshot = {
1149           .vop_default =                NULL, /* ensure very restricted access */
1150           .vop_inactive =               zfsctl_snapshot_inactive,
1151           .vop_reclaim =                zfsctl_snapshot_reclaim,
1152           .vop_vptocnp =                zfsctl_snapshot_vptocnp,
1153           .vop_lock1 =                  vop_stdlock,
1154           .vop_unlock =                 vop_stdunlock,
1155           .vop_islocked =               vop_stdislocked,
1156           .vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */
1157           .vop_print =                  zfsctl_common_print,
1158 };
1159 
1160 int
zfsctl_lookup_objset(vfs_t * vfsp,uint64_t objsetid,zfsvfs_t ** zfsvfsp)1161 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1162 {
1163           struct mount *mp;
1164           zfsvfs_t *zfsvfs = vfsp->vfs_data;
1165           vnode_t *vp;
1166           int error;
1167 
1168           ASSERT(zfsvfs->z_ctldir != NULL);
1169           *zfsvfsp = NULL;
1170           error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
1171               ZFSCTL_INO_SNAPDIR, objsetid, &vp);
1172           if (error == 0 && vp != NULL) {
1173                     /*
1174                      * XXX Probably need to at least reference, if not busy, the mp.
1175                      */
1176                     if (vp->v_mountedhere != NULL)
1177                               *zfsvfsp = vp->v_mountedhere->mnt_data;
1178                     vput(vp);
1179           }
1180           if (*zfsvfsp == NULL)
1181                     return (SET_ERROR(EINVAL));
1182           return (0);
1183 }
1184 
1185 /*
1186  * Unmount any snapshots for the given filesystem.  This is called from
1187  * zfs_umount() - if we have a ctldir, then go through and unmount all the
1188  * snapshots.
1189  */
1190 int
zfsctl_umount_snapshots(vfs_t * vfsp,int fflags,cred_t * cr)1191 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1192 {
1193           char snapname[ZFS_MAX_DATASET_NAME_LEN];
1194           zfsvfs_t *zfsvfs = vfsp->vfs_data;
1195           struct mount *mp;
1196           vnode_t *dvp;
1197           vnode_t *vp;
1198           sfs_node_t *node;
1199           sfs_node_t *snap;
1200           uint64_t cookie;
1201           int error;
1202 
1203           ASSERT(zfsvfs->z_ctldir != NULL);
1204 
1205           cookie = 0;
1206           for (;;) {
1207                     uint64_t id;
1208 
1209                     dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
1210                     error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
1211                         snapname, &id, &cookie, NULL);
1212                     dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
1213                     if (error != 0) {
1214                               if (error == ENOENT)
1215                                         error = 0;
1216                               break;
1217                     }
1218 
1219                     for (;;) {
1220                               error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
1221                                   ZFSCTL_INO_SNAPDIR, id, &vp);
1222                               if (error != 0 || vp == NULL)
1223                                         break;
1224 
1225                               mp = vp->v_mountedhere;
1226 
1227                               /*
1228                                * v_mountedhere being NULL means that the
1229                                * (uncovered) vnode is in a transient state
1230                                * (mounting or unmounting), so loop until it
1231                                * settles down.
1232                                */
1233                               if (mp != NULL)
1234                                         break;
1235                               vput(vp);
1236                     }
1237                     if (error != 0)
1238                               break;
1239                     if (vp == NULL)
1240                               continue; /* no mountpoint, nothing to do */
1241 
1242                     /*
1243                      * The mount-point vnode is kept locked to avoid spurious EBUSY
1244                      * from a concurrent umount.
1245                      * The vnode lock must have recursive locking enabled.
1246                      */
1247                     vfs_ref(mp);
1248                     error = dounmount(mp, fflags, curthread);
1249                     KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1,
1250                         ("extra references after unmount"));
1251                     vput(vp);
1252                     if (error != 0)
1253                               break;
1254           }
1255           KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0,
1256               ("force unmounting failed"));
1257           return (error);
1258 }
1259 
1260 #endif /* __FreeBSD__ */
1261 
1262 #ifdef __NetBSD__
1263 
1264 #include <sys/malloc.h>
1265 #include <sys/pathname.h>
1266 #include <miscfs/genfs/genfs.h>
1267 #include <sys/zfs_context.h>
1268 #include <sys/zfs_ctldir.h>
1269 #include <sys/dsl_dataset.h>
1270 #include <sys/zap.h>
1271 
1272 struct zfsctl_root {
1273           timestruc_t zc_cmtime;
1274 };
1275 
1276 struct sfs_node_key {
1277           uint64_t parent_id;
1278           uint64_t id;
1279 };
1280 struct sfs_node {
1281           struct sfs_node_key sn_key;
1282 #define sn_parent_id sn_key.parent_id
1283 #define sn_id sn_key.id
1284           lwp_t *sn_mounting;
1285 };
1286 
1287 #define ZFS_SNAPDIR_NAME "snapshot"
1288 
1289 #define VTOSFS(vp) ((struct sfs_node *)((vp)->v_data))
1290 
1291 #define SFS_NODE_ASSERT(vp) \
1292           do { \
1293                     struct sfs_node *np = VTOSFS(vp); \
1294                     ASSERT((vp)->v_op == zfs_sfsop_p); \
1295                     ASSERT((vp)->v_type == VDIR); \
1296           } while (/*CONSTCOND*/ 0)
1297 
1298 static int (**zfs_sfsop_p)(void *);
1299 
1300 /*
1301  * Mount a snapshot.  Cannot use do_sys_umount() as it
1302  * doesn't allow its "path" argument from SYSSPACE.
1303  */
1304 static int
sfs_snapshot_mount(vnode_t * vp,const char * snapname)1305 sfs_snapshot_mount(vnode_t *vp, const char *snapname)
1306 {
1307           struct sfs_node *node = VTOSFS(vp);
1308           zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1309           vfs_t *vfsp;
1310           char *path, *osname;
1311           int error;
1312           extern int zfs_domount(vfs_t *, char *);
1313 
1314           path = PNBUF_GET();
1315           osname = PNBUF_GET();
1316 
1317           dmu_objset_name(zfsvfs->z_os, path);
1318           snprintf(osname, MAXPATHLEN, "%s@%s", path, snapname);
1319           snprintf(path, MAXPATHLEN,
1320               "%s/" ZFS_CTLDIR_NAME "/" ZFS_SNAPDIR_NAME "/%s",
1321               vp->v_vfsp->mnt_stat.f_mntonname, snapname);
1322 
1323           vfsp = vfs_mountalloc(vp->v_vfsp->mnt_op, vp);
1324           if (vfsp == NULL) {
1325                     error = ENOMEM;
1326                     goto out;
1327           }
1328           vfsp->mnt_op->vfs_refcount++;
1329           vfsp->mnt_stat.f_owner = 0;
1330           vfsp->mnt_flag = MNT_RDONLY | MNT_NOSUID | MNT_IGNORE;
1331 
1332           mutex_enter(vfsp->mnt_updating);
1333 
1334           error = zfs_domount(vfsp, osname);
1335           if (error)
1336                     goto out;
1337 
1338           /* Set f_fsidx from parent to cheat NFSD. */
1339           vfsp->mnt_stat.f_fsidx = vp->v_vfsp->mnt_stat.f_fsidx;
1340 
1341           strlcpy(vfsp->mnt_stat.f_mntfromname, osname,
1342               sizeof(vfsp->mnt_stat.f_mntfromname));
1343           set_statvfs_info(path, UIO_SYSSPACE, vfsp->mnt_stat.f_mntfromname,
1344               UIO_SYSSPACE, vfsp->mnt_op->vfs_name, vfsp, curlwp);
1345 
1346           error = vfs_set_lowermount(vfsp, vp->v_vfsp);
1347           if (error)
1348                     goto out;
1349 
1350           mountlist_append(vfsp);
1351           vref(vp);
1352           vp->v_mountedhere = vfsp;
1353 
1354           mutex_exit(vfsp->mnt_updating);
1355           (void) VFS_STATVFS(vfsp, &vfsp->mnt_stat);
1356 
1357 out:;
1358           if (error && vfsp) {
1359                     mutex_exit(vfsp->mnt_updating);
1360                     vfs_rele(vfsp);
1361           }
1362           PNBUF_PUT(osname);
1363           PNBUF_PUT(path);
1364 
1365           return error;
1366 }
1367 
1368 static int
sfs_lookup_snapshot(vnode_t * dvp,struct componentname * cnp,vnode_t ** vpp)1369 sfs_lookup_snapshot(vnode_t *dvp, struct componentname *cnp, vnode_t **vpp)
1370 {
1371           zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1372           vnode_t *vp;
1373           struct sfs_node *node;
1374           struct sfs_node_key key;
1375           char snapname[ZFS_MAX_DATASET_NAME_LEN];
1376           int error;
1377 
1378           /* Retrieve the snapshot object id and the to be mounted on vnode. */
1379           if (cnp->cn_namelen >= sizeof(snapname))
1380                     return ENOENT;
1381 
1382           strlcpy(snapname, cnp->cn_nameptr, cnp->cn_namelen + 1);
1383           error = dsl_dataset_snap_lookup( dmu_objset_ds(zfsvfs->z_os),
1384               snapname, &key.id);
1385           if (error)
1386                     return error;
1387           key.parent_id = ZFSCTL_INO_SNAPDIR;
1388           error = vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1389           if (error)
1390                     return error;
1391 
1392           /* Handle case where the vnode is currently mounting. */
1393           vp = *vpp;
1394           mutex_enter(vp->v_interlock);
1395           node = VTOSFS(vp);
1396           if (node->sn_mounting) {
1397                     if (node->sn_mounting == curlwp)
1398                               error = 0;
1399                     else
1400                               error = ERESTART;
1401                     mutex_exit(vp->v_interlock);
1402                     if (error)
1403                               yield();
1404                     return error;
1405           }
1406 
1407           /* If not yet mounted mount the snapshot. */
1408           if (vp->v_mountedhere == NULL) {
1409                     ASSERT(node->sn_mounting == NULL);
1410                     node->sn_mounting = curlwp;
1411                     mutex_exit(vp->v_interlock);
1412 
1413                     VOP_UNLOCK(dvp, 0);
1414                     error = sfs_snapshot_mount(vp, snapname);
1415                     if (vn_lock(dvp, LK_EXCLUSIVE) != 0) {
1416                               vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1417                               error = ENOENT;
1418                     }
1419 
1420                     mutex_enter(vp->v_interlock);
1421                     if ((node = VTOSFS(vp)))
1422                               node->sn_mounting = NULL;
1423                     mutex_exit(vp->v_interlock);
1424 
1425                     if (error) {
1426                               vrele(vp);
1427                               *vpp = NULL;
1428                               return error;
1429                     }
1430           } else
1431                     mutex_exit(vp->v_interlock);
1432 
1433           /* Return the mounted root rather than the covered mount point.  */
1434           ASSERT(vp->v_mountedhere);
1435           error = VFS_ROOT(vp->v_mountedhere, LK_EXCLUSIVE, vpp);
1436           vrele(vp);
1437           if (error)
1438                     return error;
1439 
1440           /*
1441            * Fix up the root vnode mounted on .zfs/snapshot/<snapname>
1442            *
1443            * Here we make .zfs/snapshot/<snapname> accessible over NFS
1444            * without requiring manual mounts of <snapname>.
1445            */
1446           if (((*vpp)->v_vflag & VV_ROOT)) {
1447                     ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
1448                     VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
1449                     (*vpp)->v_vflag &= ~VV_ROOT;
1450           }
1451           VOP_UNLOCK(*vpp, 0);
1452 
1453           return 0;
1454 }
1455 
1456 static int
sfs_lookup(void * v)1457 sfs_lookup(void *v)
1458 {
1459           struct vop_lookup_v2_args /* {
1460                     struct vnode *a_dvp;
1461                     struct vnode **a_vpp;
1462                     struct componentname *a_cnp;
1463           } */ *ap = v;
1464           vnode_t *dvp = ap->a_dvp;
1465           vnode_t **vpp = ap->a_vpp;
1466           struct componentname *cnp = ap->a_cnp;
1467           zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1468           struct sfs_node *dnode = VTOSFS(dvp);
1469           int error;
1470 
1471           SFS_NODE_ASSERT(dvp);
1472           ZFS_ENTER(zfsvfs);
1473 
1474           /*
1475            * No CREATE, DELETE or RENAME.
1476            */
1477           if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop != LOOKUP) {
1478                     ZFS_EXIT(zfsvfs);
1479 
1480                     return ENOTSUP;
1481           }
1482 
1483           /*
1484            * Handle DOT and DOTDOT.
1485            */
1486           if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
1487                     vref(dvp);
1488                     *vpp = dvp;
1489                     ZFS_EXIT(zfsvfs);
1490 
1491                     return 0;
1492           }
1493           if ((cnp->cn_flags & ISDOTDOT)) {
1494                     if (dnode->sn_parent_id == 0) {
1495                               error = vcache_get(zfsvfs->z_vfs,
1496                                   &zfsvfs->z_root, sizeof(zfsvfs->z_root), vpp);
1497                     } else if (dnode->sn_parent_id == ZFSCTL_INO_ROOT) {
1498                               error = zfsctl_root(zfsvfs, vpp);
1499                     } else if (dnode->sn_parent_id == ZFSCTL_INO_SNAPDIR) {
1500                               error = zfsctl_snapshot(zfsvfs, vpp);
1501                     } else {
1502                               error = ENOENT;
1503                     }
1504                     ZFS_EXIT(zfsvfs);
1505 
1506                     return error;
1507           }
1508 
1509           /*
1510            * Lookup in ".zfs".
1511            */
1512           if (dnode->sn_id == ZFSCTL_INO_ROOT) {
1513                     if (cnp->cn_namelen == strlen(ZFS_SNAPDIR_NAME) &&
1514                         strncmp(cnp->cn_nameptr, ZFS_SNAPDIR_NAME,
1515                         cnp->cn_namelen) == 0) {
1516                               error = zfsctl_snapshot(zfsvfs, vpp);
1517                     } else {
1518                               error = ENOENT;
1519                     }
1520                     ZFS_EXIT(zfsvfs);
1521 
1522                     return error;
1523           }
1524 
1525           /*
1526            * Lookup in ".zfs/snapshot".
1527            */
1528           if (dnode->sn_id == ZFSCTL_INO_SNAPDIR) {
1529                     error = sfs_lookup_snapshot(dvp, cnp, vpp);
1530                     ZFS_EXIT(zfsvfs);
1531 
1532                     return error;
1533           }
1534 
1535           vprint("sfs_lookup: unexpected node for lookup", dvp);
1536           ZFS_EXIT(zfsvfs);
1537 
1538           return ENOENT;
1539 }
1540 
1541 static int
sfs_open(void * v)1542 sfs_open(void *v)
1543 {
1544           struct vop_open_args /* {
1545                     struct vnode *a_vp;
1546                     int a_mode;
1547                     kauth_cred_t a_cred;
1548           } */ *ap = v;
1549           zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1550           int error = 0;
1551 
1552           SFS_NODE_ASSERT(ap->a_vp);
1553           ZFS_ENTER(zfsvfs);
1554 
1555           if (ap->a_mode & FWRITE)
1556                     error = EACCES;
1557 
1558           ZFS_EXIT(zfsvfs);
1559 
1560           return error;
1561 }
1562 
1563 static int
sfs_close(void * v)1564 sfs_close(void *v)
1565 {
1566           struct vop_close_args /* {
1567                     struct vnode *a_vp;
1568                     int a_mode;
1569                     kauth_cred_t a_cred;
1570           } */ *ap = v;
1571           zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1572 
1573           SFS_NODE_ASSERT(ap->a_vp);
1574           ZFS_ENTER(zfsvfs);
1575 
1576           ZFS_EXIT(zfsvfs);
1577 
1578           return 0;
1579 }
1580 
1581 static int
sfs_access(void * v)1582 sfs_access(void *v)
1583 {
1584           struct vop_access_args /* {
1585                     struct vnode *a_vp;
1586                     int a_mode;
1587                     kauth_cred_t a_cred;
1588           } */ *ap = v;
1589           zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1590           int error = 0;
1591 
1592           SFS_NODE_ASSERT(ap->a_vp);
1593           ZFS_ENTER(zfsvfs);
1594 
1595           if (ap->a_accmode & FWRITE)
1596                     error = EACCES;
1597 
1598           ZFS_EXIT(zfsvfs);
1599 
1600           return error;
1601 }
1602 
1603 static int
sfs_getattr(void * v)1604 sfs_getattr(void *v)
1605 {
1606           struct vop_getattr_args /* {
1607                     struct vnode *a_vp;
1608                     struct vattr *a_vap;
1609                     kauth_cred_t a_cred;
1610           } */ *ap = v;
1611           vnode_t *vp = ap->a_vp;
1612           struct sfs_node *node = VTOSFS(vp);
1613           struct vattr *vap = ap->a_vap;
1614           zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1615           dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
1616           timestruc_t now;
1617           uint64_t snap_count;
1618           int error;
1619 
1620           SFS_NODE_ASSERT(vp);
1621           ZFS_ENTER(zfsvfs);
1622 
1623           vap->va_type = VDIR;
1624           vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
1625               S_IROTH | S_IXOTH;
1626           vap->va_nlink = 2;
1627           vap->va_uid = 0;
1628           vap->va_gid = 0;
1629           vap->va_fsid = vp->v_vfsp->mnt_stat.f_fsid;
1630           vap->va_fileid = node->sn_id;
1631           vap->va_size = 0;
1632           vap->va_blocksize = 0;
1633           gethrestime(&now);
1634           vap->va_atime = now;
1635           vap->va_ctime = zfsvfs->z_ctldir->zc_cmtime;
1636           vap->va_mtime = vap->va_ctime;
1637           vap->va_birthtime = vap->va_ctime;
1638           vap->va_gen = 0;
1639           vap->va_flags = 0;
1640           vap->va_rdev = 0;
1641           vap->va_bytes = 0;
1642           vap->va_filerev = 0;
1643 
1644           switch (node->sn_id){
1645           case ZFSCTL_INO_ROOT:
1646                     vap->va_nlink += 1; /* snapdir */
1647                     vap->va_size = vap->va_nlink;
1648                     break;
1649           case ZFSCTL_INO_SNAPDIR:
1650                     if (dsl_dataset_phys(ds)->ds_snapnames_zapobj) {
1651                               error = zap_count(
1652                                   dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
1653                                   dsl_dataset_phys(ds)->ds_snapnames_zapobj,
1654                                   &snap_count);
1655                               if (error)
1656                                         return error;
1657                               vap->va_nlink += snap_count;
1658                     }
1659                     vap->va_size = vap->va_nlink;
1660                     break;
1661           }
1662 
1663           ZFS_EXIT(zfsvfs);
1664 
1665           return 0;
1666 }
1667 
1668 static int
sfs_readdir_one(struct vop_readdir_args * ap,struct dirent * dp,const char * name,ino_t ino,off_t * offp)1669 sfs_readdir_one(struct vop_readdir_args *ap, struct dirent *dp,
1670     const char *name, ino_t ino, off_t *offp)
1671 {
1672           int error;
1673 
1674           dp->d_fileno = ino;
1675           dp->d_type = DT_DIR;
1676           strlcpy(dp->d_name, name, sizeof(dp->d_name));
1677           dp->d_namlen = strlen(dp->d_name);
1678           dp->d_reclen = _DIRENT_SIZE(dp);
1679 
1680           if (ap->a_uio->uio_resid < dp->d_reclen)
1681                     return ENAMETOOLONG;
1682           if (ap->a_uio->uio_offset > *offp) {
1683                     *offp += dp->d_reclen;
1684                     return 0;
1685           }
1686 
1687           error = uiomove(dp, dp->d_reclen, UIO_READ, ap->a_uio);
1688           if (error)
1689                     return error;
1690           if (ap->a_ncookies)
1691                     (*ap->a_cookies)[(*ap->a_ncookies)++] = *offp;
1692           *offp += dp->d_reclen;
1693 
1694           return 0;
1695 }
1696 
1697 static int
sfs_readdir(void * v)1698 sfs_readdir(void *v)
1699 {
1700           struct vop_readdir_args /* {
1701                     struct vnode *a_vp;
1702                     struct uio *a_uio;
1703                     kauth_cred_t a_cred;
1704                     int *a_eofflag;
1705                     off_t **a_cookies;
1706                     int *a_ncookies;
1707           } */ *ap = v;
1708           vnode_t *vp = ap->a_vp;
1709           struct sfs_node *node = VTOSFS(vp);
1710           zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1711           struct dirent *dp;
1712           uint64_t parent;
1713           off_t offset;
1714           int error, ncookies;
1715 
1716           SFS_NODE_ASSERT(ap->a_vp);
1717           ZFS_ENTER(zfsvfs);
1718 
1719           parent = node->sn_parent_id == 0 ? zfsvfs->z_root : node->sn_parent_id;
1720           dp = kmem_alloc(sizeof(*dp), KM_SLEEP);
1721           if (ap->a_ncookies) {
1722                     ncookies = ap->a_uio->uio_resid / _DIRENT_MINSIZE(dp);
1723                     *ap->a_ncookies = 0;
1724                     *ap->a_cookies = malloc(ncookies * sizeof (off_t),
1725                         M_TEMP, M_WAITOK);
1726           }
1727 
1728           offset = 0;
1729           error = sfs_readdir_one(ap, dp, ".", node->sn_id, &offset);
1730           if (error == 0)
1731                     error = sfs_readdir_one(ap, dp, "..", parent, &offset);
1732           if (error == 0 && node->sn_id == ZFSCTL_INO_ROOT) {
1733                     error = sfs_readdir_one(ap, dp, ZFS_SNAPDIR_NAME,
1734                         ZFSCTL_INO_SNAPDIR, &offset);
1735           } else if (error == 0 && node->sn_id == ZFSCTL_INO_SNAPDIR) {
1736                     char snapname[ZFS_MAX_DATASET_NAME_LEN];
1737                     uint64_t cookie, id;
1738 
1739                     cookie = 0;
1740                     for (;;) {
1741                               dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os),
1742                                   FTAG);
1743                               error = dmu_snapshot_list_next(zfsvfs->z_os,
1744                                   sizeof(snapname), snapname, &id, &cookie, NULL);
1745                               dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os),
1746                                   FTAG);
1747                               if (error) {
1748                                         if (error == ENOENT)
1749                                                   error = 0;
1750                                         break;
1751                               }
1752                               error = sfs_readdir_one(ap, dp, snapname, id, &offset);
1753                               if (error)
1754                                         break;
1755                     }
1756           }
1757 
1758           if (ap->a_eofflag && error == 0)
1759                     *ap->a_eofflag = 1;
1760 
1761           if (error == ENAMETOOLONG)
1762                     error = 0;
1763 
1764           if (ap->a_ncookies && error) {
1765                     free(*ap->a_cookies, M_TEMP);
1766                     *ap->a_ncookies = 0;
1767                     *ap->a_cookies = NULL;
1768           }
1769           kmem_free(dp, sizeof(*dp));
1770 
1771           ZFS_EXIT(zfsvfs);
1772 
1773           return error;
1774 }
1775 
1776 static int
sfs_inactive(void * v)1777 sfs_inactive(void *v)
1778 {
1779           struct vop_inactive_v2_args /* {
1780                     struct vnode *a_vp;
1781                     bool *a_recycle;
1782           } */ *ap = v;
1783           vnode_t *vp = ap->a_vp;
1784           struct sfs_node *node = VTOSFS(vp);
1785 
1786           SFS_NODE_ASSERT(vp);
1787 
1788           *ap->a_recycle = (node->sn_parent_id == ZFSCTL_INO_SNAPDIR);
1789 
1790           return 0;
1791 }
1792 
1793 static int
sfs_reclaim(void * v)1794 sfs_reclaim(void *v)
1795 {
1796           struct vop_reclaim_v2_args /* {
1797                     struct vnode *a_vp;
1798           } */ *ap = v;
1799           vnode_t *vp = ap->a_vp;
1800           struct sfs_node *node = VTOSFS(vp);
1801 
1802           SFS_NODE_ASSERT(ap->a_vp);
1803 
1804           vp->v_data = NULL;
1805           VOP_UNLOCK(vp, 0);
1806 
1807           kmem_free(node, sizeof(*node));
1808 
1809           return 0;
1810 }
1811 
1812 static int
sfs_print(void * v)1813 sfs_print(void *v)
1814 {
1815           struct vop_print_args /* {
1816                     struct vnode *a_vp;
1817           } */ *ap = v;
1818           struct sfs_node *node = VTOSFS(ap->a_vp);
1819 
1820           SFS_NODE_ASSERT(ap->a_vp);
1821 
1822           printf("\tid %" PRIu64 ", parent %" PRIu64 "\n",
1823               node->sn_id, node->sn_parent_id);
1824 
1825           return 0;
1826 }
1827 
1828 const struct vnodeopv_entry_desc zfs_sfsop_entries[] = {
1829           { &vop_default_desc,                    vn_default_error },
1830           { &vop_parsepath_desc,                  genfs_parsepath },
1831           { &vop_lookup_desc,           sfs_lookup },
1832           { &vop_open_desc,             sfs_open },
1833           { &vop_close_desc,            sfs_close },
1834           { &vop_access_desc,           sfs_access },
1835           { &vop_getattr_desc,                    sfs_getattr },
1836           { &vop_lock_desc,             genfs_lock },
1837           { &vop_unlock_desc,           genfs_unlock },
1838           { &vop_readdir_desc,                    sfs_readdir },
1839           { &vop_inactive_desc,                   sfs_inactive },
1840           { &vop_reclaim_desc,                    sfs_reclaim },
1841           { &vop_seek_desc,             genfs_seek },
1842           { &vop_putpages_desc,                   genfs_null_putpages },
1843           { &vop_islocked_desc,                   genfs_islocked },
1844           { &vop_print_desc,            sfs_print },
1845           { &vop_pathconf_desc,                   genfs_pathconf },
1846           { NULL, NULL }
1847 };
1848 
1849 const struct vnodeopv_desc zfs_sfsop_opv_desc =
1850           { &zfs_sfsop_p, zfs_sfsop_entries };
1851 
1852 void
zfsctl_init(void)1853 zfsctl_init(void)
1854 {
1855 }
1856 
1857 void
zfsctl_fini(void)1858 zfsctl_fini(void)
1859 {
1860 }
1861 
1862 int
zfsctl_loadvnode(vfs_t * vfsp,vnode_t * vp,const void * key,size_t key_len,const void ** new_key)1863 zfsctl_loadvnode(vfs_t *vfsp, vnode_t *vp,
1864     const void *key, size_t key_len, const void **new_key)
1865 {
1866           struct sfs_node_key node_key;
1867           struct sfs_node *node;
1868 
1869           if (key_len != sizeof(node_key))
1870                     return EINVAL;
1871           if ((vfsp->mnt_iflag & IMNT_UNMOUNT))
1872                     return ENOENT;
1873 
1874           memcpy(&node_key, key, key_len);
1875 
1876           node = kmem_alloc(sizeof(*node), KM_SLEEP);
1877 
1878           node->sn_mounting = NULL;
1879           node->sn_key = node_key;
1880 
1881           vp->v_data = node;
1882           vp->v_op = zfs_sfsop_p;
1883           vp->v_tag = VT_ZFS;
1884           vp->v_type = VDIR;
1885           uvm_vnp_setsize(vp, 0);
1886 
1887           *new_key = &node->sn_key;
1888 
1889           return 0;
1890 }
1891 
1892 int
zfsctl_vptofh(vnode_t * vp,fid_t * fidp,size_t * fh_size)1893 zfsctl_vptofh(vnode_t *vp, fid_t *fidp, size_t *fh_size)
1894 {
1895           struct sfs_node *node = VTOSFS(vp);
1896           uint64_t object = node->sn_id;
1897           zfid_short_t *zfid = (zfid_short_t *)fidp;
1898           int i;
1899 
1900           SFS_NODE_ASSERT(vp);
1901 
1902           if (*fh_size < SHORT_FID_LEN) {
1903                     *fh_size = SHORT_FID_LEN;
1904                     return SET_ERROR(E2BIG);
1905           }
1906           *fh_size = SHORT_FID_LEN;
1907 
1908           zfid->zf_len = SHORT_FID_LEN;
1909           for (i = 0; i < sizeof(zfid->zf_object); i++)
1910                     zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
1911 
1912           /* .zfs nodes always have a generation number of 0 */
1913           for (i = 0; i < sizeof(zfid->zf_gen); i++)
1914                     zfid->zf_gen[i] = 0;
1915 
1916           return 0;
1917 }
1918 
1919 /*
1920  * Return the ".zfs" vnode.
1921  */
1922 int
zfsctl_root(zfsvfs_t * zfsvfs,vnode_t ** vpp)1923 zfsctl_root(zfsvfs_t *zfsvfs, vnode_t **vpp)
1924 {
1925           struct sfs_node_key key = {
1926                     .parent_id = 0,
1927                     .id = ZFSCTL_INO_ROOT
1928           };
1929 
1930           return vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1931 }
1932 
1933 /*
1934  * Return the ".zfs/snapshot" vnode.
1935  */
1936 int
zfsctl_snapshot(zfsvfs_t * zfsvfs,vnode_t ** vpp)1937 zfsctl_snapshot(zfsvfs_t *zfsvfs, vnode_t **vpp)
1938 {
1939           struct sfs_node_key key = {
1940                     .parent_id = ZFSCTL_INO_ROOT,
1941                     .id = ZFSCTL_INO_SNAPDIR
1942           };
1943 
1944           return vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1945 }
1946 
1947 void
zfsctl_create(zfsvfs_t * zfsvfs)1948 zfsctl_create(zfsvfs_t *zfsvfs)
1949 {
1950           vnode_t *vp;
1951           struct zfsctl_root *zc;
1952           uint64_t crtime[2];
1953 
1954           zc = kmem_alloc(sizeof(*zc), KM_SLEEP);
1955 
1956           VERIFY(0 == VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &vp));
1957           VERIFY(0 == sa_lookup(VTOZ(vp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
1958               &crtime, sizeof(crtime)));
1959           vput(vp);
1960 
1961           ZFS_TIME_DECODE(&zc->zc_cmtime, crtime);
1962 
1963           ASSERT(zfsvfs->z_ctldir == NULL);
1964           zfsvfs->z_ctldir = zc;
1965 }
1966 
1967 void
zfsctl_destroy(zfsvfs_t * zfsvfs)1968 zfsctl_destroy(zfsvfs_t *zfsvfs)
1969 {
1970           struct zfsctl_root *zc = zfsvfs->z_ctldir;
1971 
1972           ASSERT(zfsvfs->z_ctldir);
1973           zfsvfs->z_ctldir = NULL;
1974           kmem_free(zc, sizeof(*zc));
1975 }
1976 
1977 int
zfsctl_lookup_objset(vfs_t * vfsp,uint64_t objsetid,zfsvfs_t ** zfsvfsp)1978 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1979 {
1980           struct sfs_node_key key = {
1981                     .parent_id = ZFSCTL_INO_SNAPDIR,
1982                     .id = objsetid
1983           };
1984           vnode_t *vp;
1985           int error;
1986 
1987           *zfsvfsp = NULL;
1988           error = vcache_get(vfsp, &key, sizeof(key), &vp);
1989           if (error == 0) {
1990                     if (vp->v_mountedhere)
1991                               *zfsvfsp = vp->v_mountedhere->mnt_data;
1992                     vrele(vp);
1993           }
1994           if (*zfsvfsp == NULL)
1995                     return SET_ERROR(EINVAL);
1996           return 0;
1997 }
1998 
1999 int
zfsctl_umount_snapshots(vfs_t * vfsp,int fflags,cred_t * cr)2000 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
2001 {
2002           char snapname[ZFS_MAX_DATASET_NAME_LEN];
2003           zfsvfs_t *zfsvfs = vfsp->vfs_data;
2004           struct mount *mp;
2005           vnode_t *vp;
2006           struct sfs_node_key key;
2007           uint64_t cookie;
2008           int error;
2009 
2010           ASSERT(zfsvfs->z_ctldir);
2011 
2012           cookie = 0;
2013           key.parent_id = ZFSCTL_INO_SNAPDIR;
2014           for (;;) {
2015                     dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
2016                     error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
2017                         snapname, &key.id, &cookie, NULL);
2018                     dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
2019                     if (error) {
2020                               if (error == ENOENT)
2021                                         error = 0;
2022                               break;
2023                     }
2024 
2025                     error = vcache_get(zfsvfs->z_vfs, &key, sizeof(key), &vp);
2026                     if (error == ENOENT)
2027                               continue;
2028                     else if (error)
2029                               break;
2030 
2031                     mp = vp->v_mountedhere;
2032                     if (mp == NULL) {
2033                               vrele(vp);
2034                               continue;
2035                     }
2036 
2037                     error = dounmount(mp, fflags, curthread);
2038                     vrele(vp);
2039                     if (error)
2040                               break;
2041           }
2042           ASSERT((fflags & MS_FORCE) == 0 || error == 0);
2043 
2044           return (error);
2045 }
2046 
2047 boolean_t
zfsctl_is_node(vnode_t * vp)2048 zfsctl_is_node(vnode_t *vp)
2049 {
2050 
2051           return (vp->v_op == zfs_sfsop_p);
2052 }
2053 #endif /* __NetBSD__ */
2054