1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24  * Copyright (c) 2014 Integros [integros.com]
25  */
26 
27 /* Portions Copyright 2007 Jeremy Teo */
28 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
29 
30 #ifdef _KERNEL
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <sys/mntent.h>
38 #include <sys/u8_textprep.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/file.h>
43 #include <sys/kmem.h>
44 #include <sys/errno.h>
45 #include <sys/unistd.h>
46 #include <sys/atomic.h>
47 #include <sys/zfs_dir.h>
48 #include <sys/zfs_acl.h>
49 #include <sys/zfs_ioctl.h>
50 #include <sys/zfs_rlock.h>
51 #include <sys/zfs_fuid.h>
52 #include <sys/dnode.h>
53 #include <sys/fs/zfs.h>
54 #include <sys/kidmap.h>
55 
56 #ifdef __NetBSD__
57 #include <sys/zfs_ctldir.h>
58 #include <miscfs/specfs/specdev.h>
59 
60 extern int (**zfs_vnodeop_p)(void *);
61 extern int (**zfs_fifoop_p)(void *);
62 extern int (**zfs_specop_p)(void *);
63 
64 #endif
65 #endif /* _KERNEL */
66 
67 #include <sys/dmu.h>
68 #include <sys/dmu_objset.h>
69 #include <sys/refcount.h>
70 #include <sys/stat.h>
71 #include <sys/zap.h>
72 #include <sys/zfs_znode.h>
73 #include <sys/sa.h>
74 #include <sys/zfs_sa.h>
75 #include <sys/zfs_stat.h>
76 #include <sys/refcount.h>
77 
78 #include "zfs_prop.h"
79 #include "zfs_comutil.h"
80 
81 /* Used by fstat(1). */
82 SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD,
83     SYSCTL_NULL_INT_PTR, sizeof(znode_t), "sizeof(znode_t)");
84 
85 /*
86  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
87  * turned on when DEBUG is also defined.
88  */
89 #ifdef    DEBUG
90 #define   ZNODE_STATS
91 #endif    /* DEBUG */
92 
93 #ifdef    ZNODE_STATS
94 #define   ZNODE_STAT_ADD(stat)                              ((stat)++)
95 #else
96 #define   ZNODE_STAT_ADD(stat)                              /* nothing */
97 #endif    /* ZNODE_STATS */
98 
99 /*
100  * Functions needed for userland (ie: libzpool) are not put under
101  * #ifdef_KERNEL; the rest of the functions have dependencies
102  * (such as VFS logic) that will not compile easily in userland.
103  */
104 #ifdef _KERNEL
105 /*
106  * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
107  * be freed before it can be safely accessed.
108  */
109 krwlock_t zfsvfs_lock;
110 
111 static kmem_cache_t *znode_cache = NULL;
112 
113 /*ARGSUSED*/
114 static void
znode_evict_error(dmu_buf_t * dbuf,void * user_ptr)115 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
116 {
117           /*
118            * We should never drop all dbuf refs without first clearing
119            * the eviction callback.
120            */
121           panic("evicting znode %p\n", user_ptr);
122 }
123 
124 extern struct vop_vector zfs_vnodeops;
125 extern struct vop_vector zfs_fifoops;
126 extern struct vop_vector zfs_shareops;
127 
128 static int
zfs_znode_cache_constructor(void * buf,void * arg,int kmflags)129 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
130 {
131           znode_t *zp = buf;
132 
133           POINTER_INVALIDATE(&zp->z_zfsvfs);
134 
135           list_link_init(&zp->z_link_node);
136 
137           mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
138 
139           mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
140           avl_create(&zp->z_range_avl, zfs_range_compare,
141               sizeof (rl_t), offsetof(rl_t, r_node));
142 
143           zp->z_acl_cached = NULL;
144           zp->z_vnode = NULL;
145           zp->z_moved = 0;
146           return (0);
147 }
148 
149 /*ARGSUSED*/
150 static void
zfs_znode_cache_destructor(void * buf,void * arg)151 zfs_znode_cache_destructor(void *buf, void *arg)
152 {
153           znode_t *zp = buf;
154 
155           ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
156           ASSERT(ZTOV(zp) == NULL);
157 #ifndef __NetBSD__
158           vn_free(ZTOV(zp));
159 #endif
160           ASSERT(!list_link_active(&zp->z_link_node));
161           mutex_destroy(&zp->z_acl_lock);
162           avl_destroy(&zp->z_range_avl);
163           mutex_destroy(&zp->z_range_lock);
164 
165           ASSERT(zp->z_acl_cached == NULL);
166 }
167 
168 #ifdef    ZNODE_STATS
169 static struct {
170           uint64_t zms_zfsvfs_invalid;
171           uint64_t zms_zfsvfs_recheck1;
172           uint64_t zms_zfsvfs_unmounted;
173           uint64_t zms_zfsvfs_recheck2;
174           uint64_t zms_obj_held;
175           uint64_t zms_vnode_locked;
176           uint64_t zms_not_only_dnlc;
177 } znode_move_stats;
178 #endif    /* ZNODE_STATS */
179 
180 #ifdef illumos
181 static void
zfs_znode_move_impl(znode_t * ozp,znode_t * nzp)182 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
183 {
184           vnode_t *vp;
185 
186           /* Copy fields. */
187           nzp->z_zfsvfs = ozp->z_zfsvfs;
188 
189           /* Swap vnodes. */
190           vp = nzp->z_vnode;
191           nzp->z_vnode = ozp->z_vnode;
192           ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
193           ZTOV(ozp)->v_data = ozp;
194           ZTOV(nzp)->v_data = nzp;
195 
196           nzp->z_id = ozp->z_id;
197           ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
198           ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
199           nzp->z_unlinked = ozp->z_unlinked;
200           nzp->z_atime_dirty = ozp->z_atime_dirty;
201           nzp->z_zn_prefetch = ozp->z_zn_prefetch;
202           nzp->z_blksz = ozp->z_blksz;
203           nzp->z_seq = ozp->z_seq;
204           nzp->z_mapcnt = ozp->z_mapcnt;
205           nzp->z_gen = ozp->z_gen;
206           nzp->z_sync_cnt = ozp->z_sync_cnt;
207           nzp->z_is_sa = ozp->z_is_sa;
208           nzp->z_sa_hdl = ozp->z_sa_hdl;
209           bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
210           nzp->z_links = ozp->z_links;
211           nzp->z_size = ozp->z_size;
212           nzp->z_pflags = ozp->z_pflags;
213           nzp->z_uid = ozp->z_uid;
214           nzp->z_gid = ozp->z_gid;
215           nzp->z_mode = ozp->z_mode;
216 
217           /*
218            * Since this is just an idle znode and kmem is already dealing with
219            * memory pressure, release any cached ACL.
220            */
221           if (ozp->z_acl_cached) {
222                     zfs_acl_free(ozp->z_acl_cached);
223                     ozp->z_acl_cached = NULL;
224           }
225 
226           sa_set_userp(nzp->z_sa_hdl, nzp);
227 
228           /*
229            * Invalidate the original znode by clearing fields that provide a
230            * pointer back to the znode. Set the low bit of the vfs pointer to
231            * ensure that zfs_znode_move() recognizes the znode as invalid in any
232            * subsequent callback.
233            */
234           ozp->z_sa_hdl = NULL;
235           POINTER_INVALIDATE(&ozp->z_zfsvfs);
236 
237           /*
238            * Mark the znode.
239            */
240           nzp->z_moved = 1;
241           ozp->z_moved = (uint8_t)-1;
242 }
243 
244 /*ARGSUSED*/
245 static kmem_cbrc_t
zfs_znode_move(void * buf,void * newbuf,size_t size,void * arg)246 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
247 {
248           znode_t *ozp = buf, *nzp = newbuf;
249           zfsvfs_t *zfsvfs;
250           vnode_t *vp;
251 
252           /*
253            * The znode is on the file system's list of known znodes if the vfs
254            * pointer is valid. We set the low bit of the vfs pointer when freeing
255            * the znode to invalidate it, and the memory patterns written by kmem
256            * (baddcafe and deadbeef) set at least one of the two low bits. A newly
257            * created znode sets the vfs pointer last of all to indicate that the
258            * znode is known and in a valid state to be moved by this function.
259            */
260           zfsvfs = ozp->z_zfsvfs;
261           if (!POINTER_IS_VALID(zfsvfs)) {
262                     ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
263                     return (KMEM_CBRC_DONT_KNOW);
264           }
265 
266           /*
267            * Close a small window in which it's possible that the filesystem could
268            * be unmounted and freed, and zfsvfs, though valid in the previous
269            * statement, could point to unrelated memory by the time we try to
270            * prevent the filesystem from being unmounted.
271            */
272           rw_enter(&zfsvfs_lock, RW_WRITER);
273           if (zfsvfs != ozp->z_zfsvfs) {
274                     rw_exit(&zfsvfs_lock);
275                     ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
276                     return (KMEM_CBRC_DONT_KNOW);
277           }
278 
279           /*
280            * If the znode is still valid, then so is the file system. We know that
281            * no valid file system can be freed while we hold zfsvfs_lock, so we
282            * can safely ensure that the filesystem is not and will not be
283            * unmounted. The next statement is equivalent to ZFS_ENTER().
284            */
285           rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
286           if (zfsvfs->z_unmounted) {
287                     ZFS_EXIT(zfsvfs);
288                     rw_exit(&zfsvfs_lock);
289                     ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
290                     return (KMEM_CBRC_DONT_KNOW);
291           }
292           rw_exit(&zfsvfs_lock);
293 
294           mutex_enter(&zfsvfs->z_znodes_lock);
295           /*
296            * Recheck the vfs pointer in case the znode was removed just before
297            * acquiring the lock.
298            */
299           if (zfsvfs != ozp->z_zfsvfs) {
300                     mutex_exit(&zfsvfs->z_znodes_lock);
301                     ZFS_EXIT(zfsvfs);
302                     ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
303                     return (KMEM_CBRC_DONT_KNOW);
304           }
305 
306           /*
307            * At this point we know that as long as we hold z_znodes_lock, the
308            * znode cannot be freed and fields within the znode can be safely
309            * accessed. Now, prevent a race with zfs_zget().
310            */
311           if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
312                     mutex_exit(&zfsvfs->z_znodes_lock);
313                     ZFS_EXIT(zfsvfs);
314                     ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
315                     return (KMEM_CBRC_LATER);
316           }
317 
318           vp = ZTOV(ozp);
319           if (mutex_tryenter(&vp->v_lock) == 0) {
320                     ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
321                     mutex_exit(&zfsvfs->z_znodes_lock);
322                     ZFS_EXIT(zfsvfs);
323                     ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
324                     return (KMEM_CBRC_LATER);
325           }
326 
327           /* Only move znodes that are referenced _only_ by the DNLC. */
328           if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
329                     mutex_exit(&vp->v_lock);
330                     ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
331                     mutex_exit(&zfsvfs->z_znodes_lock);
332                     ZFS_EXIT(zfsvfs);
333                     ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
334                     return (KMEM_CBRC_LATER);
335           }
336 
337           /*
338            * The znode is known and in a valid state to move. We're holding the
339            * locks needed to execute the critical section.
340            */
341           zfs_znode_move_impl(ozp, nzp);
342           mutex_exit(&vp->v_lock);
343           ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
344 
345           list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
346           mutex_exit(&zfsvfs->z_znodes_lock);
347           ZFS_EXIT(zfsvfs);
348 
349           return (KMEM_CBRC_YES);
350 }
351 #endif /* illumos */
352 
353 void
zfs_znode_init(void)354 zfs_znode_init(void)
355 {
356           /*
357            * Initialize zcache
358            */
359           rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
360           ASSERT(znode_cache == NULL);
361           znode_cache = kmem_cache_create("zfs_znode_cache",
362               sizeof (znode_t), 0, zfs_znode_cache_constructor,
363               zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
364           kmem_cache_set_move(znode_cache, zfs_znode_move);
365 }
366 
367 void
zfs_znode_fini(void)368 zfs_znode_fini(void)
369 {
370 #ifdef illumos
371           /*
372            * Cleanup vfs & vnode ops
373            */
374           zfs_remove_op_tables();
375 #endif
376 
377           /*
378            * Cleanup zcache
379            */
380           if (znode_cache)
381                     kmem_cache_destroy(znode_cache);
382           znode_cache = NULL;
383           rw_destroy(&zfsvfs_lock);
384 }
385 
386 #ifdef illumos
387 struct vnodeops *zfs_dvnodeops;
388 struct vnodeops *zfs_fvnodeops;
389 struct vnodeops *zfs_symvnodeops;
390 struct vnodeops *zfs_xdvnodeops;
391 struct vnodeops *zfs_evnodeops;
392 struct vnodeops *zfs_sharevnodeops;
393 
394 void
zfs_remove_op_tables()395 zfs_remove_op_tables()
396 {
397           /*
398            * Remove vfs ops
399            */
400           ASSERT(zfsfstype);
401           (void) vfs_freevfsops_by_type(zfsfstype);
402           zfsfstype = 0;
403 
404           /*
405            * Remove vnode ops
406            */
407           if (zfs_dvnodeops)
408                     vn_freevnodeops(zfs_dvnodeops);
409           if (zfs_fvnodeops)
410                     vn_freevnodeops(zfs_fvnodeops);
411           if (zfs_symvnodeops)
412                     vn_freevnodeops(zfs_symvnodeops);
413           if (zfs_xdvnodeops)
414                     vn_freevnodeops(zfs_xdvnodeops);
415           if (zfs_evnodeops)
416                     vn_freevnodeops(zfs_evnodeops);
417           if (zfs_sharevnodeops)
418                     vn_freevnodeops(zfs_sharevnodeops);
419 
420           zfs_dvnodeops = NULL;
421           zfs_fvnodeops = NULL;
422           zfs_symvnodeops = NULL;
423           zfs_xdvnodeops = NULL;
424           zfs_evnodeops = NULL;
425           zfs_sharevnodeops = NULL;
426 }
427 
428 extern const fs_operation_def_t zfs_dvnodeops_template[];
429 extern const fs_operation_def_t zfs_fvnodeops_template[];
430 extern const fs_operation_def_t zfs_xdvnodeops_template[];
431 extern const fs_operation_def_t zfs_symvnodeops_template[];
432 extern const fs_operation_def_t zfs_evnodeops_template[];
433 extern const fs_operation_def_t zfs_sharevnodeops_template[];
434 
435 int
zfs_create_op_tables()436 zfs_create_op_tables()
437 {
438           int error;
439 
440           /*
441            * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
442            * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
443            * In this case we just return as the ops vectors are already set up.
444            */
445           if (zfs_dvnodeops)
446                     return (0);
447 
448           error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
449               &zfs_dvnodeops);
450           if (error)
451                     return (error);
452 
453           error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
454               &zfs_fvnodeops);
455           if (error)
456                     return (error);
457 
458           error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
459               &zfs_symvnodeops);
460           if (error)
461                     return (error);
462 
463           error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
464               &zfs_xdvnodeops);
465           if (error)
466                     return (error);
467 
468           error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
469               &zfs_evnodeops);
470           if (error)
471                     return (error);
472 
473           error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
474               &zfs_sharevnodeops);
475 
476           return (error);
477 }
478 #endif    /* illumos */
479 
480 int
zfs_create_share_dir(zfsvfs_t * zfsvfs,dmu_tx_t * tx)481 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
482 {
483           zfs_acl_ids_t acl_ids;
484           vattr_t vattr;
485           znode_t *sharezp;
486           znode_t *zp;
487           int error;
488 
489           vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
490           vattr.va_type = VDIR;
491           vattr.va_mode = S_IFDIR|0555;
492           vattr.va_uid = crgetuid(kcred);
493           vattr.va_gid = crgetgid(kcred);
494 
495           sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
496           ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
497           sharezp->z_moved = 0;
498           sharezp->z_unlinked = 0;
499           sharezp->z_atime_dirty = 0;
500           sharezp->z_zfsvfs = zfsvfs;
501           sharezp->z_is_sa = zfsvfs->z_use_sa;
502 
503           VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
504               kcred, NULL, &acl_ids));
505           zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
506           ASSERT3P(zp, ==, sharezp);
507           POINTER_INVALIDATE(&sharezp->z_zfsvfs);
508           error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
509               ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
510           zfsvfs->z_shares_dir = sharezp->z_id;
511 
512           zfs_acl_ids_free(&acl_ids);
513           sa_handle_destroy(sharezp->z_sa_hdl);
514           kmem_cache_free(znode_cache, sharezp);
515 
516           return (error);
517 }
518 
519 /*
520  * define a couple of values we need available
521  * for both 64 and 32 bit environments.
522  */
523 #ifndef NBITSMINOR64
524 #define   NBITSMINOR64        32
525 #endif
526 #ifndef MAXMAJ64
527 #define   MAXMAJ64  0xffffffffUL
528 #endif
529 #ifndef   MAXMIN64
530 #define   MAXMIN64  0xffffffffUL
531 #endif
532 
533 /*
534  * Create special expldev for ZFS private use.
535  * Can't use standard expldev since it doesn't do
536  * what we want.  The standard expldev() takes a
537  * dev32_t in LP64 and expands it to a long dev_t.
538  * We need an interface that takes a dev32_t in ILP32
539  * and expands it to a long dev_t.
540  */
541 static uint64_t
zfs_expldev(dev_t dev)542 zfs_expldev(dev_t dev)
543 {
544           return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
545 }
546 /*
547  * Special cmpldev for ZFS private use.
548  * Can't use standard cmpldev since it takes
549  * a long dev_t and compresses it to dev32_t in
550  * LP64.  We need to do a compaction of a long dev_t
551  * to a dev32_t in ILP32.
552  */
553 dev_t
zfs_cmpldev(uint64_t dev)554 zfs_cmpldev(uint64_t dev)
555 {
556           return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
557 }
558 
559 static void
zfs_znode_sa_init(zfsvfs_t * zfsvfs,znode_t * zp,dmu_buf_t * db,dmu_object_type_t obj_type,sa_handle_t * sa_hdl)560 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
561     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
562 {
563           ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
564           ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
565 
566           ASSERT(zp->z_sa_hdl == NULL);
567           ASSERT(zp->z_acl_cached == NULL);
568           if (sa_hdl == NULL) {
569                     VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
570                         SA_HDL_SHARED, &zp->z_sa_hdl));
571           } else {
572                     zp->z_sa_hdl = sa_hdl;
573                     sa_set_userp(sa_hdl, zp);
574           }
575 
576           zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
577 
578           /*
579            * Slap on VROOT if we are the root znode unless we are the root
580            * node of a snapshot mounted under .zfs.
581            */
582           if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs)
583                     ZTOV(zp)->v_flag |= VROOT;
584 
585           vn_exists(ZTOV(zp));
586 }
587 
588 void
zfs_znode_dmu_fini(znode_t * zp)589 zfs_znode_dmu_fini(znode_t *zp)
590 {
591           ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
592               zp->z_unlinked ||
593               RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
594 
595           sa_handle_destroy(zp->z_sa_hdl);
596           zp->z_sa_hdl = NULL;
597 }
598 
599 #ifdef __FreeBSD__
600 static void
zfs_vnode_forget(vnode_t * vp)601 zfs_vnode_forget(vnode_t *vp)
602 {
603 
604           /* copied from insmntque_stddtr */
605           vp->v_data = NULL;
606           vp->v_op = &dead_vnodeops;
607           vgone(vp);
608           vput(vp);
609 }
610 #endif /* __FreeBSD__ */
611 
612 /*
613  * Construct a new znode/vnode and intialize.
614  *
615  * This does not do a call to dmu_set_user() that is
616  * up to the caller to do, in case you don't want to
617  * return the znode
618  */
619 static znode_t *
620 #ifdef __NetBSD__
zfs_znode_alloc(zfsvfs_t * zfsvfs,dmu_buf_t * db,int blksz,dmu_object_type_t obj_type,sa_handle_t * hdl,vnode_t * vp)621 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
622     dmu_object_type_t obj_type, sa_handle_t *hdl, vnode_t *vp)
623 #else
624 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
625     dmu_object_type_t obj_type, sa_handle_t *hdl)
626 #endif
627 {
628           znode_t   *zp;
629 #ifndef __NetBSD__
630           vnode_t *vp;
631 #endif
632           uint64_t mode;
633           uint64_t parent;
634           sa_bulk_attr_t bulk[9];
635           int count = 0;
636           int error;
637 
638           zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
639 
640 #ifndef __NetBSD__
641           KASSERT(curthread->td_vp_reserv > 0,
642               ("zfs_znode_alloc: getnewvnode without any vnodes reserved"));
643           error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp);
644           if (error != 0) {
645                     kmem_cache_free(znode_cache, zp);
646                     return (NULL);
647           }
648 #endif
649           zp->z_vnode = vp;
650           vp->v_data = zp;
651 
652           ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
653           zp->z_moved = 0;
654 
655           /*
656            * Defer setting z_zfsvfs until the znode is ready to be a candidate for
657            * the zfs_znode_move() callback.
658            */
659           zp->z_sa_hdl = NULL;
660           zp->z_unlinked = 0;
661           zp->z_atime_dirty = 0;
662           zp->z_mapcnt = 0;
663           zp->z_id = db->db_object;
664           zp->z_blksz = blksz;
665           zp->z_seq = 0x7A4653;
666           zp->z_sync_cnt = 0;
667 
668 #ifdef __NetBSD__
669           vp->v_op = zfs_vnodeop_p;
670           vp->v_tag = VT_ZFS;
671           zp->z_lockf = NULL;
672 #endif
673 
674           vp = ZTOV(zp);
675 
676           zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
677 
678           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
679           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
680           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
681               &zp->z_size, 8);
682           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
683               &zp->z_links, 8);
684           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
685               &zp->z_pflags, 8);
686           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
687           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
688               &zp->z_atime, 16);
689           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
690               &zp->z_uid, 8);
691           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
692               &zp->z_gid, 8);
693 
694           if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
695                     if (hdl == NULL)
696                               sa_handle_destroy(zp->z_sa_hdl);
697 #ifndef __NetBSD__
698                     zfs_vnode_forget(vp);
699 #endif
700                     zp->z_vnode = NULL;
701                     kmem_cache_free(znode_cache, zp);
702                     return (NULL);
703           }
704 
705           zp->z_mode = mode;
706 
707           vp->v_type = IFTOVT((mode_t)mode);
708 
709           switch (vp->v_type) {
710           case VDIR:
711                     zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
712                     break;
713 #if defined(illumos) || defined(__NetBSD__)
714           case VBLK:
715           case VCHR:
716                     {
717                               uint64_t rdev;
718                               VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
719                                   &rdev, sizeof (rdev)) == 0);
720 
721 #ifdef illumos
722                               vp->v_rdev = zfs_cmpldev(rdev);
723 #else
724                               vp->v_op = zfs_specop_p;
725                     spec_node_init(vp, zfs_cmpldev(rdev));
726 #endif
727                     }
728                     break;
729 #endif
730           case VFIFO:
731 #ifdef __NetBSD__
732                     vp->v_op = zfs_fifoop_p;
733                     break;
734 #else /* __NetBSD__ */
735 #ifdef illumos
736           case VSOCK:
737           case VDOOR:
738 #endif
739                     vp->v_op = &zfs_fifoops;
740                     break;
741           case VREG:
742                     if (parent == zfsvfs->z_shares_dir) {
743                               ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
744                               vp->v_op = &zfs_shareops;
745                     }
746                     break;
747 #ifdef illumos
748           case VLNK:
749                     vn_setops(vp, zfs_symvnodeops);
750                     break;
751           default:
752                     vn_setops(vp, zfs_evnodeops);
753                     break;
754 #endif
755 #endif /* __NetBSD__ */
756           }
757 
758 #ifdef __NetBSD__
759           extern const struct genfs_ops zfs_genfsops;
760           genfs_node_init(vp, &zfs_genfsops);
761           uvm_vnp_setsize(vp, zp->z_size);
762 #endif
763 
764           mutex_enter(&zfsvfs->z_znodes_lock);
765           list_insert_tail(&zfsvfs->z_all_znodes, zp);
766           membar_producer();
767           /*
768            * Everything else must be valid before assigning z_zfsvfs makes the
769            * znode eligible for zfs_znode_move().
770            */
771           zp->z_zfsvfs = zfsvfs;
772           mutex_exit(&zfsvfs->z_znodes_lock);
773 
774 #ifndef __NetBSD__
775           /*
776            * Acquire vnode lock before making it available to the world.
777            */
778           vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
779           VN_LOCK_AREC(vp);
780           if (vp->v_type != VFIFO)
781                     VN_LOCK_ASHARE(vp);
782 #endif
783 
784 #if defined(illumos) || defined(__NetBSD__)
785           VFS_HOLD(zfsvfs->z_vfs);
786 #endif
787           return (zp);
788 }
789 
790 static uint64_t empty_xattr;
791 static uint64_t pad[4];
792 static zfs_acl_phys_t acl_phys;
793 /*
794  * Create a new DMU object to hold a zfs znode.
795  *
796  *        IN:       dzp       - parent directory for new znode
797  *                  vap       - file attributes for new znode
798  *                  tx        - dmu transaction id for zap operations
799  *                  cr        - credentials of caller
800  *                  flag      - flags:
801  *                              IS_ROOT_NODE      - new object will be root
802  *                              IS_XATTR          - new object is an attribute
803  *                  bonuslen - length of bonus buffer
804  *                  setaclp  - File/Dir initial ACL
805  *                  fuidp      - Tracks fuid allocation.
806  *
807  *        OUT:      zpp       - allocated znode
808  *
809  */
810 #ifdef __NetBSD__
811 struct zfs_newvnode_args {
812           dmu_tx_t *tx;
813           uint_t flag;
814           zfs_acl_ids_t *acl_ids;
815 };
816 
817 static void
818 zfs_mknode1(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
819     uint_t, znode_t **, zfs_acl_ids_t *, vnode_t *);
820 
821 int
zfs_loadvnode(struct mount * mp,struct vnode * vp,const void * key,size_t key_len,const void ** new_key)822 zfs_loadvnode(struct mount *mp, struct vnode *vp,
823     const void *key, size_t key_len, const void **new_key)
824 {
825           int err, blksz;
826           uint64_t obj_num;
827           zfsvfs_t *zfsvfs;
828           dmu_buf_t *db;
829           dmu_object_info_t doi;
830           dmu_object_type_t obj_type;
831           sa_handle_t *hdl;
832           znode_t *zp;
833 
834           if (key_len != sizeof(obj_num))
835                     return zfsctl_loadvnode(mp, vp, key, key_len, new_key);
836 
837           memcpy(&obj_num, key, key_len);
838 
839           zfsvfs = mp->mnt_data;
840 
841           ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
842 
843           err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
844           if (err) {
845                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
846                     return (SET_ERROR(err));
847           }
848 
849           dmu_object_info_from_db(db, &doi);
850           if (doi.doi_bonus_type != DMU_OT_SA &&
851               (doi.doi_bonus_type != DMU_OT_ZNODE ||
852               (doi.doi_bonus_type == DMU_OT_ZNODE &&
853               doi.doi_bonus_size < sizeof (znode_phys_t)))) {
854                     sa_buf_rele(db, NULL);
855                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
856                     return (SET_ERROR(EINVAL));
857           }
858           blksz = doi.doi_data_block_size;
859           obj_type = doi.doi_bonus_type;
860           hdl = dmu_buf_get_user(db);
861 
862           if (hdl != NULL) {
863                     sa_buf_rele(db, NULL);
864                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
865                     return (SET_ERROR(ENOENT));
866           }
867 
868           zp = zfs_znode_alloc(zfsvfs, db, blksz, obj_type, hdl, vp);
869           if (zp == NULL) {
870                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
871                     return (SET_ERROR(ENOENT));
872           }
873           ASSERT(zp == VTOZ(vp));
874           cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, true);
875 
876           ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
877 
878           *new_key = &zp->z_id;
879 
880           return 0;
881 }
882 
883 int
zfs_newvnode(struct mount * mp,vnode_t * dvp,vnode_t * vp,vattr_t * vap,cred_t * cr,void * extra,size_t * key_len,const void ** new_key)884 zfs_newvnode(struct mount *mp, vnode_t *dvp, vnode_t *vp, vattr_t *vap,
885     cred_t *cr, void *extra, size_t *key_len, const void **new_key)
886 {
887           struct zfs_newvnode_args *args = extra;
888           znode_t *zp, *dzp = VTOZ(dvp);
889           dmu_tx_t *tx = args->tx;
890           uint_t flag = args->flag;
891           zfs_acl_ids_t *acl_ids = args->acl_ids;
892 
893           zfs_mknode1(dzp, vap, tx, cr, flag, &zp, acl_ids, vp);
894           ASSERT(zp == VTOZ(vp));
895           cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, true);
896 
897           *key_len = sizeof(zp->z_id);
898           *new_key = &zp->z_id;
899 
900           return 0;
901 }
902 
903 void
zfs_mknode(znode_t * dzp,vattr_t * vap,dmu_tx_t * tx,cred_t * cr,uint_t flag,znode_t ** zpp,zfs_acl_ids_t * acl_ids)904 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
905     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
906 {
907           vnode_t *vp, *dvp = ZTOV(dzp);
908           zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
909           struct zfs_newvnode_args args = { tx, flag, acl_ids };
910 
911           if (flag & IS_ROOT_NODE)
912                     return zfs_mknode1(dzp, vap, tx, cr, flag, zpp, acl_ids, NULL);
913 
914           VERIFY(vcache_new(zfsvfs->z_vfs, dvp, vap, cr, &args, &vp) == 0);
915           vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
916           *zpp = VTOZ(vp);
917 }
918 
919 static void
zfs_mknode1(znode_t * dzp,vattr_t * vap,dmu_tx_t * tx,cred_t * cr,uint_t flag,znode_t ** zpp,zfs_acl_ids_t * acl_ids,vnode_t * vp)920 zfs_mknode1(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
921     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids, vnode_t *vp)
922 #else
923 void
924 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
925     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
926 #endif
927 {
928           uint64_t  crtime[2], atime[2], mtime[2], ctime[2];
929           uint64_t  mode, size, links, parent, pflags;
930           uint64_t  dzp_pflags = 0;
931           uint64_t  rdev = 0;
932           zfsvfs_t  *zfsvfs = dzp->z_zfsvfs;
933           dmu_buf_t *db;
934           timestruc_t         now;
935           uint64_t  gen, obj;
936           int                 err;
937           int                 bonuslen;
938           sa_handle_t         *sa_hdl;
939           dmu_object_type_t obj_type;
940           sa_bulk_attr_t      sa_attrs[ZPL_END];
941           int                 cnt = 0;
942           zfs_acl_locator_cb_t locate = { 0 };
943 
944           ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
945 
946           if (zfsvfs->z_replay) {
947                     obj = vap->va_nodeid;
948                     now = vap->va_ctime;                    /* see zfs_replay_create() */
949                     gen = vap->va_nblocks;                  /* ditto */
950           } else {
951                     obj = 0;
952                     vfs_timestamp(&now);
953                     gen = dmu_tx_get_txg(tx);
954           }
955 
956           obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
957           bonuslen = (obj_type == DMU_OT_SA) ?
958               DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
959 
960           /*
961            * Create a new DMU object.
962            */
963           /*
964            * There's currently no mechanism for pre-reading the blocks that will
965            * be needed to allocate a new object, so we accept the small chance
966            * that there will be an i/o error and we will fail one of the
967            * assertions below.
968            */
969           if (vap->va_type == VDIR) {
970                     if (zfsvfs->z_replay) {
971                               VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj,
972                                   zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
973                                   obj_type, bonuslen, tx));
974                     } else {
975                               obj = zap_create_norm(zfsvfs->z_os,
976                                   zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
977                                   obj_type, bonuslen, tx);
978                     }
979           } else {
980                     if (zfsvfs->z_replay) {
981                               VERIFY0(dmu_object_claim(zfsvfs->z_os, obj,
982                                   DMU_OT_PLAIN_FILE_CONTENTS, 0,
983                                   obj_type, bonuslen, tx));
984                     } else {
985                               obj = dmu_object_alloc(zfsvfs->z_os,
986                                   DMU_OT_PLAIN_FILE_CONTENTS, 0,
987                                   obj_type, bonuslen, tx);
988                     }
989           }
990 
991           ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
992           VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
993 
994           /*
995            * If this is the root, fix up the half-initialized parent pointer
996            * to reference the just-allocated physical data area.
997            */
998           if (flag & IS_ROOT_NODE) {
999                     dzp->z_id = obj;
1000           } else {
1001                     dzp_pflags = dzp->z_pflags;
1002           }
1003 
1004           /*
1005            * If parent is an xattr, so am I.
1006            */
1007           if (dzp_pflags & ZFS_XATTR) {
1008                     flag |= IS_XATTR;
1009           }
1010 
1011           if (zfsvfs->z_use_fuids)
1012                     pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
1013           else
1014                     pflags = 0;
1015 
1016           if (vap->va_type == VDIR) {
1017                     size = 2;           /* contents ("." and "..") */
1018                     links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
1019           } else {
1020                     size = links = 0;
1021           }
1022 
1023           if (vap->va_type == VBLK || vap->va_type == VCHR) {
1024                     rdev = zfs_expldev(vap->va_rdev);
1025           }
1026 
1027           parent = dzp->z_id;
1028           mode = acl_ids->z_mode;
1029           if (flag & IS_XATTR)
1030                     pflags |= ZFS_XATTR;
1031 
1032           /*
1033            * No execs denied will be deterimed when zfs_mode_compute() is called.
1034            */
1035           pflags |= acl_ids->z_aclp->z_hints &
1036               (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
1037               ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
1038 
1039           ZFS_TIME_ENCODE(&now, crtime);
1040           ZFS_TIME_ENCODE(&now, ctime);
1041 
1042           if (vap->va_mask & AT_ATIME) {
1043                     ZFS_TIME_ENCODE(&vap->va_atime, atime);
1044           } else {
1045                     ZFS_TIME_ENCODE(&now, atime);
1046           }
1047 
1048           if (vap->va_mask & AT_MTIME) {
1049                     ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
1050           } else {
1051                     ZFS_TIME_ENCODE(&now, mtime);
1052           }
1053 
1054           /* Now add in all of the "SA" attributes */
1055           VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
1056               &sa_hdl));
1057 
1058           /*
1059            * Setup the array of attributes to be replaced/set on the new file
1060            *
1061            * order for  DMU_OT_ZNODE is critical since it needs to be constructed
1062            * in the old znode_phys_t format.  Don't change this ordering
1063            */
1064 
1065           if (obj_type == DMU_OT_ZNODE) {
1066                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
1067                         NULL, &atime, 16);
1068                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
1069                         NULL, &mtime, 16);
1070                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
1071                         NULL, &ctime, 16);
1072                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
1073                         NULL, &crtime, 16);
1074                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
1075                         NULL, &gen, 8);
1076                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
1077                         NULL, &mode, 8);
1078                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
1079                         NULL, &size, 8);
1080                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
1081                         NULL, &parent, 8);
1082           } else {
1083                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
1084                         NULL, &mode, 8);
1085                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
1086                         NULL, &size, 8);
1087                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
1088                         NULL, &gen, 8);
1089                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
1090                         &acl_ids->z_fuid, 8);
1091                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
1092                         &acl_ids->z_fgid, 8);
1093                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
1094                         NULL, &parent, 8);
1095                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
1096                         NULL, &pflags, 8);
1097                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
1098                         NULL, &atime, 16);
1099                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
1100                         NULL, &mtime, 16);
1101                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
1102                         NULL, &ctime, 16);
1103                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
1104                         NULL, &crtime, 16);
1105           }
1106 
1107           SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
1108 
1109           if (obj_type == DMU_OT_ZNODE) {
1110                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
1111                         &empty_xattr, 8);
1112           }
1113           if (obj_type == DMU_OT_ZNODE ||
1114               (vap->va_type == VBLK || vap->va_type == VCHR)) {
1115                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
1116                         NULL, &rdev, 8);
1117 
1118           }
1119           if (obj_type == DMU_OT_ZNODE) {
1120                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
1121                         NULL, &pflags, 8);
1122                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
1123                         &acl_ids->z_fuid, 8);
1124                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
1125                         &acl_ids->z_fgid, 8);
1126                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
1127                         sizeof (uint64_t) * 4);
1128                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1129                         &acl_phys, sizeof (zfs_acl_phys_t));
1130           } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
1131                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
1132                         &acl_ids->z_aclp->z_acl_count, 8);
1133                     locate.cb_aclp = acl_ids->z_aclp;
1134                     SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
1135                         zfs_acl_data_locator, &locate,
1136                         acl_ids->z_aclp->z_acl_bytes);
1137                     mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
1138                         acl_ids->z_fuid, acl_ids->z_fgid);
1139           }
1140 
1141           VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
1142 
1143           if (!(flag & IS_ROOT_NODE)) {
1144 #ifdef __NetBSD__
1145                     *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl, vp);
1146 #else
1147                     *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
1148 #endif
1149                     ASSERT(*zpp != NULL);
1150           } else {
1151                     /*
1152                      * If we are creating the root node, the "parent" we
1153                      * passed in is the znode for the root.
1154                      */
1155                     *zpp = dzp;
1156 
1157                     (*zpp)->z_sa_hdl = sa_hdl;
1158           }
1159 
1160           (*zpp)->z_pflags = pflags;
1161           (*zpp)->z_mode = mode;
1162 
1163           if (vap->va_mask & AT_XVATTR)
1164                     zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
1165 
1166           if (obj_type == DMU_OT_ZNODE ||
1167               acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
1168                     VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
1169           }
1170 #ifndef __NetBSD__
1171           if (!(flag & IS_ROOT_NODE)) {
1172                     vnode_t *vp;
1173 
1174                     vp = ZTOV(*zpp);
1175                     vp->v_vflag |= VV_FORCEINSMQ;
1176                     err = insmntque(vp, zfsvfs->z_vfs);
1177                     vp->v_vflag &= ~VV_FORCEINSMQ;
1178                     KASSERT(err == 0, ("insmntque() failed: error %d", err));
1179           }
1180 #endif
1181           ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1182 }
1183 
1184 /*
1185  * Update in-core attributes.  It is assumed the caller will be doing an
1186  * sa_bulk_update to push the changes out.
1187  */
1188 void
zfs_xvattr_set(znode_t * zp,xvattr_t * xvap,dmu_tx_t * tx)1189 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
1190 {
1191           xoptattr_t *xoap;
1192 
1193           xoap = xva_getxoptattr(xvap);
1194           ASSERT(xoap);
1195 
1196           if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
1197                     uint64_t times[2];
1198                     ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
1199                     (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
1200                         &times, sizeof (times), tx);
1201                     XVA_SET_RTN(xvap, XAT_CREATETIME);
1202           }
1203           if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1204                     ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
1205                         zp->z_pflags, tx);
1206                     XVA_SET_RTN(xvap, XAT_READONLY);
1207           }
1208           if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1209                     ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
1210                         zp->z_pflags, tx);
1211                     XVA_SET_RTN(xvap, XAT_HIDDEN);
1212           }
1213           if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1214                     ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
1215                         zp->z_pflags, tx);
1216                     XVA_SET_RTN(xvap, XAT_SYSTEM);
1217           }
1218           if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1219                     ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
1220                         zp->z_pflags, tx);
1221                     XVA_SET_RTN(xvap, XAT_ARCHIVE);
1222           }
1223           if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1224                     ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
1225                         zp->z_pflags, tx);
1226                     XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1227           }
1228           if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1229                     ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
1230                         zp->z_pflags, tx);
1231                     XVA_SET_RTN(xvap, XAT_NOUNLINK);
1232           }
1233           if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1234                     ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
1235                         zp->z_pflags, tx);
1236                     XVA_SET_RTN(xvap, XAT_APPENDONLY);
1237           }
1238           if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1239                     ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
1240                         zp->z_pflags, tx);
1241                     XVA_SET_RTN(xvap, XAT_NODUMP);
1242           }
1243           if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1244                     ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1245                         zp->z_pflags, tx);
1246                     XVA_SET_RTN(xvap, XAT_OPAQUE);
1247           }
1248           if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1249                     ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1250                         xoap->xoa_av_quarantined, zp->z_pflags, tx);
1251                     XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1252           }
1253           if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1254                     ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1255                         zp->z_pflags, tx);
1256                     XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1257           }
1258           if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1259                     zfs_sa_set_scanstamp(zp, xvap, tx);
1260                     XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1261           }
1262           if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1263                     ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1264                         zp->z_pflags, tx);
1265                     XVA_SET_RTN(xvap, XAT_REPARSE);
1266           }
1267           if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1268                     ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1269                         zp->z_pflags, tx);
1270                     XVA_SET_RTN(xvap, XAT_OFFLINE);
1271           }
1272           if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1273                     ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1274                         zp->z_pflags, tx);
1275                     XVA_SET_RTN(xvap, XAT_SPARSE);
1276           }
1277 }
1278 
1279 #ifdef __NetBSD__
1280 
1281 int
zfs_zget(zfsvfs_t * zfsvfs,uint64_t obj_num,znode_t ** zpp)1282 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1283 {
1284           int error;
1285           vnode_t *vp;
1286 
1287           error = vcache_get(zfsvfs->z_vfs, &obj_num, sizeof(obj_num), &vp);
1288           if (error == 0)
1289                     *zpp = VTOZ(vp);
1290 
1291           return error;
1292 }
1293 
1294 /*
1295  * Get a known cached znode, to be used from zil_commit()->zfs_get_data()
1296  * to resolve log entries.  Doesn't take a reference, will never fail and
1297  * depends on zfs_vnops.c::zfs_netbsd_reclaim() running a zil_commit()
1298  * before the znode gets freed.
1299  */
1300 int
zfs_zget_cleaner(zfsvfs_t * zfsvfs,uint64_t obj_num,znode_t ** zpp)1301 zfs_zget_cleaner(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1302 {
1303           dmu_buf_t *db;
1304           sa_handle_t *hdl;
1305           dmu_object_info_t doi;
1306           znode_t *zp;
1307 
1308           ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1309 
1310           VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db));
1311 
1312           dmu_object_info_from_db(db, &doi);
1313           ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
1314               (doi.doi_bonus_type == DMU_OT_ZNODE &&
1315               doi.doi_bonus_size >= sizeof (znode_phys_t)));
1316 
1317           hdl = dmu_buf_get_user(db);
1318           ASSERT3P(hdl, !=, NULL);
1319 
1320           zp = sa_get_userdata(hdl);
1321           ASSERT3U(zp->z_id, ==, obj_num);
1322 
1323           sa_buf_rele(db, NULL);
1324 
1325           ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1326 
1327           *zpp = zp;
1328           return (0);
1329 }
1330 
1331 #else /* __NetBSD__ */
1332 
1333 int
zfs_zget(zfsvfs_t * zfsvfs,uint64_t obj_num,znode_t ** zpp)1334 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1335 {
1336           dmu_object_info_t doi;
1337           dmu_buf_t *db;
1338           znode_t             *zp;
1339           vnode_t             *vp;
1340           sa_handle_t         *hdl;
1341           struct thread       *td;
1342           int locked;
1343           int err;
1344 
1345           td = curthread;
1346           getnewvnode_reserve(1);
1347 again:
1348           *zpp = NULL;
1349           ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1350 
1351           err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1352           if (err) {
1353                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1354                     getnewvnode_drop_reserve();
1355                     return (err);
1356           }
1357 
1358           dmu_object_info_from_db(db, &doi);
1359           if (doi.doi_bonus_type != DMU_OT_SA &&
1360               (doi.doi_bonus_type != DMU_OT_ZNODE ||
1361               (doi.doi_bonus_type == DMU_OT_ZNODE &&
1362               doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1363                     sa_buf_rele(db, NULL);
1364                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1365 #ifdef __FreeBSD__
1366                     getnewvnode_drop_reserve();
1367 #endif
1368                     return (SET_ERROR(EINVAL));
1369           }
1370 
1371           hdl = dmu_buf_get_user(db);
1372           if (hdl != NULL) {
1373                     zp  = sa_get_userdata(hdl);
1374 
1375                     /*
1376                      * Since "SA" does immediate eviction we
1377                      * should never find a sa handle that doesn't
1378                      * know about the znode.
1379                      */
1380                     ASSERT3P(zp, !=, NULL);
1381                     ASSERT3U(zp->z_id, ==, obj_num);
1382                     *zpp = zp;
1383                     vp = ZTOV(zp);
1384 
1385                     /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */
1386                     VN_HOLD(vp);
1387 
1388                     sa_buf_rele(db, NULL);
1389                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1390 
1391                     locked = VOP_ISLOCKED(vp);
1392                     VI_LOCK(vp);
1393                     if ((vp->v_iflag & VI_DOOMED) != 0 &&
1394                         locked != LK_EXCLUSIVE) {
1395                               /*
1396                                * The vnode is doomed and this thread doesn't
1397                                * hold the exclusive lock on it, so the vnode
1398                                * must be being reclaimed by another thread.
1399                                * Otherwise the doomed vnode is being reclaimed
1400                                * by this thread and zfs_zget is called from
1401                                * ZIL internals.
1402                                */
1403                               VI_UNLOCK(vp);
1404 
1405                               /*
1406                                * XXX vrele() locks the vnode when the last reference
1407                                * is dropped.  Although in this case the vnode is
1408                                * doomed / dead and so no inactivation is required,
1409                                * the vnode lock is still acquired.  That could result
1410                                * in a LOR with z_teardown_lock if another thread holds
1411                                * the vnode's lock and tries to take z_teardown_lock.
1412                                * But that is only possible if the other thread peforms
1413                                * a ZFS vnode operation on the vnode.  That either
1414                                * should not happen if the vnode is dead or the thread
1415                                * should also have a refrence to the vnode and thus
1416                                * our reference is not last.
1417                                */
1418                               VN_RELE(vp);
1419                               goto again;
1420                     }
1421                     VI_UNLOCK(vp);
1422                     getnewvnode_drop_reserve();
1423                     return (0);
1424           }
1425 
1426           /*
1427            * Not found create new znode/vnode
1428            * but only if file exists.
1429            *
1430            * There is a small window where zfs_vget() could
1431            * find this object while a file create is still in
1432            * progress.  This is checked for in zfs_znode_alloc()
1433            *
1434            * if zfs_znode_alloc() fails it will drop the hold on the
1435            * bonus buffer.
1436            */
1437           zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
1438               doi.doi_bonus_type, NULL);
1439           if (zp == NULL) {
1440                     err = SET_ERROR(ENOENT);
1441           } else {
1442                     *zpp = zp;
1443           }
1444           if (err == 0) {
1445                     vnode_t *vp = ZTOV(zp);
1446 
1447                     err = insmntque(vp, zfsvfs->z_vfs);
1448                     if (err == 0) {
1449                               vp->v_hash = obj_num;
1450                               VOP_UNLOCK(vp, 0);
1451                     } else {
1452                               zp->z_vnode = NULL;
1453                               zfs_znode_dmu_fini(zp);
1454                               zfs_znode_free(zp);
1455                               *zpp = NULL;
1456                     }
1457           }
1458           ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1459           getnewvnode_drop_reserve();
1460           return (err);
1461 }
1462 
1463 #endif /* __NetBSD__ */
1464 
1465 int
zfs_rezget(znode_t * zp)1466 zfs_rezget(znode_t *zp)
1467 {
1468           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1469           dmu_object_info_t doi;
1470           dmu_buf_t *db;
1471           vnode_t *vp;
1472           uint64_t obj_num = zp->z_id;
1473           uint64_t mode, size;
1474           sa_bulk_attr_t bulk[8];
1475           int err;
1476           int count = 0;
1477           uint64_t gen;
1478 
1479           ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1480 
1481           mutex_enter(&zp->z_acl_lock);
1482           if (zp->z_acl_cached) {
1483                     zfs_acl_free(zp->z_acl_cached);
1484                     zp->z_acl_cached = NULL;
1485           }
1486 
1487           mutex_exit(&zp->z_acl_lock);
1488           ASSERT(zp->z_sa_hdl == NULL);
1489           err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1490           if (err) {
1491                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1492                     return (err);
1493           }
1494 
1495           dmu_object_info_from_db(db, &doi);
1496           if (doi.doi_bonus_type != DMU_OT_SA &&
1497               (doi.doi_bonus_type != DMU_OT_ZNODE ||
1498               (doi.doi_bonus_type == DMU_OT_ZNODE &&
1499               doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1500                     sa_buf_rele(db, NULL);
1501                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1502                     return (SET_ERROR(EINVAL));
1503           }
1504 
1505           zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
1506           size = zp->z_size;
1507 
1508           /* reload cached values */
1509           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
1510               &gen, sizeof (gen));
1511           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
1512               &zp->z_size, sizeof (zp->z_size));
1513           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
1514               &zp->z_links, sizeof (zp->z_links));
1515           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1516               &zp->z_pflags, sizeof (zp->z_pflags));
1517           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
1518               &zp->z_atime, sizeof (zp->z_atime));
1519           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
1520               &zp->z_uid, sizeof (zp->z_uid));
1521           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
1522               &zp->z_gid, sizeof (zp->z_gid));
1523           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1524               &mode, sizeof (mode));
1525 
1526           if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1527                     zfs_znode_dmu_fini(zp);
1528                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1529                     return (SET_ERROR(EIO));
1530           }
1531 
1532           zp->z_mode = mode;
1533 
1534           if (gen != zp->z_gen) {
1535                     zfs_znode_dmu_fini(zp);
1536                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1537                     return (SET_ERROR(EIO));
1538           }
1539 
1540           /*
1541            * It is highly improbable but still quite possible that two
1542            * objects in different datasets are created with the same
1543            * object numbers and in transaction groups with the same
1544            * numbers.  znodes corresponding to those objects would
1545            * have the same z_id and z_gen, but their other attributes
1546            * may be different.
1547            * zfs recv -F may replace one of such objects with the other.
1548            * As a result file properties recorded in the replaced
1549            * object's vnode may no longer match the received object's
1550            * properties.  At present the only cached property is the
1551            * files type recorded in v_type.
1552            * So, handle this case by leaving the old vnode and znode
1553            * disassociated from the actual object.  A new vnode and a
1554            * znode will be created if the object is accessed
1555            * (e.g. via a look-up).  The old vnode and znode will be
1556            * recycled when the last vnode reference is dropped.
1557            */
1558           vp = ZTOV(zp);
1559           if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
1560                     zfs_znode_dmu_fini(zp);
1561                     ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1562                     return (EIO);
1563           }
1564 
1565           zp->z_unlinked = (zp->z_links == 0);
1566           zp->z_blksz = doi.doi_data_block_size;
1567 #ifdef __NetBSD__
1568           rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
1569           (void)VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO);
1570 #else
1571           vn_pages_remove(vp, 0, 0);
1572 #endif
1573           if (zp->z_size != size)
1574                     vnode_pager_setsize(vp, zp->z_size);
1575           ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1576 
1577           return (0);
1578 }
1579 
1580 void
zfs_znode_delete(znode_t * zp,dmu_tx_t * tx)1581 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1582 {
1583           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1584           objset_t *os = zfsvfs->z_os;
1585           uint64_t obj = zp->z_id;
1586           uint64_t acl_obj = zfs_external_acl(zp);
1587 
1588           ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
1589           if (acl_obj) {
1590                     VERIFY(!zp->z_is_sa);
1591                     VERIFY(0 == dmu_object_free(os, acl_obj, tx));
1592           }
1593           VERIFY(0 == dmu_object_free(os, obj, tx));
1594           zfs_znode_dmu_fini(zp);
1595           ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1596           zfs_znode_free(zp);
1597 }
1598 
1599 void
zfs_zinactive(znode_t * zp)1600 zfs_zinactive(znode_t *zp)
1601 {
1602           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1603           uint64_t z_id = zp->z_id;
1604 
1605           ASSERT(zp->z_sa_hdl);
1606 
1607           /*
1608            * Don't allow a zfs_zget() while were trying to release this znode
1609            */
1610           ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
1611 
1612           /*
1613            * If this was the last reference to a file with no links,
1614            * remove the file from the file system.
1615            */
1616           if (zp->z_unlinked) {
1617                     ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1618                     zfs_rmnode(zp);
1619                     return;
1620           }
1621 
1622           zfs_znode_dmu_fini(zp);
1623           ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1624           zfs_znode_free(zp);
1625 }
1626 
1627 void
zfs_znode_free(znode_t * zp)1628 zfs_znode_free(znode_t *zp)
1629 {
1630           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1631 
1632 #ifdef __NetBSD__
1633           struct vnode *vp = ZTOV(zp);
1634 
1635           genfs_node_destroy(vp);
1636 
1637           /*
1638            * Interlock with zfs_sync().
1639            */
1640           mutex_enter(vp->v_interlock);
1641           vp->v_data = NULL;
1642           mutex_exit(vp->v_interlock);
1643 #endif
1644 
1645           ASSERT(zp->z_sa_hdl == NULL);
1646           zp->z_vnode = NULL;
1647           mutex_enter(&zfsvfs->z_znodes_lock);
1648           POINTER_INVALIDATE(&zp->z_zfsvfs);
1649           list_remove(&zfsvfs->z_all_znodes, zp);
1650           mutex_exit(&zfsvfs->z_znodes_lock);
1651 
1652           if (zp->z_acl_cached) {
1653                     zfs_acl_free(zp->z_acl_cached);
1654                     zp->z_acl_cached = NULL;
1655           }
1656 
1657           kmem_cache_free(znode_cache, zp);
1658 
1659 #ifdef illumos
1660           VFS_RELE(zfsvfs->z_vfs);
1661 #endif
1662 }
1663 
1664 void
zfs_tstamp_update_setup(znode_t * zp,uint_t flag,uint64_t mtime[2],uint64_t ctime[2],boolean_t have_tx)1665 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
1666     uint64_t ctime[2], boolean_t have_tx)
1667 {
1668           timestruc_t         now;
1669 
1670           vfs_timestamp(&now);
1671 
1672           if (have_tx) {      /* will sa_bulk_update happen really soon? */
1673                     zp->z_atime_dirty = 0;
1674                     zp->z_seq++;
1675           } else {
1676                     zp->z_atime_dirty = 1;
1677           }
1678 
1679           if (flag & AT_ATIME) {
1680                     ZFS_TIME_ENCODE(&now, zp->z_atime);
1681           }
1682 
1683           if (flag & AT_MTIME) {
1684                     ZFS_TIME_ENCODE(&now, mtime);
1685                     if (zp->z_zfsvfs->z_use_fuids) {
1686                               zp->z_pflags |= (ZFS_ARCHIVE |
1687                                   ZFS_AV_MODIFIED);
1688                     }
1689           }
1690 
1691           if (flag & AT_CTIME) {
1692                     ZFS_TIME_ENCODE(&now, ctime);
1693                     if (zp->z_zfsvfs->z_use_fuids)
1694                               zp->z_pflags |= ZFS_ARCHIVE;
1695           }
1696 }
1697 
1698 /*
1699  * Grow the block size for a file.
1700  *
1701  *        IN:       zp        - znode of file to free data in.
1702  *                  size      - requested block size
1703  *                  tx        - open transaction.
1704  *
1705  * NOTE: this function assumes that the znode is write locked.
1706  */
1707 void
zfs_grow_blocksize(znode_t * zp,uint64_t size,dmu_tx_t * tx)1708 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1709 {
1710           int                 error;
1711           u_longlong_t        dummy;
1712 
1713           if (size <= zp->z_blksz)
1714                     return;
1715           /*
1716            * If the file size is already greater than the current blocksize,
1717            * we will not grow.  If there is more than one block in a file,
1718            * the blocksize cannot change.
1719            */
1720           if (zp->z_blksz && zp->z_size > zp->z_blksz)
1721                     return;
1722 
1723           error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
1724               size, 0, tx);
1725 
1726           if (error == ENOTSUP)
1727                     return;
1728           ASSERT0(error);
1729 
1730           /* What blocksize did we actually get? */
1731           dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
1732 }
1733 
1734 #ifdef illumos
1735 /*
1736  * This is a dummy interface used when pvn_vplist_dirty() should *not*
1737  * be calling back into the fs for a putpage().  E.g.: when truncating
1738  * a file, the pages being "thrown away* don't need to be written out.
1739  */
1740 /* ARGSUSED */
1741 static int
zfs_no_putpage(vnode_t * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,cred_t * cr)1742 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1743     int flags, cred_t *cr)
1744 {
1745           ASSERT(0);
1746           return (0);
1747 }
1748 #endif
1749 
1750 /*
1751  * Increase the file length
1752  *
1753  *        IN:       zp        - znode of file to free data in.
1754  *                  end       - new end-of-file
1755  *
1756  *        RETURN:   0 on success, error code on failure
1757  */
1758 static int
zfs_extend(znode_t * zp,uint64_t end)1759 zfs_extend(znode_t *zp, uint64_t end)
1760 {
1761           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1762           dmu_tx_t *tx;
1763           rl_t *rl;
1764           uint64_t newblksz;
1765           int error;
1766 
1767           /*
1768            * We will change zp_size, lock the whole file.
1769            */
1770           rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
1771 
1772           /*
1773            * Nothing to do if file already at desired length.
1774            */
1775           if (end <= zp->z_size) {
1776                     zfs_range_unlock(rl);
1777                     return (0);
1778           }
1779           tx = dmu_tx_create(zfsvfs->z_os);
1780           dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1781           zfs_sa_upgrade_txholds(tx, zp);
1782           if (end > zp->z_blksz &&
1783               (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
1784                     /*
1785                      * We are growing the file past the current block size.
1786                      */
1787                     if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
1788                               /*
1789                                * File's blocksize is already larger than the
1790                                * "recordsize" property.  Only let it grow to
1791                                * the next power of 2.
1792                                */
1793                               ASSERT(!ISP2(zp->z_blksz));
1794                               newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
1795                     } else {
1796                               newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
1797                     }
1798                     dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1799           } else {
1800                     newblksz = 0;
1801           }
1802 
1803           error = dmu_tx_assign(tx, TXG_WAIT);
1804           if (error) {
1805                     dmu_tx_abort(tx);
1806                     zfs_range_unlock(rl);
1807                     return (error);
1808           }
1809 
1810           if (newblksz)
1811                     zfs_grow_blocksize(zp, newblksz, tx);
1812 
1813           zp->z_size = end;
1814 
1815           VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
1816               &zp->z_size, sizeof (zp->z_size), tx));
1817 
1818           vnode_pager_setsize(ZTOV(zp), end);
1819 
1820           zfs_range_unlock(rl);
1821 
1822           dmu_tx_commit(tx);
1823 
1824           return (0);
1825 }
1826 
1827 /*
1828  * Free space in a file.
1829  *
1830  *        IN:       zp        - znode of file to free data in.
1831  *                  off       - start of section to free.
1832  *                  len       - length of section to free.
1833  *
1834  *        RETURN:   0 on success, error code on failure
1835  */
1836 static int
zfs_free_range(znode_t * zp,uint64_t off,uint64_t len)1837 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1838 {
1839           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1840           rl_t *rl;
1841           int error;
1842 
1843           /*
1844            * Lock the range being freed.
1845            */
1846           rl = zfs_range_lock(zp, off, len, RL_WRITER);
1847 
1848           /*
1849            * Nothing to do if file already at desired length.
1850            */
1851           if (off >= zp->z_size) {
1852                     zfs_range_unlock(rl);
1853                     return (0);
1854           }
1855 
1856           if (off + len > zp->z_size)
1857                     len = zp->z_size - off;
1858 
1859           error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
1860 
1861           if (error == 0) {
1862                     /*
1863                      * In FreeBSD we cannot free block in the middle of a file,
1864                      * but only at the end of a file, so this code path should
1865                      * never happen.
1866                      */
1867                     vnode_pager_setsize(ZTOV(zp), off);
1868           }
1869 
1870           zfs_range_unlock(rl);
1871 
1872           return (error);
1873 }
1874 
1875 /*
1876  * Truncate a file
1877  *
1878  *        IN:       zp        - znode of file to free data in.
1879  *                  end       - new end-of-file.
1880  *
1881  *        RETURN:   0 on success, error code on failure
1882  */
1883 static int
zfs_trunc(znode_t * zp,uint64_t end)1884 zfs_trunc(znode_t *zp, uint64_t end)
1885 {
1886           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1887           vnode_t *vp = ZTOV(zp);
1888           dmu_tx_t *tx;
1889           rl_t *rl;
1890           int error;
1891           sa_bulk_attr_t bulk[2];
1892           int count = 0;
1893 
1894           /*
1895            * We will change zp_size, lock the whole file.
1896            */
1897           rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
1898 
1899           /*
1900            * Nothing to do if file already at desired length.
1901            */
1902           if (end >= zp->z_size) {
1903                     zfs_range_unlock(rl);
1904                     return (0);
1905           }
1906 
1907           error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,  -1);
1908           if (error) {
1909                     zfs_range_unlock(rl);
1910                     return (error);
1911           }
1912           tx = dmu_tx_create(zfsvfs->z_os);
1913           dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1914           zfs_sa_upgrade_txholds(tx, zp);
1915           dmu_tx_mark_netfree(tx);
1916           error = dmu_tx_assign(tx, TXG_WAIT);
1917           if (error) {
1918                     dmu_tx_abort(tx);
1919                     zfs_range_unlock(rl);
1920                     return (error);
1921           }
1922 
1923           zp->z_size = end;
1924           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
1925               NULL, &zp->z_size, sizeof (zp->z_size));
1926 
1927           if (end == 0) {
1928                     zp->z_pflags &= ~ZFS_SPARSE;
1929                     SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1930                         NULL, &zp->z_pflags, 8);
1931           }
1932           VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
1933 
1934           dmu_tx_commit(tx);
1935 
1936           /*
1937            * Clear any mapped pages in the truncated region.  This has to
1938            * happen outside of the transaction to avoid the possibility of
1939            * a deadlock with someone trying to push a page that we are
1940            * about to invalidate.
1941            */
1942           vnode_pager_setsize(vp, end);
1943 
1944           zfs_range_unlock(rl);
1945 
1946           return (0);
1947 }
1948 
1949 /*
1950  * Free space in a file
1951  *
1952  *        IN:       zp        - znode of file to free data in.
1953  *                  off       - start of range
1954  *                  len       - end of range (0 => EOF)
1955  *                  flag      - current file open mode flags.
1956  *                  log       - TRUE if this action should be logged
1957  *
1958  *        RETURN:   0 on success, error code on failure
1959  */
1960 int
zfs_freesp(znode_t * zp,uint64_t off,uint64_t len,int flag,boolean_t log)1961 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1962 {
1963           vnode_t *vp = ZTOV(zp);
1964           dmu_tx_t *tx;
1965           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1966           zilog_t *zilog = zfsvfs->z_log;
1967           uint64_t mode;
1968           uint64_t mtime[2], ctime[2];
1969           sa_bulk_attr_t bulk[3];
1970           int count = 0;
1971           int error;
1972 
1973           if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
1974               sizeof (mode))) != 0)
1975                     return (error);
1976 
1977           if (off > zp->z_size) {
1978                     error =  zfs_extend(zp, off+len);
1979                     if (error == 0 && log)
1980                               goto log;
1981                     else
1982                               return (error);
1983           }
1984 
1985           /*
1986            * Check for any locks in the region to be freed.
1987            */
1988 
1989           if (MANDLOCK(vp, (mode_t)mode)) {
1990                     uint64_t length = (len ? len : zp->z_size - off);
1991                     if (error = chklock(vp, FWRITE, off, length, flag, NULL))
1992                               return (error);
1993           }
1994 
1995           if (len == 0) {
1996                     error = zfs_trunc(zp, off);
1997           } else {
1998                     if ((error = zfs_free_range(zp, off, len)) == 0 &&
1999                         off + len > zp->z_size)
2000                               error = zfs_extend(zp, off+len);
2001           }
2002           if (error || !log)
2003                     return (error);
2004 log:
2005           tx = dmu_tx_create(zfsvfs->z_os);
2006           dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2007           zfs_sa_upgrade_txholds(tx, zp);
2008           error = dmu_tx_assign(tx, TXG_WAIT);
2009           if (error) {
2010                     dmu_tx_abort(tx);
2011                     return (error);
2012           }
2013 
2014           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
2015           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
2016           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
2017               NULL, &zp->z_pflags, 8);
2018           zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
2019           error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2020           ASSERT(error == 0);
2021 
2022           zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
2023 
2024           dmu_tx_commit(tx);
2025           return (0);
2026 }
2027 
2028 void
zfs_create_fs(objset_t * os,cred_t * cr,nvlist_t * zplprops,dmu_tx_t * tx)2029 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
2030 {
2031           uint64_t  moid, obj, sa_obj, version;
2032           uint64_t  sense = ZFS_CASE_SENSITIVE;
2033           uint64_t  norm = 0;
2034           nvpair_t  *elem;
2035           int                 error;
2036           int                 i;
2037           znode_t             *rootzp = NULL;
2038           zfsvfs_t  *zfsvfs;
2039           vattr_t             vattr;
2040           znode_t             *zp;
2041           zfs_acl_ids_t       acl_ids;
2042 
2043           /*
2044            * First attempt to create master node.
2045            */
2046           /*
2047            * In an empty objset, there are no blocks to read and thus
2048            * there can be no i/o errors (which we assert below).
2049            */
2050           moid = MASTER_NODE_OBJ;
2051           error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
2052               DMU_OT_NONE, 0, tx);
2053           ASSERT(error == 0);
2054 
2055           /*
2056            * Set starting attributes.
2057            */
2058           version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
2059           elem = NULL;
2060           while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
2061                     /* For the moment we expect all zpl props to be uint64_ts */
2062                     uint64_t val;
2063                     char *name;
2064 
2065                     ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
2066                     VERIFY(nvpair_value_uint64(elem, &val) == 0);
2067                     name = nvpair_name(elem);
2068                     if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
2069                               if (val < version)
2070                                         version = val;
2071                     } else {
2072                               error = zap_update(os, moid, name, 8, 1, &val, tx);
2073                     }
2074                     ASSERT(error == 0);
2075                     if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
2076                               norm = val;
2077                     else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
2078                               sense = val;
2079           }
2080           ASSERT(version != 0);
2081           error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
2082 
2083           /*
2084            * Create zap object used for SA attribute registration
2085            */
2086 
2087           if (version >= ZPL_VERSION_SA) {
2088                     sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2089                         DMU_OT_NONE, 0, tx);
2090                     error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2091                     ASSERT(error == 0);
2092           } else {
2093                     sa_obj = 0;
2094           }
2095           /*
2096            * Create a delete queue.
2097            */
2098           obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
2099 
2100           error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
2101           ASSERT(error == 0);
2102 
2103           /*
2104            * Create root znode.  Create minimal znode/vnode/zfsvfs
2105            * to allow zfs_mknode to work.
2106            */
2107           VATTR_NULL(&vattr);
2108           vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
2109           vattr.va_type = VDIR;
2110           vattr.va_mode = S_IFDIR|0755;
2111           vattr.va_uid = crgetuid(cr);
2112           vattr.va_gid = crgetgid(cr);
2113 
2114           zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
2115 
2116           rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
2117           ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
2118           rootzp->z_moved = 0;
2119           rootzp->z_unlinked = 0;
2120           rootzp->z_atime_dirty = 0;
2121           rootzp->z_is_sa = USE_SA(version, os);
2122 
2123           zfsvfs->z_os = os;
2124           zfsvfs->z_parent = zfsvfs;
2125           zfsvfs->z_version = version;
2126           zfsvfs->z_use_fuids = USE_FUIDS(version, os);
2127           zfsvfs->z_use_sa = USE_SA(version, os);
2128           zfsvfs->z_norm = norm;
2129 
2130           error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
2131               &zfsvfs->z_attr_table);
2132 
2133           ASSERT(error == 0);
2134 
2135           /*
2136            * Fold case on file systems that are always or sometimes case
2137            * insensitive.
2138            */
2139           if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
2140                     zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
2141 
2142           mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
2143           list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
2144               offsetof(znode_t, z_link_node));
2145 
2146           for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
2147                     mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
2148 
2149           rootzp->z_zfsvfs = zfsvfs;
2150           VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
2151               cr, NULL, &acl_ids));
2152           zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
2153           ASSERT3P(zp, ==, rootzp);
2154           error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
2155           ASSERT(error == 0);
2156           zfs_acl_ids_free(&acl_ids);
2157           POINTER_INVALIDATE(&rootzp->z_zfsvfs);
2158 
2159           sa_handle_destroy(rootzp->z_sa_hdl);
2160           kmem_cache_free(znode_cache, rootzp);
2161 
2162           /*
2163            * Create shares directory
2164            */
2165 
2166           error = zfs_create_share_dir(zfsvfs, tx);
2167 
2168           ASSERT(error == 0);
2169 
2170           for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
2171                     mutex_destroy(&zfsvfs->z_hold_mtx[i]);
2172           mutex_destroy(&zfsvfs->z_znodes_lock);
2173           kmem_free(zfsvfs, sizeof (zfsvfs_t));
2174 }
2175 #endif /* _KERNEL */
2176 
2177 static int
zfs_sa_setup(objset_t * osp,sa_attr_type_t ** sa_table)2178 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
2179 {
2180           uint64_t sa_obj = 0;
2181           int error;
2182 
2183           error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
2184           if (error != 0 && error != ENOENT)
2185                     return (error);
2186 
2187           error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
2188           return (error);
2189 }
2190 
2191 static int
zfs_grab_sa_handle(objset_t * osp,uint64_t obj,sa_handle_t ** hdlp,dmu_buf_t ** db,void * tag)2192 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
2193     dmu_buf_t **db, void *tag)
2194 {
2195           dmu_object_info_t doi;
2196           int error;
2197 
2198           if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
2199                     return (error);
2200 
2201           dmu_object_info_from_db(*db, &doi);
2202           if ((doi.doi_bonus_type != DMU_OT_SA &&
2203               doi.doi_bonus_type != DMU_OT_ZNODE) ||
2204               doi.doi_bonus_type == DMU_OT_ZNODE &&
2205               doi.doi_bonus_size < sizeof (znode_phys_t)) {
2206                     sa_buf_rele(*db, tag);
2207                     return (SET_ERROR(ENOTSUP));
2208           }
2209 
2210           error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
2211           if (error != 0) {
2212                     sa_buf_rele(*db, tag);
2213                     return (error);
2214           }
2215 
2216           return (0);
2217 }
2218 
2219 void
zfs_release_sa_handle(sa_handle_t * hdl,dmu_buf_t * db,void * tag)2220 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
2221 {
2222           sa_handle_destroy(hdl);
2223           sa_buf_rele(db, tag);
2224 }
2225 
2226 /*
2227  * Given an object number, return its parent object number and whether
2228  * or not the object is an extended attribute directory.
2229  */
2230 static int
zfs_obj_to_pobj(objset_t * osp,sa_handle_t * hdl,sa_attr_type_t * sa_table,uint64_t * pobjp,int * is_xattrdir)2231 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
2232     uint64_t *pobjp, int *is_xattrdir)
2233 {
2234           uint64_t parent;
2235           uint64_t pflags;
2236           uint64_t mode;
2237           uint64_t parent_mode;
2238           sa_bulk_attr_t bulk[3];
2239           sa_handle_t *sa_hdl;
2240           dmu_buf_t *sa_db;
2241           int count = 0;
2242           int error;
2243 
2244           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
2245               &parent, sizeof (parent));
2246           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
2247               &pflags, sizeof (pflags));
2248           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2249               &mode, sizeof (mode));
2250 
2251           if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
2252                     return (error);
2253 
2254           /*
2255            * When a link is removed its parent pointer is not changed and will
2256            * be invalid.  There are two cases where a link is removed but the
2257            * file stays around, when it goes to the delete queue and when there
2258            * are additional links.
2259            */
2260           error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
2261           if (error != 0)
2262                     return (error);
2263 
2264           error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
2265           zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2266           if (error != 0)
2267                     return (error);
2268 
2269           *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
2270 
2271           /*
2272            * Extended attributes can be applied to files, directories, etc.
2273            * Otherwise the parent must be a directory.
2274            */
2275           if (!*is_xattrdir && !S_ISDIR(parent_mode))
2276                     return (SET_ERROR(EINVAL));
2277 
2278           *pobjp = parent;
2279 
2280           return (0);
2281 }
2282 
2283 /*
2284  * Given an object number, return some zpl level statistics
2285  */
2286 static int
zfs_obj_to_stats_impl(sa_handle_t * hdl,sa_attr_type_t * sa_table,zfs_stat_t * sb)2287 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
2288     zfs_stat_t *sb)
2289 {
2290           sa_bulk_attr_t bulk[4];
2291           int count = 0;
2292 
2293           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2294               &sb->zs_mode, sizeof (sb->zs_mode));
2295           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2296               &sb->zs_gen, sizeof (sb->zs_gen));
2297           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2298               &sb->zs_links, sizeof (sb->zs_links));
2299           SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2300               &sb->zs_ctime, sizeof (sb->zs_ctime));
2301 
2302           return (sa_bulk_lookup(hdl, bulk, count));
2303 }
2304 
2305 static int
zfs_obj_to_path_impl(objset_t * osp,uint64_t obj,sa_handle_t * hdl,sa_attr_type_t * sa_table,char * buf,int len)2306 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2307     sa_attr_type_t *sa_table, char *buf, int len)
2308 {
2309           sa_handle_t *sa_hdl;
2310           sa_handle_t *prevhdl = NULL;
2311           dmu_buf_t *prevdb = NULL;
2312           dmu_buf_t *sa_db = NULL;
2313           char *path = buf + len - 1;
2314           int error;
2315 
2316           *path = '\0';
2317           sa_hdl = hdl;
2318 
2319           for (;;) {
2320                     uint64_t pobj;
2321                     char component[MAXNAMELEN + 2];
2322                     size_t complen;
2323                     int is_xattrdir;
2324 
2325                     if (prevdb)
2326                               zfs_release_sa_handle(prevhdl, prevdb, FTAG);
2327 
2328                     if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
2329                         &is_xattrdir)) != 0)
2330                               break;
2331 
2332                     if (pobj == obj) {
2333                               if (path[0] != '/')
2334                                         *--path = '/';
2335                               break;
2336                     }
2337 
2338                     component[0] = '/';
2339                     if (is_xattrdir) {
2340                               (void) sprintf(component + 1, "<xattrdir>");
2341                     } else {
2342                               error = zap_value_search(osp, pobj, obj,
2343                                   ZFS_DIRENT_OBJ(-1ULL), component + 1);
2344                               if (error != 0)
2345                                         break;
2346                     }
2347 
2348                     complen = strlen(component);
2349                     path -= complen;
2350                     ASSERT(path >= buf);
2351                     bcopy(component, path, complen);
2352                     obj = pobj;
2353 
2354                     if (sa_hdl != hdl) {
2355                               prevhdl = sa_hdl;
2356                               prevdb = sa_db;
2357                     }
2358                     error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
2359                     if (error != 0) {
2360                               sa_hdl = prevhdl;
2361                               sa_db = prevdb;
2362                               break;
2363                     }
2364           }
2365 
2366           if (sa_hdl != NULL && sa_hdl != hdl) {
2367                     ASSERT(sa_db != NULL);
2368                     zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2369           }
2370 
2371           if (error == 0)
2372                     (void) memmove(buf, path, buf + len - path);
2373 
2374           return (error);
2375 }
2376 
2377 int
zfs_obj_to_path(objset_t * osp,uint64_t obj,char * buf,int len)2378 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2379 {
2380           sa_attr_type_t *sa_table;
2381           sa_handle_t *hdl;
2382           dmu_buf_t *db;
2383           int error;
2384 
2385           error = zfs_sa_setup(osp, &sa_table);
2386           if (error != 0)
2387                     return (error);
2388 
2389           error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
2390           if (error != 0)
2391                     return (error);
2392 
2393           error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2394 
2395           zfs_release_sa_handle(hdl, db, FTAG);
2396           return (error);
2397 }
2398 
2399 int
zfs_obj_to_stats(objset_t * osp,uint64_t obj,zfs_stat_t * sb,char * buf,int len)2400 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2401     char *buf, int len)
2402 {
2403           char *path = buf + len - 1;
2404           sa_attr_type_t *sa_table;
2405           sa_handle_t *hdl;
2406           dmu_buf_t *db;
2407           int error;
2408 
2409           *path = '\0';
2410 
2411           error = zfs_sa_setup(osp, &sa_table);
2412           if (error != 0)
2413                     return (error);
2414 
2415           error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
2416           if (error != 0)
2417                     return (error);
2418 
2419           error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2420           if (error != 0) {
2421                     zfs_release_sa_handle(hdl, db, FTAG);
2422                     return (error);
2423           }
2424 
2425           error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2426 
2427           zfs_release_sa_handle(hdl, db, FTAG);
2428           return (error);
2429 }
2430 
2431 #ifdef _KERNEL
2432 int
zfs_znode_parent_and_name(znode_t * zp,znode_t ** dzpp,char * buf)2433 zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf)
2434 {
2435           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2436           uint64_t parent;
2437           int is_xattrdir;
2438           int err;
2439 
2440           /* Extended attributes should not be visible as regular files. */
2441           if ((zp->z_pflags & ZFS_XATTR) != 0)
2442                     return (SET_ERROR(EINVAL));
2443 
2444           err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table,
2445               &parent, &is_xattrdir);
2446           if (err != 0)
2447                     return (err);
2448           ASSERT0(is_xattrdir);
2449 
2450           /* No name as this is a root object. */
2451           if (parent == zp->z_id)
2452                     return (SET_ERROR(EINVAL));
2453 
2454           err = zap_value_search(zfsvfs->z_os, parent, zp->z_id,
2455               ZFS_DIRENT_OBJ(-1ULL), buf);
2456           if (err != 0)
2457                     return (err);
2458           err = zfs_zget(zfsvfs, parent, dzpp);
2459           return (err);
2460 }
2461 #endif /* _KERNEL */
2462