1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 iXsystems, Inc
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27 */
28
29 #include <sys/zfs_context.h>
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/dmu.h>
35 #include <sys/dmu_impl.h>
36 #include <sys/dmu_objset.h>
37 #include <sys/dbuf.h>
38 #include <sys/dnode.h>
39 #include <sys/zap.h>
40 #include <sys/sa.h>
41 #include <sys/sunddi.h>
42 #include <sys/sa_impl.h>
43 #include <sys/dnode.h>
44 #include <sys/errno.h>
45 #include <sys/zfs_context.h>
46
47 /*
48 * ZFS System attributes:
49 *
50 * A generic mechanism to allow for arbitrary attributes
51 * to be stored in a dnode. The data will be stored in the bonus buffer of
52 * the dnode and if necessary a special "spill" block will be used to handle
53 * overflow situations. The spill block will be sized to fit the data
54 * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the
55 * spill block is stored at the end of the current bonus buffer. Any
56 * attributes that would be in the way of the blkptr_t will be relocated
57 * into the spill block.
58 *
59 * Attribute registration:
60 *
61 * Stored persistently on a per dataset basis
62 * a mapping between attribute "string" names and their actual attribute
63 * numeric values, length, and byteswap function. The names are only used
64 * during registration. All attributes are known by their unique attribute
65 * id value. If an attribute can have a variable size then the value
66 * 0 will be used to indicate this.
67 *
68 * Attribute Layout:
69 *
70 * Attribute layouts are a way to compactly store multiple attributes, but
71 * without taking the overhead associated with managing each attribute
72 * individually. Since you will typically have the same set of attributes
73 * stored in the same order a single table will be used to represent that
74 * layout. The ZPL for example will usually have only about 10 different
75 * layouts (regular files, device files, symlinks,
76 * regular files + scanstamp, files/dir with extended attributes, and then
77 * you have the possibility of all of those minus ACL, because it would
78 * be kicked out into the spill block)
79 *
80 * Layouts are simply an array of the attributes and their
81 * ordering i.e. [0, 1, 4, 5, 2]
82 *
83 * Each distinct layout is given a unique layout number and that is whats
84 * stored in the header at the beginning of the SA data buffer.
85 *
86 * A layout only covers a single dbuf (bonus or spill). If a set of
87 * attributes is split up between the bonus buffer and a spill buffer then
88 * two different layouts will be used. This allows us to byteswap the
89 * spill without looking at the bonus buffer and keeps the on disk format of
90 * the bonus and spill buffer the same.
91 *
92 * Adding a single attribute will cause the entire set of attributes to
93 * be rewritten and could result in a new layout number being constructed
94 * as part of the rewrite if no such layout exists for the new set of
95 * attribues. The new attribute will be appended to the end of the already
96 * existing attributes.
97 *
98 * Both the attribute registration and attribute layout information are
99 * stored in normal ZAP attributes. Their should be a small number of
100 * known layouts and the set of attributes is assumed to typically be quite
101 * small.
102 *
103 * The registered attributes and layout "table" information is maintained
104 * in core and a special "sa_os_t" is attached to the objset_t.
105 *
106 * A special interface is provided to allow for quickly applying
107 * a large set of attributes at once. sa_replace_all_by_template() is
108 * used to set an array of attributes. This is used by the ZPL when
109 * creating a brand new file. The template that is passed into the function
110 * specifies the attribute, size for variable length attributes, location of
111 * data and special "data locator" function if the data isn't in a contiguous
112 * location.
113 *
114 * Byteswap implications:
115 *
116 * Since the SA attributes are not entirely self describing we can't do
117 * the normal byteswap processing. The special ZAP layout attribute and
118 * attribute registration attributes define the byteswap function and the
119 * size of the attributes, unless it is variable sized.
120 * The normal ZFS byteswapping infrastructure assumes you don't need
121 * to read any objects in order to do the necessary byteswapping. Whereas
122 * SA attributes can only be properly byteswapped if the dataset is opened
123 * and the layout/attribute ZAP attributes are available. Because of this
124 * the SA attributes will be byteswapped when they are first accessed by
125 * the SA code that will read the SA data.
126 */
127
128 typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
129 uint16_t length, int length_idx, boolean_t, void *userp);
130
131 static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
132 static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
133 static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
134 void *data);
135 static void sa_idx_tab_rele(objset_t *os, void *arg);
136 static void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
137 int buflen);
138 static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
139 sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
140 uint16_t buflen, dmu_tx_t *tx);
141
142 arc_byteswap_func_t *sa_bswap_table[] = {
143 byteswap_uint64_array,
144 byteswap_uint32_array,
145 byteswap_uint16_array,
146 byteswap_uint8_array,
147 zfs_acl_byteswap,
148 };
149
150 #define SA_COPY_DATA(f, s, t, l) \
151 { \
152 if (f == NULL) { \
153 if (l == 8) { \
154 *(uint64_t *)t = *(uint64_t *)s; \
155 } else if (l == 16) { \
156 *(uint64_t *)t = *(uint64_t *)s; \
157 *(uint64_t *)((uintptr_t)t + 8) = \
158 *(uint64_t *)((uintptr_t)s + 8); \
159 } else { \
160 bcopy(s, t, l); \
161 } \
162 } else \
163 sa_copy_data(f, s, t, l); \
164 }
165
166 /*
167 * This table is fixed and cannot be changed. Its purpose is to
168 * allow the SA code to work with both old/new ZPL file systems.
169 * It contains the list of legacy attributes. These attributes aren't
170 * stored in the "attribute" registry zap objects, since older ZPL file systems
171 * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will
172 * use this static table.
173 */
174 sa_attr_reg_t sa_legacy_attrs[] = {
175 {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
176 {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
177 {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
178 {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
179 {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
180 {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
181 {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
182 {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
183 {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
184 {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
185 {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
186 {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
187 {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
188 {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
189 {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
190 {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
191 };
192
193 /*
194 * This is only used for objects of type DMU_OT_ZNODE
195 */
196 sa_attr_type_t sa_legacy_zpl_layout[] = {
197 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
198 };
199
200 /*
201 * Special dummy layout used for buffers with no attributes.
202 */
203 sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
204
205 static int sa_legacy_attr_count = 16;
206 static kmem_cache_t *sa_cache = NULL;
207
208 /*ARGSUSED*/
209 static int
sa_cache_constructor(void * buf,void * unused,int kmflag)210 sa_cache_constructor(void *buf, void *unused, int kmflag)
211 {
212 sa_handle_t *hdl = buf;
213
214 mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
215 return (0);
216 }
217
218 /*ARGSUSED*/
219 static void
sa_cache_destructor(void * buf,void * unused)220 sa_cache_destructor(void *buf, void *unused)
221 {
222 sa_handle_t *hdl = buf;
223 mutex_destroy(&hdl->sa_lock);
224 }
225
226 void
sa_cache_init(void)227 sa_cache_init(void)
228 {
229 sa_cache = kmem_cache_create("sa_cache",
230 sizeof (sa_handle_t), 0, sa_cache_constructor,
231 sa_cache_destructor, NULL, NULL, NULL, 0);
232 }
233
234 void
sa_cache_fini(void)235 sa_cache_fini(void)
236 {
237 if (sa_cache)
238 kmem_cache_destroy(sa_cache);
239 }
240
241 static int
layout_num_compare(const void * arg1,const void * arg2)242 layout_num_compare(const void *arg1, const void *arg2)
243 {
244 const sa_lot_t *node1 = arg1;
245 const sa_lot_t *node2 = arg2;
246
247 if (node1->lot_num > node2->lot_num)
248 return (1);
249 else if (node1->lot_num < node2->lot_num)
250 return (-1);
251 return (0);
252 }
253
254 static int
layout_hash_compare(const void * arg1,const void * arg2)255 layout_hash_compare(const void *arg1, const void *arg2)
256 {
257 const sa_lot_t *node1 = arg1;
258 const sa_lot_t *node2 = arg2;
259
260 if (node1->lot_hash > node2->lot_hash)
261 return (1);
262 if (node1->lot_hash < node2->lot_hash)
263 return (-1);
264 if (node1->lot_instance > node2->lot_instance)
265 return (1);
266 if (node1->lot_instance < node2->lot_instance)
267 return (-1);
268 return (0);
269 }
270
271 boolean_t
sa_layout_equal(sa_lot_t * tbf,sa_attr_type_t * attrs,int count)272 sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
273 {
274 int i;
275
276 if (count != tbf->lot_attr_count)
277 return (1);
278
279 for (i = 0; i != count; i++) {
280 if (attrs[i] != tbf->lot_attrs[i])
281 return (1);
282 }
283 return (0);
284 }
285
286 #define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
287
288 static uint64_t
sa_layout_info_hash(sa_attr_type_t * attrs,int attr_count)289 sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
290 {
291 int i;
292 uint64_t crc = -1ULL;
293
294 for (i = 0; i != attr_count; i++)
295 crc ^= SA_ATTR_HASH(attrs[i]);
296
297 return (crc);
298 }
299
300 static int
sa_get_spill(sa_handle_t * hdl)301 sa_get_spill(sa_handle_t *hdl)
302 {
303 int rc;
304 if (hdl->sa_spill == NULL) {
305 if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
306 &hdl->sa_spill)) == 0)
307 VERIFY(0 == sa_build_index(hdl, SA_SPILL));
308 } else {
309 rc = 0;
310 }
311
312 return (rc);
313 }
314
315 /*
316 * Main attribute lookup/update function
317 * returns 0 for success or non zero for failures
318 *
319 * Operates on bulk array, first failure will abort further processing
320 */
321 int
sa_attr_op(sa_handle_t * hdl,sa_bulk_attr_t * bulk,int count,sa_data_op_t data_op,dmu_tx_t * tx)322 sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
323 sa_data_op_t data_op, dmu_tx_t *tx)
324 {
325 sa_os_t *sa = hdl->sa_os->os_sa;
326 int i;
327 int error = 0;
328 sa_buf_type_t buftypes;
329
330 buftypes = 0;
331
332 ASSERT(count > 0);
333 for (i = 0; i != count; i++) {
334 ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
335
336 bulk[i].sa_addr = NULL;
337 /* First check the bonus buffer */
338
339 if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
340 hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
341 SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
342 SA_GET_HDR(hdl, SA_BONUS),
343 bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
344 if (tx && !(buftypes & SA_BONUS)) {
345 dmu_buf_will_dirty(hdl->sa_bonus, tx);
346 buftypes |= SA_BONUS;
347 }
348 }
349 if (bulk[i].sa_addr == NULL &&
350 ((error = sa_get_spill(hdl)) == 0)) {
351 if (TOC_ATTR_PRESENT(
352 hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
353 SA_ATTR_INFO(sa, hdl->sa_spill_tab,
354 SA_GET_HDR(hdl, SA_SPILL),
355 bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
356 if (tx && !(buftypes & SA_SPILL) &&
357 bulk[i].sa_size == bulk[i].sa_length) {
358 dmu_buf_will_dirty(hdl->sa_spill, tx);
359 buftypes |= SA_SPILL;
360 }
361 }
362 }
363 if (error && error != ENOENT) {
364 return ((error == ECKSUM) ? EIO : error);
365 }
366
367 switch (data_op) {
368 case SA_LOOKUP:
369 if (bulk[i].sa_addr == NULL)
370 return (SET_ERROR(ENOENT));
371 if (bulk[i].sa_data) {
372 SA_COPY_DATA(bulk[i].sa_data_func,
373 bulk[i].sa_addr, bulk[i].sa_data,
374 bulk[i].sa_size);
375 }
376 continue;
377
378 case SA_UPDATE:
379 /* existing rewrite of attr */
380 if (bulk[i].sa_addr &&
381 bulk[i].sa_size == bulk[i].sa_length) {
382 SA_COPY_DATA(bulk[i].sa_data_func,
383 bulk[i].sa_data, bulk[i].sa_addr,
384 bulk[i].sa_length);
385 continue;
386 } else if (bulk[i].sa_addr) { /* attr size change */
387 error = sa_modify_attrs(hdl, bulk[i].sa_attr,
388 SA_REPLACE, bulk[i].sa_data_func,
389 bulk[i].sa_data, bulk[i].sa_length, tx);
390 } else { /* adding new attribute */
391 error = sa_modify_attrs(hdl, bulk[i].sa_attr,
392 SA_ADD, bulk[i].sa_data_func,
393 bulk[i].sa_data, bulk[i].sa_length, tx);
394 }
395 if (error)
396 return (error);
397 break;
398 }
399 }
400 return (error);
401 }
402
403 static sa_lot_t *
sa_add_layout_entry(objset_t * os,sa_attr_type_t * attrs,int attr_count,uint64_t lot_num,uint64_t hash,boolean_t zapadd,dmu_tx_t * tx)404 sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
405 uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
406 {
407 sa_os_t *sa = os->os_sa;
408 sa_lot_t *tb, *findtb;
409 int i;
410 avl_index_t loc;
411
412 ASSERT(MUTEX_HELD(&sa->sa_lock));
413 tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
414 tb->lot_attr_count = attr_count;
415 tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
416 KM_SLEEP);
417 bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
418 tb->lot_num = lot_num;
419 tb->lot_hash = hash;
420 tb->lot_instance = 0;
421
422 if (zapadd) {
423 char attr_name[8];
424
425 if (sa->sa_layout_attr_obj == 0) {
426 sa->sa_layout_attr_obj = zap_create_link(os,
427 DMU_OT_SA_ATTR_LAYOUTS,
428 sa->sa_master_obj, SA_LAYOUTS, tx);
429 }
430
431 (void) snprintf(attr_name, sizeof (attr_name),
432 "%d", (int)lot_num);
433 VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
434 attr_name, 2, attr_count, attrs, tx));
435 }
436
437 list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
438 offsetof(sa_idx_tab_t, sa_next));
439
440 for (i = 0; i != attr_count; i++) {
441 if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
442 tb->lot_var_sizes++;
443 }
444
445 avl_add(&sa->sa_layout_num_tree, tb);
446
447 /* verify we don't have a hash collision */
448 if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
449 for (; findtb && findtb->lot_hash == hash;
450 findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
451 if (findtb->lot_instance != tb->lot_instance)
452 break;
453 tb->lot_instance++;
454 }
455 }
456 avl_add(&sa->sa_layout_hash_tree, tb);
457 return (tb);
458 }
459
460 static void
sa_find_layout(objset_t * os,uint64_t hash,sa_attr_type_t * attrs,int count,dmu_tx_t * tx,sa_lot_t ** lot)461 sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
462 int count, dmu_tx_t *tx, sa_lot_t **lot)
463 {
464 sa_lot_t *tb, tbsearch;
465 avl_index_t loc;
466 sa_os_t *sa = os->os_sa;
467 boolean_t found = B_FALSE;
468
469 mutex_enter(&sa->sa_lock);
470 tbsearch.lot_hash = hash;
471 tbsearch.lot_instance = 0;
472 tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
473 if (tb) {
474 for (; tb && tb->lot_hash == hash;
475 tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
476 if (sa_layout_equal(tb, attrs, count) == 0) {
477 found = B_TRUE;
478 break;
479 }
480 }
481 }
482 if (!found) {
483 tb = sa_add_layout_entry(os, attrs, count,
484 avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
485 }
486 mutex_exit(&sa->sa_lock);
487 *lot = tb;
488 }
489
490 static int
sa_resize_spill(sa_handle_t * hdl,uint32_t size,dmu_tx_t * tx)491 sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
492 {
493 int error;
494 uint32_t blocksize;
495
496 if (size == 0) {
497 blocksize = SPA_MINBLOCKSIZE;
498 } else if (size > SPA_OLD_MAXBLOCKSIZE) {
499 ASSERT(0);
500 return (SET_ERROR(EFBIG));
501 } else {
502 blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
503 }
504
505 error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
506 ASSERT(error == 0);
507 return (error);
508 }
509
510 static void
sa_copy_data(sa_data_locator_t * func,void * datastart,void * target,int buflen)511 sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
512 {
513 if (func == NULL) {
514 bcopy(datastart, target, buflen);
515 } else {
516 boolean_t start;
517 int bytes;
518 void *dataptr;
519 void *saptr = target;
520 uint32_t length;
521
522 start = B_TRUE;
523 bytes = 0;
524 while (bytes < buflen) {
525 func(&dataptr, &length, buflen, start, datastart);
526 bcopy(dataptr, saptr, length);
527 saptr = (void *)((caddr_t)saptr + length);
528 bytes += length;
529 start = B_FALSE;
530 }
531 }
532 }
533
534 /*
535 * Determine several different sizes
536 * first the sa header size
537 * the number of bytes to be stored
538 * if spill would occur the index in the attribute array is returned
539 *
540 * the boolean will_spill will be set when spilling is necessary. It
541 * is only set when the buftype is SA_BONUS
542 */
543 static int
sa_find_sizes(sa_os_t * sa,sa_bulk_attr_t * attr_desc,int attr_count,dmu_buf_t * db,sa_buf_type_t buftype,int * index,int * total,boolean_t * will_spill)544 sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
545 dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
546 boolean_t *will_spill)
547 {
548 int var_size = 0;
549 int i;
550 int j = -1;
551 int full_space;
552 int hdrsize;
553 boolean_t done = B_FALSE;
554
555 if (buftype == SA_BONUS && sa->sa_force_spill) {
556 *total = 0;
557 *index = 0;
558 *will_spill = B_TRUE;
559 return (0);
560 }
561
562 *index = -1;
563 *total = 0;
564
565 if (buftype == SA_BONUS)
566 *will_spill = B_FALSE;
567
568 hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
569 sizeof (sa_hdr_phys_t);
570
571 full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
572 ASSERT(IS_P2ALIGNED(full_space, 8));
573
574 for (i = 0; i != attr_count; i++) {
575 boolean_t is_var_sz;
576
577 *total = P2ROUNDUP(*total, 8);
578 *total += attr_desc[i].sa_length;
579 if (done)
580 goto next;
581
582 is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
583 if (is_var_sz) {
584 var_size++;
585 }
586
587 if (is_var_sz && var_size > 1) {
588 if (P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
589 *total < full_space) {
590 /*
591 * Account for header space used by array of
592 * optional sizes of variable-length attributes.
593 * Record the index in case this increase needs
594 * to be reversed due to spill-over.
595 */
596 hdrsize += sizeof (uint16_t);
597 j = i;
598 } else {
599 done = B_TRUE;
600 *index = i;
601 if (buftype == SA_BONUS)
602 *will_spill = B_TRUE;
603 continue;
604 }
605 }
606
607 /*
608 * find index of where spill *could* occur.
609 * Then continue to count of remainder attribute
610 * space. The sum is used later for sizing bonus
611 * and spill buffer.
612 */
613 if (buftype == SA_BONUS && *index == -1 &&
614 (*total + P2ROUNDUP(hdrsize, 8)) >
615 (full_space - sizeof (blkptr_t))) {
616 *index = i;
617 done = B_TRUE;
618 }
619
620 next:
621 if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space &&
622 buftype == SA_BONUS)
623 *will_spill = B_TRUE;
624 }
625
626 /*
627 * j holds the index of the last variable-sized attribute for
628 * which hdrsize was increased. Reverse the increase if that
629 * attribute will be relocated to the spill block.
630 */
631 if (*will_spill && j == *index)
632 hdrsize -= sizeof (uint16_t);
633
634 hdrsize = P2ROUNDUP(hdrsize, 8);
635 return (hdrsize);
636 }
637
638 #define BUF_SPACE_NEEDED(total, header) (total + header)
639
640 /*
641 * Find layout that corresponds to ordering of attributes
642 * If not found a new layout number is created and added to
643 * persistent layout tables.
644 */
645 static int
sa_build_layouts(sa_handle_t * hdl,sa_bulk_attr_t * attr_desc,int attr_count,dmu_tx_t * tx)646 sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
647 dmu_tx_t *tx)
648 {
649 sa_os_t *sa = hdl->sa_os->os_sa;
650 uint64_t hash;
651 sa_buf_type_t buftype;
652 sa_hdr_phys_t *sahdr;
653 void *data_start;
654 int buf_space;
655 sa_attr_type_t *attrs, *attrs_start;
656 int i, lot_count;
657 int hdrsize;
658 int spillhdrsize = 0;
659 int used;
660 dmu_object_type_t bonustype;
661 sa_lot_t *lot;
662 int len_idx;
663 int spill_used;
664 boolean_t spilling;
665
666 dmu_buf_will_dirty(hdl->sa_bonus, tx);
667 bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
668
669 /* first determine bonus header size and sum of all attributes */
670 hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
671 SA_BONUS, &i, &used, &spilling);
672
673 if (used > SPA_OLD_MAXBLOCKSIZE)
674 return (SET_ERROR(EFBIG));
675
676 VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
677 MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
678 used + hdrsize, tx));
679
680 ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
681 bonustype == DMU_OT_SA);
682
683 /* setup and size spill buffer when needed */
684 if (spilling) {
685 boolean_t dummy;
686
687 if (hdl->sa_spill == NULL) {
688 VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
689 &hdl->sa_spill) == 0);
690 }
691 dmu_buf_will_dirty(hdl->sa_spill, tx);
692
693 spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
694 attr_count - i, hdl->sa_spill, SA_SPILL, &i,
695 &spill_used, &dummy);
696
697 if (spill_used > SPA_OLD_MAXBLOCKSIZE)
698 return (SET_ERROR(EFBIG));
699
700 buf_space = hdl->sa_spill->db_size - spillhdrsize;
701 if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
702 hdl->sa_spill->db_size)
703 VERIFY(0 == sa_resize_spill(hdl,
704 BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
705 }
706
707 /* setup starting pointers to lay down data */
708 data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
709 sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
710 buftype = SA_BONUS;
711
712 if (spilling)
713 buf_space = (sa->sa_force_spill) ?
714 0 : SA_BLKPTR_SPACE - hdrsize;
715 else
716 buf_space = hdl->sa_bonus->db_size - hdrsize;
717
718 attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
719 KM_SLEEP);
720 lot_count = 0;
721
722 for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
723 uint16_t length;
724
725 ASSERT(IS_P2ALIGNED(data_start, 8));
726 ASSERT(IS_P2ALIGNED(buf_space, 8));
727 attrs[i] = attr_desc[i].sa_attr;
728 length = SA_REGISTERED_LEN(sa, attrs[i]);
729 if (length == 0)
730 length = attr_desc[i].sa_length;
731 else
732 VERIFY(length == attr_desc[i].sa_length);
733
734 if (buf_space < length) { /* switch to spill buffer */
735 VERIFY(spilling);
736 VERIFY(bonustype == DMU_OT_SA);
737 if (buftype == SA_BONUS && !sa->sa_force_spill) {
738 sa_find_layout(hdl->sa_os, hash, attrs_start,
739 lot_count, tx, &lot);
740 SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
741 }
742
743 buftype = SA_SPILL;
744 hash = -1ULL;
745 len_idx = 0;
746
747 sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
748 sahdr->sa_magic = SA_MAGIC;
749 data_start = (void *)((uintptr_t)sahdr +
750 spillhdrsize);
751 attrs_start = &attrs[i];
752 buf_space = hdl->sa_spill->db_size - spillhdrsize;
753 lot_count = 0;
754 }
755 hash ^= SA_ATTR_HASH(attrs[i]);
756 attr_desc[i].sa_addr = data_start;
757 attr_desc[i].sa_size = length;
758 SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
759 data_start, length);
760 if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
761 sahdr->sa_lengths[len_idx++] = length;
762 }
763 VERIFY((uintptr_t)data_start % 8 == 0);
764 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
765 length), 8);
766 buf_space -= P2ROUNDUP(length, 8);
767 lot_count++;
768 }
769
770 sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
771
772 /*
773 * Verify that old znodes always have layout number 0.
774 * Must be DMU_OT_SA for arbitrary layouts
775 */
776 VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
777 (bonustype == DMU_OT_SA && lot->lot_num > 1));
778
779 if (bonustype == DMU_OT_SA) {
780 SA_SET_HDR(sahdr, lot->lot_num,
781 buftype == SA_BONUS ? hdrsize : spillhdrsize);
782 }
783
784 kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
785 if (hdl->sa_bonus_tab) {
786 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
787 hdl->sa_bonus_tab = NULL;
788 }
789 if (!sa->sa_force_spill)
790 VERIFY(0 == sa_build_index(hdl, SA_BONUS));
791 if (hdl->sa_spill) {
792 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
793 if (!spilling) {
794 /*
795 * remove spill block that is no longer needed.
796 */
797 dmu_buf_rele(hdl->sa_spill, NULL);
798 hdl->sa_spill = NULL;
799 hdl->sa_spill_tab = NULL;
800 VERIFY(0 == dmu_rm_spill(hdl->sa_os,
801 sa_handle_object(hdl), tx));
802 } else {
803 VERIFY(0 == sa_build_index(hdl, SA_SPILL));
804 }
805 }
806
807 return (0);
808 }
809
810 static void
sa_free_attr_table(sa_os_t * sa)811 sa_free_attr_table(sa_os_t *sa)
812 {
813 int i;
814
815 if (sa->sa_attr_table == NULL)
816 return;
817
818 for (i = 0; i != sa->sa_num_attrs; i++) {
819 if (sa->sa_attr_table[i].sa_name)
820 kmem_free(sa->sa_attr_table[i].sa_name,
821 strlen(sa->sa_attr_table[i].sa_name) + 1);
822 }
823
824 kmem_free(sa->sa_attr_table,
825 sizeof (sa_attr_table_t) * sa->sa_num_attrs);
826
827 sa->sa_attr_table = NULL;
828 }
829
830 static int
sa_attr_table_setup(objset_t * os,sa_attr_reg_t * reg_attrs,int count)831 sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
832 {
833 sa_os_t *sa = os->os_sa;
834 uint64_t sa_attr_count = 0;
835 uint64_t sa_reg_count = 0;
836 int error = 0;
837 uint64_t attr_value;
838 sa_attr_table_t *tb;
839 zap_cursor_t zc;
840 zap_attribute_t za;
841 int registered_count = 0;
842 int i;
843 dmu_objset_type_t ostype = dmu_objset_type(os);
844
845 sa->sa_user_table =
846 kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP);
847 sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
848
849 if (sa->sa_reg_attr_obj != 0) {
850 error = zap_count(os, sa->sa_reg_attr_obj,
851 &sa_attr_count);
852
853 /*
854 * Make sure we retrieved a count and that it isn't zero
855 */
856 if (error || (error == 0 && sa_attr_count == 0)) {
857 if (error == 0)
858 error = SET_ERROR(EINVAL);
859 goto bail;
860 }
861 sa_reg_count = sa_attr_count;
862 }
863
864 if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
865 sa_attr_count += sa_legacy_attr_count;
866
867 /* Allocate attribute numbers for attributes that aren't registered */
868 for (i = 0; i != count; i++) {
869 boolean_t found = B_FALSE;
870 int j;
871
872 if (ostype == DMU_OST_ZFS) {
873 for (j = 0; j != sa_legacy_attr_count; j++) {
874 if (strcmp(reg_attrs[i].sa_name,
875 sa_legacy_attrs[j].sa_name) == 0) {
876 sa->sa_user_table[i] =
877 sa_legacy_attrs[j].sa_attr;
878 found = B_TRUE;
879 }
880 }
881 }
882 if (found)
883 continue;
884
885 if (sa->sa_reg_attr_obj)
886 error = zap_lookup(os, sa->sa_reg_attr_obj,
887 reg_attrs[i].sa_name, 8, 1, &attr_value);
888 else
889 error = SET_ERROR(ENOENT);
890 switch (error) {
891 case ENOENT:
892 sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
893 sa_attr_count++;
894 break;
895 case 0:
896 sa->sa_user_table[i] = ATTR_NUM(attr_value);
897 break;
898 default:
899 goto bail;
900 }
901 }
902
903 sa->sa_num_attrs = sa_attr_count;
904 tb = sa->sa_attr_table =
905 kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP);
906
907 /*
908 * Attribute table is constructed from requested attribute list,
909 * previously foreign registered attributes, and also the legacy
910 * ZPL set of attributes.
911 */
912
913 if (sa->sa_reg_attr_obj) {
914 for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
915 (error = zap_cursor_retrieve(&zc, &za)) == 0;
916 zap_cursor_advance(&zc)) {
917 uint64_t value;
918 value = za.za_first_integer;
919
920 registered_count++;
921 tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
922 tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
923 tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
924 tb[ATTR_NUM(value)].sa_registered = B_TRUE;
925
926 if (tb[ATTR_NUM(value)].sa_name) {
927 continue;
928 }
929 tb[ATTR_NUM(value)].sa_name =
930 kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP);
931 (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
932 strlen(za.za_name) +1);
933 }
934 zap_cursor_fini(&zc);
935 /*
936 * Make sure we processed the correct number of registered
937 * attributes
938 */
939 if (registered_count != sa_reg_count) {
940 ASSERT(error != 0);
941 goto bail;
942 }
943
944 }
945
946 if (ostype == DMU_OST_ZFS) {
947 for (i = 0; i != sa_legacy_attr_count; i++) {
948 if (tb[i].sa_name)
949 continue;
950 tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
951 tb[i].sa_length = sa_legacy_attrs[i].sa_length;
952 tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
953 tb[i].sa_registered = B_FALSE;
954 tb[i].sa_name =
955 kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
956 KM_SLEEP);
957 (void) strlcpy(tb[i].sa_name,
958 sa_legacy_attrs[i].sa_name,
959 strlen(sa_legacy_attrs[i].sa_name) + 1);
960 }
961 }
962
963 for (i = 0; i != count; i++) {
964 sa_attr_type_t attr_id;
965
966 attr_id = sa->sa_user_table[i];
967 if (tb[attr_id].sa_name)
968 continue;
969
970 tb[attr_id].sa_length = reg_attrs[i].sa_length;
971 tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
972 tb[attr_id].sa_attr = attr_id;
973 tb[attr_id].sa_name =
974 kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP);
975 (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
976 strlen(reg_attrs[i].sa_name) + 1);
977 }
978
979 sa->sa_need_attr_registration =
980 (sa_attr_count != registered_count);
981
982 return (0);
983 bail:
984 kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t));
985 sa->sa_user_table = NULL;
986 sa_free_attr_table(sa);
987 return ((error != 0) ? error : EINVAL);
988 }
989
990 int
sa_setup(objset_t * os,uint64_t sa_obj,sa_attr_reg_t * reg_attrs,int count,sa_attr_type_t ** user_table)991 sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
992 sa_attr_type_t **user_table)
993 {
994 zap_cursor_t zc;
995 zap_attribute_t za;
996 sa_os_t *sa;
997 dmu_objset_type_t ostype = dmu_objset_type(os);
998 sa_attr_type_t *tb;
999 int error;
1000
1001 mutex_enter(&os->os_user_ptr_lock);
1002 if (os->os_sa) {
1003 mutex_enter(&os->os_sa->sa_lock);
1004 mutex_exit(&os->os_user_ptr_lock);
1005 tb = os->os_sa->sa_user_table;
1006 mutex_exit(&os->os_sa->sa_lock);
1007 *user_table = tb;
1008 return (0);
1009 }
1010
1011 sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP);
1012 mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
1013 sa->sa_master_obj = sa_obj;
1014
1015 os->os_sa = sa;
1016 mutex_enter(&sa->sa_lock);
1017 mutex_exit(&os->os_user_ptr_lock);
1018 avl_create(&sa->sa_layout_num_tree, layout_num_compare,
1019 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
1020 avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
1021 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
1022
1023 if (sa_obj) {
1024 error = zap_lookup(os, sa_obj, SA_LAYOUTS,
1025 8, 1, &sa->sa_layout_attr_obj);
1026 if (error != 0 && error != ENOENT)
1027 goto fail;
1028 error = zap_lookup(os, sa_obj, SA_REGISTRY,
1029 8, 1, &sa->sa_reg_attr_obj);
1030 if (error != 0 && error != ENOENT)
1031 goto fail;
1032 }
1033
1034 if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0)
1035 goto fail;
1036
1037 if (sa->sa_layout_attr_obj != 0) {
1038 uint64_t layout_count;
1039
1040 error = zap_count(os, sa->sa_layout_attr_obj,
1041 &layout_count);
1042
1043 /*
1044 * Layout number count should be > 0
1045 */
1046 if (error || (error == 0 && layout_count == 0)) {
1047 if (error == 0)
1048 error = SET_ERROR(EINVAL);
1049 goto fail;
1050 }
1051
1052 for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
1053 (error = zap_cursor_retrieve(&zc, &za)) == 0;
1054 zap_cursor_advance(&zc)) {
1055 sa_attr_type_t *lot_attrs;
1056 uint64_t lot_num;
1057
1058 lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
1059 za.za_num_integers, KM_SLEEP);
1060
1061 if ((error = (zap_lookup(os, sa->sa_layout_attr_obj,
1062 za.za_name, 2, za.za_num_integers,
1063 lot_attrs))) != 0) {
1064 kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1065 za.za_num_integers);
1066 break;
1067 }
1068 VERIFY(ddi_strtoull(za.za_name, NULL, 10,
1069 (unsigned long long *)&lot_num) == 0);
1070
1071 (void) sa_add_layout_entry(os, lot_attrs,
1072 za.za_num_integers, lot_num,
1073 sa_layout_info_hash(lot_attrs,
1074 za.za_num_integers), B_FALSE, NULL);
1075 kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1076 za.za_num_integers);
1077 }
1078 zap_cursor_fini(&zc);
1079
1080 /*
1081 * Make sure layout count matches number of entries added
1082 * to AVL tree
1083 */
1084 if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) {
1085 ASSERT(error != 0);
1086 goto fail;
1087 }
1088 }
1089
1090 /* Add special layout number for old ZNODES */
1091 if (ostype == DMU_OST_ZFS) {
1092 (void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
1093 sa_legacy_attr_count, 0,
1094 sa_layout_info_hash(sa_legacy_zpl_layout,
1095 sa_legacy_attr_count), B_FALSE, NULL);
1096
1097 (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
1098 0, B_FALSE, NULL);
1099 }
1100 *user_table = os->os_sa->sa_user_table;
1101 mutex_exit(&sa->sa_lock);
1102 return (0);
1103 fail:
1104 os->os_sa = NULL;
1105 sa_free_attr_table(sa);
1106 if (sa->sa_user_table)
1107 kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1108 mutex_exit(&sa->sa_lock);
1109 avl_destroy(&sa->sa_layout_hash_tree);
1110 avl_destroy(&sa->sa_layout_num_tree);
1111 mutex_destroy(&sa->sa_lock);
1112 kmem_free(sa, sizeof (sa_os_t));
1113 return ((error == ECKSUM) ? EIO : error);
1114 }
1115
1116 void
sa_tear_down(objset_t * os)1117 sa_tear_down(objset_t *os)
1118 {
1119 sa_os_t *sa = os->os_sa;
1120 sa_lot_t *layout;
1121 void *cookie;
1122
1123 kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1124
1125 /* Free up attr table */
1126
1127 sa_free_attr_table(sa);
1128
1129 cookie = NULL;
1130 while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) {
1131 sa_idx_tab_t *tab;
1132 while (tab = list_head(&layout->lot_idx_tab)) {
1133 ASSERT(refcount_count(&tab->sa_refcount));
1134 sa_idx_tab_rele(os, tab);
1135 }
1136 }
1137
1138 cookie = NULL;
1139 while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) {
1140 kmem_free(layout->lot_attrs,
1141 sizeof (sa_attr_type_t) * layout->lot_attr_count);
1142 kmem_free(layout, sizeof (sa_lot_t));
1143 }
1144
1145 avl_destroy(&sa->sa_layout_hash_tree);
1146 avl_destroy(&sa->sa_layout_num_tree);
1147 mutex_destroy(&sa->sa_lock);
1148
1149 kmem_free(sa, sizeof (sa_os_t));
1150 os->os_sa = NULL;
1151 }
1152
1153 void
sa_build_idx_tab(void * hdr,void * attr_addr,sa_attr_type_t attr,uint16_t length,int length_idx,boolean_t var_length,void * userp)1154 sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
1155 uint16_t length, int length_idx, boolean_t var_length, void *userp)
1156 {
1157 sa_idx_tab_t *idx_tab = userp;
1158
1159 if (var_length) {
1160 ASSERT(idx_tab->sa_variable_lengths);
1161 idx_tab->sa_variable_lengths[length_idx] = length;
1162 }
1163 TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx,
1164 (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr));
1165 }
1166
1167 static void
sa_attr_iter(objset_t * os,sa_hdr_phys_t * hdr,dmu_object_type_t type,sa_iterfunc_t func,sa_lot_t * tab,void * userp)1168 sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type,
1169 sa_iterfunc_t func, sa_lot_t *tab, void *userp)
1170 {
1171 void *data_start;
1172 sa_lot_t *tb = tab;
1173 sa_lot_t search;
1174 avl_index_t loc;
1175 sa_os_t *sa = os->os_sa;
1176 int i;
1177 uint16_t *length_start = NULL;
1178 uint8_t length_idx = 0;
1179
1180 if (tab == NULL) {
1181 search.lot_num = SA_LAYOUT_NUM(hdr, type);
1182 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1183 ASSERT(tb);
1184 }
1185
1186 if (IS_SA_BONUSTYPE(type)) {
1187 data_start = (void *)P2ROUNDUP(((uintptr_t)hdr +
1188 offsetof(sa_hdr_phys_t, sa_lengths) +
1189 (sizeof (uint16_t) * tb->lot_var_sizes)), 8);
1190 length_start = hdr->sa_lengths;
1191 } else {
1192 data_start = hdr;
1193 }
1194
1195 for (i = 0; i != tb->lot_attr_count; i++) {
1196 int attr_length, reg_length;
1197 uint8_t idx_len;
1198
1199 reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length;
1200 if (reg_length) {
1201 attr_length = reg_length;
1202 idx_len = 0;
1203 } else {
1204 attr_length = length_start[length_idx];
1205 idx_len = length_idx++;
1206 }
1207
1208 func(hdr, data_start, tb->lot_attrs[i], attr_length,
1209 idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp);
1210
1211 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
1212 attr_length), 8);
1213 }
1214 }
1215
1216 /*ARGSUSED*/
1217 void
sa_byteswap_cb(void * hdr,void * attr_addr,sa_attr_type_t attr,uint16_t length,int length_idx,boolean_t variable_length,void * userp)1218 sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
1219 uint16_t length, int length_idx, boolean_t variable_length, void *userp)
1220 {
1221 sa_handle_t *hdl = userp;
1222 sa_os_t *sa = hdl->sa_os->os_sa;
1223
1224 sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
1225 }
1226
1227 void
sa_byteswap(sa_handle_t * hdl,sa_buf_type_t buftype)1228 sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
1229 {
1230 sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1231 dmu_buf_impl_t *db;
1232 sa_os_t *sa = hdl->sa_os->os_sa;
1233 int num_lengths = 1;
1234 int i;
1235
1236 ASSERT(MUTEX_HELD(&sa->sa_lock));
1237 if (sa_hdr_phys->sa_magic == SA_MAGIC)
1238 return;
1239
1240 db = SA_GET_DB(hdl, buftype);
1241
1242 if (buftype == SA_SPILL) {
1243 arc_release(db->db_buf, NULL);
1244 arc_buf_thaw(db->db_buf);
1245 }
1246
1247 sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic);
1248 sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
1249
1250 /*
1251 * Determine number of variable lenghts in header
1252 * The standard 8 byte header has one for free and a
1253 * 16 byte header would have 4 + 1;
1254 */
1255 if (SA_HDR_SIZE(sa_hdr_phys) > 8)
1256 num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1;
1257 for (i = 0; i != num_lengths; i++)
1258 sa_hdr_phys->sa_lengths[i] =
1259 BSWAP_16(sa_hdr_phys->sa_lengths[i]);
1260
1261 sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA,
1262 sa_byteswap_cb, NULL, hdl);
1263
1264 if (buftype == SA_SPILL)
1265 arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf);
1266 }
1267
1268 static int
sa_build_index(sa_handle_t * hdl,sa_buf_type_t buftype)1269 sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
1270 {
1271 sa_hdr_phys_t *sa_hdr_phys;
1272 dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype);
1273 dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db);
1274 sa_os_t *sa = hdl->sa_os->os_sa;
1275 sa_idx_tab_t *idx_tab;
1276
1277 sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1278
1279 mutex_enter(&sa->sa_lock);
1280
1281 /* Do we need to byteswap? */
1282
1283 /* only check if not old znode */
1284 if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC &&
1285 sa_hdr_phys->sa_magic != 0) {
1286 VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC);
1287 sa_byteswap(hdl, buftype);
1288 }
1289
1290 idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys);
1291
1292 if (buftype == SA_BONUS)
1293 hdl->sa_bonus_tab = idx_tab;
1294 else
1295 hdl->sa_spill_tab = idx_tab;
1296
1297 mutex_exit(&sa->sa_lock);
1298 return (0);
1299 }
1300
1301 /*ARGSUSED*/
1302 static void
sa_evict(void * dbu)1303 sa_evict(void *dbu)
1304 {
1305 panic("evicting sa dbuf\n");
1306 }
1307
1308 static void
sa_idx_tab_rele(objset_t * os,void * arg)1309 sa_idx_tab_rele(objset_t *os, void *arg)
1310 {
1311 sa_os_t *sa = os->os_sa;
1312 sa_idx_tab_t *idx_tab = arg;
1313
1314 if (idx_tab == NULL)
1315 return;
1316
1317 mutex_enter(&sa->sa_lock);
1318 if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) {
1319 list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab);
1320 if (idx_tab->sa_variable_lengths)
1321 kmem_free(idx_tab->sa_variable_lengths,
1322 sizeof (uint16_t) *
1323 idx_tab->sa_layout->lot_var_sizes);
1324 refcount_destroy(&idx_tab->sa_refcount);
1325 kmem_free(idx_tab->sa_idx_tab,
1326 sizeof (uint32_t) * sa->sa_num_attrs);
1327 kmem_free(idx_tab, sizeof (sa_idx_tab_t));
1328 }
1329 mutex_exit(&sa->sa_lock);
1330 }
1331
1332 static void
sa_idx_tab_hold(objset_t * os,sa_idx_tab_t * idx_tab)1333 sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
1334 {
1335 sa_os_t *sa = os->os_sa;
1336
1337 ASSERT(MUTEX_HELD(&sa->sa_lock));
1338 (void) refcount_add(&idx_tab->sa_refcount, NULL);
1339 }
1340
1341 void
sa_handle_destroy(sa_handle_t * hdl)1342 sa_handle_destroy(sa_handle_t *hdl)
1343 {
1344 dmu_buf_t *db = hdl->sa_bonus;
1345
1346 mutex_enter(&hdl->sa_lock);
1347 (void) dmu_buf_remove_user(db, &hdl->sa_dbu);
1348
1349 if (hdl->sa_bonus_tab)
1350 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
1351
1352 if (hdl->sa_spill_tab)
1353 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
1354
1355 dmu_buf_rele(hdl->sa_bonus, NULL);
1356
1357 if (hdl->sa_spill)
1358 dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
1359 mutex_exit(&hdl->sa_lock);
1360
1361 kmem_cache_free(sa_cache, hdl);
1362 }
1363
1364 int
sa_handle_get_from_db(objset_t * os,dmu_buf_t * db,void * userp,sa_handle_type_t hdl_type,sa_handle_t ** handlepp)1365 sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
1366 sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1367 {
1368 int error = 0;
1369 dmu_object_info_t doi;
1370 sa_handle_t *handle = NULL;
1371
1372 #ifdef ZFS_DEBUG
1373 dmu_object_info_from_db(db, &doi);
1374 ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
1375 doi.doi_bonus_type == DMU_OT_ZNODE);
1376 #endif
1377 /* find handle, if it exists */
1378 /* if one doesn't exist then create a new one, and initialize it */
1379
1380 if (hdl_type == SA_HDL_SHARED)
1381 handle = dmu_buf_get_user(db);
1382
1383 if (handle == NULL) {
1384 sa_handle_t *winner = NULL;
1385
1386 handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
1387 handle->sa_dbu.dbu_evict_func = NULL;
1388 handle->sa_userp = userp;
1389 handle->sa_bonus = db;
1390 handle->sa_os = os;
1391 handle->sa_spill = NULL;
1392 handle->sa_bonus_tab = NULL;
1393 handle->sa_spill_tab = NULL;
1394
1395 error = sa_build_index(handle, SA_BONUS);
1396
1397 if (hdl_type == SA_HDL_SHARED) {
1398 dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
1399 winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
1400 }
1401
1402 if (winner != NULL) {
1403 kmem_cache_free(sa_cache, handle);
1404 handle = winner;
1405 }
1406 }
1407 *handlepp = handle;
1408
1409 return (error);
1410 }
1411
1412 int
sa_handle_get(objset_t * objset,uint64_t objid,void * userp,sa_handle_type_t hdl_type,sa_handle_t ** handlepp)1413 sa_handle_get(objset_t *objset, uint64_t objid, void *userp,
1414 sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1415 {
1416 dmu_buf_t *db;
1417 int error;
1418
1419 if (error = dmu_bonus_hold(objset, objid, NULL, &db))
1420 return (error);
1421
1422 return (sa_handle_get_from_db(objset, db, userp, hdl_type,
1423 handlepp));
1424 }
1425
1426 int
sa_buf_hold(objset_t * objset,uint64_t obj_num,void * tag,dmu_buf_t ** db)1427 sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db)
1428 {
1429 return (dmu_bonus_hold(objset, obj_num, tag, db));
1430 }
1431
1432 void
sa_buf_rele(dmu_buf_t * db,void * tag)1433 sa_buf_rele(dmu_buf_t *db, void *tag)
1434 {
1435 dmu_buf_rele(db, tag);
1436 }
1437
1438 int
sa_lookup_impl(sa_handle_t * hdl,sa_bulk_attr_t * bulk,int count)1439 sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
1440 {
1441 ASSERT(hdl);
1442 ASSERT(MUTEX_HELD(&hdl->sa_lock));
1443 return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL));
1444 }
1445
1446 int
sa_lookup(sa_handle_t * hdl,sa_attr_type_t attr,void * buf,uint32_t buflen)1447 sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
1448 {
1449 int error;
1450 sa_bulk_attr_t bulk;
1451
1452 bulk.sa_attr = attr;
1453 bulk.sa_data = buf;
1454 bulk.sa_length = buflen;
1455 bulk.sa_data_func = NULL;
1456
1457 ASSERT(hdl);
1458 mutex_enter(&hdl->sa_lock);
1459 error = sa_lookup_impl(hdl, &bulk, 1);
1460 mutex_exit(&hdl->sa_lock);
1461 return (error);
1462 }
1463
1464 #ifdef _KERNEL
1465 int
sa_lookup_uio(sa_handle_t * hdl,sa_attr_type_t attr,uio_t * uio)1466 sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
1467 {
1468 int error;
1469 sa_bulk_attr_t bulk;
1470
1471 bulk.sa_data = NULL;
1472 bulk.sa_attr = attr;
1473 bulk.sa_data_func = NULL;
1474
1475 ASSERT(hdl);
1476
1477 mutex_enter(&hdl->sa_lock);
1478 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
1479 error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
1480 uio->uio_resid), UIO_READ, uio);
1481 }
1482 mutex_exit(&hdl->sa_lock);
1483 return (error);
1484
1485 }
1486 #endif
1487
1488 void *
sa_find_idx_tab(objset_t * os,dmu_object_type_t bonustype,void * data)1489 sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data)
1490 {
1491 sa_idx_tab_t *idx_tab;
1492 sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data;
1493 sa_os_t *sa = os->os_sa;
1494 sa_lot_t *tb, search;
1495 avl_index_t loc;
1496
1497 /*
1498 * Deterimine layout number. If SA node and header == 0 then
1499 * force the index table to the dummy "1" empty layout.
1500 *
1501 * The layout number would only be zero for a newly created file
1502 * that has not added any attributes yet, or with crypto enabled which
1503 * doesn't write any attributes to the bonus buffer.
1504 */
1505
1506 search.lot_num = SA_LAYOUT_NUM(hdr, bonustype);
1507
1508 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1509
1510 /* Verify header size is consistent with layout information */
1511 ASSERT(tb);
1512 ASSERT(IS_SA_BONUSTYPE(bonustype) &&
1513 SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) ||
1514 (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0));
1515
1516 /*
1517 * See if any of the already existing TOC entries can be reused?
1518 */
1519
1520 for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab;
1521 idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) {
1522 boolean_t valid_idx = B_TRUE;
1523 int i;
1524
1525 if (tb->lot_var_sizes != 0 &&
1526 idx_tab->sa_variable_lengths != NULL) {
1527 for (i = 0; i != tb->lot_var_sizes; i++) {
1528 if (hdr->sa_lengths[i] !=
1529 idx_tab->sa_variable_lengths[i]) {
1530 valid_idx = B_FALSE;
1531 break;
1532 }
1533 }
1534 }
1535 if (valid_idx) {
1536 sa_idx_tab_hold(os, idx_tab);
1537 return (idx_tab);
1538 }
1539 }
1540
1541 /* No such luck, create a new entry */
1542 idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP);
1543 idx_tab->sa_idx_tab =
1544 kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP);
1545 idx_tab->sa_layout = tb;
1546 refcount_create(&idx_tab->sa_refcount);
1547 if (tb->lot_var_sizes)
1548 idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) *
1549 tb->lot_var_sizes, KM_SLEEP);
1550
1551 sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab,
1552 tb, idx_tab);
1553 sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */
1554 sa_idx_tab_hold(os, idx_tab); /* one for layout */
1555 list_insert_tail(&tb->lot_idx_tab, idx_tab);
1556 return (idx_tab);
1557 }
1558
1559 void
sa_default_locator(void ** dataptr,uint32_t * len,uint32_t total_len,boolean_t start,void * userdata)1560 sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len,
1561 boolean_t start, void *userdata)
1562 {
1563 ASSERT(start);
1564
1565 *dataptr = userdata;
1566 *len = total_len;
1567 }
1568
1569 static void
sa_attr_register_sync(sa_handle_t * hdl,dmu_tx_t * tx)1570 sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
1571 {
1572 uint64_t attr_value = 0;
1573 sa_os_t *sa = hdl->sa_os->os_sa;
1574 sa_attr_table_t *tb = sa->sa_attr_table;
1575 int i;
1576
1577 mutex_enter(&sa->sa_lock);
1578
1579 if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) {
1580 mutex_exit(&sa->sa_lock);
1581 return;
1582 }
1583
1584 if (sa->sa_reg_attr_obj == 0) {
1585 sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
1586 DMU_OT_SA_ATTR_REGISTRATION,
1587 sa->sa_master_obj, SA_REGISTRY, tx);
1588 }
1589 for (i = 0; i != sa->sa_num_attrs; i++) {
1590 if (sa->sa_attr_table[i].sa_registered)
1591 continue;
1592 ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
1593 tb[i].sa_byteswap);
1594 VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
1595 tb[i].sa_name, 8, 1, &attr_value, tx));
1596 tb[i].sa_registered = B_TRUE;
1597 }
1598 sa->sa_need_attr_registration = B_FALSE;
1599 mutex_exit(&sa->sa_lock);
1600 }
1601
1602 /*
1603 * Replace all attributes with attributes specified in template.
1604 * If dnode had a spill buffer then those attributes will be
1605 * also be replaced, possibly with just an empty spill block
1606 *
1607 * This interface is intended to only be used for bulk adding of
1608 * attributes for a new file. It will also be used by the ZPL
1609 * when converting and old formatted znode to native SA support.
1610 */
1611 int
sa_replace_all_by_template_locked(sa_handle_t * hdl,sa_bulk_attr_t * attr_desc,int attr_count,dmu_tx_t * tx)1612 sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1613 int attr_count, dmu_tx_t *tx)
1614 {
1615 sa_os_t *sa = hdl->sa_os->os_sa;
1616
1617 if (sa->sa_need_attr_registration)
1618 sa_attr_register_sync(hdl, tx);
1619 return (sa_build_layouts(hdl, attr_desc, attr_count, tx));
1620 }
1621
1622 int
sa_replace_all_by_template(sa_handle_t * hdl,sa_bulk_attr_t * attr_desc,int attr_count,dmu_tx_t * tx)1623 sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1624 int attr_count, dmu_tx_t *tx)
1625 {
1626 int error;
1627
1628 mutex_enter(&hdl->sa_lock);
1629 error = sa_replace_all_by_template_locked(hdl, attr_desc,
1630 attr_count, tx);
1631 mutex_exit(&hdl->sa_lock);
1632 return (error);
1633 }
1634
1635 /*
1636 * Add/remove a single attribute or replace a variable-sized attribute value
1637 * with a value of a different size, and then rewrite the entire set
1638 * of attributes.
1639 * Same-length attribute value replacement (including fixed-length attributes)
1640 * is handled more efficiently by the upper layers.
1641 */
1642 static int
sa_modify_attrs(sa_handle_t * hdl,sa_attr_type_t newattr,sa_data_op_t action,sa_data_locator_t * locator,void * datastart,uint16_t buflen,dmu_tx_t * tx)1643 sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
1644 sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
1645 uint16_t buflen, dmu_tx_t *tx)
1646 {
1647 sa_os_t *sa = hdl->sa_os->os_sa;
1648 dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
1649 dnode_t *dn;
1650 sa_bulk_attr_t *attr_desc;
1651 void *old_data[2];
1652 int bonus_attr_count = 0;
1653 int bonus_data_size = 0;
1654 int spill_data_size = 0;
1655 int spill_attr_count = 0;
1656 int error;
1657 uint16_t length;
1658 int i, j, k, length_idx;
1659 sa_hdr_phys_t *hdr;
1660 sa_idx_tab_t *idx_tab;
1661 int attr_count;
1662 int count;
1663
1664 ASSERT(MUTEX_HELD(&hdl->sa_lock));
1665
1666 /* First make of copy of the old data */
1667
1668 DB_DNODE_ENTER(db);
1669 dn = DB_DNODE(db);
1670 if (dn->dn_bonuslen != 0) {
1671 bonus_data_size = hdl->sa_bonus->db_size;
1672 old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
1673 bcopy(hdl->sa_bonus->db_data, old_data[0],
1674 hdl->sa_bonus->db_size);
1675 bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
1676 } else {
1677 old_data[0] = NULL;
1678 }
1679 DB_DNODE_EXIT(db);
1680
1681 /* Bring spill buffer online if it isn't currently */
1682
1683 if ((error = sa_get_spill(hdl)) == 0) {
1684 spill_data_size = hdl->sa_spill->db_size;
1685 old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
1686 bcopy(hdl->sa_spill->db_data, old_data[1],
1687 hdl->sa_spill->db_size);
1688 spill_attr_count =
1689 hdl->sa_spill_tab->sa_layout->lot_attr_count;
1690 } else if (error && error != ENOENT) {
1691 if (old_data[0])
1692 kmem_free(old_data[0], bonus_data_size);
1693 return (error);
1694 } else {
1695 old_data[1] = NULL;
1696 }
1697
1698 /* build descriptor of all attributes */
1699
1700 attr_count = bonus_attr_count + spill_attr_count;
1701 if (action == SA_ADD)
1702 attr_count++;
1703 else if (action == SA_REMOVE)
1704 attr_count--;
1705
1706 attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);
1707
1708 /*
1709 * loop through bonus and spill buffer if it exists, and
1710 * build up new attr_descriptor to reset the attributes
1711 */
1712 k = j = 0;
1713 count = bonus_attr_count;
1714 hdr = SA_GET_HDR(hdl, SA_BONUS);
1715 idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
1716 for (; k != 2; k++) {
1717 /* iterate over each attribute in layout */
1718 for (i = 0, length_idx = 0; i != count; i++) {
1719 sa_attr_type_t attr;
1720
1721 attr = idx_tab->sa_layout->lot_attrs[i];
1722 if (attr == newattr) {
1723 /* duplicate attributes are not allowed */
1724 ASSERT(action == SA_REPLACE ||
1725 action == SA_REMOVE);
1726 /* must be variable-sized to be replaced here */
1727 if (action == SA_REPLACE) {
1728 ASSERT(SA_REGISTERED_LEN(sa, attr) == 0);
1729 SA_ADD_BULK_ATTR(attr_desc, j, attr,
1730 locator, datastart, buflen);
1731 }
1732 } else {
1733 length = SA_REGISTERED_LEN(sa, attr);
1734 if (length == 0) {
1735 length = hdr->sa_lengths[length_idx];
1736 }
1737
1738 SA_ADD_BULK_ATTR(attr_desc, j, attr,
1739 NULL, (void *)
1740 (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
1741 (uintptr_t)old_data[k]), length);
1742 }
1743 if (SA_REGISTERED_LEN(sa, attr) == 0)
1744 length_idx++;
1745 }
1746 if (k == 0 && hdl->sa_spill) {
1747 hdr = SA_GET_HDR(hdl, SA_SPILL);
1748 idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
1749 count = spill_attr_count;
1750 } else {
1751 break;
1752 }
1753 }
1754 if (action == SA_ADD) {
1755 length = SA_REGISTERED_LEN(sa, newattr);
1756 if (length == 0) {
1757 length = buflen;
1758 }
1759 SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
1760 datastart, buflen);
1761 }
1762 ASSERT3U(j, ==, attr_count);
1763
1764 error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
1765
1766 if (old_data[0])
1767 kmem_free(old_data[0], bonus_data_size);
1768 if (old_data[1])
1769 kmem_free(old_data[1], spill_data_size);
1770 kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
1771
1772 return (error);
1773 }
1774
1775 static int
sa_bulk_update_impl(sa_handle_t * hdl,sa_bulk_attr_t * bulk,int count,dmu_tx_t * tx)1776 sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
1777 dmu_tx_t *tx)
1778 {
1779 int error;
1780 sa_os_t *sa = hdl->sa_os->os_sa;
1781 dmu_object_type_t bonustype;
1782
1783 bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS));
1784
1785 ASSERT(hdl);
1786 ASSERT(MUTEX_HELD(&hdl->sa_lock));
1787
1788 /* sync out registration table if necessary */
1789 if (sa->sa_need_attr_registration)
1790 sa_attr_register_sync(hdl, tx);
1791
1792 error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx);
1793 if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb)
1794 sa->sa_update_cb(hdl, tx);
1795
1796 return (error);
1797 }
1798
1799 /*
1800 * update or add new attribute
1801 */
1802 int
sa_update(sa_handle_t * hdl,sa_attr_type_t type,void * buf,uint32_t buflen,dmu_tx_t * tx)1803 sa_update(sa_handle_t *hdl, sa_attr_type_t type,
1804 void *buf, uint32_t buflen, dmu_tx_t *tx)
1805 {
1806 int error;
1807 sa_bulk_attr_t bulk;
1808
1809 bulk.sa_attr = type;
1810 bulk.sa_data_func = NULL;
1811 bulk.sa_length = buflen;
1812 bulk.sa_data = buf;
1813
1814 mutex_enter(&hdl->sa_lock);
1815 error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1816 mutex_exit(&hdl->sa_lock);
1817 return (error);
1818 }
1819
1820 int
sa_update_from_cb(sa_handle_t * hdl,sa_attr_type_t attr,uint32_t buflen,sa_data_locator_t * locator,void * userdata,dmu_tx_t * tx)1821 sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
1822 uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
1823 {
1824 int error;
1825 sa_bulk_attr_t bulk;
1826
1827 bulk.sa_attr = attr;
1828 bulk.sa_data = userdata;
1829 bulk.sa_data_func = locator;
1830 bulk.sa_length = buflen;
1831
1832 mutex_enter(&hdl->sa_lock);
1833 error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1834 mutex_exit(&hdl->sa_lock);
1835 return (error);
1836 }
1837
1838 /*
1839 * Return size of an attribute
1840 */
1841
1842 int
sa_size(sa_handle_t * hdl,sa_attr_type_t attr,int * size)1843 sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size)
1844 {
1845 sa_bulk_attr_t bulk;
1846 int error;
1847
1848 bulk.sa_data = NULL;
1849 bulk.sa_attr = attr;
1850 bulk.sa_data_func = NULL;
1851
1852 ASSERT(hdl);
1853 mutex_enter(&hdl->sa_lock);
1854 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) {
1855 mutex_exit(&hdl->sa_lock);
1856 return (error);
1857 }
1858 *size = bulk.sa_size;
1859
1860 mutex_exit(&hdl->sa_lock);
1861 return (0);
1862 }
1863
1864 int
sa_bulk_lookup_locked(sa_handle_t * hdl,sa_bulk_attr_t * attrs,int count)1865 sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1866 {
1867 ASSERT(hdl);
1868 ASSERT(MUTEX_HELD(&hdl->sa_lock));
1869 return (sa_lookup_impl(hdl, attrs, count));
1870 }
1871
1872 int
sa_bulk_lookup(sa_handle_t * hdl,sa_bulk_attr_t * attrs,int count)1873 sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1874 {
1875 int error;
1876
1877 ASSERT(hdl);
1878 mutex_enter(&hdl->sa_lock);
1879 error = sa_bulk_lookup_locked(hdl, attrs, count);
1880 mutex_exit(&hdl->sa_lock);
1881 return (error);
1882 }
1883
1884 int
sa_bulk_update(sa_handle_t * hdl,sa_bulk_attr_t * attrs,int count,dmu_tx_t * tx)1885 sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx)
1886 {
1887 int error;
1888
1889 ASSERT(hdl);
1890 mutex_enter(&hdl->sa_lock);
1891 error = sa_bulk_update_impl(hdl, attrs, count, tx);
1892 mutex_exit(&hdl->sa_lock);
1893 return (error);
1894 }
1895
1896 int
sa_remove(sa_handle_t * hdl,sa_attr_type_t attr,dmu_tx_t * tx)1897 sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx)
1898 {
1899 int error;
1900
1901 mutex_enter(&hdl->sa_lock);
1902 error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL,
1903 NULL, 0, tx);
1904 mutex_exit(&hdl->sa_lock);
1905 return (error);
1906 }
1907
1908 void
sa_object_info(sa_handle_t * hdl,dmu_object_info_t * doi)1909 sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi)
1910 {
1911 dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi);
1912 }
1913
1914 void
sa_object_size(sa_handle_t * hdl,uint32_t * blksize,u_longlong_t * nblocks)1915 sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
1916 {
1917 dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus,
1918 blksize, nblocks);
1919 }
1920
1921 void
sa_set_userp(sa_handle_t * hdl,void * ptr)1922 sa_set_userp(sa_handle_t *hdl, void *ptr)
1923 {
1924 hdl->sa_userp = ptr;
1925 }
1926
1927 dmu_buf_t *
sa_get_db(sa_handle_t * hdl)1928 sa_get_db(sa_handle_t *hdl)
1929 {
1930 return ((dmu_buf_t *)hdl->sa_bonus);
1931 }
1932
1933 void *
sa_get_userdata(sa_handle_t * hdl)1934 sa_get_userdata(sa_handle_t *hdl)
1935 {
1936 return (hdl->sa_userp);
1937 }
1938
1939 void
sa_register_update_callback_locked(objset_t * os,sa_update_cb_t * func)1940 sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func)
1941 {
1942 ASSERT(MUTEX_HELD(&os->os_sa->sa_lock));
1943 os->os_sa->sa_update_cb = func;
1944 }
1945
1946 void
sa_register_update_callback(objset_t * os,sa_update_cb_t * func)1947 sa_register_update_callback(objset_t *os, sa_update_cb_t *func)
1948 {
1949
1950 mutex_enter(&os->os_sa->sa_lock);
1951 sa_register_update_callback_locked(os, func);
1952 mutex_exit(&os->os_sa->sa_lock);
1953 }
1954
1955 uint64_t
sa_handle_object(sa_handle_t * hdl)1956 sa_handle_object(sa_handle_t *hdl)
1957 {
1958 return (hdl->sa_bonus->db_object);
1959 }
1960
1961 boolean_t
sa_enabled(objset_t * os)1962 sa_enabled(objset_t *os)
1963 {
1964 return (os->os_sa == NULL);
1965 }
1966
1967 int
sa_set_sa_object(objset_t * os,uint64_t sa_object)1968 sa_set_sa_object(objset_t *os, uint64_t sa_object)
1969 {
1970 sa_os_t *sa = os->os_sa;
1971
1972 if (sa->sa_master_obj)
1973 return (1);
1974
1975 sa->sa_master_obj = sa_object;
1976
1977 return (0);
1978 }
1979
1980 int
sa_hdrsize(void * arg)1981 sa_hdrsize(void *arg)
1982 {
1983 sa_hdr_phys_t *hdr = arg;
1984
1985 return (SA_HDR_SIZE(hdr));
1986 }
1987
1988 void
sa_handle_lock(sa_handle_t * hdl)1989 sa_handle_lock(sa_handle_t *hdl)
1990 {
1991 ASSERT(hdl);
1992 mutex_enter(&hdl->sa_lock);
1993 }
1994
1995 void
sa_handle_unlock(sa_handle_t * hdl)1996 sa_handle_unlock(sa_handle_t *hdl)
1997 {
1998 ASSERT(hdl);
1999 mutex_exit(&hdl->sa_lock);
2000 }
2001