1 /*
2 * CDDL HEADER START
3 *
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
8 *
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
12 *
13 * CDDL HEADER END
14 */
15 /*
16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
17 */
18
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_zfs.h>
22
23 /*
24 * This overrides the number of sublists in each multilist_t, which defaults
25 * to the number of CPUs in the system (see multilist_create()).
26 */
27 int zfs_multilist_num_sublists = 0;
28
29 /*
30 * Given the object contained on the list, return a pointer to the
31 * object's multilist_node_t structure it contains.
32 */
33 #ifdef ZFS_DEBUG
34 static multilist_node_t *
multilist_d2l(multilist_t * ml,void * obj)35 multilist_d2l(multilist_t *ml, void *obj)
36 {
37 return ((multilist_node_t *)((char *)obj + ml->ml_offset));
38 }
39 #endif
40
41 /*
42 * Initialize a new mutlilist using the parameters specified.
43 *
44 * - 'size' denotes the size of the structure containing the
45 * multilist_node_t.
46 * - 'offset' denotes the byte offset of the mutlilist_node_t within
47 * the structure that contains it.
48 * - 'num' specifies the number of internal sublists to create.
49 * - 'index_func' is used to determine which sublist to insert into
50 * when the multilist_insert() function is called; as well as which
51 * sublist to remove from when multilist_remove() is called. The
52 * requirements this function must meet, are the following:
53 *
54 * - It must always return the same value when called on the same
55 * object (to ensure the object is removed from the list it was
56 * inserted into).
57 *
58 * - It must return a value in the range [0, number of sublists).
59 * The multilist_get_num_sublists() function may be used to
60 * determine the number of sublists in the multilist.
61 *
62 * Also, in order to reduce internal contention between the sublists
63 * during insertion and removal, this function should choose evenly
64 * between all available sublists when inserting. This isn't a hard
65 * requirement, but a general rule of thumb in order to garner the
66 * best multi-threaded performance out of the data structure.
67 */
68 static void
multilist_create_impl(multilist_t * ml,size_t size,size_t offset,unsigned int num,multilist_sublist_index_func_t * index_func)69 multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
70 unsigned int num, multilist_sublist_index_func_t *index_func)
71 {
72 ASSERT3U(size, >, 0);
73 ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
74 ASSERT3U(num, >, 0);
75 ASSERT3P(index_func, !=, NULL);
76
77 ml->ml_offset = offset;
78 ml->ml_num_sublists = num;
79 ml->ml_index_func = index_func;
80
81 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
82 ml->ml_num_sublists, KM_SLEEP);
83
84 ASSERT3P(ml->ml_sublists, !=, NULL);
85
86 for (int i = 0; i < ml->ml_num_sublists; i++) {
87 multilist_sublist_t *mls = &ml->ml_sublists[i];
88 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
89 list_create(&mls->mls_list, size, offset);
90 }
91 }
92
93 /*
94 * Allocate a new multilist, using the default number of sublists (the number
95 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
96 * that the multilists do not expand if more CPUs are hot-added. In that case,
97 * we will have less fanout than boot_ncpus, but we don't want to always
98 * reserve the RAM necessary to create the extra slots for additional CPUs up
99 * front, and dynamically adding them is a complex task.
100 */
101 void
multilist_create(multilist_t * ml,size_t size,size_t offset,multilist_sublist_index_func_t * index_func)102 multilist_create(multilist_t *ml, size_t size, size_t offset,
103 multilist_sublist_index_func_t *index_func)
104 {
105 int num_sublists;
106
107 if (zfs_multilist_num_sublists > 0) {
108 num_sublists = zfs_multilist_num_sublists;
109 } else {
110 num_sublists = MAX(boot_ncpus, 4);
111 }
112
113 multilist_create_impl(ml, size, offset, num_sublists, index_func);
114 }
115
116 /*
117 * Destroy the given multilist object, and free up any memory it holds.
118 */
119 void
multilist_destroy(multilist_t * ml)120 multilist_destroy(multilist_t *ml)
121 {
122 ASSERT(multilist_is_empty(ml));
123
124 for (int i = 0; i < ml->ml_num_sublists; i++) {
125 multilist_sublist_t *mls = &ml->ml_sublists[i];
126
127 ASSERT(list_is_empty(&mls->mls_list));
128
129 list_destroy(&mls->mls_list);
130 mutex_destroy(&mls->mls_lock);
131 }
132
133 ASSERT3P(ml->ml_sublists, !=, NULL);
134 kmem_free(ml->ml_sublists,
135 sizeof (multilist_sublist_t) * ml->ml_num_sublists);
136
137 ml->ml_num_sublists = 0;
138 ml->ml_offset = 0;
139 ml->ml_sublists = NULL;
140 }
141
142 /*
143 * Insert the given object into the multilist.
144 *
145 * This function will insert the object specified into the sublist
146 * determined using the function given at multilist creation time.
147 *
148 * The sublist locks are automatically acquired if not already held, to
149 * ensure consistency when inserting and removing from multiple threads.
150 */
151 void
multilist_insert(multilist_t * ml,void * obj)152 multilist_insert(multilist_t *ml, void *obj)
153 {
154 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
155 multilist_sublist_t *mls;
156 boolean_t need_lock;
157
158 DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
159 unsigned int, sublist_idx, void *, obj);
160
161 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
162
163 mls = &ml->ml_sublists[sublist_idx];
164
165 /*
166 * Note: Callers may already hold the sublist lock by calling
167 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
168 * returning TRUE if and only if the current thread holds the
169 * lock. While it's a little ugly to make the lock recursive in
170 * this way, it works and allows the calling code to be much
171 * simpler -- otherwise it would have to pass around a flag
172 * indicating that it already has the lock.
173 */
174 need_lock = !MUTEX_HELD(&mls->mls_lock);
175
176 if (need_lock)
177 mutex_enter(&mls->mls_lock);
178
179 ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
180
181 multilist_sublist_insert_head(mls, obj);
182
183 if (need_lock)
184 mutex_exit(&mls->mls_lock);
185 }
186
187 /*
188 * Remove the given object from the multilist.
189 *
190 * This function will remove the object specified from the sublist
191 * determined using the function given at multilist creation time.
192 *
193 * The necessary sublist locks are automatically acquired, to ensure
194 * consistency when inserting and removing from multiple threads.
195 */
196 void
multilist_remove(multilist_t * ml,void * obj)197 multilist_remove(multilist_t *ml, void *obj)
198 {
199 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
200 multilist_sublist_t *mls;
201 boolean_t need_lock;
202
203 DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
204 unsigned int, sublist_idx, void *, obj);
205
206 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
207
208 mls = &ml->ml_sublists[sublist_idx];
209 /* See comment in multilist_insert(). */
210 need_lock = !MUTEX_HELD(&mls->mls_lock);
211
212 if (need_lock)
213 mutex_enter(&mls->mls_lock);
214
215 ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
216
217 multilist_sublist_remove(mls, obj);
218
219 if (need_lock)
220 mutex_exit(&mls->mls_lock);
221 }
222
223 /*
224 * Check to see if this multilist object is empty.
225 *
226 * This will return TRUE if it finds all of the sublists of this
227 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
228 * automatically acquired as necessary.
229 *
230 * If concurrent insertions and removals are occurring, the semantics
231 * of this function become a little fuzzy. Instead of locking all
232 * sublists for the entire call time of the function, each sublist is
233 * only locked as it is individually checked for emptiness. Thus, it's
234 * possible for this function to return TRUE with non-empty sublists at
235 * the time the function returns. This would be due to another thread
236 * inserting into a given sublist, after that specific sublist was check
237 * and deemed empty, but before all sublists have been checked.
238 */
239 int
multilist_is_empty(multilist_t * ml)240 multilist_is_empty(multilist_t *ml)
241 {
242 for (int i = 0; i < ml->ml_num_sublists; i++) {
243 multilist_sublist_t *mls = &ml->ml_sublists[i];
244 /* See comment in multilist_insert(). */
245 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
246
247 if (need_lock)
248 mutex_enter(&mls->mls_lock);
249
250 if (!list_is_empty(&mls->mls_list)) {
251 if (need_lock)
252 mutex_exit(&mls->mls_lock);
253
254 return (FALSE);
255 }
256
257 if (need_lock)
258 mutex_exit(&mls->mls_lock);
259 }
260
261 return (TRUE);
262 }
263
264 /* Return the number of sublists composing this multilist */
265 unsigned int
multilist_get_num_sublists(multilist_t * ml)266 multilist_get_num_sublists(multilist_t *ml)
267 {
268 return (ml->ml_num_sublists);
269 }
270
271 /* Return a randomly selected, valid sublist index for this multilist */
272 unsigned int
multilist_get_random_index(multilist_t * ml)273 multilist_get_random_index(multilist_t *ml)
274 {
275 return (random_in_range(ml->ml_num_sublists));
276 }
277
278 /* Lock and return the sublist specified at the given index */
279 multilist_sublist_t *
multilist_sublist_lock(multilist_t * ml,unsigned int sublist_idx)280 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
281 {
282 multilist_sublist_t *mls;
283
284 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
285 mls = &ml->ml_sublists[sublist_idx];
286 mutex_enter(&mls->mls_lock);
287
288 return (mls);
289 }
290
291 /* Lock and return the sublist that would be used to store the specified obj */
292 multilist_sublist_t *
multilist_sublist_lock_obj(multilist_t * ml,void * obj)293 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
294 {
295 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
296 }
297
298 void
multilist_sublist_unlock(multilist_sublist_t * mls)299 multilist_sublist_unlock(multilist_sublist_t *mls)
300 {
301 mutex_exit(&mls->mls_lock);
302 }
303
304 /*
305 * We're allowing any object to be inserted into this specific sublist,
306 * but this can lead to trouble if multilist_remove() is called to
307 * remove this object. Specifically, if calling ml_index_func on this
308 * object returns an index for sublist different than what is passed as
309 * a parameter here, any call to multilist_remove() with this newly
310 * inserted object is undefined! (the call to multilist_remove() will
311 * remove the object from a list that it isn't contained in)
312 */
313 void
multilist_sublist_insert_head(multilist_sublist_t * mls,void * obj)314 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
315 {
316 ASSERT(MUTEX_HELD(&mls->mls_lock));
317 list_insert_head(&mls->mls_list, obj);
318 }
319
320 /* please see comment above multilist_sublist_insert_head */
321 void
multilist_sublist_insert_tail(multilist_sublist_t * mls,void * obj)322 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
323 {
324 ASSERT(MUTEX_HELD(&mls->mls_lock));
325 list_insert_tail(&mls->mls_list, obj);
326 }
327
328 /*
329 * Move the object one element forward in the list.
330 *
331 * This function will move the given object forward in the list (towards
332 * the head) by one object. So, in essence, it will swap its position in
333 * the list with its "prev" pointer. If the given object is already at the
334 * head of the list, it cannot be moved forward any more than it already
335 * is, so no action is taken.
336 *
337 * NOTE: This function **must not** remove any object from the list other
338 * than the object given as the parameter. This is relied upon in
339 * arc_evict_state_impl().
340 */
341 void
multilist_sublist_move_forward(multilist_sublist_t * mls,void * obj)342 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
343 {
344 void *prev = list_prev(&mls->mls_list, obj);
345
346 ASSERT(MUTEX_HELD(&mls->mls_lock));
347 ASSERT(!list_is_empty(&mls->mls_list));
348
349 /* 'obj' must be at the head of the list, nothing to do */
350 if (prev == NULL)
351 return;
352
353 list_remove(&mls->mls_list, obj);
354 list_insert_before(&mls->mls_list, prev, obj);
355 }
356
357 void
multilist_sublist_remove(multilist_sublist_t * mls,void * obj)358 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
359 {
360 ASSERT(MUTEX_HELD(&mls->mls_lock));
361 list_remove(&mls->mls_list, obj);
362 }
363
364 int
multilist_sublist_is_empty(multilist_sublist_t * mls)365 multilist_sublist_is_empty(multilist_sublist_t *mls)
366 {
367 ASSERT(MUTEX_HELD(&mls->mls_lock));
368 return (list_is_empty(&mls->mls_list));
369 }
370
371 int
multilist_sublist_is_empty_idx(multilist_t * ml,unsigned int sublist_idx)372 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
373 {
374 multilist_sublist_t *mls;
375 int empty;
376
377 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
378 mls = &ml->ml_sublists[sublist_idx];
379 ASSERT(!MUTEX_HELD(&mls->mls_lock));
380 mutex_enter(&mls->mls_lock);
381 empty = list_is_empty(&mls->mls_list);
382 mutex_exit(&mls->mls_lock);
383 return (empty);
384 }
385
386 void *
multilist_sublist_head(multilist_sublist_t * mls)387 multilist_sublist_head(multilist_sublist_t *mls)
388 {
389 ASSERT(MUTEX_HELD(&mls->mls_lock));
390 return (list_head(&mls->mls_list));
391 }
392
393 void *
multilist_sublist_tail(multilist_sublist_t * mls)394 multilist_sublist_tail(multilist_sublist_t *mls)
395 {
396 ASSERT(MUTEX_HELD(&mls->mls_lock));
397 return (list_tail(&mls->mls_list));
398 }
399
400 void *
multilist_sublist_next(multilist_sublist_t * mls,void * obj)401 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
402 {
403 ASSERT(MUTEX_HELD(&mls->mls_lock));
404 return (list_next(&mls->mls_list, obj));
405 }
406
407 void *
multilist_sublist_prev(multilist_sublist_t * mls,void * obj)408 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
409 {
410 ASSERT(MUTEX_HELD(&mls->mls_lock));
411 return (list_prev(&mls->mls_list, obj));
412 }
413
414 void
multilist_link_init(multilist_node_t * link)415 multilist_link_init(multilist_node_t *link)
416 {
417 list_link_init(link);
418 }
419
420 int
multilist_link_active(multilist_node_t * link)421 multilist_link_active(multilist_node_t *link)
422 {
423 return (list_link_active(link));
424 }
425
426 /* BEGIN CSTYLED */
427 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW,
428 "Number of sublists used in each multilist");
429 /* END CSTYLED */
430