1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
23  * All rights reserved.
24  */
25 
26 #include <sys/zfs_context.h>
27 #include <sys/spa_impl.h>
28 #include <sys/vdev_impl.h>
29 #include <sys/trim_map.h>
30 #include <sys/time.h>
31 
32 /*
33  * Calculate the zio end, upgrading based on ashift which would be
34  * done by zio_vdev_io_start.
35  *
36  * This makes free range consolidation much more effective
37  * than it would otherwise be as well as ensuring that entire
38  * blocks are invalidated by writes.
39  */
40 #define   TRIM_ZIO_END(vd, offset, size)          (offset +           \
41           P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
42 
43 /* Maximal segment size for ATA TRIM. */
44 #define TRIM_MAP_SIZE_FACTOR  (512 << 16)
45 
46 #define TRIM_MAP_SEGS(size)   (1 + (size) / TRIM_MAP_SIZE_FACTOR)
47 
48 #define TRIM_MAP_ADD(tm, ts)  do {                                    \
49           list_insert_tail(&(tm)->tm_head, (ts));                     \
50           (tm)->tm_pending += TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
51 } while (0)
52 
53 #define TRIM_MAP_REM(tm, ts)  do {                                    \
54           list_remove(&(tm)->tm_head, (ts));                          \
55           (tm)->tm_pending -= TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
56 } while (0)
57 
58 typedef struct trim_map {
59           list_t              tm_head;            /* List of segments sorted by txg. */
60           avl_tree_t          tm_queued_frees;    /* AVL tree of segments waiting for TRIM. */
61           avl_tree_t          tm_inflight_frees;  /* AVL tree of in-flight TRIMs. */
62           avl_tree_t          tm_inflight_writes; /* AVL tree of in-flight writes. */
63           list_t              tm_pending_writes;  /* Writes blocked on in-flight frees. */
64           kmutex_t  tm_lock;
65           uint64_t  tm_pending;                   /* Count of pending TRIMs. */
66 } trim_map_t;
67 
68 typedef struct trim_seg {
69           avl_node_t          ts_node;  /* AVL node. */
70           list_node_t         ts_next;  /* List element. */
71           uint64_t  ts_start; /* Starting offset of this segment. */
72           uint64_t  ts_end;             /* Ending offset (non-inclusive). */
73           uint64_t  ts_txg;             /* Segment creation txg. */
74           hrtime_t  ts_time;  /* Segment creation time. */
75 } trim_seg_t;
76 
77 extern boolean_t zfs_trim_enabled;
78 
79 static u_int trim_txg_delay = 32;       /* Keep deleted data up to 32 TXG */
80 static u_int trim_timeout = 30;                   /* Keep deleted data up to 30s */
81 static u_int trim_max_interval = 1;     /* 1s delays between TRIMs */
82 static u_int trim_vdev_max_pending = 10000; /* Keep up to 10K segments */
83 
84 SYSCTL_DECL(_vfs_zfs);
85 SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM");
86 
87 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, txg_delay, CTLFLAG_RWTUN, &trim_txg_delay,
88     0, "Delay TRIMs by up to this many TXGs");
89 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, timeout, CTLFLAG_RWTUN, &trim_timeout, 0,
90     "Delay TRIMs by up to this many seconds");
91 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN,
92     &trim_max_interval, 0,
93     "Maximum interval between TRIM queue processing (seconds)");
94 
95 SYSCTL_DECL(_vfs_zfs_vdev);
96 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN,
97     &trim_vdev_max_pending, 0,
98     "Maximum pending TRIM segments for a vdev");
99 
100 static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
101 
102 static int
trim_map_seg_compare(const void * x1,const void * x2)103 trim_map_seg_compare(const void *x1, const void *x2)
104 {
105           const trim_seg_t *s1 = x1;
106           const trim_seg_t *s2 = x2;
107 
108           if (s1->ts_start < s2->ts_start) {
109                     if (s1->ts_end > s2->ts_start)
110                               return (0);
111                     return (-1);
112           }
113           if (s1->ts_start > s2->ts_start) {
114                     if (s1->ts_start < s2->ts_end)
115                               return (0);
116                     return (1);
117           }
118           return (0);
119 }
120 
121 static int
trim_map_zio_compare(const void * x1,const void * x2)122 trim_map_zio_compare(const void *x1, const void *x2)
123 {
124           const zio_t *z1 = x1;
125           const zio_t *z2 = x2;
126 
127           if (z1->io_offset < z2->io_offset) {
128                     if (z1->io_offset + z1->io_size > z2->io_offset)
129                               return (0);
130                     return (-1);
131           }
132           if (z1->io_offset > z2->io_offset) {
133                     if (z1->io_offset < z2->io_offset + z2->io_size)
134                               return (0);
135                     return (1);
136           }
137           return (0);
138 }
139 
140 void
trim_map_create(vdev_t * vd)141 trim_map_create(vdev_t *vd)
142 {
143           trim_map_t *tm;
144 
145           ASSERT(zfs_trim_enabled && !vd->vdev_notrim &&
146                     vd->vdev_ops->vdev_op_leaf);
147 
148           tm = kmem_zalloc(sizeof (*tm), KM_SLEEP);
149           mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL);
150           list_create(&tm->tm_head, sizeof (trim_seg_t),
151               offsetof(trim_seg_t, ts_next));
152           list_create(&tm->tm_pending_writes, sizeof (zio_t),
153               offsetof(zio_t, io_trim_link));
154           avl_create(&tm->tm_queued_frees, trim_map_seg_compare,
155               sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
156           avl_create(&tm->tm_inflight_frees, trim_map_seg_compare,
157               sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
158           avl_create(&tm->tm_inflight_writes, trim_map_zio_compare,
159               sizeof (zio_t), offsetof(zio_t, io_trim_node));
160           vd->vdev_trimmap = tm;
161 }
162 
163 void
trim_map_destroy(vdev_t * vd)164 trim_map_destroy(vdev_t *vd)
165 {
166           trim_map_t *tm;
167           trim_seg_t *ts;
168 
169           ASSERT(vd->vdev_ops->vdev_op_leaf);
170 
171           if (!zfs_trim_enabled)
172                     return;
173 
174           tm = vd->vdev_trimmap;
175           if (tm == NULL)
176                     return;
177 
178           /*
179            * We may have been called before trim_map_vdev_commit_done()
180            * had a chance to run, so do it now to prune the remaining
181            * inflight frees.
182            */
183           trim_map_vdev_commit_done(vd->vdev_spa, vd);
184 
185           mutex_enter(&tm->tm_lock);
186           while ((ts = list_head(&tm->tm_head)) != NULL) {
187                     avl_remove(&tm->tm_queued_frees, ts);
188                     TRIM_MAP_REM(tm, ts);
189                     kmem_free(ts, sizeof (*ts));
190           }
191           mutex_exit(&tm->tm_lock);
192 
193           avl_destroy(&tm->tm_queued_frees);
194           avl_destroy(&tm->tm_inflight_frees);
195           avl_destroy(&tm->tm_inflight_writes);
196           list_destroy(&tm->tm_pending_writes);
197           list_destroy(&tm->tm_head);
198           mutex_destroy(&tm->tm_lock);
199           kmem_free(tm, sizeof (*tm));
200           vd->vdev_trimmap = NULL;
201 }
202 
203 static void
trim_map_segment_add(trim_map_t * tm,uint64_t start,uint64_t end,uint64_t txg)204 trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
205 {
206           avl_index_t where;
207           trim_seg_t tsearch, *ts_before, *ts_after, *ts;
208           boolean_t merge_before, merge_after;
209           hrtime_t time;
210 
211           ASSERT(MUTEX_HELD(&tm->tm_lock));
212           VERIFY(start < end);
213 
214           time = gethrtime();
215           tsearch.ts_start = start;
216           tsearch.ts_end = end;
217 
218           ts = avl_find(&tm->tm_queued_frees, &tsearch, &where);
219           if (ts != NULL) {
220                     if (start < ts->ts_start)
221                               trim_map_segment_add(tm, start, ts->ts_start, txg);
222                     if (end > ts->ts_end)
223                               trim_map_segment_add(tm, ts->ts_end, end, txg);
224                     return;
225           }
226 
227           ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
228           ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);
229 
230           merge_before = (ts_before != NULL && ts_before->ts_end == start);
231           merge_after = (ts_after != NULL && ts_after->ts_start == end);
232 
233           if (merge_before && merge_after) {
234                     avl_remove(&tm->tm_queued_frees, ts_before);
235                     TRIM_MAP_REM(tm, ts_before);
236                     TRIM_MAP_REM(tm, ts_after);
237                     ts_after->ts_start = ts_before->ts_start;
238                     ts_after->ts_txg = txg;
239                     ts_after->ts_time = time;
240                     TRIM_MAP_ADD(tm, ts_after);
241                     kmem_free(ts_before, sizeof (*ts_before));
242           } else if (merge_before) {
243                     TRIM_MAP_REM(tm, ts_before);
244                     ts_before->ts_end = end;
245                     ts_before->ts_txg = txg;
246                     ts_before->ts_time = time;
247                     TRIM_MAP_ADD(tm, ts_before);
248           } else if (merge_after) {
249                     TRIM_MAP_REM(tm, ts_after);
250                     ts_after->ts_start = start;
251                     ts_after->ts_txg = txg;
252                     ts_after->ts_time = time;
253                     TRIM_MAP_ADD(tm, ts_after);
254           } else {
255                     ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
256                     ts->ts_start = start;
257                     ts->ts_end = end;
258                     ts->ts_txg = txg;
259                     ts->ts_time = time;
260                     avl_insert(&tm->tm_queued_frees, ts, where);
261                     TRIM_MAP_ADD(tm, ts);
262           }
263 }
264 
265 static void
trim_map_segment_remove(trim_map_t * tm,trim_seg_t * ts,uint64_t start,uint64_t end)266 trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
267     uint64_t end)
268 {
269           trim_seg_t *nts;
270           boolean_t left_over, right_over;
271 
272           ASSERT(MUTEX_HELD(&tm->tm_lock));
273 
274           left_over = (ts->ts_start < start);
275           right_over = (ts->ts_end > end);
276 
277           TRIM_MAP_REM(tm, ts);
278           if (left_over && right_over) {
279                     nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
280                     nts->ts_start = end;
281                     nts->ts_end = ts->ts_end;
282                     nts->ts_txg = ts->ts_txg;
283                     nts->ts_time = ts->ts_time;
284                     ts->ts_end = start;
285                     avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
286                     TRIM_MAP_ADD(tm, ts);
287                     TRIM_MAP_ADD(tm, nts);
288           } else if (left_over) {
289                     ts->ts_end = start;
290                     TRIM_MAP_ADD(tm, ts);
291           } else if (right_over) {
292                     ts->ts_start = end;
293                     TRIM_MAP_ADD(tm, ts);
294           } else {
295                     avl_remove(&tm->tm_queued_frees, ts);
296                     kmem_free(ts, sizeof (*ts));
297           }
298 }
299 
300 static void
trim_map_free_locked(trim_map_t * tm,uint64_t start,uint64_t end,uint64_t txg)301 trim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
302 {
303           zio_t zsearch, *zs;
304 
305           ASSERT(MUTEX_HELD(&tm->tm_lock));
306 
307           zsearch.io_offset = start;
308           zsearch.io_size = end - start;
309 
310           zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL);
311           if (zs == NULL) {
312                     trim_map_segment_add(tm, start, end, txg);
313                     return;
314           }
315           if (start < zs->io_offset)
316                     trim_map_free_locked(tm, start, zs->io_offset, txg);
317           if (zs->io_offset + zs->io_size < end)
318                     trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg);
319 }
320 
321 void
trim_map_free(vdev_t * vd,uint64_t offset,uint64_t size,uint64_t txg)322 trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg)
323 {
324           trim_map_t *tm = vd->vdev_trimmap;
325 
326           if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
327                     return;
328 
329           mutex_enter(&tm->tm_lock);
330           trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg);
331           mutex_exit(&tm->tm_lock);
332 }
333 
334 boolean_t
trim_map_write_start(zio_t * zio)335 trim_map_write_start(zio_t *zio)
336 {
337           vdev_t *vd = zio->io_vd;
338           trim_map_t *tm = vd->vdev_trimmap;
339           trim_seg_t tsearch, *ts;
340           boolean_t left_over, right_over;
341           uint64_t start, end;
342 
343           if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
344                     return (B_TRUE);
345 
346           start = zio->io_offset;
347           end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size);
348           tsearch.ts_start = start;
349           tsearch.ts_end = end;
350 
351           mutex_enter(&tm->tm_lock);
352 
353           /*
354            * Checking for colliding in-flight frees.
355            */
356           ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL);
357           if (ts != NULL) {
358                     list_insert_tail(&tm->tm_pending_writes, zio);
359                     mutex_exit(&tm->tm_lock);
360                     return (B_FALSE);
361           }
362 
363           ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
364           if (ts != NULL) {
365                     /*
366                      * Loop until all overlapping segments are removed.
367                      */
368                     do {
369                               trim_map_segment_remove(tm, ts, start, end);
370                               ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
371                     } while (ts != NULL);
372           }
373           avl_add(&tm->tm_inflight_writes, zio);
374 
375           mutex_exit(&tm->tm_lock);
376 
377           return (B_TRUE);
378 }
379 
380 void
trim_map_write_done(zio_t * zio)381 trim_map_write_done(zio_t *zio)
382 {
383           vdev_t *vd = zio->io_vd;
384           trim_map_t *tm = vd->vdev_trimmap;
385 
386           /*
387            * Don't check for vdev_notrim, since the write could have
388            * started before vdev_notrim was set.
389            */
390           if (!zfs_trim_enabled || tm == NULL)
391                     return;
392 
393           mutex_enter(&tm->tm_lock);
394           /*
395            * Don't fail if the write isn't in the tree, since the write
396            * could have started after vdev_notrim was set.
397            */
398           if (zio->io_trim_node.avl_child[0] ||
399               zio->io_trim_node.avl_child[1] ||
400               AVL_XPARENT(&zio->io_trim_node) ||
401               tm->tm_inflight_writes.avl_root == &zio->io_trim_node)
402                     avl_remove(&tm->tm_inflight_writes, zio);
403           mutex_exit(&tm->tm_lock);
404 }
405 
406 /*
407  * Return the oldest segment (the one with the lowest txg / time) or NULL if:
408  * 1. The list is empty
409  * 2. The first element's txg is greater than txgsafe
410  * 3. The first element's txg is not greater than the txg argument and the
411  *    the first element's time is not greater than time argument
412  */
413 static trim_seg_t *
trim_map_first(trim_map_t * tm,uint64_t txg,uint64_t txgsafe,hrtime_t time,boolean_t force)414 trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time,
415     boolean_t force)
416 {
417           trim_seg_t *ts;
418 
419           ASSERT(MUTEX_HELD(&tm->tm_lock));
420           VERIFY(txgsafe >= txg);
421 
422           ts = list_head(&tm->tm_head);
423           if (ts != NULL && ts->ts_txg <= txgsafe &&
424               (ts->ts_txg <= txg || ts->ts_time <= time || force))
425                     return (ts);
426           return (NULL);
427 }
428 
429 static void
trim_map_vdev_commit(spa_t * spa,zio_t * zio,vdev_t * vd)430 trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
431 {
432           trim_map_t *tm = vd->vdev_trimmap;
433           trim_seg_t *ts;
434           uint64_t size, offset, txgtarget, txgsafe;
435           int64_t hard, soft;
436           hrtime_t timelimit;
437 
438           ASSERT(vd->vdev_ops->vdev_op_leaf);
439 
440           if (tm == NULL)
441                     return;
442 
443           timelimit = gethrtime() - (hrtime_t)trim_timeout * NANOSEC;
444           if (vd->vdev_isl2cache) {
445                     txgsafe = UINT64_MAX;
446                     txgtarget = UINT64_MAX;
447           } else {
448                     txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa));
449                     if (txgsafe > trim_txg_delay)
450                               txgtarget = txgsafe - trim_txg_delay;
451                     else
452                               txgtarget = 0;
453           }
454 
455           mutex_enter(&tm->tm_lock);
456           hard = 0;
457           if (tm->tm_pending > trim_vdev_max_pending)
458                     hard = (tm->tm_pending - trim_vdev_max_pending) / 4;
459           soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64);
460           /* Loop until we have sent all outstanding free's */
461           while (soft > 0 &&
462               (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0))
463               != NULL) {
464                     TRIM_MAP_REM(tm, ts);
465                     avl_remove(&tm->tm_queued_frees, ts);
466                     avl_add(&tm->tm_inflight_frees, ts);
467                     size = ts->ts_end - ts->ts_start;
468                     offset = ts->ts_start;
469                     /*
470                      * We drop the lock while we call zio_nowait as the IO
471                      * scheduler can result in a different IO being run e.g.
472                      * a write which would result in a recursive lock.
473                      */
474                     mutex_exit(&tm->tm_lock);
475 
476                     zio_nowait(zio_trim(zio, spa, vd, offset, size));
477 
478                     soft -= TRIM_MAP_SEGS(size);
479                     hard -= TRIM_MAP_SEGS(size);
480                     mutex_enter(&tm->tm_lock);
481           }
482           mutex_exit(&tm->tm_lock);
483 }
484 
485 static void
trim_map_vdev_commit_done(spa_t * spa,vdev_t * vd)486 trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd)
487 {
488           trim_map_t *tm = vd->vdev_trimmap;
489           trim_seg_t *ts;
490           list_t pending_writes;
491           zio_t *zio;
492           uint64_t start, size;
493           void *cookie;
494 
495           ASSERT(vd->vdev_ops->vdev_op_leaf);
496 
497           if (tm == NULL)
498                     return;
499 
500           mutex_enter(&tm->tm_lock);
501           if (!avl_is_empty(&tm->tm_inflight_frees)) {
502                     cookie = NULL;
503                     while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees,
504                         &cookie)) != NULL) {
505                               kmem_free(ts, sizeof (*ts));
506                     }
507           }
508           list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t,
509               io_trim_link));
510           list_move_tail(&pending_writes, &tm->tm_pending_writes);
511           mutex_exit(&tm->tm_lock);
512 
513           while ((zio = list_remove_head(&pending_writes)) != NULL) {
514                     zio_vdev_io_reissue(zio);
515                     zio_execute(zio);
516           }
517           list_destroy(&pending_writes);
518 }
519 
520 static void
trim_map_commit(spa_t * spa,zio_t * zio,vdev_t * vd)521 trim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
522 {
523           int c;
524 
525           if (vd == NULL)
526                     return;
527 
528           if (vd->vdev_ops->vdev_op_leaf) {
529                     trim_map_vdev_commit(spa, zio, vd);
530           } else {
531                     for (c = 0; c < vd->vdev_children; c++)
532                               trim_map_commit(spa, zio, vd->vdev_child[c]);
533           }
534 }
535 
536 static void
trim_map_commit_done(spa_t * spa,vdev_t * vd)537 trim_map_commit_done(spa_t *spa, vdev_t *vd)
538 {
539           int c;
540 
541           if (vd == NULL)
542                     return;
543 
544           if (vd->vdev_ops->vdev_op_leaf) {
545                     trim_map_vdev_commit_done(spa, vd);
546           } else {
547                     for (c = 0; c < vd->vdev_children; c++)
548                               trim_map_commit_done(spa, vd->vdev_child[c]);
549           }
550 }
551 
552 static void
trim_thread(void * arg)553 trim_thread(void *arg)
554 {
555           spa_t *spa = arg;
556           zio_t *zio;
557 
558 #ifdef __FreeBSD__
559 #ifdef _KERNEL
560           (void) snprintf(curthread->td_name, sizeof(curthread->td_name),
561               "trim %s", spa_name(spa));
562 #endif
563 #endif
564 #ifdef __NetBSD__
565 #ifdef _KERNEL
566           size_t sz;
567           char *name, *oname;
568           struct lwp *l = curlwp;
569 
570           name = kmem_alloc(MAXCOMLEN, KM_SLEEP);
571           snprintf(name, MAXCOMLEN, "trim %s", spa_name(spa));
572           name[MAXCOMLEN - 1] = 0;
573 
574           lwp_lock(l);
575           oname = l->l_name;
576           l->l_name = name;
577           lwp_unlock(l);
578 
579           if (oname != NULL)
580                     kmem_free(oname, MAXCOMLEN);
581 #endif
582 #endif
583 
584           for (;;) {
585                     mutex_enter(&spa->spa_trim_lock);
586                     if (spa->spa_trim_thread == NULL) {
587                               spa->spa_trim_thread = curthread;
588                               cv_signal(&spa->spa_trim_cv);
589                               mutex_exit(&spa->spa_trim_lock);
590                               thread_exit();
591                     }
592 
593                     (void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock,
594                         hz * trim_max_interval);
595                     mutex_exit(&spa->spa_trim_lock);
596 
597                     zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
598 
599                     spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
600                     trim_map_commit(spa, zio, spa->spa_root_vdev);
601                     (void) zio_wait(zio);
602                     trim_map_commit_done(spa, spa->spa_root_vdev);
603                     spa_config_exit(spa, SCL_STATE, FTAG);
604           }
605 }
606 
607 void
trim_thread_create(spa_t * spa)608 trim_thread_create(spa_t *spa)
609 {
610 
611           if (!zfs_trim_enabled)
612                     return;
613 
614           mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL);
615           cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL);
616           mutex_enter(&spa->spa_trim_lock);
617           spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0,
618               TS_RUN, minclsyspri);
619           mutex_exit(&spa->spa_trim_lock);
620 }
621 
622 void
trim_thread_destroy(spa_t * spa)623 trim_thread_destroy(spa_t *spa)
624 {
625 
626           if (!zfs_trim_enabled)
627                     return;
628           if (spa->spa_trim_thread == NULL)
629                     return;
630 
631           mutex_enter(&spa->spa_trim_lock);
632           /* Setting spa_trim_thread to NULL tells the thread to stop. */
633           spa->spa_trim_thread = NULL;
634           cv_signal(&spa->spa_trim_cv);
635           /* The thread will set it back to != NULL on exit. */
636           while (spa->spa_trim_thread == NULL)
637                     cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
638           spa->spa_trim_thread = NULL;
639           mutex_exit(&spa->spa_trim_lock);
640 
641           cv_destroy(&spa->spa_trim_cv);
642           mutex_destroy(&spa->spa_trim_lock);
643 }
644 
645 void
trim_thread_wakeup(spa_t * spa)646 trim_thread_wakeup(spa_t *spa)
647 {
648 
649           if (!zfs_trim_enabled)
650                     return;
651           if (spa->spa_trim_thread == NULL)
652                     return;
653 
654           mutex_enter(&spa->spa_trim_lock);
655           cv_signal(&spa->spa_trim_cv);
656           mutex_exit(&spa->spa_trim_lock);
657 }
658