1 /*        $NetBSD: chfs_gc.c,v 1.12 2021/12/07 22:13:56 andvar Exp $  */
2 
3 /*-
4  * Copyright (c) 2010 Department of Software Engineering,
5  *                        University of Szeged, Hungary
6  * Copyright (c) 2010 Tamas Toth <ttoth@inf.u-szeged.hu>
7  * Copyright (c) 2010 Adam Hoka <ahoka@NetBSD.org>
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to The NetBSD Foundation
11  * by the Department of Software Engineering, University of Szeged, Hungary
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cprng.h>
36 #include "chfs.h"
37 
38 void chfs_gc_release_inode(struct chfs_mount *,
39     struct chfs_inode *);
40 struct chfs_inode *chfs_gc_fetch_inode(struct chfs_mount *,
41     ino_t, uint32_t);
42 int chfs_check(struct chfs_mount *, struct chfs_vnode_cache *);
43 void chfs_clear_inode(struct chfs_mount *, struct chfs_inode *);
44 
45 
46 struct chfs_eraseblock *find_gc_block(struct chfs_mount *);
47 int chfs_gcollect_pristine(struct chfs_mount *,
48     struct chfs_eraseblock *,
49     struct chfs_vnode_cache *, struct chfs_node_ref *);
50 int chfs_gcollect_live(struct chfs_mount *,
51     struct chfs_eraseblock *, struct chfs_node_ref *,
52     struct chfs_inode *);
53 int chfs_gcollect_vnode(struct chfs_mount *, struct chfs_inode *);
54 int chfs_gcollect_dirent(struct chfs_mount *,
55     struct chfs_eraseblock *, struct chfs_inode *,
56     struct chfs_dirent *);
57 int chfs_gcollect_deletion_dirent(struct chfs_mount *,
58     struct chfs_eraseblock *, struct chfs_inode *,
59     struct chfs_dirent *);
60 int chfs_gcollect_dnode(struct chfs_mount *,
61     struct chfs_eraseblock *, struct chfs_inode *,
62     struct chfs_full_dnode *, uint32_t, uint32_t);
63 
64 /*
65  * chfs_gc_trigger - wakes up GC thread, if it should run
66  * Must be called with chm_lock_mountfields held.
67  */
68 void
chfs_gc_trigger(struct chfs_mount * chmp)69 chfs_gc_trigger(struct chfs_mount *chmp)
70 {
71           struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
72 
73           if (gc->gcth_running &&
74               chfs_gc_thread_should_wake(chmp)) {
75                     cv_signal(&gc->gcth_wakeup);
76           }
77 }
78 
79 
80 /* chfs_gc_thread - garbage collector's thread */
81 void
chfs_gc_thread(void * data)82 chfs_gc_thread(void *data)
83 {
84           struct chfs_mount *chmp = data;
85           struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
86 
87           dbg_gc("[GC THREAD] thread started\n");
88 
89           mutex_enter(&chmp->chm_lock_mountfields);
90           while (gc->gcth_running) {
91                     /* we must call chfs_gc_thread_should_wake with chm_lock_mountfields
92                      * held, which is a bit awkwardly done here, but we can't really
93                      * do it otherway with the current design...
94                      */
95                     if (chfs_gc_thread_should_wake(chmp)) {
96                               if (chfs_gcollect_pass(chmp) == ENOSPC) {
97                                         mutex_exit(&chmp->chm_lock_mountfields);
98                                         panic("No space for garbage collection\n");
99                                         /* XXX why break here? i have added a panic
100                                          * here to see if it gets triggered -ahoka
101                                          */
102                                         break;
103                               }
104                               /* XXX gcollect_pass drops the mutex */
105                     }
106 
107                     cv_timedwait_sig(&gc->gcth_wakeup,
108                         &chmp->chm_lock_mountfields, mstohz(100));
109           }
110           mutex_exit(&chmp->chm_lock_mountfields);
111 
112           dbg_gc("[GC THREAD] thread stopped\n");
113           kthread_exit(0);
114 }
115 
116 /* chfs_gc_thread_start - starts GC */
117 void
chfs_gc_thread_start(struct chfs_mount * chmp)118 chfs_gc_thread_start(struct chfs_mount *chmp)
119 {
120           struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
121 
122           cv_init(&gc->gcth_wakeup, "chfsgccv");
123 
124           gc->gcth_running = true;
125           kthread_create(PRI_NONE, /*KTHREAD_MPSAFE |*/ KTHREAD_MUSTJOIN,
126               NULL, chfs_gc_thread, chmp, &gc->gcth_thread,
127               "chfsgcth");
128 }
129 
130 /* chfs_gc_thread_stop - stops GC */
131 void
chfs_gc_thread_stop(struct chfs_mount * chmp)132 chfs_gc_thread_stop(struct chfs_mount *chmp)
133 {
134           struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
135 
136           /* check if it is actually running */
137           if (gc->gcth_running) {
138                     gc->gcth_running = false;
139           } else {
140                     return;
141           }
142           cv_signal(&gc->gcth_wakeup);
143           dbg_gc("[GC THREAD] stop signal sent\n");
144 
145           kthread_join(gc->gcth_thread);
146 #ifdef BROKEN_KTH_JOIN
147           kpause("chfsthjoin", false, mstohz(1000), NULL);
148 #endif
149 
150           cv_destroy(&gc->gcth_wakeup);
151 }
152 
153 /*
154  * chfs_gc_thread_should_wake - checks if GC thread should wake up
155  * Must be called with chm_lock_mountfields held.
156  * Returns 1, if GC should wake up and 0 else.
157  */
158 int
chfs_gc_thread_should_wake(struct chfs_mount * chmp)159 chfs_gc_thread_should_wake(struct chfs_mount *chmp)
160 {
161           int nr_very_dirty = 0;
162           struct chfs_eraseblock *cheb;
163           uint32_t dirty;
164 
165           KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
166 
167           /* Erase pending queue is not empty. */
168           if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
169                     dbg_gc("erase_pending\n");
170                     return 1;
171           }
172 
173           /* There is something unchecked in the filesystem. */
174           if (chmp->chm_unchecked_size) {
175                     dbg_gc("unchecked\n");
176                     return 1;
177           }
178 
179           dirty = chmp->chm_dirty_size - chmp->chm_nr_erasable_blocks *
180               chmp->chm_ebh->eb_size;
181 
182           /* Number of free and erasable blocks are critical. */
183           if (chmp->chm_nr_free_blocks + chmp->chm_nr_erasable_blocks <
184               chmp->chm_resv_blocks_gctrigger && (dirty > chmp->chm_nospc_dirty)) {
185                     dbg_gc("free: %d + erasable: %d < resv: %d\n",
186                         chmp->chm_nr_free_blocks, chmp->chm_nr_erasable_blocks,
187                         chmp->chm_resv_blocks_gctrigger);
188                     dbg_gc("dirty: %d > nospc_dirty: %d\n",
189                         dirty, chmp->chm_nospc_dirty);
190 
191                     return 1;
192           }
193 
194           /* There are too much very dirty blocks. */
195           TAILQ_FOREACH(cheb, &chmp->chm_very_dirty_queue, queue) {
196                     nr_very_dirty++;
197                     if (nr_very_dirty == chmp->chm_vdirty_blocks_gctrigger) {
198                               dbg_gc("nr_very_dirty\n");
199                               return 1;
200                     }
201           }
202 
203           /* Everything is OK, GC shouldn't run. */
204           return 0;
205 }
206 
207 /* chfs_gc_release_inode - does nothing yet */
208 void
chfs_gc_release_inode(struct chfs_mount * chmp,struct chfs_inode * ip)209 chfs_gc_release_inode(struct chfs_mount *chmp,
210     struct chfs_inode *ip)
211 {
212           dbg_gc("release inode\n");
213 }
214 
215 /* chfs_gc_fetch_inode - assign the given inode to the GC */
216 struct chfs_inode *
chfs_gc_fetch_inode(struct chfs_mount * chmp,ino_t vno,uint32_t unlinked)217 chfs_gc_fetch_inode(struct chfs_mount *chmp, ino_t vno,
218     uint32_t unlinked)
219 {
220           struct vnode *vp = NULL;
221           struct chfs_vnode_cache *vc;
222           struct chfs_inode *ip;
223           dbg_gc("fetch inode %llu\n", (unsigned long long)vno);
224 
225           if (unlinked) {
226                     dbg_gc("unlinked\n");
227                     vp = chfs_vnode_lookup(chmp, vno);
228                     if (!vp) {
229                               mutex_enter(&chmp->chm_lock_vnocache);
230                               vc = chfs_vnode_cache_get(chmp, vno);
231                               if (!vc) {
232                                         mutex_exit(&chmp->chm_lock_vnocache);
233                                         return NULL;
234                               }
235                               mutex_exit(&chmp->chm_lock_vnocache);
236                               if (vc->state != VNO_STATE_CHECKEDABSENT) {
237                                         /* XXX why do we need the delay here?! */
238                                         KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
239                                         cv_timedwait_sig(
240                                                   &chmp->chm_gc_thread.gcth_wakeup,
241                                                   &chmp->chm_lock_mountfields, mstohz(50));
242                               }
243                               return NULL;
244                     }
245           } else {
246                     dbg_gc("vnode lookup\n");
247                     vp = chfs_vnode_lookup(chmp, vno);
248           }
249           dbg_gc("vp to ip\n");
250           ip = VTOI(vp);
251           KASSERT(ip);
252           vrele(vp);
253 
254           return ip;
255 }
256 
257 extern rb_tree_ops_t frag_rbtree_ops;
258 
259 /* chfs_check - checks an inode with minimal initialization */
260 int
chfs_check(struct chfs_mount * chmp,struct chfs_vnode_cache * chvc)261 chfs_check(struct chfs_mount *chmp, struct  chfs_vnode_cache *chvc)
262 {
263           KASSERT(mutex_owned(&chmp->chm_lock_vnocache));
264 
265           struct chfs_inode *ip;
266           struct vnode *vp;
267           int ret;
268 
269           /* Get a new inode. */
270           ip = pool_get(&chfs_inode_pool, PR_WAITOK);
271           if (!ip) {
272                     return ENOMEM;
273           }
274 
275           vp = kmem_zalloc(sizeof(struct vnode), KM_SLEEP);
276 
277           /* Minimal initialization. */
278           ip->chvc = chvc;
279           ip->vp = vp;
280 
281           vp->v_data = ip;
282 
283           rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
284           TAILQ_INIT(&ip->dents);
285 
286           /* Build the node. */
287           mutex_exit(&chmp->chm_lock_vnocache);
288           ret = chfs_read_inode_internal(chmp, ip);
289           mutex_enter(&chmp->chm_lock_vnocache);
290           if (!ret) {
291                     chfs_clear_inode(chmp, ip);
292           }
293 
294           /* Release inode. */
295           pool_put(&chfs_inode_pool, ip);
296 
297           return ret;
298 }
299 
300 /* chfs_clear_inode - kills a minimal inode */
301 void
chfs_clear_inode(struct chfs_mount * chmp,struct chfs_inode * ip)302 chfs_clear_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
303 {
304           KASSERT(mutex_owned(&chmp->chm_lock_vnocache));
305 
306           struct chfs_dirent *fd, *tmpfd;
307           struct chfs_vnode_cache *chvc;
308           struct chfs_node_ref *nref;
309 
310           chvc = ip->chvc;
311           /* shouldnt this be: */
312           //bool deleted = (chvc && !(chvc->pvno || chvc->nlink));
313           int deleted = (chvc && !(chvc->pvno | chvc->nlink));
314 
315           /* Set actual state. */
316           if (chvc && chvc->state != VNO_STATE_CHECKING) {
317                     chvc->state = VNO_STATE_CLEARING;
318           }
319 
320           /* Remove vnode information. */
321           while (deleted && chvc->v != (struct chfs_node_ref *)chvc) {
322                     nref = chvc->v;
323                     chfs_remove_and_obsolete(chmp, chvc, nref, &chvc->v);
324           }
325 
326           /* Destroy data. */
327           chfs_kill_fragtree(chmp, &ip->fragtree);
328 
329           /* Clear dirents. */
330           TAILQ_FOREACH_SAFE(fd, &ip->dents, fds, tmpfd) {
331                     chfs_free_dirent(fd);
332           }
333 
334           /* Remove node from vnode cache. */
335           if (chvc && chvc->state == VNO_STATE_CHECKING) {
336                     chvc->state = VNO_STATE_CHECKEDABSENT;
337                     if ((struct chfs_vnode_cache *)chvc->v == chvc &&
338                         (struct chfs_vnode_cache *)chvc->dirents == chvc &&
339                         (struct chfs_vnode_cache *)chvc->dnode == chvc)
340                               chfs_vnode_cache_remove(chmp, chvc);
341           }
342 }
343 
344 /* find_gc_block - finds the next block for GC */
345 struct chfs_eraseblock *
find_gc_block(struct chfs_mount * chmp)346 find_gc_block(struct chfs_mount *chmp)
347 {
348           struct chfs_eraseblock *ret;
349           struct chfs_eraseblock_queue *nextqueue;
350 
351           KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
352 
353           /* Get a random number. */
354           uint32_t n = cprng_fast32() % 128;
355 
356 again:
357           /* Find an eraseblock queue. */
358     if (n<50 && !TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
359                     dbg_gc("Picking block from erase_pending_queue to GC next\n");
360                     nextqueue = &chmp->chm_erase_pending_queue;
361           } else if (n<110 && !TAILQ_EMPTY(&chmp->chm_very_dirty_queue) ) {
362                     dbg_gc("Picking block from very_dirty_queue to GC next\n");
363                     nextqueue = &chmp->chm_very_dirty_queue;
364           } else if (n<126 && !TAILQ_EMPTY(&chmp->chm_dirty_queue) ) {
365                     dbg_gc("Picking block from dirty_queue to GC next\n");
366                     nextqueue = &chmp->chm_dirty_queue;
367           } else if (!TAILQ_EMPTY(&chmp->chm_clean_queue)) {
368                     dbg_gc("Picking block from clean_queue to GC next\n");
369                     nextqueue = &chmp->chm_clean_queue;
370           } else if (!TAILQ_EMPTY(&chmp->chm_dirty_queue)) {
371                     dbg_gc("Picking block from dirty_queue to GC next"
372                         " (clean_queue was empty)\n");
373                     nextqueue = &chmp->chm_dirty_queue;
374           } else if (!TAILQ_EMPTY(&chmp->chm_very_dirty_queue)) {
375                     dbg_gc("Picking block from very_dirty_queue to GC next"
376                         " (clean_queue and dirty_queue were empty)\n");
377                     nextqueue = &chmp->chm_very_dirty_queue;
378           } else if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
379                     dbg_gc("Picking block from erase_pending_queue to GC next"
380                         " (clean_queue and {very_,}dirty_queue were empty)\n");
381                     nextqueue = &chmp->chm_erase_pending_queue;
382           } else if (!TAILQ_EMPTY(&chmp->chm_erasable_pending_wbuf_queue)) {
383                     dbg_gc("Synching wbuf in order to reuse "
384                         "erasable_pendig_wbuf_queue blocks\n");
385                     rw_enter(&chmp->chm_lock_wbuf, RW_WRITER);
386                     chfs_flush_pending_wbuf(chmp);
387                     rw_exit(&chmp->chm_lock_wbuf);
388                     goto again;
389           } else {
390                     dbg_gc("CHFS: no clean, dirty _or_ erasable"
391                         " blocks to GC from! Where are they all?\n");
392                     return NULL;
393           }
394 
395           /* Get the first block of the queue. */
396           ret = TAILQ_FIRST(nextqueue);
397           if (chmp->chm_nextblock) {
398                     dbg_gc("nextblock num: %u - gcblock num: %u\n",
399                         chmp->chm_nextblock->lnr, ret->lnr);
400                     if (ret == chmp->chm_nextblock)
401                               goto again;
402           }
403           TAILQ_REMOVE(nextqueue, ret, queue);
404 
405           /* Set GC block. */
406           chmp->chm_gcblock = ret;
407           /* Set GC node. */
408           ret->gc_node = ret->first_node;
409 
410           if (!ret->gc_node) {
411                     dbg_gc("Oops! ret->gc_node at LEB: %u is NULL\n", ret->lnr);
412                     panic("CHFS BUG - one LEB's gc_node is NULL\n");
413           }
414 
415           /* TODO wasted size? */
416           return ret;
417 }
418 
419 /* chfs_gcollect_pass - this is the main function of GC */
420 int
chfs_gcollect_pass(struct chfs_mount * chmp)421 chfs_gcollect_pass(struct chfs_mount *chmp)
422 {
423           struct chfs_vnode_cache *vc;
424           struct chfs_eraseblock *eb;
425           struct chfs_node_ref *nref;
426           uint32_t gcblock_dirty;
427           struct chfs_inode *ip;
428           ino_t vno, pvno;
429           uint32_t nlink;
430           int ret = 0;
431 
432           KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
433 
434           /* Check all vnodes. */
435           for (;;) {
436                     mutex_enter(&chmp->chm_lock_sizes);
437 
438                     /* Check unchecked size. */
439                     dbg_gc("unchecked size == %u\n", chmp->chm_unchecked_size);
440                     if (!chmp->chm_unchecked_size)
441                               break;
442 
443                     /* Compare vnode number to the maximum. */
444                     if (chmp->chm_checked_vno > chmp->chm_max_vno) {
445                               mutex_exit(&chmp->chm_lock_sizes);
446                               dbg_gc("checked_vno (#%llu) > max_vno (#%llu)\n",
447                                   (unsigned long long)chmp->chm_checked_vno,
448                                   (unsigned long long)chmp->chm_max_vno);
449                               return ENOSPC;
450                     }
451 
452                     mutex_exit(&chmp->chm_lock_sizes);
453 
454                     mutex_enter(&chmp->chm_lock_vnocache);
455                     dbg_gc("checking vno #%llu\n",
456                               (unsigned long long)chmp->chm_checked_vno);
457                     dbg_gc("get vnode cache\n");
458 
459                     /* OK, Get and check the vnode cache. */
460                     vc = chfs_vnode_cache_get(chmp, chmp->chm_checked_vno++);
461 
462                     if (!vc) {
463                               dbg_gc("!vc\n");
464                               mutex_exit(&chmp->chm_lock_vnocache);
465                               continue;
466                     }
467 
468                     if ((vc->pvno | vc->nlink) == 0) {
469                               dbg_gc("(pvno | nlink) == 0\n");
470                               mutex_exit(&chmp->chm_lock_vnocache);
471                               continue;
472                     }
473 
474                     /* Find out the state of the vnode. */
475                     dbg_gc("switch\n");
476                     switch (vc->state) {
477                     case VNO_STATE_CHECKEDABSENT:
478                               /* FALLTHROUGH */
479                     case VNO_STATE_PRESENT:
480                               mutex_exit(&chmp->chm_lock_vnocache);
481                               continue;
482 
483                     case VNO_STATE_GC:
484                               /* FALLTHROUGH */
485                     case VNO_STATE_CHECKING:
486                               mutex_exit(&chmp->chm_lock_vnocache);
487                               dbg_gc("VNO_STATE GC or CHECKING\n");
488                               panic("CHFS BUG - vc state gc or checking\n");
489 
490                     case VNO_STATE_READING:
491                               chmp->chm_checked_vno--;
492                               mutex_exit(&chmp->chm_lock_vnocache);
493                               /* XXX why do we need the delay here?! */
494                               kpause("chvncrea", true, mstohz(50), NULL);
495 
496                               return 0;
497 
498                     default:
499                               mutex_exit(&chmp->chm_lock_vnocache);
500                               dbg_gc("default\n");
501                               panic("CHFS BUG - vc state is other what we"
502                                   " checked\n");
503 
504                     case VNO_STATE_UNCHECKED:
505                               ;
506                     }
507 
508                     /* We found an unchecked vnode. */
509 
510                     vc->state = VNO_STATE_CHECKING;
511 
512                     /* XXX check if this is too heavy to call under
513                      * chm_lock_vnocache
514                      */
515                     ret = chfs_check(chmp, vc);
516                     vc->state = VNO_STATE_CHECKEDABSENT;
517 
518                     mutex_exit(&chmp->chm_lock_vnocache);
519                     return ret;
520           }
521 
522           /* Get GC block. */
523           eb = chmp->chm_gcblock;
524 
525           if (!eb) {
526                     eb = find_gc_block(chmp);
527           }
528 
529           if (!eb) {
530                     dbg_gc("!eb\n");
531                     if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
532                               mutex_exit(&chmp->chm_lock_sizes);
533                               return EAGAIN;
534                     }
535                     mutex_exit(&chmp->chm_lock_sizes);
536                     return EIO;
537           }
538 
539           if (!eb->used_size) {
540                     dbg_gc("!eb->used_size\n");
541                     goto eraseit;
542           }
543 
544           /* Get GC node. */
545           nref = eb->gc_node;
546           gcblock_dirty = eb->dirty_size;
547 
548           /* Find a node which wasn't obsoleted yet.
549            * Obsoleted nodes will be simply deleted after the whole block has checked. */
550           while(CHFS_REF_OBSOLETE(nref)) {
551 #ifdef DBG_MSG_GC
552                     if (nref == chmp->chm_blocks[nref->nref_lnr].last_node) {
553                               dbg_gc("THIS NODE IS THE LAST NODE OF ITS EB\n");
554                     }
555 #endif
556                     nref = node_next(nref);
557                     if (!nref) {
558                               eb->gc_node = nref;
559                               mutex_exit(&chmp->chm_lock_sizes);
560                               panic("CHFS BUG - nref is NULL)\n");
561                     }
562           }
563 
564           /* We found a "not obsoleted" node. */
565           eb->gc_node = nref;
566           KASSERT(nref->nref_lnr == chmp->chm_gcblock->lnr);
567 
568           /* Check if node is in any chain. */
569           if (!nref->nref_next) {
570                     /* This node is not in any chain. Simply collect it, or obsolete. */
571                     mutex_exit(&chmp->chm_lock_sizes);
572                     if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
573                               chfs_gcollect_pristine(chmp, eb, NULL, nref);
574                     } else {
575                               chfs_mark_node_obsolete(chmp, nref);
576                     }
577                     goto lock_size;
578           }
579 
580           mutex_exit(&chmp->chm_lock_sizes);
581 
582           mutex_enter(&chmp->chm_lock_vnocache);
583 
584           dbg_gc("nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset);
585           vc = chfs_nref_to_vc(nref);
586 
587           /* Check the state of the node. */
588           dbg_gc("switch\n");
589           switch(vc->state) {
590         case VNO_STATE_CHECKEDABSENT:
591                               if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
592                                         vc->state = VNO_STATE_GC;
593                               }
594                               break;
595 
596         case VNO_STATE_PRESENT:
597                               break;
598 
599         case VNO_STATE_UNCHECKED:
600                               /* FALLTHROUGH */
601         case VNO_STATE_CHECKING:
602                               /* FALLTHROUGH */
603         case VNO_STATE_GC:
604                               mutex_exit(&chmp->chm_lock_vnocache);
605                               panic("CHFS BUG - vc state unchecked,"
606                                         " checking or gc (vno #%llu, num #%d)\n",
607                                         (unsigned long long)vc->vno, vc->state);
608 
609         case VNO_STATE_READING:
610                               /* Node is in use at this time. */
611                               mutex_exit(&chmp->chm_lock_vnocache);
612                               kpause("chvncrea", true, mstohz(50), NULL);
613                               return 0;
614           }
615 
616           if (vc->state == VNO_STATE_GC) {
617                     dbg_gc("vc->state == VNO_STATE_GC\n");
618                     vc->state = VNO_STATE_CHECKEDABSENT;
619                     mutex_exit(&chmp->chm_lock_vnocache);
620                     ret = chfs_gcollect_pristine(chmp, eb, NULL, nref);
621 
622                     //TODO wake_up(&chmp->chm_vnocache_wq);
623                     if (ret != EBADF)
624                               goto test_gcnode;
625                     mutex_enter(&chmp->chm_lock_vnocache);
626           }
627 
628           /* Collect living node. */
629           vno = vc->vno;
630           pvno = vc->pvno;
631           nlink = vc->nlink;
632           mutex_exit(&chmp->chm_lock_vnocache);
633 
634           ip = chfs_gc_fetch_inode(chmp, vno, !(pvno | nlink));
635 
636           if (!ip) {
637                     dbg_gc("!ip\n");
638                     ret = 0;
639                     goto lock_size;
640           }
641 
642           chfs_gcollect_live(chmp, eb, nref, ip);
643 
644           chfs_gc_release_inode(chmp, ip);
645 
646 test_gcnode:
647           if (eb->dirty_size == gcblock_dirty &&
648               !CHFS_REF_OBSOLETE(eb->gc_node)) {
649                     dbg_gc("ERROR collecting node at %u failed.\n",
650                         CHFS_GET_OFS(eb->gc_node->nref_offset));
651 
652                     ret = ENOSPC;
653           }
654 
655 lock_size:
656           KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
657           mutex_enter(&chmp->chm_lock_sizes);
658 eraseit:
659           dbg_gc("eraseit\n");
660 
661           if (chmp->chm_gcblock) {
662           /* This is only for debugging. */
663                     dbg_gc("eb used size = %u\n", chmp->chm_gcblock->used_size);
664                     dbg_gc("eb free size = %u\n", chmp->chm_gcblock->free_size);
665                     dbg_gc("eb dirty size = %u\n", chmp->chm_gcblock->dirty_size);
666                     dbg_gc("eb unchecked size = %u\n",
667                         chmp->chm_gcblock->unchecked_size);
668                     dbg_gc("eb wasted size = %u\n", chmp->chm_gcblock->wasted_size);
669 
670                     KASSERT(chmp->chm_gcblock->used_size + chmp->chm_gcblock->free_size +
671                         chmp->chm_gcblock->dirty_size +
672                         chmp->chm_gcblock->unchecked_size +
673                         chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size);
674 
675           }
676 
677           /* Check the state of GC block. */
678           if (chmp->chm_gcblock && chmp->chm_gcblock->dirty_size +
679               chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size) {
680                     dbg_gc("Block at leb #%u completely obsoleted by GC, "
681                         "Moving to erase_pending_queue\n", chmp->chm_gcblock->lnr);
682                     TAILQ_INSERT_TAIL(&chmp->chm_erase_pending_queue,
683                         chmp->chm_gcblock, queue);
684                     chmp->chm_gcblock = NULL;
685                     chmp->chm_nr_erasable_blocks++;
686                     if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
687                               ret = chfs_remap_leb(chmp);
688                     }
689           }
690 
691           mutex_exit(&chmp->chm_lock_sizes);
692           dbg_gc("return\n");
693           return ret;
694 }
695 
696 
697 /* chfs_gcollect_pristine - collects a pristine node */
698 int
chfs_gcollect_pristine(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_vnode_cache * chvc,struct chfs_node_ref * nref)699 chfs_gcollect_pristine(struct chfs_mount *chmp, struct chfs_eraseblock *cheb,
700     struct chfs_vnode_cache *chvc, struct chfs_node_ref *nref)
701 {
702           struct chfs_node_ref *newnref;
703           struct chfs_flash_node_hdr *nhdr;
704           struct chfs_flash_vnode *fvnode;
705           struct chfs_flash_dirent_node *fdirent;
706           struct chfs_flash_data_node *fdata;
707           int ret, retries = 0;
708           uint32_t ofs, crc;
709           size_t totlen = chfs_nref_len(chmp, cheb, nref);
710           char *data;
711           struct iovec vec;
712           size_t retlen;
713 
714           dbg_gc("gcollect_pristine\n");
715 
716           data = kmem_alloc(totlen, KM_SLEEP);
717           ofs = CHFS_GET_OFS(nref->nref_offset);
718 
719           /* Read header. */
720           ret = chfs_read_leb(chmp, nref->nref_lnr, data, ofs, totlen, &retlen);
721           if (ret) {
722                     dbg_gc("reading error\n");
723                     goto err_out;
724           }
725           if (retlen != totlen) {
726                     dbg_gc("read size error\n");
727                     ret = EIO;
728                     goto err_out;
729           }
730           nhdr = (struct chfs_flash_node_hdr *)data;
731 
732           /* Check the header. */
733           if (le16toh(nhdr->magic) != CHFS_FS_MAGIC_BITMASK) {
734                     dbg_gc("node header magic number error\n");
735                     ret = EBADF;
736                     goto err_out;
737           }
738           crc = crc32(0, (uint8_t *)nhdr, CHFS_NODE_HDR_SIZE - 4);
739           if (crc != le32toh(nhdr->hdr_crc)) {
740                     dbg_gc("node header crc error\n");
741                     ret = EBADF;
742                     goto err_out;
743           }
744 
745           /* Read the remaining parts. */
746           switch(le16toh(nhdr->type)) {
747         case CHFS_NODETYPE_VNODE:
748                     /* vnode information node */
749                               fvnode = (struct chfs_flash_vnode *)data;
750                   crc = crc32(0, (uint8_t *)fvnode, sizeof(struct chfs_flash_vnode) - 4);
751                   if (crc != le32toh(fvnode->node_crc)) {
752                                         dbg_gc("vnode crc error\n");
753                                         ret = EBADF;
754                                         goto err_out;
755                               }
756                               break;
757         case CHFS_NODETYPE_DIRENT:
758                     /* dirent node */
759                               fdirent = (struct chfs_flash_dirent_node *)data;
760                   crc = crc32(0, (uint8_t *)fdirent, sizeof(struct chfs_flash_dirent_node) - 4);
761                   if (crc != le32toh(fdirent->node_crc)) {
762                                         dbg_gc("dirent crc error\n");
763                                         ret = EBADF;
764                                         goto err_out;
765                               }
766                   crc = crc32(0, fdirent->name, fdirent->nsize);
767                   if (crc != le32toh(fdirent->name_crc)) {
768                                         dbg_gc("dirent name crc error\n");
769                                         ret = EBADF;
770                                         goto err_out;
771                               }
772                               break;
773         case CHFS_NODETYPE_DATA:
774                     /* data node */
775                               fdata = (struct chfs_flash_data_node *)data;
776                   crc = crc32(0, (uint8_t *)fdata, sizeof(struct chfs_flash_data_node) - 4);
777                   if (crc != le32toh(fdata->node_crc)) {
778                                         dbg_gc("data node crc error\n");
779                                         ret = EBADF;
780                                         goto err_out;
781                               }
782                               break;
783         default:
784                     /* unknown node */
785                               if (chvc) {
786                                         dbg_gc("unknown node have vnode cache\n");
787                                         ret = EBADF;
788                                         goto err_out;
789                               }
790           }
791           /* CRC's OK, write node to its new place */
792 retry:
793           ret = chfs_reserve_space_gc(chmp, totlen);
794           if (ret)
795                     goto err_out;
796 
797           newnref = chfs_alloc_node_ref(chmp->chm_nextblock);
798           if (!newnref) {
799                     ret = ENOMEM;
800                     goto err_out;
801           }
802 
803           ofs = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size;
804           newnref->nref_offset = ofs;
805 
806           /* write out the whole node */
807           vec.iov_base = (void *)data;
808           vec.iov_len = totlen;
809           mutex_enter(&chmp->chm_lock_sizes);
810           ret = chfs_write_wbuf(chmp, &vec, 1, ofs, &retlen);
811 
812           if (ret || retlen != totlen) {
813                     /* error while writing */
814                     chfs_err("error while writing out to the media\n");
815                     chfs_err("err: %d | size: %zu | retlen : %zu\n",
816                         ret, totlen, retlen);
817 
818                     chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen);
819                     if (retries) {
820                               mutex_exit(&chmp->chm_lock_sizes);
821                               ret = EIO;
822                               goto err_out;
823                     }
824 
825                     /* try again */
826                     retries++;
827                     mutex_exit(&chmp->chm_lock_sizes);
828                     goto retry;
829           }
830 
831           /* update vnode information */
832           mutex_exit(&chmp->chm_lock_sizes);
833           //TODO should we set free_size?
834           mutex_enter(&chmp->chm_lock_vnocache);
835           chfs_add_vnode_ref_to_vc(chmp, chvc, newnref);
836           mutex_exit(&chmp->chm_lock_vnocache);
837           ret = 0;
838           /* FALLTHROUGH */
839 err_out:
840           kmem_free(data, totlen);
841           return ret;
842 }
843 
844 
845 /* chfs_gcollect_live - collects a living node */
846 int
chfs_gcollect_live(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_node_ref * nref,struct chfs_inode * ip)847 chfs_gcollect_live(struct chfs_mount *chmp,
848     struct chfs_eraseblock *cheb, struct chfs_node_ref *nref,
849     struct chfs_inode *ip)
850 {
851           struct chfs_node_frag *frag;
852           struct chfs_full_dnode *fn = NULL;
853           int start = 0, end = 0, nrfrags = 0;
854           struct chfs_dirent *fd = NULL;
855           int ret = 0;
856           bool is_dirent;
857 
858           dbg_gc("gcollect_live\n");
859 
860           if (chmp->chm_gcblock != cheb) {
861                     dbg_gc("GC block is no longer gcblock. Restart.\n");
862                     goto upnout;
863           }
864 
865           if (CHFS_REF_OBSOLETE(nref)) {
866                     dbg_gc("node to be GC'd was obsoleted in the meantime.\n");
867                     goto upnout;
868           }
869 
870           /* It's a vnode? */
871           if (ip->chvc->v == nref) {
872                     chfs_gcollect_vnode(chmp, ip);
873                     goto upnout;
874           }
875 
876           /* Find data node. */
877           dbg_gc("find full dnode\n");
878           for(frag = frag_first(&ip->fragtree);
879               frag; frag = frag_next(&ip->fragtree, frag)) {
880                     if (frag->node && frag->node->nref == nref) {
881                               fn = frag->node;
882                               end = frag->ofs + frag->size;
883                               if (!nrfrags++)
884                                         start = frag->ofs;
885                               if (nrfrags == frag->node->frags)
886                                         break;
887                     }
888           }
889 
890           /* It's a pristine node, or dnode (or hole? XXX have we hole nodes?) */
891           if (fn) {
892                     if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
893                               ret = chfs_gcollect_pristine(chmp,
894                                   cheb, ip->chvc, nref);
895                               if (!ret) {
896                                         frag->node->nref = ip->chvc->v;
897                               }
898                               if (ret != EBADF)
899                                         goto upnout;
900                     }
901                     ret = chfs_gcollect_dnode(chmp, cheb, ip, fn, start, end);
902                     goto upnout;
903           }
904 
905           /* Is it a dirent? */
906           dbg_gc("find full dirent\n");
907           is_dirent = false;
908           TAILQ_FOREACH(fd, &ip->dents, fds) {
909                     if (fd->nref == nref) {
910                               is_dirent = true;
911                               break;
912                     }
913           }
914 
915           if (is_dirent && fd->vno) {
916                     /* Living dirent. */
917                     ret = chfs_gcollect_dirent(chmp, cheb, ip, fd);
918           } else if (is_dirent) {
919                     /* Already deleted dirent. */
920                     ret = chfs_gcollect_deletion_dirent(chmp, cheb, ip, fd);
921           } else {
922                     dbg_gc("Nref at leb #%u offset 0x%08x wasn't in node list"
923                         " for ino #%llu\n",
924                         nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset),
925                         (unsigned long long)ip->ino);
926                     if (CHFS_REF_OBSOLETE(nref)) {
927                               dbg_gc("But it's obsolete so we don't mind"
928                                   " too much.\n");
929                     }
930           }
931 
932 upnout:
933           return ret;
934 }
935 
936 /* chfs_gcollect_vnode - collects a vnode information node */
937 int
chfs_gcollect_vnode(struct chfs_mount * chmp,struct chfs_inode * ip)938 chfs_gcollect_vnode(struct chfs_mount *chmp, struct chfs_inode *ip)
939 {
940           int ret;
941           dbg_gc("gcollect_vnode\n");
942 
943           /* Simply write the new vnode information to the flash
944            * with GC's space allocation */
945           ret = chfs_write_flash_vnode(chmp, ip, ALLOC_GC);
946 
947           return ret;
948 }
949 
950 /* chfs_gcollect_dirent - collects a dirent */
951 int
chfs_gcollect_dirent(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_inode * parent,struct chfs_dirent * fd)952 chfs_gcollect_dirent(struct chfs_mount *chmp,
953     struct chfs_eraseblock *cheb, struct chfs_inode *parent,
954     struct chfs_dirent *fd)
955 {
956           struct vnode *vnode = NULL;
957           struct chfs_inode *ip;
958           dbg_gc("gcollect_dirent\n");
959 
960           /* Find vnode. */
961           vnode = chfs_vnode_lookup(chmp, fd->vno);
962 
963           /* XXX maybe KASSERT or panic on this? */
964           if (vnode == NULL) {
965                     return ENOENT;
966           }
967 
968           ip = VTOI(vnode);
969           vrele(vnode);
970 
971           /* Remove and obsolete the previous version. */
972           mutex_enter(&chmp->chm_lock_vnocache);
973           chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref,
974                     &parent->chvc->dirents);
975           mutex_exit(&chmp->chm_lock_vnocache);
976 
977           /* Write the new dirent to the flash. */
978           return chfs_write_flash_dirent(chmp,
979               parent, ip, fd, fd->vno, ALLOC_GC);
980 }
981 
982 /*
983  * chfs_gcollect_deletion_dirent -
984  * collects a dirent what was marked as deleted
985  */
986 int
chfs_gcollect_deletion_dirent(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_inode * parent,struct chfs_dirent * fd)987 chfs_gcollect_deletion_dirent(struct chfs_mount *chmp,
988     struct chfs_eraseblock *cheb, struct chfs_inode *parent,
989     struct chfs_dirent *fd)
990 {
991           struct chfs_flash_dirent_node chfdn;
992           struct chfs_node_ref *nref;
993           size_t retlen, name_len, nref_len;
994           uint32_t name_crc;
995 
996           int ret;
997 
998           dbg_gc("gcollect_deletion_dirent\n");
999 
1000           /* Check node. */
1001           name_len = strlen(fd->name);
1002           name_crc = crc32(0, fd->name, name_len);
1003 
1004           nref_len = chfs_nref_len(chmp, cheb, fd->nref);
1005 
1006           /* XXX This was a noop  (void)chfs_vnode_lookup(chmp, fd->vno); */
1007 
1008           /* Find it in parent dirents. */
1009           for (nref = parent->chvc->dirents;
1010                nref != (void*)parent->chvc;
1011                nref = nref->nref_next) {
1012 
1013                     if (!CHFS_REF_OBSOLETE(nref))
1014                               continue;
1015 
1016                     /* if node refs have different length, skip */
1017                     if (chfs_nref_len(chmp, NULL, nref) != nref_len)
1018                               continue;
1019 
1020                     if (CHFS_GET_OFS(nref->nref_offset) ==
1021                         CHFS_GET_OFS(fd->nref->nref_offset)) {
1022                               continue;
1023                     }
1024 
1025                     /* read it from flash */
1026                     ret = chfs_read_leb(chmp,
1027                         nref->nref_lnr, (void*)&chfdn, CHFS_GET_OFS(nref->nref_offset),
1028                         nref_len, &retlen);
1029 
1030                     if (ret) {
1031                               dbg_gc("Read error: %d\n", ret);
1032                               continue;
1033                     }
1034 
1035                     if (retlen != nref_len) {
1036                               dbg_gc("Error reading node:"
1037                                   " read: %zu instead of: %zu\n", retlen, nref_len);
1038                               continue;
1039                     }
1040 
1041                     /* if node type doesn't match, skip */
1042                     if (le16toh(chfdn.type) != CHFS_NODETYPE_DIRENT)
1043                               continue;
1044 
1045                     /* if crc doesn't match, skip */
1046                     if (le32toh(chfdn.name_crc) != name_crc)
1047                               continue;
1048 
1049                     /* if length of name different, or this is an another deletion
1050                      * dirent, skip
1051                      */
1052                     if (chfdn.nsize != name_len || !le64toh(chfdn.vno))
1053                               continue;
1054 
1055                     /* check actual name */
1056                     if (memcmp(chfdn.name, fd->name, name_len))
1057                               continue;
1058 
1059                     mutex_enter(&chmp->chm_lock_vnocache);
1060                     chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref,
1061                               &parent->chvc->dirents);
1062                     mutex_exit(&chmp->chm_lock_vnocache);
1063                     return chfs_write_flash_dirent(chmp,
1064                         parent, NULL, fd, fd->vno, ALLOC_GC);
1065           }
1066 
1067           /* Simply remove it from the parent dirents. */
1068           TAILQ_REMOVE(&parent->dents, fd, fds);
1069           chfs_free_dirent(fd);
1070           return 0;
1071 }
1072 
1073 /* chfs_gcollect_dnode - */
1074 int
chfs_gcollect_dnode(struct chfs_mount * chmp,struct chfs_eraseblock * orig_cheb,struct chfs_inode * ip,struct chfs_full_dnode * fn,uint32_t orig_start,uint32_t orig_end)1075 chfs_gcollect_dnode(struct chfs_mount *chmp,
1076     struct chfs_eraseblock *orig_cheb, struct chfs_inode *ip,
1077     struct chfs_full_dnode *fn, uint32_t orig_start, uint32_t orig_end)
1078 {
1079           struct chfs_node_ref *nref;
1080           struct chfs_full_dnode *newfn;
1081           struct chfs_flash_data_node *fdnode;
1082           int ret = 0, retries = 0;
1083           uint32_t totlen;
1084           char *data = NULL;
1085           struct iovec vec;
1086           size_t retlen;
1087           dbg_gc("gcollect_dnode\n");
1088 
1089           //TODO merge frags
1090 
1091           KASSERT(orig_cheb->lnr == fn->nref->nref_lnr);
1092           totlen = chfs_nref_len(chmp, orig_cheb, fn->nref);
1093           data = kmem_alloc(totlen, KM_SLEEP);
1094 
1095           /* Read the node from the flash. */
1096           ret = chfs_read_leb(chmp, fn->nref->nref_lnr, data, fn->nref->nref_offset,
1097               totlen, &retlen);
1098 
1099           fdnode = (struct chfs_flash_data_node *)data;
1100           fdnode->version = htole64(++ip->chvc->highest_version);
1101           fdnode->node_crc = htole32(crc32(0, (uint8_t *)fdnode,
1102                     sizeof(*fdnode) - 4));
1103 
1104           vec.iov_base = (void *)data;
1105           vec.iov_len = totlen;
1106 
1107 retry:
1108           /* Set the next block where we can write. */
1109           ret = chfs_reserve_space_gc(chmp, totlen);
1110           if (ret)
1111                     goto out;
1112 
1113           nref = chfs_alloc_node_ref(chmp->chm_nextblock);
1114           if (!nref) {
1115                     ret = ENOMEM;
1116                     goto out;
1117           }
1118 
1119           mutex_enter(&chmp->chm_lock_sizes);
1120 
1121           nref->nref_offset = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size;
1122           KASSERT(nref->nref_offset % 4 == 0);
1123           chfs_change_size_free(chmp, chmp->chm_nextblock, -totlen);
1124 
1125           /* Write it to the writebuffer. */
1126           ret = chfs_write_wbuf(chmp, &vec, 1, nref->nref_offset, &retlen);
1127           if (ret || retlen != totlen) {
1128                     /* error during writing */
1129                     chfs_err("error while writing out to the media\n");
1130                     chfs_err("err: %d | size: %d | retlen : %zu\n",
1131                         ret, totlen, retlen);
1132                     chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen);
1133                     if (retries) {
1134                               ret = EIO;
1135                               mutex_exit(&chmp->chm_lock_sizes);
1136                               goto out;
1137                     }
1138 
1139                     /* try again */
1140                     retries++;
1141                     mutex_exit(&chmp->chm_lock_sizes);
1142                     goto retry;
1143           }
1144 
1145           dbg_gc("new nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset);
1146 
1147           chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
1148           mutex_exit(&chmp->chm_lock_sizes);
1149           KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
1150 
1151           /* Set fields of the new node. */
1152           newfn = chfs_alloc_full_dnode();
1153           newfn->nref = nref;
1154           newfn->ofs = fn->ofs;
1155           newfn->size = fn->size;
1156           newfn->frags = 0;
1157 
1158           mutex_enter(&chmp->chm_lock_vnocache);
1159           /* Remove every part of the old node. */
1160           chfs_remove_frags_of_node(chmp, &ip->fragtree, fn->nref);
1161           chfs_remove_and_obsolete(chmp, ip->chvc, fn->nref, &ip->chvc->dnode);
1162 
1163           /* Add the new nref to inode. */
1164           chfs_add_full_dnode_to_inode(chmp, ip, newfn);
1165           chfs_add_node_to_list(chmp,
1166               ip->chvc, newfn->nref, &ip->chvc->dnode);
1167           mutex_exit(&chmp->chm_lock_vnocache);
1168 
1169 out:
1170           kmem_free(data, totlen);
1171           return ret;
1172 }
1173