xref: /dragonfly/usr.sbin/makefs/hammer2/hammer2_io.c (revision 6b47f3ea0add18fe433924e96d23c8a42f668f93)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
5  * Copyright (c) 2013-2023 The DragonFly Project.  All rights reserved.
6  *
7  * This code is derived from software contributed to The DragonFly Project
8  * by Matthew Dillon <dillon@dragonflybsd.org>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in
18  *    the documentation and/or other materials provided with the
19  *    distribution.
20  * 3. Neither the name of The DragonFly Project nor the names of its
21  *    contributors may be used to endorse or promote products derived
22  *    from this software without specific, prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
28  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "hammer2.h"
39 
40 #define HAMMER2_DOP_READ      1
41 #define HAMMER2_DOP_NEW                 2
42 #define HAMMER2_DOP_NEWNZ     3
43 #define HAMMER2_DOP_READQ     4
44 
45 /*
46  * Implements an abstraction layer for synchronous and asynchronous
47  * buffered device I/O.  Can be used as an OS-abstraction but the main
48  * purpose is to allow larger buffers to be used against hammer2_chain's
49  * using smaller allocations, without causing deadlocks.
50  *
51  * The DIOs also record temporary state with limited persistence.  This
52  * feature is used to keep track of dedupable blocks.
53  */
54 static void dio_write_stats_update(hammer2_io_t *dio, struct m_buf *bp);
55 
56 static hammer2_io_t *hammer2_io_hash_lookup(hammer2_dev_t *hmp,
57                               hammer2_off_t pbase, uint64_t *refsp);
58 static hammer2_io_t *hammer2_io_hash_enter(hammer2_dev_t *hmp,
59                               hammer2_io_t *dio, uint64_t *refsp);
60 static void hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit);
61 
62 void
hammer2_io_hash_init(hammer2_dev_t * hmp)63 hammer2_io_hash_init(hammer2_dev_t *hmp)
64 {
65           hammer2_io_hash_t *hash;
66           int i;
67 
68           for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
69                     hash = &hmp->iohash[i];
70                     hammer2_spin_init(&hash->spin, "h2iohash");
71           }
72 }
73 
74 #ifdef HAMMER2_IO_DEBUG
75 
76 static __inline void
DIO_RECORD(hammer2_io_t * dio HAMMER2_IO_DEBUG_ARGS)77 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
78 {
79           int i;
80 
81           i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
82 
83           dio->debug_file[i] = file;
84           dio->debug_line[i] = line;
85           dio->debug_refs[i] = dio->refs;
86           dio->debug_td[i] = curthread;
87 }
88 
89 #else
90 
91 #define DIO_RECORD(dio)
92 
93 #endif
94 
95 /*
96  * Returns the DIO corresponding to the data|radix, creating it if necessary.
97  *
98  * If createit is 0, NULL can be returned indicating that the DIO does not
99  * exist.  (btype) is ignored when createit is 0.
100  */
101 static
102 hammer2_io_t *
hammer2_io_alloc(hammer2_dev_t * hmp,hammer2_off_t data_off,uint8_t btype,int createit,int * isgoodp)103 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_off_t data_off, uint8_t btype,
104                      int createit, int *isgoodp)
105 {
106           hammer2_io_t *dio;
107           hammer2_io_t *xio;
108           hammer2_off_t lbase;
109           hammer2_off_t pbase;
110           hammer2_off_t pmask;
111           hammer2_vfsvolume_t *vol;
112           uint64_t refs;
113           int lsize;
114           int psize;
115 
116           psize = HAMMER2_PBUFSIZE;
117           pmask = ~(hammer2_off_t)(psize - 1);
118           if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
119                     lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
120           else
121                     lsize = 0;
122           lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
123           pbase = lbase & pmask;
124 
125           if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
126                     kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
127                               pbase, lbase, lsize, pmask);
128           }
129           KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
130           *isgoodp = 0;
131 
132           /*
133            * Access/Allocate the DIO, bump dio->refs to prevent destruction.
134            *
135            * If DIO_GOOD is set the ref should prevent it from being cleared
136            * out from under us, we can set *isgoodp, and the caller can operate
137            * on the buffer without any further interaction.
138            */
139           dio = hammer2_io_hash_lookup(hmp, pbase, &refs);
140           if (dio) {
141                     if (refs & HAMMER2_DIO_GOOD)
142                               *isgoodp = 1;
143           } else if (createit) {
144                     refs = 0;
145                     vol = hammer2_get_volume_from_hmp(hmp, pbase);
146                     dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO);
147                     dio->hmp = hmp;
148                     dio->devvp = vol->dev->devvp;
149                     dio->dbase = vol->offset;
150                     KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
151                     dio->pbase = pbase;
152                     dio->psize = psize;
153                     dio->btype = btype;
154                     dio->refs = refs + 1;
155                     dio->act = 5;
156                     xio = hammer2_io_hash_enter(hmp, dio, &refs);
157                     if (xio == NULL) {
158                               atomic_add_int(&hammer2_dio_count, 1);
159                     } else {
160                               if (refs & HAMMER2_DIO_GOOD)
161                                         *isgoodp = 1;
162                               kfree_obj(dio, hmp->mio);
163                               dio = xio;
164                     }
165           } else {
166                     return NULL;
167           }
168           dio->ticks = ticks;
169           if (dio->act < 10)
170                     ++dio->act;
171 
172           return dio;
173 }
174 
175 /*
176  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
177  * a buffer.  If set the buffer already exists and is good to go.
178  */
179 hammer2_io_t *
_hammer2_io_getblk(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,int op HAMMER2_IO_DEBUG_ARGS)180 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
181                        int lsize, int op HAMMER2_IO_DEBUG_ARGS)
182 {
183           hammer2_io_t *dio;
184           hammer2_off_t dev_pbase;
185           //off_t peof;
186           uint64_t orefs;
187           uint64_t nrefs;
188           int isgood;
189           int error;
190           int hce;
191           //int bflags;
192 
193           //bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
194           //bflags |= B_KVABIO;
195 
196           KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
197 
198           if (op == HAMMER2_DOP_READQ) {
199                     dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
200                     if (dio == NULL)
201                               return NULL;
202                     op = HAMMER2_DOP_READ;
203           } else {
204                     dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
205           }
206 
207           for (;;) {
208                     orefs = dio->refs;
209                     cpu_ccfence();
210 
211                     /*
212                      * Buffer is already good, handle the op and return.
213                      */
214                     if (orefs & HAMMER2_DIO_GOOD) {
215                               if (isgood == 0)
216                                         cpu_mfence();
217                               bkvasync(dio->bp);
218 
219                               switch(op) {
220                               case HAMMER2_DOP_NEW:
221                                         bzero(hammer2_io_data(dio, lbase), lsize);
222                                         /* fall through */
223                               case HAMMER2_DOP_NEWNZ:
224                                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
225                                         break;
226                               case HAMMER2_DOP_READ:
227                               default:
228                                         /* nothing to do */
229                                         break;
230                               }
231                               DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
232                               return (dio);
233                     }
234 
235                     /*
236                      * Try to own the DIO
237                      */
238                     if (orefs & HAMMER2_DIO_INPROG) {
239                               nrefs = orefs | HAMMER2_DIO_WAITING;
240                               tsleep_interlock(dio, 0);
241                               if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
242                                         tsleep(dio, PINTERLOCKED, "h2dio", hz);
243                               }
244                               /* retry */
245                     } else {
246                               nrefs = orefs | HAMMER2_DIO_INPROG;
247                               if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
248                                         break;
249                               }
250                     }
251           }
252 
253           /*
254            * We break to here if GOOD is not set and we acquired INPROG for
255            * the I/O.
256            */
257           KKASSERT(dio->bp == NULL);
258           if (btype == HAMMER2_BREF_TYPE_DATA)
259                     hce = hammer2_cluster_data_read;
260           else
261                     hce = hammer2_cluster_meta_read;
262 
263           error = 0;
264           dev_pbase = dio->pbase - dio->dbase;
265           if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
266               dio->psize == lsize) {
267                     switch(op) {
268                     case HAMMER2_DOP_NEW:
269                     case HAMMER2_DOP_NEWNZ:
270                               dio->bp = getblkx(dio->devvp,
271                                                    dev_pbase, dio->psize,
272                                                    GETBLK_KVABIO, 0);
273                               if (op == HAMMER2_DOP_NEW) {
274                                         bkvasync(dio->bp);
275                                         bzero(dio->bp->b_data, dio->psize);
276                               }
277                               atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
278                               break;
279                     case HAMMER2_DOP_READ:
280                     default:
281                               KKASSERT(dio->bp == NULL);
282 #if 0
283                               if (hce > 0) {
284                                         /*
285                                          * Synchronous cluster I/O for now.
286                                          */
287                                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
288                                                ~HAMMER2_SEGMASK64;
289                                         peof -= dio->dbase;
290                                         error = cluster_readx(dio->devvp,
291                                                                  peof, dev_pbase,
292                                                                  dio->psize, bflags,
293                                                                  dio->psize,
294                                                                  HAMMER2_PBUFSIZE*hce,
295                                                                  &dio->bp);
296                               } else {
297                                         error = breadnx(dio->devvp, dev_pbase,
298                                                             dio->psize, bflags,
299                                                           NULL, NULL, 0, &dio->bp);
300                               }
301 #else
302                               error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
303 #endif
304                               break;
305                     }
306           } else {
307 #if 0
308                     if (hce > 0) {
309                               /*
310                                * Synchronous cluster I/O for now.
311                                */
312                               peof = (dio->pbase + HAMMER2_SEGMASK64) &
313                                      ~HAMMER2_SEGMASK64;
314                               peof -= dio->dbase;
315                               error = cluster_readx(dio->devvp,
316                                                         peof, dev_pbase, dio->psize,
317                                                         bflags,
318                                                         dio->psize, HAMMER2_PBUFSIZE*hce,
319                                                         &dio->bp);
320                     } else {
321                               error = breadnx(dio->devvp, dev_pbase,
322                                                 dio->psize, bflags,
323                                                   NULL, NULL, 0, &dio->bp);
324                     }
325 #else
326                     error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
327 #endif
328                     if (dio->bp) {
329                               /*
330                                * Handle NEW flags
331                                */
332                               switch(op) {
333                               case HAMMER2_DOP_NEW:
334                                         bkvasync(dio->bp);
335                                         bzero(hammer2_io_data(dio, lbase), lsize);
336                                         /* fall through */
337                               case HAMMER2_DOP_NEWNZ:
338                                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
339                                         break;
340                               case HAMMER2_DOP_READ:
341                               default:
342                                         break;
343                               }
344 
345                               /*
346                                * Tell the kernel that the buffer cache is not
347                                * meta-data based on the btype.  This allows
348                                * swapcache to distinguish between data and
349                                * meta-data.
350                                */
351                               switch(btype) {
352                               case HAMMER2_BREF_TYPE_DATA:
353                                         //dio->bp->b_flags |= B_NOTMETA;
354                                         break;
355                               default:
356                                         break;
357                               }
358                     }
359           }
360 
361           if (dio->bp) {
362                     bkvasync(dio->bp);
363                     BUF_KERNPROC(dio->bp);
364                     //dio->bp->b_flags &= ~B_AGE;
365                     /* dio->bp->b_debug_info2 = dio; */
366           }
367           dio->error = error;
368 
369           /*
370            * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
371            */
372           for (;;) {
373                     orefs = dio->refs;
374                     cpu_ccfence();
375                     nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
376                     if (error == 0)
377                               nrefs |= HAMMER2_DIO_GOOD;
378                     if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
379                               if (orefs & HAMMER2_DIO_WAITING)
380                                         wakeup(dio);
381                               break;
382                     }
383                     cpu_pause();
384           }
385 
386           /* XXX error handling */
387           DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
388 
389           return dio;
390 }
391 
392 /*
393  * Release our ref on *diop.
394  *
395  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
396  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
397  */
398 void
_hammer2_io_putblk(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)399 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
400 {
401           hammer2_dev_t *hmp;
402           hammer2_io_t *dio;
403           struct m_buf *bp;
404           off_t pbase;
405           int psize;
406           int dio_limit;
407           uint64_t orefs;
408           uint64_t nrefs;
409 
410           dio = *diop;
411           *diop = NULL;
412           hmp = dio->hmp;
413           DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
414 
415           KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
416 
417           /*
418            * Drop refs.
419            *
420            * On the 1->0 transition clear GOOD and set INPROG, and break.
421            * On any other transition we can return early.
422            */
423           for (;;) {
424                     orefs = dio->refs;
425                     cpu_ccfence();
426 
427                     if ((orefs & HAMMER2_DIO_MASK) == 1 &&
428                         (orefs & HAMMER2_DIO_INPROG) == 0) {
429                               /*
430                                * Lastdrop case, INPROG can be set.  GOOD must be
431                                * cleared to prevent the getblk shortcut.
432                                */
433                               nrefs = orefs - 1;
434                               nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
435                               nrefs |= HAMMER2_DIO_INPROG;
436                               if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
437                                         break;
438                     } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
439                               /*
440                                * Lastdrop case, INPROG already set.  We must
441                                * wait for INPROG to clear.
442                                */
443                               nrefs = orefs | HAMMER2_DIO_WAITING;
444                               tsleep_interlock(dio, 0);
445                               if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
446                                         tsleep(dio, PINTERLOCKED, "h2dio", hz);
447                               }
448                               /* retry */
449                     } else {
450                               /*
451                                * Normal drop case.
452                                */
453                               nrefs = orefs - 1;
454                               if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
455                                         return;
456                               /* retry */
457                     }
458                     cpu_pause();
459                     /* retry */
460           }
461 
462           /*
463            * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
464            * have been cleared.  iofree_count has not yet been incremented,
465            * note that another accessor race will decrement iofree_count so
466            * we have to increment it regardless.
467            * We can now dispose of the buffer.
468            */
469           pbase = dio->pbase;
470           psize = dio->psize;
471           bp = dio->bp;
472           dio->bp = NULL;
473 
474           if ((orefs & HAMMER2_DIO_GOOD) && bp) {
475                     /*
476                      * Non-errored disposal of bp
477                      */
478                     if (orefs & HAMMER2_DIO_DIRTY) {
479                               dio_write_stats_update(dio, bp);
480 
481                               /*
482                                * Allows dirty buffers to accumulate and
483                                * possibly be canceled (e.g. by a 'rm'),
484                                * by default we will burst-write later.
485                                *
486                                * We generally do NOT want to issue an actual
487                                * b[a]write() or cluster_write() here.  Due to
488                                * the way chains are locked, buffers may be cycled
489                                * in and out quite often and disposal here can cause
490                                * multiple writes or write-read stalls.
491                                *
492                                * If FLUSH is set we do want to issue the actual
493                                * write.  This typically occurs in the write-behind
494                                * case when writing to large files.
495                                */
496                               //off_t peof;
497                               //int hce;
498                               if (dio->refs & HAMMER2_DIO_FLUSH) {
499 #if 0
500                                         if ((hce = hammer2_cluster_write) != 0) {
501                                                   peof = (pbase + HAMMER2_SEGMASK64) &
502                                                          ~HAMMER2_SEGMASK64;
503                                                   peof -= dio->dbase;
504                                                   bp->b_flags |= B_CLUSTEROK;
505                                                   cluster_write(bp, peof, psize, hce);
506                                         } else {
507                                                   bp->b_flags &= ~B_CLUSTEROK;
508                                                   bawrite(bp);
509                                         }
510 #else
511                                         bawrite(bp);
512 #endif
513                               } else {
514                                         //bp->b_flags &= ~B_CLUSTEROK;
515                                         bdwrite(bp);
516                               }
517 #if 0
518                     } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
519                               brelse(bp);
520 #endif
521                     } else {
522                               bqrelse(bp);
523                     }
524           } else if (bp) {
525                     /*
526                      * Errored disposal of bp
527                      */
528                     brelse(bp);
529           }
530 
531           /*
532            * Update iofree_count before disposing of the dio
533            */
534           hmp = dio->hmp;
535           atomic_add_int(&hmp->iofree_count, 1);
536 
537           /*
538            * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
539            *
540            * Also clear FLUSH as it was handled above.
541            */
542           for (;;) {
543                     orefs = dio->refs;
544                     cpu_ccfence();
545                     nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
546                                           HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
547                     if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
548                               if (orefs & HAMMER2_DIO_WAITING)
549                                         wakeup(dio);
550                               break;
551                     }
552                     cpu_pause();
553           }
554 
555           /*
556            * We cache free buffers so re-use cases can use a shared lock, but
557            * if too many build up we have to clean them out.
558            */
559           dio_limit = hammer2_dio_limit;
560           if (dio_limit < 256)
561                     dio_limit = 256;
562           if (dio_limit > 1024*1024)
563                     dio_limit = 1024*1024;
564           if (hmp->iofree_count > dio_limit)
565                     hammer2_io_hash_cleanup(hmp, dio_limit);
566 }
567 
568 /*
569  * Returns a pointer to the requested data.
570  */
571 char *
hammer2_io_data(hammer2_io_t * dio,off_t lbase)572 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
573 {
574           struct m_buf *bp;
575           int off;
576 
577           bp = dio->bp;
578           KKASSERT(bp != NULL);
579           bkvasync(bp);
580           lbase -= dio->dbase;
581           off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
582           KKASSERT(off >= 0 && off < bp->b_bufsize);
583           return(bp->b_data + off);
584 }
585 
586 int
hammer2_io_new(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,hammer2_io_t ** diop)587 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
588                  hammer2_io_t **diop)
589 {
590           *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
591           return ((*diop)->error);
592 }
593 
594 int
hammer2_io_newnz(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,hammer2_io_t ** diop)595 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
596                      hammer2_io_t **diop)
597 {
598           *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
599           return ((*diop)->error);
600 }
601 
602 int
_hammer2_io_bread(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)603 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
604                     hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
605 {
606 #ifdef HAMMER2_IO_DEBUG
607           hammer2_io_t *dio;
608 #endif
609 
610           *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
611                                            HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
612 #ifdef HAMMER2_IO_DEBUG
613           if ((dio = *diop) != NULL) {
614 #if 0
615                     int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
616                     dio->debug_data[i] = debug_data;
617 #endif
618           }
619 #endif
620           return ((*diop)->error);
621 }
622 
623 hammer2_io_t *
_hammer2_io_getquick(hammer2_dev_t * hmp,off_t lbase,int lsize HAMMER2_IO_DEBUG_ARGS)624 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
625                          int lsize HAMMER2_IO_DEBUG_ARGS)
626 {
627           hammer2_io_t *dio;
628 
629           dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
630                                          HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
631           return dio;
632 }
633 
634 void
_hammer2_io_bawrite(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)635 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
636 {
637           atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
638                                               HAMMER2_DIO_FLUSH);
639           _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
640 }
641 
642 void
_hammer2_io_bdwrite(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)643 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
644 {
645           atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
646           _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
647 }
648 
649 int
_hammer2_io_bwrite(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)650 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
651 {
652           atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
653                                               HAMMER2_DIO_FLUSH);
654           _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
655           return (0);         /* XXX */
656 }
657 
658 void
hammer2_io_setdirty(hammer2_io_t * dio)659 hammer2_io_setdirty(hammer2_io_t *dio)
660 {
661           atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
662 }
663 
664 /*
665  * This routine is called when a MODIFIED chain is being DESTROYED,
666  * in an attempt to allow the related buffer cache buffer to be
667  * invalidated and discarded instead of flushing it to disk.
668  *
669  * At the moment this case is only really useful for file meta-data.
670  * File data is already handled via the logical buffer cache associated
671  * with the vnode, and will be discarded if it was never flushed to disk.
672  * File meta-data may include inodes, directory entries, and indirect blocks.
673  *
674  * XXX
675  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
676  * invalidated might be smaller.  Most of the meta-data structures above
677  * are in the 'smaller' category.  For now, don't try to invalidate the
678  * data areas.
679  */
680 void
hammer2_io_inval(hammer2_io_t * dio,hammer2_off_t data_off,u_int bytes)681 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
682 {
683           /* NOP */
684 }
685 
686 void
_hammer2_io_brelse(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)687 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
688 {
689           _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
690 }
691 
692 void
_hammer2_io_bqrelse(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)693 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
694 {
695           _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
696 }
697 
698 /*
699  * Set dedup validation bits in a DIO.  We do not need the buffer cache
700  * buffer for this.  This must be done concurrent with setting bits in
701  * the freemap so as to interlock with bulkfree's clearing of those bits.
702  */
703 void
hammer2_io_dedup_set(hammer2_dev_t * hmp,hammer2_blockref_t * bref)704 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
705 {
706           hammer2_io_t *dio;
707           uint64_t mask;
708           int lsize;
709           int isgood;
710 
711           dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
712           if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
713                     lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
714           else
715                     lsize = 0;
716           mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
717           atomic_clear_64(&dio->dedup_valid, mask);
718           atomic_set_64(&dio->dedup_alloc, mask);
719           hammer2_io_putblk(&dio);
720 }
721 
722 /*
723  * Clear dedup validation bits in a DIO.  This is typically done when
724  * a modified chain is destroyed or by the bulkfree code.  No buffer
725  * is needed for this operation.  If the DIO no longer exists it is
726  * equivalent to the bits not being set.
727  */
728 void
hammer2_io_dedup_delete(hammer2_dev_t * hmp,uint8_t btype,hammer2_off_t data_off,u_int bytes)729 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
730                               hammer2_off_t data_off, u_int bytes)
731 {
732           hammer2_io_t *dio;
733           uint64_t mask;
734           int isgood;
735 
736           if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
737                     return;
738           if (btype != HAMMER2_BREF_TYPE_DATA)
739                     return;
740           dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
741           if (dio) {
742                     if (data_off < dio->pbase ||
743                         (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
744                         dio->pbase + dio->psize) {
745                               panic("hammer2_io_dedup_delete: DATAOFF BAD "
746                                     "%016jx/%d %016jx\n",
747                                     data_off, bytes, dio->pbase);
748                     }
749                     mask = hammer2_dedup_mask(dio, data_off, bytes);
750                     atomic_clear_64(&dio->dedup_alloc, mask);
751                     atomic_clear_64(&dio->dedup_valid, mask);
752                     hammer2_io_putblk(&dio);
753           }
754 }
755 
756 /*
757  * Assert that dedup allocation bits in a DIO are not set.  This operation
758  * does not require a buffer.  The DIO does not need to exist.
759  */
760 void
hammer2_io_dedup_assert(hammer2_dev_t * hmp,hammer2_off_t data_off,u_int bytes)761 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
762 {
763           hammer2_io_t *dio;
764           int isgood;
765 
766           dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
767                                      0, &isgood);
768           if (dio) {
769                     KASSERT((dio->dedup_alloc &
770                                 hammer2_dedup_mask(dio, data_off, bytes)) == 0,
771                               ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
772                               data_off,
773                               bytes,
774                               hammer2_dedup_mask(dio, data_off, bytes),
775                               dio->dedup_alloc));
776                     hammer2_io_putblk(&dio);
777           }
778 }
779 
780 static
781 void
dio_write_stats_update(hammer2_io_t * dio,struct m_buf * bp)782 dio_write_stats_update(hammer2_io_t *dio, struct m_buf *bp)
783 {
784           /*
785           if (bp->b_flags & B_DELWRI)
786                     return;
787           */
788           hammer2_adjwritecounter(dio->btype, dio->psize);
789 }
790 
791 void
hammer2_io_bkvasync(hammer2_io_t * dio)792 hammer2_io_bkvasync(hammer2_io_t *dio)
793 {
794           KKASSERT(dio->bp != NULL);
795           bkvasync(dio->bp);
796 }
797 
798 /*
799  * Ref a dio that is already owned
800  */
801 void
_hammer2_io_ref(hammer2_io_t * dio HAMMER2_IO_DEBUG_ARGS)802 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
803 {
804           DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
805           atomic_add_64(&dio->refs, 1);
806 }
807 
808 static __inline hammer2_io_hash_t *
hammer2_io_hashv(hammer2_dev_t * hmp,hammer2_off_t pbase)809 hammer2_io_hashv(hammer2_dev_t *hmp, hammer2_off_t pbase)
810 {
811           int hv;
812 
813           hv = (int)pbase + (int)(pbase >> 16);
814           return (&hmp->iohash[hv & HAMMER2_IOHASH_MASK]);
815 }
816 
817 /*
818  * Lookup and reference the requested dio
819  */
820 static hammer2_io_t *
hammer2_io_hash_lookup(hammer2_dev_t * hmp,hammer2_off_t pbase,uint64_t * refsp)821 hammer2_io_hash_lookup(hammer2_dev_t *hmp, hammer2_off_t pbase, uint64_t *refsp)
822 {
823           hammer2_io_hash_t *hash;
824           hammer2_io_t *dio;
825           uint64_t refs;
826 
827           *refsp = 0;
828           hash = hammer2_io_hashv(hmp, pbase);
829           hammer2_spin_sh(&hash->spin);
830           for (dio = hash->base; dio; dio = dio->next) {
831                     if (dio->pbase == pbase) {
832                               refs = atomic_fetchadd_64(&dio->refs, 1);
833                               if ((refs & HAMMER2_DIO_MASK) == 0)
834                                         atomic_add_int(&dio->hmp->iofree_count, -1);
835                               *refsp = refs;
836                               break;
837                     }
838           }
839           hammer2_spin_unsh(&hash->spin);
840 
841           return dio;
842 }
843 
844 /*
845  * Enter a dio into the hash.  If the pbase already exists in the hash,
846  * the xio in the hash is referenced and returned.  If dio is sucessfully
847  * entered into the hash, NULL is returned.
848  */
849 static hammer2_io_t *
hammer2_io_hash_enter(hammer2_dev_t * hmp,hammer2_io_t * dio,uint64_t * refsp)850 hammer2_io_hash_enter(hammer2_dev_t *hmp, hammer2_io_t *dio, uint64_t *refsp)
851 {
852           hammer2_io_t *xio;
853           hammer2_io_t **xiop;
854           hammer2_io_hash_t *hash;
855           uint64_t refs;
856 
857           *refsp = 0;
858           hash = hammer2_io_hashv(hmp, dio->pbase);
859           hammer2_spin_ex(&hash->spin);
860           for (xiop = &hash->base; (xio = *xiop) != NULL; xiop = &xio->next) {
861                     if (xio->pbase == dio->pbase) {
862                               refs = atomic_fetchadd_64(&xio->refs, 1);
863                               if ((refs & HAMMER2_DIO_MASK) == 0)
864                                         atomic_add_int(&xio->hmp->iofree_count, -1);
865                               *refsp = refs;
866                               goto done;
867                     }
868           }
869           dio->next = NULL;
870           *xiop = dio;
871 done:
872           hammer2_spin_unex(&hash->spin);
873 
874           return xio;
875 }
876 
877 /*
878  * Clean out a limited number of freeable DIOs
879  */
880 static void
hammer2_io_hash_cleanup(hammer2_dev_t * hmp,int dio_limit)881 hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit)
882 {
883           hammer2_io_hash_t *hash;
884           hammer2_io_t *dio;
885           hammer2_io_t **diop;
886           hammer2_io_t **cleanapp;
887           hammer2_io_t *cleanbase;
888           int count;
889           int maxscan;
890           int i;
891 
892           count = hmp->iofree_count - dio_limit + 32;
893           if (count <= 0)
894                     return;
895           cleanbase = NULL;
896           cleanapp = &cleanbase;
897 
898           i = hmp->io_iterator++;
899           maxscan = HAMMER2_IOHASH_SIZE;
900           while (count > 0 && maxscan--) {
901                     hash = &hmp->iohash[i & HAMMER2_IOHASH_MASK];
902                     hammer2_spin_ex(&hash->spin);
903                     diop = &hash->base;
904                     while ((dio = *diop) != NULL) {
905                               if ((dio->refs & (HAMMER2_DIO_MASK |
906                                                     HAMMER2_DIO_INPROG)) != 0)
907                               {
908                                         diop = &dio->next;
909                                         continue;
910                               }
911                               if (dio->act > 0) {
912                                         int act;
913 
914                                         act = dio->act - (ticks - dio->ticks) / hz - 1;
915                                         dio->act = (act < 0) ? 0 : act;
916                               }
917                               if (dio->act) {
918                                         diop = &dio->next;
919                                         continue;
920                               }
921                               KKASSERT(dio->bp == NULL);
922                               *diop = dio->next;
923                               dio->next = NULL;
924                               *cleanapp = dio;
925                               cleanapp = &dio->next;
926                               --count;
927                               /* diop remains unchanged */
928                               atomic_add_int(&hmp->iofree_count, -1);
929                     }
930                     hammer2_spin_unex(&hash->spin);
931                     i = hmp->io_iterator++;
932           }
933 
934           /*
935            * Get rid of dios on clean list without holding any locks
936            */
937           while ((dio = cleanbase) != NULL) {
938                     cleanbase = dio->next;
939                     dio->next = NULL;
940                     KKASSERT(dio->bp == NULL &&
941                         (dio->refs & (HAMMER2_DIO_MASK |
942                                           HAMMER2_DIO_INPROG)) == 0);
943                     if (dio->refs & HAMMER2_DIO_DIRTY) {
944                               kprintf("hammer2_io_cleanup: Dirty buffer "
945                                         "%016jx/%d (bp=%p)\n",
946                                         dio->pbase, dio->psize, dio->bp);
947                     }
948                     kfree_obj(dio, hmp->mio);
949                     atomic_add_int(&hammer2_dio_count, -1);
950           }
951 }
952 
953 /*
954  * Destroy all DIOs associated with the media
955  */
956 void
hammer2_io_hash_cleanup_all(hammer2_dev_t * hmp)957 hammer2_io_hash_cleanup_all(hammer2_dev_t *hmp)
958 {
959           hammer2_io_hash_t *hash;
960           hammer2_io_t *dio;
961           int i;
962 
963           for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
964                     hash = &hmp->iohash[i];
965 
966                     while ((dio = hash->base) != NULL) {
967                               hash->base = dio->next;
968                               dio->next = NULL;
969                               KKASSERT(dio->bp == NULL &&
970                                   (dio->refs & (HAMMER2_DIO_MASK |
971                                                     HAMMER2_DIO_INPROG)) == 0);
972                               if (dio->refs & HAMMER2_DIO_DIRTY) {
973                                         kprintf("hammer2_io_cleanup: Dirty buffer "
974                                                   "%016jx/%d (bp=%p)\n",
975                                                   dio->pbase, dio->psize, dio->bp);
976                               }
977                               kfree_obj(dio, hmp->mio);
978                               atomic_add_int(&hammer2_dio_count, -1);
979                               atomic_add_int(&hmp->iofree_count, -1);
980                     }
981           }
982 }
983