xref: /dragonfly/sbin/hammer/ondisk.c (revision d04bb327d85bc66ac64f2bda8b2eaf482d07beb6)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer_util.h"
36 
37 #include <sys/diskslice.h>
38 #include <sys/diskmbr.h>
39 
40 static void check_volume(volume_info_t volume);
41 static void get_buffer_readahead(buffer_info_t base);
42 static __inline int readhammervol(volume_info_t volume);
43 static __inline int readhammerbuf(buffer_info_t buffer);
44 static __inline int writehammervol(volume_info_t volume);
45 static __inline int writehammerbuf(buffer_info_t buffer);
46 
47 hammer_uuid_t Hammer_FSType;
48 hammer_uuid_t Hammer_FSId;
49 int UseReadBehind = -4;
50 int UseReadAhead = 4;
51 int DebugOpt;
52 uint32_t HammerVersion = -1;
53 
54 TAILQ_HEAD(volume_list, volume_info);
55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
56 static int valid_hammer_volumes;
57 
58 static __inline
59 int
buffer_hash(hammer_off_t zone2_offset)60 buffer_hash(hammer_off_t zone2_offset)
61 {
62           int hi;
63 
64           hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
65           return(hi);
66 }
67 
68 static
69 buffer_info_t
find_buffer(hammer_off_t zone2_offset)70 find_buffer(hammer_off_t zone2_offset)
71 {
72           volume_info_t volume;
73           buffer_info_t buffer;
74           int hi;
75 
76           volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
77           assert(volume);
78 
79           hi = buffer_hash(zone2_offset);
80           TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) {
81                     if (buffer->zone2_offset == zone2_offset)
82                               return(buffer);
83           }
84           return(NULL);
85 }
86 
87 static
88 volume_info_t
__alloc_volume(const char * volname,int oflags)89 __alloc_volume(const char *volname, int oflags)
90 {
91           volume_info_t volume;
92           int i;
93 
94           volume = calloc(1, sizeof(*volume));
95           volume->vol_no = -1;
96           volume->rdonly = (oflags == O_RDONLY);
97           volume->name = strdup(volname);
98           volume->fd = open(volume->name, oflags);
99           if (volume->fd < 0) {
100                     err(1, "alloc_volume: Failed to open %s", volume->name);
101                     /* not reached */
102           }
103           check_volume(volume);
104 
105           volume->ondisk = calloc(1, HAMMER_BUFSIZE);
106 
107           for (i = 0; i < HAMMER_BUFLISTS; ++i)
108                     TAILQ_INIT(&volume->buffer_lists[i]);
109 
110           return(volume);
111 }
112 
113 static
114 void
__add_volume(const volume_info_t volume)115 __add_volume(const volume_info_t volume)
116 {
117           volume_info_t scan;
118           struct stat st1, st2;
119 
120           if (fstat(volume->fd, &st1) != 0) {
121                     errx(1, "add_volume: %s: Failed to stat", volume->name);
122                     /* not reached */
123           }
124 
125           TAILQ_FOREACH(scan, &VolList, entry) {
126                     if (scan->vol_no == volume->vol_no) {
127                               errx(1, "add_volume: %s: Duplicate volume number %d "
128                                         "against %s",
129                                         volume->name, volume->vol_no, scan->name);
130                               /* not reached */
131                     }
132                     if (fstat(scan->fd, &st2) != 0) {
133                               errx(1, "add_volume: %s: Failed to stat %s",
134                                         volume->name, scan->name);
135                               /* not reached */
136                     }
137                     if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
138                               errx(1, "add_volume: %s: Specified more than once",
139                                         volume->name);
140                               /* not reached */
141                     }
142           }
143 
144           TAILQ_INSERT_TAIL(&VolList, volume, entry);
145 }
146 
147 static
148 void
__verify_volume(const volume_info_t volume)149 __verify_volume(const volume_info_t volume)
150 {
151           hammer_volume_ondisk_t ondisk = volume->ondisk;
152           char *fstype;
153 
154           if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
155                     errx(1, "verify_volume: Invalid volume signature %016jx",
156                               ondisk->vol_signature);
157                     /* not reached */
158           }
159           if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) {
160                     errx(1, "verify_volume: Invalid root volume# %d",
161                               ondisk->vol_rootvol);
162                     /* not reached */
163           }
164           hammer_uuid_to_string(&ondisk->vol_fstype, &fstype);
165           if (hammer_uuid_compare(&Hammer_FSType, &ondisk->vol_fstype)) {
166                     errx(1, "verify_volume: %s: fstype %s does not indicate "
167                               "this is a HAMMER volume", volume->name, fstype);
168                     /* not reached */
169           }
170           free(fstype);
171           if (hammer_uuid_compare(&Hammer_FSId, &ondisk->vol_fsid)) {
172                     errx(1, "verify_volume: %s: fsid does not match other volumes!",
173                               volume->name);
174                     /* not reached */
175           }
176           if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN ||
177               ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) {
178                     errx(1, "verify_volume: %s: Invalid volume version %u",
179                               volume->name, ondisk->vol_version);
180                     /* not reached */
181           }
182 }
183 
184 /*
185  * Initialize a volume structure and ondisk vol_no field.
186  */
187 volume_info_t
init_volume(const char * filename,int oflags,int32_t vol_no)188 init_volume(const char *filename, int oflags, int32_t vol_no)
189 {
190           volume_info_t volume;
191 
192           volume = __alloc_volume(filename, oflags);
193           volume->vol_no = volume->ondisk->vol_no = vol_no;
194 
195           __add_volume(volume);
196 
197           return(volume);
198 }
199 
200 /*
201  * Initialize a volume structure and read ondisk volume header.
202  */
203 volume_info_t
load_volume(const char * filename,int oflags,int verify_volume)204 load_volume(const char *filename, int oflags, int verify_volume)
205 {
206           volume_info_t volume;
207           int n;
208 
209           volume = __alloc_volume(filename, oflags);
210 
211           n = readhammervol(volume);
212           if (n == -1) {
213                     err(1, "load_volume: %s: Read failed at offset 0",
214                         volume->name);
215                     /* not reached */
216           }
217           volume->vol_no = volume->ondisk->vol_no;
218           if (volume->vol_no == HAMMER_ROOT_VOLNO)
219                     HammerVersion = volume->ondisk->vol_version;
220 
221           if (valid_hammer_volumes++ == 0)
222                     Hammer_FSId = volume->ondisk->vol_fsid;
223           if (verify_volume)
224                     __verify_volume(volume);
225 
226           __add_volume(volume);
227 
228           return(volume);
229 }
230 
231 /*
232  * Check basic volume characteristics.
233  */
234 static
235 void
check_volume(volume_info_t volume)236 check_volume(volume_info_t volume)
237 {
238           struct partinfo pinfo;
239           struct stat st;
240 
241           /*
242            * Allow the formatting of block devices or regular files
243            */
244           if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) {
245                     if (fstat(volume->fd, &st) < 0) {
246                               err(1, "Unable to stat %s", volume->name);
247                               /* not reached */
248                     }
249                     if (S_ISREG(st.st_mode)) {
250                               volume->size = st.st_size;
251                               volume->type = "REGFILE";
252                     } else {
253                               errx(1, "Unsupported file type for %s", volume->name);
254                               /* not reached */
255                     }
256           } else {
257                     /*
258                      * When formatting a block device as a HAMMER volume the
259                      * sector size must be compatible.  HAMMER uses 16384 byte
260                      * filesystem buffers.
261                      */
262                     if (pinfo.reserved_blocks) {
263                               errx(1, "HAMMER cannot be placed in a partition "
264                                         "which overlaps the disklabel or MBR");
265                               /* not reached */
266                     }
267                     if (pinfo.media_blksize > HAMMER_BUFSIZE ||
268                         HAMMER_BUFSIZE % pinfo.media_blksize) {
269                               errx(1, "A media sector size of %d is not supported",
270                                    pinfo.media_blksize);
271                               /* not reached */
272                     }
273 
274                     volume->size = pinfo.media_size;
275                     volume->device_offset = pinfo.media_offset;
276                     volume->type = "DEVICE";
277           }
278 }
279 
280 int
is_regfile(const volume_info_t volume)281 is_regfile(const volume_info_t volume)
282 {
283           return(strcmp(volume->type, "REGFILE") ? 0 : 1);
284 }
285 
286 void
assert_volume_offset(const volume_info_t volume)287 assert_volume_offset(const volume_info_t volume)
288 {
289           assert(hammer_is_zone_raw_buffer(volume->vol_free_off));
290           assert(hammer_is_zone_raw_buffer(volume->vol_free_end));
291           if (volume->vol_free_off >= volume->vol_free_end) {
292                     errx(1, "Ran out of room, filesystem too small");
293                     /* not reached */
294           }
295 }
296 
297 volume_info_t
get_volume(int32_t vol_no)298 get_volume(int32_t vol_no)
299 {
300           volume_info_t volume;
301 
302           TAILQ_FOREACH(volume, &VolList, entry) {
303                     if (volume->vol_no == vol_no)
304                               break;
305           }
306 
307           return(volume);
308 }
309 
310 volume_info_t
get_root_volume(void)311 get_root_volume(void)
312 {
313           return(get_volume(HAMMER_ROOT_VOLNO));
314 }
315 
316 static
317 hammer_off_t
__blockmap_xlate_to_zone2(hammer_off_t buf_offset)318 __blockmap_xlate_to_zone2(hammer_off_t buf_offset)
319 {
320           hammer_off_t zone2_offset;
321           int error = 0;
322 
323           if (hammer_is_zone_raw_buffer(buf_offset))
324                     zone2_offset = buf_offset;
325           else
326                     zone2_offset = blockmap_lookup(buf_offset, &error);
327 
328           if (error)
329                     return(HAMMER_OFF_BAD);
330           assert(hammer_is_zone_raw_buffer(zone2_offset));
331 
332           return(zone2_offset);
333 }
334 
335 static
336 buffer_info_t
__alloc_buffer(hammer_off_t zone2_offset,int isnew)337 __alloc_buffer(hammer_off_t zone2_offset, int isnew)
338 {
339           volume_info_t volume;
340           buffer_info_t buffer;
341           int hi;
342 
343           volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
344           assert(volume != NULL);
345 
346           buffer = calloc(1, sizeof(*buffer));
347           buffer->zone2_offset = zone2_offset;
348           buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset);
349           buffer->volume = volume;
350           buffer->ondisk = calloc(1, HAMMER_BUFSIZE);
351 
352           if (isnew <= 0) {
353                     if (readhammerbuf(buffer) == -1) {
354                               err(1, "Failed to read %s:%016jx at %016jx",
355                                   volume->name,
356                                   (intmax_t)buffer->zone2_offset,
357                                   (intmax_t)buffer->raw_offset);
358                               /* not reached */
359                     }
360           }
361 
362           hi = buffer_hash(zone2_offset);
363           TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry);
364           hammer_cache_add(&buffer->cache);
365 
366           return(buffer);
367 }
368 
369 /*
370  * Acquire the 16KB buffer for specified zone offset.
371  */
372 static
373 buffer_info_t
get_buffer(hammer_off_t buf_offset,int isnew)374 get_buffer(hammer_off_t buf_offset, int isnew)
375 {
376           buffer_info_t buffer;
377           hammer_off_t zone2_offset;
378           int dora = 0;
379 
380           zone2_offset = __blockmap_xlate_to_zone2(buf_offset);
381           if (zone2_offset == HAMMER_OFF_BAD)
382                     return(NULL);
383 
384           zone2_offset &= ~HAMMER_BUFMASK64;
385           buffer = find_buffer(zone2_offset);
386 
387           if (buffer == NULL) {
388                     buffer = __alloc_buffer(zone2_offset, isnew);
389                     dora = (isnew == 0);
390           } else {
391                     assert(isnew != -1);
392                     hammer_cache_used(&buffer->cache);
393           }
394           assert(buffer->ondisk != NULL);
395 
396           ++buffer->cache.refs;
397           hammer_cache_flush();
398 
399           if (isnew > 0) {
400                     assert(buffer->cache.modified == 0);
401                     bzero(buffer->ondisk, HAMMER_BUFSIZE);
402                     buffer->cache.modified = 1;
403           }
404           if (dora)
405                     get_buffer_readahead(buffer);
406           return(buffer);
407 }
408 
409 static
410 void
get_buffer_readahead(const buffer_info_t base)411 get_buffer_readahead(const buffer_info_t base)
412 {
413           buffer_info_t buffer;
414           volume_info_t volume;
415           hammer_off_t zone2_offset;
416           int64_t raw_offset;
417           int ri = UseReadBehind;
418           int re = UseReadAhead;
419 
420           raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
421           volume = base->volume;
422 
423           while (ri < re) {
424                     if (raw_offset >= volume->ondisk->vol_buf_end)
425                               break;
426                     if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) {
427                               ++ri;
428                               raw_offset += HAMMER_BUFSIZE;
429                               continue;
430                     }
431                     zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
432                               raw_offset - volume->ondisk->vol_buf_beg);
433                     buffer = find_buffer(zone2_offset);
434                     if (buffer == NULL) {
435                               /* call with -1 to prevent another readahead */
436                               buffer = get_buffer(zone2_offset, -1);
437                               rel_buffer(buffer);
438                     }
439                     ++ri;
440                     raw_offset += HAMMER_BUFSIZE;
441           }
442 }
443 
444 void
rel_buffer(buffer_info_t buffer)445 rel_buffer(buffer_info_t buffer)
446 {
447           volume_info_t volume;
448           int hi;
449 
450           if (buffer == NULL)
451                     return;
452           assert(buffer->cache.refs > 0);
453           if (--buffer->cache.refs == 0) {
454                     if (buffer->cache.delete) {
455                               hi = buffer_hash(buffer->zone2_offset);
456                               volume = buffer->volume;
457                               if (buffer->cache.modified)
458                                         flush_buffer(buffer);
459                               TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
460                               hammer_cache_del(&buffer->cache);
461                               free(buffer->ondisk);
462                               free(buffer);
463                     }
464           }
465 }
466 
467 /*
468  * Retrieve a pointer to a buffer data given a zone-X buffer offset.
469  * The underlying bufferp is freed if isnew or the corresponding zone-2
470  * offset is out of range of the cached data.  If bufferp is freed,
471  * a referenced buffer is loaded into it.
472  */
473 void *
get_buffer_data(hammer_off_t buf_offset,buffer_info_t * bufferp,int isnew)474 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew)
475 {
476           hammer_off_t xor = 0;
477           hammer_volume_ondisk_t ondisk;
478 
479           if (*bufferp != NULL) {
480                     if (hammer_is_zone_undo(buf_offset)) {
481                               ondisk = (*bufferp)->volume->ondisk;
482                               xor = hammer_xlate_to_undo(ondisk, buf_offset) ^
483                                         (*bufferp)->zone2_offset;
484                     } else if (hammer_is_zone_direct_xlated(buf_offset)) {
485                               xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^
486                                     HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset);
487                     } else {
488                               assert(0);
489                     }
490                     if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) {
491                               rel_buffer(*bufferp);
492                               *bufferp = NULL;
493                     } else {
494                               hammer_cache_used(&(*bufferp)->cache);
495                     }
496           }
497 
498           if (*bufferp == NULL) {
499                     *bufferp = get_buffer(buf_offset, isnew);
500                     if (*bufferp == NULL)
501                               return(NULL);
502           }
503 
504           return((char *)(*bufferp)->ondisk +
505                     ((int32_t)buf_offset & HAMMER_BUFMASK));
506 }
507 
508 /*
509  * Allocate HAMMER elements - B-Tree nodes
510  */
511 hammer_node_ondisk_t
alloc_btree_node(hammer_off_t * offp,buffer_info_t * data_bufferp)512 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp)
513 {
514           hammer_node_ondisk_t node;
515 
516           node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
517                                     offp, data_bufferp);
518           bzero(node, sizeof(*node));
519           return(node);
520 }
521 
522 /*
523  * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc)
524  */
525 void *
alloc_meta_element(hammer_off_t * offp,int32_t data_len,buffer_info_t * data_bufferp)526 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
527                        buffer_info_t *data_bufferp)
528 {
529           void *data;
530 
531           data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
532                                     offp, data_bufferp);
533           bzero(data, data_len);
534           return(data);
535 }
536 
537 /*
538  * Format a new blockmap.  This is mostly a degenerate case because
539  * all allocations are now actually done from the freemap.
540  */
541 void
format_blockmap(volume_info_t root_vol,int zone,hammer_off_t offset)542 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset)
543 {
544           hammer_blockmap_t blockmap;
545           hammer_off_t zone_base;
546 
547           /* Only root volume needs formatting */
548           assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
549 
550           assert(hammer_is_index_record(zone));
551 
552           blockmap = &root_vol->ondisk->vol0_blockmap[zone];
553           zone_base = HAMMER_ZONE_ENCODE(zone, offset);
554 
555           bzero(blockmap, sizeof(*blockmap));
556           blockmap->phys_offset = 0;
557           blockmap->first_offset = zone_base;
558           blockmap->next_offset = zone_base;
559           blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
560           hammer_crc_set_blockmap(HammerVersion, blockmap);
561 }
562 
563 /*
564  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
565  * code will load each volume's freemap.
566  */
567 void
format_freemap(volume_info_t root_vol)568 format_freemap(volume_info_t root_vol)
569 {
570           buffer_info_t buffer = NULL;
571           hammer_off_t layer1_offset;
572           hammer_blockmap_t blockmap;
573           hammer_blockmap_layer1_t layer1;
574           int i, isnew;
575 
576           /* Only root volume needs formatting */
577           assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
578 
579           layer1_offset = bootstrap_bigblock(root_vol);
580           for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
581                     isnew = ((i % HAMMER_BUFSIZE) == 0);
582                     layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
583                     bzero(layer1, sizeof(*layer1));
584                     layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
585                     layer1->blocks_free = 0;
586                     hammer_crc_set_layer1(HammerVersion, layer1);
587           }
588           assert(i == HAMMER_BIGBLOCK_SIZE);
589           rel_buffer(buffer);
590 
591           blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
592           bzero(blockmap, sizeof(*blockmap));
593           blockmap->phys_offset = layer1_offset;
594           blockmap->first_offset = 0;
595           blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
596           blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
597           hammer_crc_set_blockmap(HammerVersion, blockmap);
598 }
599 
600 /*
601  * Load the volume's remaining free space into the freemap.
602  *
603  * Returns the number of big-blocks available.
604  */
605 int64_t
initialize_freemap(volume_info_t volume)606 initialize_freemap(volume_info_t volume)
607 {
608           volume_info_t root_vol;
609           buffer_info_t buffer1 = NULL;
610           buffer_info_t buffer2 = NULL;
611           hammer_blockmap_layer1_t layer1;
612           hammer_blockmap_layer2_t layer2;
613           hammer_off_t layer1_offset;
614           hammer_off_t layer2_offset;
615           hammer_off_t phys_offset;
616           hammer_off_t block_offset;
617           hammer_off_t aligned_vol_free_end;
618           hammer_blockmap_t freemap;
619           int64_t count = 0;
620           int64_t layer1_count = 0;
621 
622           root_vol = get_root_volume();
623 
624           assert_volume_offset(volume);
625           aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
626 
627           printf("initialize freemap volume %d\n", volume->vol_no);
628 
629           /*
630            * Initialize the freemap.  First preallocate the big-blocks required
631            * to implement layer2.   This preallocation is a bootstrap allocation
632            * using blocks from the target volume.
633            */
634           freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
635 
636           for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
637                phys_offset < aligned_vol_free_end;
638                phys_offset += HAMMER_BLOCKMAP_LAYER2) {
639                     layer1_offset = freemap->phys_offset +
640                                         HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
641                     layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
642                     if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
643                               layer1->phys_offset = bootstrap_bigblock(volume);
644                               layer1->blocks_free = 0;
645                               buffer1->cache.modified = 1;
646                               hammer_crc_set_layer1(HammerVersion, layer1);
647                     }
648           }
649 
650           /*
651            * Now fill everything in.
652            */
653           for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
654                phys_offset < aligned_vol_free_end;
655                phys_offset += HAMMER_BLOCKMAP_LAYER2) {
656                     layer1_count = 0;
657                     layer1_offset = freemap->phys_offset +
658                                         HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
659                     layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
660                     assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
661 
662                     for (block_offset = 0;
663                          block_offset < HAMMER_BLOCKMAP_LAYER2;
664                          block_offset += HAMMER_BIGBLOCK_SIZE) {
665                               layer2_offset = layer1->phys_offset +
666                                                 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
667                               layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
668                               bzero(layer2, sizeof(*layer2));
669 
670                               if (phys_offset + block_offset < volume->vol_free_off) {
671                                         /*
672                                          * Big-blocks already allocated as part
673                                          * of the freemap bootstrap.
674                                          */
675                                         layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
676                                         layer2->append_off = HAMMER_BIGBLOCK_SIZE;
677                                         layer2->bytes_free = 0;
678                               } else if (phys_offset + block_offset < volume->vol_free_end) {
679                                         layer2->zone = 0;
680                                         layer2->append_off = 0;
681                                         layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
682                                         ++count;
683                                         ++layer1_count;
684                               } else {
685                                         layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
686                                         layer2->append_off = HAMMER_BIGBLOCK_SIZE;
687                                         layer2->bytes_free = 0;
688                               }
689                               hammer_crc_set_layer2(HammerVersion, layer2);
690                               buffer2->cache.modified = 1;
691                     }
692 
693                     layer1->blocks_free += layer1_count;
694                     hammer_crc_set_layer1(HammerVersion, layer1);
695                     buffer1->cache.modified = 1;
696           }
697 
698           rel_buffer(buffer1);
699           rel_buffer(buffer2);
700           return(count);
701 }
702 
703 /*
704  * Returns the number of big-blocks available for filesystem data and undos
705  * without formatting.
706  */
707 int64_t
count_freemap(const volume_info_t volume)708 count_freemap(const volume_info_t volume)
709 {
710           hammer_off_t phys_offset;
711           hammer_off_t vol_free_off;
712           hammer_off_t aligned_vol_free_end;
713           int64_t count = 0;
714 
715           vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
716 
717           assert_volume_offset(volume);
718           aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
719 
720           if (volume->vol_no == HAMMER_ROOT_VOLNO)
721                     vol_free_off += HAMMER_BIGBLOCK_SIZE;
722 
723           for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
724                phys_offset < aligned_vol_free_end;
725                phys_offset += HAMMER_BLOCKMAP_LAYER2) {
726                     vol_free_off += HAMMER_BIGBLOCK_SIZE;
727           }
728 
729           for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
730                phys_offset < aligned_vol_free_end;
731                phys_offset += HAMMER_BIGBLOCK_SIZE) {
732                     if (phys_offset < vol_free_off)
733                               ;
734                     else if (phys_offset < volume->vol_free_end)
735                               ++count;
736           }
737 
738           return(count);
739 }
740 
741 /*
742  * Format the undomap for the root volume.
743  */
744 void
format_undomap(volume_info_t root_vol,int64_t * undo_buffer_size)745 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size)
746 {
747           hammer_off_t undo_limit;
748           hammer_blockmap_t blockmap;
749           hammer_volume_ondisk_t ondisk;
750           buffer_info_t buffer = NULL;
751           hammer_off_t scan;
752           int n;
753           int limit_index;
754           uint32_t seqno;
755 
756           /* Only root volume needs formatting */
757           assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
758           ondisk = root_vol->ondisk;
759 
760           /*
761            * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
762            * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks.
763            * Size to approximately 0.1% of the disk.
764            *
765            * The minimum UNDO fifo size is 512MB, or approximately 1% of
766            * the recommended 50G disk.
767            *
768            * Changing this minimum is rather dangerous as complex filesystem
769            * operations can cause the UNDO FIFO to fill up otherwise.
770            */
771           undo_limit = *undo_buffer_size;
772           if (undo_limit == 0) {
773                     undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000;
774                     if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS)
775                               undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS;
776           }
777           undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit);
778           if (undo_limit < HAMMER_BIGBLOCK_SIZE)
779                     undo_limit = HAMMER_BIGBLOCK_SIZE;
780           if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS)
781                     undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS;
782           *undo_buffer_size = undo_limit;
783 
784           blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
785           bzero(blockmap, sizeof(*blockmap));
786           blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
787           blockmap->first_offset = HAMMER_ENCODE_UNDO(0);
788           blockmap->next_offset = blockmap->first_offset;
789           blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit);
790           hammer_crc_set_blockmap(HammerVersion, blockmap);
791 
792           limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
793           assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS);
794 
795           for (n = 0; n < limit_index; ++n)
796                     ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol);
797           while (n < HAMMER_MAX_UNDO_BIGBLOCKS)
798                     ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
799 
800           /*
801            * Pre-initialize the UNDO blocks (HAMMER version 4+)
802            */
803           printf("initializing the undo map (%jd MB)\n",
804                     (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) /
805                     (1024 * 1024));
806 
807           scan = blockmap->first_offset;
808           seqno = 0;
809 
810           while (scan < blockmap->alloc_offset) {
811                     hammer_fifo_head_t head;
812                     hammer_fifo_tail_t tail;
813                     int bytes = HAMMER_UNDO_ALIGN;
814                     int isnew = ((scan & HAMMER_BUFMASK64) == 0);
815 
816                     head = get_buffer_data(scan, &buffer, isnew);
817                     buffer->cache.modified = 1;
818                     tail = (void *)((char *)head + bytes - sizeof(*tail));
819 
820                     bzero(head, bytes);
821                     head->hdr_signature = HAMMER_HEAD_SIGNATURE;
822                     head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
823                     head->hdr_size = bytes;
824                     head->hdr_seq = seqno++;
825 
826                     tail->tail_signature = HAMMER_TAIL_SIGNATURE;
827                     tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
828                     tail->tail_size = bytes;
829 
830                     hammer_crc_set_fifo_head(HammerVersion, head, bytes);
831 
832                     scan += bytes;
833           }
834           rel_buffer(buffer);
835 }
836 
837 const char *zone_labels[] = {
838           "",                 /* 0 */
839           "raw_volume",       /* 1 */
840           "raw_buffer",       /* 2 */
841           "undo",             /* 3 */
842           "freemap",          /* 4 */
843           "",                 /* 5 */
844           "",                 /* 6 */
845           "",                 /* 7 */
846           "btree",  /* 8 */
847           "meta",             /* 9 */
848           "large_data",       /* 10 */
849           "small_data",       /* 11 */
850           "",                 /* 12 */
851           "",                 /* 13 */
852           "",                 /* 14 */
853           "unavail",          /* 15 */
854 };
855 
856 void
print_blockmap(const volume_info_t volume)857 print_blockmap(const volume_info_t volume)
858 {
859           hammer_blockmap_t blockmap;
860           hammer_volume_ondisk_t ondisk = volume->ondisk;
861           int64_t size, used;
862           int i;
863           char *fstype, *fsid;
864 #define INDENT ""
865 
866           printf(INDENT"vol_label\t%s\n", ondisk->vol_label);
867           printf(INDENT"vol_count\t%d\n", ondisk->vol_count);
868 
869           hammer_uuid_to_string(&ondisk->vol_fstype, &fstype);
870           hammer_uuid_to_string(&ondisk->vol_fsid, &fsid);
871           printf(INDENT"vol_fstype\t%s", fstype);
872           if (strcmp(fstype, "61dc63ac-6e38-11dc-8513-01301bb8a9f5") == 0)
873                     printf(" \"%s\"\n", HAMMER_FSTYPE_STRING);
874           else
875                     printf("\n"); /* invalid UUID */
876           printf(INDENT"vol_fsid\t%s\n", fsid);
877           free(fstype);
878           free(fsid);
879 
880           printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg));
881           printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg));
882           printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg));
883           printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end));
884           printf(INDENT"vol0_next_tid\t%016jx\n",
885                  (uintmax_t)ondisk->vol0_next_tid);
886 
887           blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
888           size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset);
889           if (blockmap->first_offset <= blockmap->next_offset)
890                     used = blockmap->next_offset - blockmap->first_offset;
891           else
892                     used = blockmap->alloc_offset - blockmap->first_offset +
893                               HAMMER_OFF_LONG_ENCODE(blockmap->next_offset);
894           printf(INDENT"undo_size\t%s\n", sizetostr(size));
895           printf(INDENT"undo_used\t%s\n", sizetostr(used));
896 
897           printf(INDENT"zone #             "
898                  "phys             first            next             alloc\n");
899           for (i = 0; i < HAMMER_MAX_ZONES; i++) {
900                     blockmap = &ondisk->vol0_blockmap[i];
901                     printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n",
902                               i, zone_labels[i],
903                               (uintmax_t)blockmap->phys_offset,
904                               (uintmax_t)blockmap->first_offset,
905                               (uintmax_t)blockmap->next_offset,
906                               (uintmax_t)blockmap->alloc_offset);
907           }
908 }
909 
910 /*
911  * Flush various tracking structures to disk
912  */
913 void
flush_all_volumes(void)914 flush_all_volumes(void)
915 {
916           volume_info_t volume;
917 
918           TAILQ_FOREACH(volume, &VolList, entry)
919                     flush_volume(volume);
920 }
921 
922 void
flush_volume(volume_info_t volume)923 flush_volume(volume_info_t volume)
924 {
925           buffer_info_t buffer;
926           int i;
927 
928           for (i = 0; i < HAMMER_BUFLISTS; ++i) {
929                     TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
930                               flush_buffer(buffer);
931           }
932           if (writehammervol(volume) == -1) {
933                     err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
934                     /* not reached */
935           }
936 }
937 
938 void
flush_buffer(buffer_info_t buffer)939 flush_buffer(buffer_info_t buffer)
940 {
941           volume_info_t volume;
942 
943           volume = buffer->volume;
944           if (writehammerbuf(buffer) == -1) {
945                     err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
946                     /* not reached */
947           }
948           buffer->cache.modified = 0;
949 }
950 
951 /*
952  * Core I/O operations
953  */
954 static
955 int
__read(volume_info_t volume,void * data,int64_t offset,int size)956 __read(volume_info_t volume, void *data, int64_t offset, int size)
957 {
958           ssize_t n;
959 
960           n = pread(volume->fd, data, size, offset);
961           if (n != size)
962                     return(-1);
963           return(0);
964 }
965 
966 static __inline
967 int
readhammervol(volume_info_t volume)968 readhammervol(volume_info_t volume)
969 {
970           return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
971 }
972 
973 static __inline
974 int
readhammerbuf(buffer_info_t buffer)975 readhammerbuf(buffer_info_t buffer)
976 {
977           return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset,
978                     HAMMER_BUFSIZE));
979 }
980 
981 static
982 int
__write(volume_info_t volume,const void * data,int64_t offset,int size)983 __write(volume_info_t volume, const void *data, int64_t offset, int size)
984 {
985           ssize_t n;
986 
987           if (volume->rdonly)
988                     return(0);
989 
990           n = pwrite(volume->fd, data, size, offset);
991           if (n != size)
992                     return(-1);
993           return(0);
994 }
995 
996 static __inline
997 int
writehammervol(volume_info_t volume)998 writehammervol(volume_info_t volume)
999 {
1000           return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
1001 }
1002 
1003 static __inline
1004 int
writehammerbuf(buffer_info_t buffer)1005 writehammerbuf(buffer_info_t buffer)
1006 {
1007           return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset,
1008                     HAMMER_BUFSIZE));
1009 }
1010 
1011 int64_t
init_boot_area_size(int64_t value,off_t avg_vol_size)1012 init_boot_area_size(int64_t value, off_t avg_vol_size)
1013 {
1014           if (value == 0) {
1015                     value = HAMMER_BOOT_NOMBYTES;
1016                     while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1017                               value >>= 1;
1018           }
1019 
1020           if (value < HAMMER_BOOT_MINBYTES)
1021                     value = HAMMER_BOOT_MINBYTES;
1022           else if (value > HAMMER_BOOT_MAXBYTES)
1023                     value = HAMMER_BOOT_MAXBYTES;
1024 
1025           return(value);
1026 }
1027 
1028 int64_t
init_memory_log_size(int64_t value,off_t avg_vol_size)1029 init_memory_log_size(int64_t value, off_t avg_vol_size)
1030 {
1031           if (value == 0) {
1032                     value = HAMMER_MEM_NOMBYTES;
1033                     while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1034                               value >>= 1;
1035           }
1036 
1037           if (value < HAMMER_MEM_MINBYTES)
1038                     value = HAMMER_MEM_MINBYTES;
1039           else if (value > HAMMER_MEM_MAXBYTES)
1040                     value = HAMMER_MEM_MAXBYTES;
1041 
1042           return(value);
1043 }
1044