xref: /NextBSD/sys/fs/nandfs/bmap.c (revision 287e3b14e9552995def1802ec9c5034f4adf28ec)
1 /*-
2  * Copyright (c) 2012 Semihalf
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/namei.h>
32 #include <sys/kernel.h>
33 #include <sys/stat.h>
34 #include <sys/buf.h>
35 #include <sys/bio.h>
36 #include <sys/proc.h>
37 #include <sys/mount.h>
38 #include <sys/vnode.h>
39 #include <sys/signalvar.h>
40 #include <sys/malloc.h>
41 #include <sys/dirent.h>
42 #include <sys/lockf.h>
43 #include <sys/ktr.h>
44 #include <sys/kdb.h>
45 
46 #include <vm/vm.h>
47 #include <vm/vm_extern.h>
48 #include <vm/vm_object.h>
49 #include <vm/vnode_pager.h>
50 
51 #include <machine/_inttypes.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vnode_pager.h>
57 
58 #include "nandfs_mount.h"
59 #include "nandfs.h"
60 #include "nandfs_subr.h"
61 #include "bmap.h"
62 
63 static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t,
64     struct nandfs_indir *, int *);
65 
66 int
bmap_lookup(struct nandfs_node * node,nandfs_lbn_t lblk,nandfs_daddr_t * vblk)67 bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk)
68 {
69 	struct nandfs_inode *ip;
70 	struct nandfs_indir a[NIADDR + 1], *ap;
71 	nandfs_daddr_t daddr;
72 	struct buf *bp;
73 	int error;
74 	int num, *nump;
75 
76 	DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk));
77 	ip = &node->nn_inode;
78 
79 	ap = a;
80 	nump = &num;
81 
82 	error = bmap_getlbns(node, lblk, ap, nump);
83 	if (error)
84 		return (error);
85 
86 	if (num == 0) {
87 		*vblk = ip->i_db[lblk];
88 		return (0);
89 	}
90 
91 	DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__,
92 	    node, lblk, ap->in_off));
93 	daddr = ip->i_ib[ap->in_off];
94 	for (bp = NULL, ++ap; --num; ap++) {
95 		if (daddr == 0) {
96 			DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with "
97 			    "vblk 0\n", __func__, node, lblk));
98 			*vblk = 0;
99 			return (0);
100 		}
101 		if (ap->in_lbn == lblk) {
102 			DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx "
103 			    "returning address of indirect block (%jx)\n",
104 			    __func__, node, lblk, ap->in_lbn, daddr));
105 			*vblk = daddr;
106 			return (0);
107 		}
108 
109 		DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block "
110 		    "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn));
111 
112 		error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
113 		if (error) {
114 			brelse(bp);
115 			return (error);
116 		}
117 
118 		daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
119 		brelse(bp);
120 	}
121 
122 	DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__,
123 	    node, lblk, daddr));
124 	*vblk = daddr;
125 
126 	return (0);
127 }
128 
129 int
bmap_dirty_meta(struct nandfs_node * node,nandfs_lbn_t lblk,int force)130 bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force)
131 {
132 	struct nandfs_indir a[NIADDR+1], *ap;
133 #ifdef DEBUG
134 	nandfs_daddr_t daddr;
135 #endif
136 	struct buf *bp;
137 	int error;
138 	int num, *nump;
139 
140 	DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk));
141 
142 	ap = a;
143 	nump = &num;
144 
145 	error = bmap_getlbns(node, lblk, ap, nump);
146 	if (error)
147 		return (error);
148 
149 	/*
150 	 * Direct block, nothing to do
151 	 */
152 	if (num == 0)
153 		return (0);
154 
155 	DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node));
156 
157 	for (bp = NULL, ++ap; --num; ap++) {
158 		error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
159 		if (error) {
160 			brelse(bp);
161 			return (error);
162 		}
163 
164 #ifdef DEBUG
165 		daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
166 		MPASS(daddr != 0 || node->nn_ino == 3);
167 #endif
168 
169 		error = nandfs_dirty_buf_meta(bp, force);
170 		if (error)
171 			return (error);
172 	}
173 
174 	return (0);
175 }
176 
177 int
bmap_insert_block(struct nandfs_node * node,nandfs_lbn_t lblk,nandfs_daddr_t vblk)178 bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk,
179     nandfs_daddr_t vblk)
180 {
181 	struct nandfs_inode *ip;
182 	struct nandfs_indir a[NIADDR+1], *ap;
183 	struct buf *bp;
184 	nandfs_daddr_t daddr;
185 	int error;
186 	int num, *nump, i;
187 
188 	DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk,
189 	    vblk));
190 
191 	ip = &node->nn_inode;
192 
193 	ap = a;
194 	nump = &num;
195 
196 	error = bmap_getlbns(node, lblk, ap, nump);
197 	if (error)
198 		return (error);
199 
200 	DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__,
201 	    node, lblk, vblk, num));
202 
203 	if (num == 0) {
204 		DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__,
205 		    node, lblk));
206 		ip->i_db[lblk] = vblk;
207 		return (0);
208 	}
209 
210 	DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n",
211 	    __func__, node, lblk, ap->in_off));
212 
213 	if (num == 1) {
214 		DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting "
215 		    "%jx as vblk for indirect block %d\n", __func__, node,
216 		    lblk, vblk, ap->in_off));
217 		ip->i_ib[ap->in_off] = vblk;
218 		return (0);
219 	}
220 
221 	bp = NULL;
222 	daddr = ip->i_ib[a[0].in_off];
223 	for (i = 1; i < num; i++) {
224 		if (bp)
225 			brelse(bp);
226 		if (daddr == 0) {
227 			DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create "
228 			    "block %jx %d\n", __func__, node, lblk, vblk,
229 			    a[i].in_lbn, a[i].in_off));
230 			error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED,
231 			    0, &bp);
232 			if (error)
233 				return (error);
234 		} else {
235 			DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read "
236 			    "block %jx %d\n", __func__, node, daddr, vblk,
237 			    a[i].in_lbn, a[i].in_off));
238 			error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp);
239 			if (error) {
240 				brelse(bp);
241 				return (error);
242 			}
243 		}
244 		daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off];
245 	}
246 	i--;
247 
248 	DPRINTF(BMAP,
249 	    ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at "
250 	    "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off,
251 	    daddr));
252 
253 	if (!bp) {
254 		nandfs_error("%s: cannot find indirect block\n", __func__);
255 		return (-1);
256 	}
257 	((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk;
258 
259 	error = nandfs_dirty_buf_meta(bp, 0);
260 	if (error) {
261 		nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp);
262 		return (error);
263 	}
264 	DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__,
265 	    node, lblk, vblk));
266 
267 	return (error);
268 }
269 
270 CTASSERT(NIADDR <= 3);
271 #define SINGLE	0	/* index of single indirect block */
272 #define DOUBLE	1	/* index of double indirect block */
273 #define TRIPLE	2	/* index of triple indirect block */
274 
275 static __inline nandfs_lbn_t
lbn_offset(struct nandfs_device * fsdev,int level)276 lbn_offset(struct nandfs_device *fsdev, int level)
277 {
278 	nandfs_lbn_t res;
279 
280 	for (res = 1; level > 0; level--)
281 		res *= MNINDIR(fsdev);
282 	return (res);
283 }
284 
285 static nandfs_lbn_t
blocks_inside(struct nandfs_device * fsdev,int level,struct nandfs_indir * nip)286 blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip)
287 {
288 	nandfs_lbn_t blocks;
289 
290 	for (blocks = 1; level >= SINGLE; level--, nip++) {
291 		MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev));
292 		blocks += nip->in_off * lbn_offset(fsdev, level);
293 	}
294 
295 	return (blocks);
296 }
297 
298 static int
bmap_truncate_indirect(struct nandfs_node * node,int level,nandfs_lbn_t * left,int * cleaned,struct nandfs_indir * ap,struct nandfs_indir * fp,nandfs_daddr_t * copy)299 bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left,
300     int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp,
301     nandfs_daddr_t *copy)
302 {
303 	struct buf *bp;
304 	nandfs_lbn_t i, lbn, nlbn, factor, tosub;
305 	struct nandfs_device *fsdev;
306 	int error, lcleaned, modified;
307 
308 	DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__,
309 	    node, level, *left));
310 
311 	fsdev = node->nn_nandfsdev;
312 
313 	MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev));
314 
315 	factor = lbn_offset(fsdev, level);
316 	lbn = ap->in_lbn;
317 
318 	error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
319 	if (error) {
320 		if (bp != NULL)
321 			brelse(bp);
322 		return (error);
323 	}
324 
325 	bcopy(bp->b_data, copy, fsdev->nd_blocksize);
326 	bqrelse(bp);
327 
328 	modified = 0;
329 
330 	i = ap->in_off;
331 
332 	if (ap != fp)
333 		ap++;
334 	for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--,
335 	    nlbn += factor) {
336 		lcleaned = 0;
337 
338 		DPRINTF(BMAP,
339 		    ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n",
340 		    __func__, node, i, nlbn, *left, ap, copy[i]));
341 
342 		if (copy[i] == 0) {
343 			tosub = blocks_inside(fsdev, level - 1, ap);
344 			if (tosub > *left)
345 				tosub = 0;
346 
347 			*left -= tosub;
348 		} else {
349 			if (level > SINGLE) {
350 				if (ap == fp)
351 					ap->in_lbn = nlbn;
352 
353 				error = bmap_truncate_indirect(node, level - 1,
354 				    left, &lcleaned, ap, fp,
355 				    copy + MNINDIR(fsdev));
356 				if (error)
357 					return (error);
358 			} else {
359 				error = nandfs_bdestroy(node, copy[i]);
360 				if (error)
361 					return (error);
362 				lcleaned = 1;
363 				*left -= 1;
364 			}
365 		}
366 
367 		if (lcleaned) {
368 			if (level > SINGLE) {
369 				error = nandfs_vblock_end(fsdev, copy[i]);
370 				if (error)
371 					return (error);
372 			}
373 			copy[i] = 0;
374 			modified++;
375 		}
376 
377 		ap = fp;
378 	}
379 
380 	if (i == -1)
381 		*cleaned = 1;
382 
383 	error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
384 	if (error) {
385 		brelse(bp);
386 		return (error);
387 	}
388 	if (modified)
389 		bcopy(copy, bp->b_data, fsdev->nd_blocksize);
390 
391 	/* Force success even if we can't dirty the buffer metadata when freeing space */
392 	nandfs_dirty_buf_meta(bp, 1);
393 
394 	return (0);
395 }
396 
397 int
bmap_truncate_mapping(struct nandfs_node * node,nandfs_lbn_t lastblk,nandfs_lbn_t todo)398 bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk,
399     nandfs_lbn_t todo)
400 {
401 	struct nandfs_inode *ip;
402 	struct nandfs_indir a[NIADDR + 1], f[NIADDR], *ap;
403 	nandfs_daddr_t indir_lbn[NIADDR];
404 	nandfs_daddr_t *copy;
405 	int error, level;
406 	nandfs_lbn_t left, tosub;
407 	struct nandfs_device *fsdev;
408 	int cleaned, i;
409 	int num, *nump;
410 
411 	DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__,
412 	    node, lastblk, todo));
413 
414 	ip = &node->nn_inode;
415 	fsdev = node->nn_nandfsdev;
416 
417 	ap = a;
418 	nump = &num;
419 
420 	error = bmap_getlbns(node, lastblk, ap, nump);
421 	if (error)
422 		return (error);
423 
424 	indir_lbn[SINGLE] = -NDADDR;
425 	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1;
426 	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev)
427 	    * MNINDIR(fsdev) - 1;
428 
429 	for (i = 0; i < NIADDR; i++) {
430 		f[i].in_off = MNINDIR(fsdev) - 1;
431 		f[i].in_lbn = 0xdeadbeef;
432 	}
433 
434 	left = todo;
435 
436 #ifdef DEBUG
437 	a[num].in_off = -1;
438 #endif
439 
440 	ap++;
441 	num -= 2;
442 
443 	if (num < 0)
444 		goto direct;
445 
446 	copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1),
447 	    M_NANDFSTEMP, M_WAITOK);
448 
449 	for (level = num; level >= SINGLE && left > 0; level--) {
450 		cleaned = 0;
451 
452 		if (ip->i_ib[level] == 0) {
453 			tosub = blocks_inside(fsdev, level, ap);
454 			if (tosub > left)
455 				left = 0;
456 			else
457 				left -= tosub;
458 		} else {
459 			if (ap == f)
460 				ap->in_lbn = indir_lbn[level];
461 			error = bmap_truncate_indirect(node, level, &left,
462 			    &cleaned, ap, f, copy);
463 			if (error) {
464 				free(copy, M_NANDFSTEMP);
465 				nandfs_error("%s: error %d when truncate "
466 				    "at level %d\n", __func__, error, level);
467 				return (error);
468 			}
469 		}
470 
471 		if (cleaned) {
472 			nandfs_vblock_end(fsdev, ip->i_ib[level]);
473 			ip->i_ib[level] = 0;
474 		}
475 
476 		ap = f;
477 	}
478 
479 	free(copy, M_NANDFSTEMP);
480 
481 direct:
482 	if (num < 0)
483 		i = lastblk;
484 	else
485 		i = NDADDR - 1;
486 
487 	for (; i >= 0 && left > 0; i--) {
488 		if (ip->i_db[i] != 0) {
489 			error = nandfs_bdestroy(node, ip->i_db[i]);
490 			if (error) {
491 				nandfs_error("%s: cannot destroy "
492 				    "block %jx, error %d\n", __func__,
493 				    (uintmax_t)ip->i_db[i], error);
494 				return (error);
495 			}
496 			ip->i_db[i] = 0;
497 		}
498 
499 		left--;
500 	}
501 
502 	KASSERT(left == 0,
503 	    ("truncated wrong number of blocks (%jd should be 0)", left));
504 
505 	return (error);
506 }
507 
508 nandfs_lbn_t
get_maxfilesize(struct nandfs_device * fsdev)509 get_maxfilesize(struct nandfs_device *fsdev)
510 {
511 	struct nandfs_indir f[NIADDR];
512 	nandfs_lbn_t max;
513 	int i;
514 
515 	max = NDADDR;
516 
517 	for (i = 0; i < NIADDR; i++) {
518 		f[i].in_off = MNINDIR(fsdev) - 1;
519 		max += blocks_inside(fsdev, i, f);
520 	}
521 
522 	max *= fsdev->nd_blocksize;
523 
524 	return (max);
525 }
526 
527 /*
528  * This is ufs_getlbns with minor modifications.
529  */
530 /*
531  * Create an array of logical block number/offset pairs which represent the
532  * path of indirect blocks required to access a data block.  The first "pair"
533  * contains the logical block number of the appropriate single, double or
534  * triple indirect block and the offset into the inode indirect block array.
535  * Note, the logical block number of the inode single/double/triple indirect
536  * block appears twice in the array, once with the offset into the i_ib and
537  * once with the offset into the page itself.
538  */
539 static int
bmap_getlbns(struct nandfs_node * node,nandfs_lbn_t bn,struct nandfs_indir * ap,int * nump)540 bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump)
541 {
542 	nandfs_daddr_t blockcnt;
543 	nandfs_lbn_t metalbn, realbn;
544 	struct nandfs_device *fsdev;
545 	int i, numlevels, off;
546 
547 	fsdev = node->nn_nandfsdev;
548 
549 	DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__,
550 	    node, bn, MNINDIR(fsdev)));
551 
552 	if (nump)
553 		*nump = 0;
554 	numlevels = 0;
555 	realbn = bn;
556 
557 	if (bn < 0)
558 		bn = -bn;
559 
560 	/* The first NDADDR blocks are direct blocks. */
561 	if (bn < NDADDR)
562 		return (0);
563 
564 	/*
565 	 * Determine the number of levels of indirection.  After this loop
566 	 * is done, blockcnt indicates the number of data blocks possible
567 	 * at the previous level of indirection, and NIADDR - i is the number
568 	 * of levels of indirection needed to locate the requested block.
569 	 */
570 	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
571 		DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__,
572 		    blockcnt, i, bn));
573 		if (i == 0)
574 			return (EFBIG);
575 		blockcnt *= MNINDIR(fsdev);
576 		if (bn < blockcnt)
577 			break;
578 	}
579 
580 	/* Calculate the address of the first meta-block. */
581 	if (realbn >= 0)
582 		metalbn = -(realbn - bn + NIADDR - i);
583 	else
584 		metalbn = -(-realbn - bn + NIADDR - i);
585 
586 	/*
587 	 * At each iteration, off is the offset into the bap array which is
588 	 * an array of disk addresses at the current level of indirection.
589 	 * The logical block number and the offset in that block are stored
590 	 * into the argument array.
591 	 */
592 	ap->in_lbn = metalbn;
593 	ap->in_off = off = NIADDR - i;
594 
595 	DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__,
596 	    metalbn, off));
597 
598 	ap++;
599 	for (++numlevels; i <= NIADDR; i++) {
600 		/* If searching for a meta-data block, quit when found. */
601 		if (metalbn == realbn)
602 			break;
603 
604 		blockcnt /= MNINDIR(fsdev);
605 		off = (bn / blockcnt) % MNINDIR(fsdev);
606 
607 		++numlevels;
608 		ap->in_lbn = metalbn;
609 		ap->in_off = off;
610 
611 		DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__,
612 		    ap->in_lbn, ap->in_off));
613 		++ap;
614 
615 		metalbn -= -1 + off * blockcnt;
616 	}
617 	if (nump)
618 		*nump = numlevels;
619 
620 	DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels));
621 
622 	return (0);
623 }
624