1 /*-
2  * Copyright (c) 2010-2012 Semihalf
3  * Copyright (c) 2008, 2009 Reinoud Zandijk
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: stable/10/sys/fs/nandfs/nandfs_subr.c 241844 2012-10-22 03:00:37Z eadler $");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/namei.h>
35 #include <sys/resourcevar.h>
36 #include <sys/kernel.h>
37 #include <sys/file.h>
38 #include <sys/stat.h>
39 #include <sys/buf.h>
40 #include <sys/bio.h>
41 #include <sys/proc.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/signalvar.h>
45 #include <sys/malloc.h>
46 #include <sys/dirent.h>
47 #include <sys/lockf.h>
48 #include <sys/libkern.h>
49 
50 #include <geom/geom.h>
51 #include <geom/geom_vfs.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 
56 #include <machine/_inttypes.h>
57 #include "nandfs_mount.h"
58 #include "nandfs.h"
59 #include "nandfs_subr.h"
60 
61 MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount");
62 MALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp");
63 
64 uma_zone_t nandfs_node_zone;
65 
66 void nandfs_bdflush(struct bufobj *bo, struct buf *bp);
67 int nandfs_bufsync(struct bufobj *bo, int waitfor);
68 
69 struct buf_ops buf_ops_nandfs = {
70 	.bop_name	=	"buf_ops_nandfs",
71 	.bop_write	=	bufwrite,
72 	.bop_strategy	=	bufstrategy,
73 	.bop_sync	=	nandfs_bufsync,
74 	.bop_bdflush	=	nandfs_bdflush,
75 };
76 
77 int
nandfs_bufsync(struct bufobj * bo,int waitfor)78 nandfs_bufsync(struct bufobj *bo, int waitfor)
79 {
80 	struct vnode *vp;
81 	int error = 0;
82 
83 	vp = bo->__bo_vnode;
84 
85 	ASSERT_VOP_LOCKED(vp, __func__);
86 	error = nandfs_sync_file(vp);
87 	if (error)
88 		nandfs_warning("%s: cannot flush buffers err:%d\n",
89 		    __func__, error);
90 
91 	return (error);
92 }
93 
94 void
nandfs_bdflush(bo,bp)95 nandfs_bdflush(bo, bp)
96 	struct bufobj *bo;
97 	struct buf *bp;
98 {
99 	struct vnode *vp;
100 	int error;
101 
102 	if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10))
103 		return;
104 
105 	vp = bp->b_vp;
106 	if (NANDFS_SYS_NODE(VTON(vp)->nn_ino))
107 		return;
108 
109 	if (NANDFS_IS_INDIRECT(bp))
110 		return;
111 
112 	error = nandfs_sync_file(vp);
113 	if (error)
114 		nandfs_warning("%s: cannot flush buffers err:%d\n",
115 		    __func__, error);
116 }
117 
118 int
nandfs_init(struct vfsconf * vfsp)119 nandfs_init(struct vfsconf *vfsp)
120 {
121 
122 	nandfs_node_zone = uma_zcreate("nandfs node zone",
123 	    sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0);
124 
125 	return (0);
126 }
127 
128 int
nandfs_uninit(struct vfsconf * vfsp)129 nandfs_uninit(struct vfsconf *vfsp)
130 {
131 
132 	uma_zdestroy(nandfs_node_zone);
133 	return (0);
134 }
135 
136 /* Basic calculators */
137 uint64_t
nandfs_get_segnum_of_block(struct nandfs_device * nandfsdev,nandfs_daddr_t blocknr)138 nandfs_get_segnum_of_block(struct nandfs_device *nandfsdev,
139     nandfs_daddr_t blocknr)
140 {
141 	uint64_t segnum, blks_per_seg;
142 
143 	MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block);
144 
145 	blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
146 
147 	segnum = blocknr / blks_per_seg;
148 	segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg;
149 
150 	DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__,
151 	    blocknr, segnum));
152 
153 	return (segnum);
154 }
155 
156 void
nandfs_get_segment_range(struct nandfs_device * nandfsdev,uint64_t segnum,uint64_t * seg_start,uint64_t * seg_end)157 nandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum,
158     uint64_t *seg_start, uint64_t *seg_end)
159 {
160 	uint64_t blks_per_seg;
161 
162 	blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
163 	*seg_start = nandfsdev->nd_fsdata.f_first_data_block +
164 	    blks_per_seg * segnum;
165 	if (seg_end != NULL)
166 		*seg_end = *seg_start + blks_per_seg -1;
167 }
168 
nandfs_calc_mdt_consts(struct nandfs_device * nandfsdev,struct nandfs_mdt * mdt,int entry_size)169 void nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev,
170     struct nandfs_mdt *mdt, int entry_size)
171 {
172 	uint32_t blocksize = nandfsdev->nd_blocksize;
173 
174 	mdt->entries_per_group = blocksize * 8;
175 	mdt->entries_per_block = blocksize / entry_size;
176 
177 	mdt->blocks_per_group =
178 	    (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
179 	mdt->groups_per_desc_block =
180 	    blocksize / sizeof(struct nandfs_block_group_desc);
181 	mdt->blocks_per_desc_block =
182 	    mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
183 }
184 
185 int
nandfs_dev_bread(struct nandfs_device * nandfsdev,nandfs_lbn_t blocknr,struct ucred * cred,int flags,struct buf ** bpp)186 nandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr,
187     struct ucred *cred, int flags, struct buf **bpp)
188 {
189 	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
190 	int error;
191 
192 	DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__,
193 	    blocknr * blk2dev, nandfsdev->nd_devvp));
194 	error = bread(nandfsdev->nd_devvp, blocknr * blk2dev,
195 	    nandfsdev->nd_blocksize, NOCRED, bpp);
196 	if (error)
197 		nandfs_error("%s: cannot read from device - blk:%jx\n",
198 		    __func__, blocknr);
199 	return (error);
200 }
201 
202 /* Read on a node */
203 int
nandfs_bread(struct nandfs_node * node,nandfs_lbn_t blocknr,struct ucred * cred,int flags,struct buf ** bpp)204 nandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr,
205     struct ucred *cred, int flags, struct buf **bpp)
206 {
207 	nandfs_daddr_t vblk;
208 	int error;
209 
210 	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
211 	    blocknr));
212 
213 	error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
214 	    cred, bpp);
215 
216 	KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
217 	    NTOV(node), blocknr, error));
218 
219 	if (!nandfs_vblk_get(*bpp) &&
220 	    ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
221 		nandfs_bmap_lookup(node, blocknr, &vblk);
222 		nandfs_vblk_set(*bpp, vblk);
223 	}
224 	return (error);
225 }
226 
227 int
nandfs_bread_meta(struct nandfs_node * node,nandfs_lbn_t blocknr,struct ucred * cred,int flags,struct buf ** bpp)228 nandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
229     struct ucred *cred, int flags, struct buf **bpp)
230 {
231 	nandfs_daddr_t vblk;
232 	int error;
233 
234 	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
235 	    blocknr));
236 
237 	error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
238 	    cred, bpp);
239 
240 	KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
241 	    NTOV(node), blocknr, error));
242 
243 	if (!nandfs_vblk_get(*bpp) &&
244 	    ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
245 		nandfs_bmap_lookup(node, blocknr, &vblk);
246 		nandfs_vblk_set(*bpp, vblk);
247 	}
248 
249 	return (error);
250 }
251 
252 int
nandfs_bdestroy(struct nandfs_node * node,nandfs_daddr_t vblk)253 nandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk)
254 {
255 	int error;
256 
257 	if (!NANDFS_SYS_NODE(node->nn_ino))
258 		NANDFS_WRITEASSERT(node->nn_nandfsdev);
259 
260 	error = nandfs_vblock_end(node->nn_nandfsdev, vblk);
261 	if (error) {
262 		nandfs_error("%s: ending vblk: %jx failed\n",
263 		    __func__, (uintmax_t)vblk);
264 		return (error);
265 	}
266 	node->nn_inode.i_blocks--;
267 
268 	return (0);
269 }
270 
271 int
nandfs_bcreate(struct nandfs_node * node,nandfs_lbn_t blocknr,struct ucred * cred,int flags,struct buf ** bpp)272 nandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr,
273     struct ucred *cred, int flags, struct buf **bpp)
274 {
275 	int error;
276 
277 	ASSERT_VOP_LOCKED(NTOV(node), __func__);
278 	if (!NANDFS_SYS_NODE(node->nn_ino))
279 		NANDFS_WRITEASSERT(node->nn_nandfsdev);
280 
281 	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
282 	    blocknr));
283 
284 	*bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
285 	    0, 0, 0);
286 
287 	KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
288 	    NTOV(node), blocknr));
289 
290 	if (*bpp) {
291 		vfs_bio_clrbuf(*bpp);
292 		(*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
293 		error = nandfs_bmap_insert_block(node, blocknr, *bpp);
294 		if (error) {
295 			nandfs_warning("%s: failed bmap insert node:%p"
296 			    " blk:%jx\n", __func__, node, blocknr);
297 			brelse(*bpp);
298 			return (error);
299 		}
300 		node->nn_inode.i_blocks++;
301 
302 		return (0);
303 	}
304 
305 	return (-1);
306 }
307 
308 int
nandfs_bcreate_meta(struct nandfs_node * node,nandfs_lbn_t blocknr,struct ucred * cred,int flags,struct buf ** bpp)309 nandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
310     struct ucred *cred, int flags, struct buf **bpp)
311 {
312 	struct nandfs_device *fsdev;
313 	nandfs_daddr_t vblk;
314 	int error;
315 
316 	ASSERT_VOP_LOCKED(NTOV(node), __func__);
317 	NANDFS_WRITEASSERT(node->nn_nandfsdev);
318 
319 	DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
320 	    blocknr));
321 
322 	fsdev = node->nn_nandfsdev;
323 
324 	*bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
325 	    0, 0, 0);
326 
327 	KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
328 	    NTOV(node), blocknr));
329 
330 	memset((*bpp)->b_data, 0, fsdev->nd_blocksize);
331 
332 	vfs_bio_clrbuf(*bpp);
333 	(*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
334 
335 	nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED);
336 
337 	if (node->nn_ino != NANDFS_DAT_INO) {
338 		error = nandfs_vblock_alloc(fsdev, &vblk);
339 		if (error) {
340 			nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED);
341 			brelse(*bpp);
342 			return (error);
343 		}
344 	} else
345 		vblk = fsdev->nd_fakevblk++;
346 
347 	nandfs_vblk_set(*bpp, vblk);
348 
349 	nandfs_bmap_insert_block(node, blocknr, *bpp);
350 	return (0);
351 }
352 
353 /* Translate index to a file block number and an entry */
354 void
nandfs_mdt_trans(struct nandfs_mdt * mdt,uint64_t index,nandfs_lbn_t * blocknr,uint32_t * entry_in_block)355 nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index,
356     nandfs_lbn_t *blocknr, uint32_t *entry_in_block)
357 {
358 	uint64_t blknr;
359 	uint64_t group, group_offset, blocknr_in_group;
360 	uint64_t desc_block, desc_offset;
361 
362 	/* Calculate our offset in the file */
363 	group = index / mdt->entries_per_group;
364 	group_offset = index % mdt->entries_per_group;
365 	desc_block = group / mdt->groups_per_desc_block;
366 	desc_offset = group % mdt->groups_per_desc_block;
367 	blocknr_in_group = group_offset / mdt->entries_per_block;
368 
369 	/* To descgroup offset */
370 	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
371 
372 	/* To group offset */
373 	blknr += desc_offset * mdt->blocks_per_group;
374 
375 	/* To actual file block */
376 	blknr += 1 + blocknr_in_group;
377 
378 	*blocknr = blknr;
379 	*entry_in_block = group_offset % mdt->entries_per_block;
380 }
381 
382 void
nandfs_mdt_trans_blk(struct nandfs_mdt * mdt,uint64_t index,uint64_t * desc,uint64_t * bitmap,nandfs_lbn_t * blocknr,uint32_t * entry_in_block)383 nandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index,
384     uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr,
385     uint32_t *entry_in_block)
386 {
387 	uint64_t blknr;
388 	uint64_t group, group_offset, blocknr_in_group;
389 	uint64_t desc_block, desc_offset;
390 
391 	/* Calculate our offset in the file */
392 	group = index / mdt->entries_per_group;
393 	group_offset = index % mdt->entries_per_group;
394 	desc_block = group / mdt->groups_per_desc_block;
395 	desc_offset = group % mdt->groups_per_desc_block;
396 	blocknr_in_group = group_offset / mdt->entries_per_block;
397 
398 	/* To descgroup offset */
399 	*desc = desc_block * mdt->blocks_per_desc_block;
400 	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
401 
402 	/* To group offset */
403 	blknr += desc_offset * mdt->blocks_per_group;
404 	*bitmap = blknr;
405 
406 	/* To actual file block */
407 	blknr += 1 + blocknr_in_group;
408 
409 	*blocknr = blknr;
410 	*entry_in_block = group_offset % mdt->entries_per_block;
411 
412 	DPRINTF(ALLOC,
413 	    ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n",
414 	    __func__, (uintmax_t)*desc, (uintmax_t)*bitmap,
415 	    (uintmax_t)*blocknr, *entry_in_block));
416 }
417 
418 int
nandfs_vtop(struct nandfs_node * node,nandfs_daddr_t vblocknr,nandfs_daddr_t * pblocknr)419 nandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr,
420     nandfs_daddr_t *pblocknr)
421 {
422 	struct nandfs_node *dat_node;
423 	struct nandfs_dat_entry *entry;
424 	struct buf *bp;
425 	nandfs_lbn_t ldatblknr;
426 	uint32_t entry_in_block;
427 	int locked, error;
428 
429 	if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) {
430 		*pblocknr = vblocknr;
431 		return (0);
432 	}
433 
434 	/* only translate valid vblocknrs */
435 	if (vblocknr == 0)
436 		return (0);
437 
438 	dat_node = node->nn_nandfsdev->nd_dat_node;
439 	nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr,
440 	    &entry_in_block);
441 
442 	locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node));
443 	if (!locked)
444 		VOP_LOCK(NTOV(dat_node), LK_SHARED);
445 	error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp);
446 	if (error) {
447 		DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n",
448 		    (uintmax_t)ldatblknr));
449 		brelse(bp);
450 		VOP_UNLOCK(NTOV(dat_node), 0);
451 		return (error);
452 	}
453 
454 	/* Get our translation */
455 	entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block;
456 	DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n",
457 	    entry, bp->b_data, entry_in_block))
458 	DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n",
459 	    (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr,
460 	    (uintmax_t)entry->de_start, (uintmax_t)entry->de_end));
461 
462 	*pblocknr = entry->de_blocknr;
463 	brelse(bp);
464 	if (!locked)
465 		VOP_UNLOCK(NTOV(dat_node), 0);
466 
467 	MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block ||
468 	    *pblocknr == 0);
469 
470 	return (0);
471 }
472 
473 int
nandfs_segsum_valid(struct nandfs_segment_summary * segsum)474 nandfs_segsum_valid(struct nandfs_segment_summary *segsum)
475 {
476 
477 	return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC);
478 }
479 
480 int
nandfs_load_segsum(struct nandfs_device * fsdev,nandfs_daddr_t blocknr,struct nandfs_segment_summary * segsum)481 nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr,
482     struct nandfs_segment_summary *segsum)
483 {
484 	struct buf *bp;
485 	int error;
486 
487 	DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n",
488 	    (uintmax_t)blocknr));
489 
490 	error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp);
491 	if (error)
492 		return (error);
493 
494 	memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary));
495 	brelse(bp);
496 
497 	if (!nandfs_segsum_valid(segsum)) {
498 		DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__,
499 		    blocknr));
500 		return (EINVAL);
501 	}
502 
503 	return (error);
504 }
505 
506 static int
nandfs_load_super_root(struct nandfs_device * nandfsdev,struct nandfs_segment_summary * segsum,uint64_t pseg)507 nandfs_load_super_root(struct nandfs_device *nandfsdev,
508     struct nandfs_segment_summary *segsum, uint64_t pseg)
509 {
510 	struct nandfs_super_root super_root;
511 	struct buf *bp;
512 	uint64_t blocknr;
513 	uint32_t super_root_crc, comp_crc;
514 	int off, error;
515 
516 	/* Check if there is a superroot */
517 	if ((segsum->ss_flags & NANDFS_SS_SR) == 0) {
518 		DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__,
519 		    pseg));
520 		return (ENOENT);
521 	}
522 
523 	/* Get our super root, located at the end of the pseg */
524 	blocknr = pseg + segsum->ss_nblocks - 1;
525 	DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr));
526 
527 	error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp);
528 	if (error)
529 		return (error);
530 
531 	memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root));
532 	brelse(bp);
533 
534 	/* Check super root CRC */
535 	super_root_crc = super_root.sr_sum;
536 	off = sizeof(super_root.sr_sum);
537 	comp_crc = crc32((uint8_t *)&super_root + off,
538 	    NANDFS_SR_BYTES - off);
539 
540 	if (super_root_crc != comp_crc) {
541 		DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n",
542 		    __func__, super_root_crc, comp_crc));
543 		return (EINVAL);
544 	}
545 
546 	nandfsdev->nd_super_root = super_root;
547 	DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__));
548 
549 	return (0);
550 }
551 
552 /*
553  * Search for the last super root recorded.
554  */
555 int
nandfs_search_super_root(struct nandfs_device * nandfsdev)556 nandfs_search_super_root(struct nandfs_device *nandfsdev)
557 {
558 	struct nandfs_super_block *super;
559 	struct nandfs_segment_summary segsum;
560 	uint64_t seg_start, seg_end, cno, seq, create, pseg;
561 	uint64_t segnum;
562 	int error, found;
563 
564 	error = found = 0;
565 
566 	/* Search for last super root */
567 	pseg = nandfsdev->nd_super.s_last_pseg;
568 	segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
569 
570 	cno = nandfsdev->nd_super.s_last_cno;
571 	create = seq = 0;
572 	DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__,
573 	    (uintmax_t)pseg));
574 
575 	for (;;) {
576 		error = nandfs_load_segsum(nandfsdev, pseg, &segsum);
577 		if (error)
578 			break;
579 
580 		if (segsum.ss_seq < seq || segsum.ss_create < create)
581 			break;
582 
583 		/* Try to load super root */
584 		if (segsum.ss_flags & NANDFS_SS_SR) {
585 			error = nandfs_load_super_root(nandfsdev, &segsum, pseg);
586 			if (error)
587 				break;	/* confused */
588 			found = 1;
589 
590 			super = &nandfsdev->nd_super;
591 			nandfsdev->nd_last_segsum = segsum;
592 			super->s_last_pseg = pseg;
593 			super->s_last_cno = cno++;
594 			super->s_last_seq = segsum.ss_seq;
595 			super->s_state = NANDFS_VALID_FS;
596 			seq = segsum.ss_seq;
597 			create = segsum.ss_create;
598 		} else {
599 			seq = segsum.ss_seq;
600 			create = segsum.ss_create;
601 		}
602 
603 		/* Calculate next partial segment location */
604 		pseg += segsum.ss_nblocks;
605 		DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__,
606 		    (uintmax_t)pseg));
607 
608 		/* Did we reach the end of the segment? if so, go to the next */
609 		nandfs_get_segment_range(nandfsdev, segnum, &seg_start,
610 		    &seg_end);
611 		if (pseg >= seg_end) {
612 			pseg = segsum.ss_next;
613 			DPRINTF(VOLUMES,
614 			    (" partial seg oor next is %jx[%jx - %jx]\n",
615 			    (uintmax_t)pseg, (uintmax_t)seg_start,
616 			    (uintmax_t)seg_end));
617 		}
618 		segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
619 	}
620 
621 	if (error && !found)
622 		return (error);
623 
624 	return (0);
625 }
626 
627 int
nandfs_get_node_raw(struct nandfs_device * nandfsdev,struct nandfsmount * nmp,uint64_t ino,struct nandfs_inode * inode,struct nandfs_node ** nodep)628 nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp,
629     uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep)
630 {
631 	struct nandfs_node *node;
632 	struct vnode *nvp;
633 	struct mount *mp;
634 	int error;
635 
636 	*nodep = NULL;
637 
638 	/* Associate with mountpoint if present */
639 	if (nmp) {
640 		mp = nmp->nm_vfs_mountp;
641 		error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp);
642 		if (error) {
643 			return (error);
644 		}
645 	} else {
646 		mp = NULL;
647 		error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops,
648 		    &nvp);
649 		if (error) {
650 			return (error);
651 		}
652 	}
653 
654 	if (mp)
655 		NANDFS_WRITELOCK(nandfsdev);
656 
657 	DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n",
658 	    __func__, (uintmax_t)ino, nvp));
659 	/* Lock node */
660 	lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL);
661 
662 	if (mp) {
663 		error = insmntque(nvp, mp);
664 		if (error != 0) {
665 			*nodep = NULL;
666 			return (error);
667 		}
668 	}
669 
670 	node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO);
671 
672 	/* Crosslink */
673 	node->nn_vnode = nvp;
674 	nvp->v_bufobj.bo_ops = &buf_ops_nandfs;
675 	node->nn_nmp = nmp;
676 	node->nn_nandfsdev = nandfsdev;
677 	nvp->v_data = node;
678 
679 	/* Initiase NANDFS node */
680 	node->nn_ino = ino;
681 	if (inode != NULL)
682 		node->nn_inode = *inode;
683 
684 	nandfs_vinit(nvp, ino);
685 
686 	/* Return node */
687 	*nodep = node;
688 	DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n",
689 	    __func__, (uintmax_t)ino, nvp, *nodep));
690 
691 	return (0);
692 }
693 
694 int
nandfs_get_node(struct nandfsmount * nmp,uint64_t ino,struct nandfs_node ** nodep)695 nandfs_get_node(struct nandfsmount *nmp, uint64_t ino,
696     struct nandfs_node **nodep)
697 {
698 	struct nandfs_device *nandfsdev;
699 	struct nandfs_inode inode, *entry;
700 	struct vnode *nvp, *vpp;
701 	struct thread *td;
702 	struct buf *bp;
703 	uint64_t ivblocknr;
704 	uint32_t entry_in_block;
705 	int error;
706 
707 	/* Look up node in hash table */
708 	td = curthread;
709 	*nodep = NULL;
710 
711 	if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) {
712 		printf("nandfs_get_node: system ino %"PRIu64" not in mount "
713 		    "point!\n", ino);
714 		return (ENOENT);
715 	}
716 
717 	error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp,
718 	    NULL, NULL);
719 	if (error)
720 		return (error);
721 
722 	if (nvp != NULL) {
723 		*nodep = (struct nandfs_node *)nvp->v_data;
724 		return (0);
725 	}
726 
727 	/* Look up inode structure in mountpoints ifile */
728 	nandfsdev = nmp->nm_nandfsdev;
729 	nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr,
730 	    &entry_in_block);
731 
732 	VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED);
733 	error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp);
734 	if (error) {
735 		brelse(bp);
736 		VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
737 		return (ENOENT);
738 	}
739 
740 	/* Get inode entry */
741 	entry = (struct nandfs_inode *) bp->b_data + entry_in_block;
742 	memcpy(&inode, entry, sizeof(struct nandfs_inode));
743 	brelse(bp);
744 	VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
745 
746 	/* Get node */
747 	error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep);
748 	if (error) {
749 		*nodep = NULL;
750 		return (error);
751 	}
752 
753 	nvp = (*nodep)->nn_vnode;
754 	error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL);
755 	if (error) {
756 		*nodep = NULL;
757 		return (error);
758 	}
759 
760 	return (error);
761 }
762 
763 void
nandfs_dispose_node(struct nandfs_node ** nodep)764 nandfs_dispose_node(struct nandfs_node **nodep)
765 {
766 	struct nandfs_node *node;
767 	struct vnode *vp;
768 
769 	/* Protect against rogue values */
770 	node = *nodep;
771 	if (!node) {
772 		return;
773 	}
774 	DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep));
775 
776 	vp = NTOV(node);
777 	vp->v_data = NULL;
778 
779 	/* Free our associated memory */
780 	uma_zfree(nandfs_node_zone, node);
781 
782 	*nodep = NULL;
783 }
784 
785 int
nandfs_lookup_name_in_dir(struct vnode * dvp,const char * name,int namelen,uint64_t * ino,int * found,uint64_t * off)786 nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
787     uint64_t *ino, int *found, uint64_t *off)
788 {
789 	struct nandfs_node *dir_node = VTON(dvp);
790 	struct nandfs_dir_entry	*ndirent;
791 	struct buf *bp;
792 	uint64_t file_size, diroffset, blkoff;
793 	uint64_t blocknr;
794 	uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
795 	uint8_t *pos, name_len;
796 	int error;
797 
798 	*found = 0;
799 
800 	DPRINTF(VNCALL, ("%s: %s file\n", __func__, name));
801 	if (dvp->v_type != VDIR) {
802 		return (ENOTDIR);
803 	}
804 
805 	/* Get directory filesize */
806 	file_size = dir_node->nn_inode.i_size;
807 
808 	/* Walk the directory */
809 	diroffset = 0;
810 	blocknr = 0;
811 	blkoff = 0;
812 	error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
813 	if (error) {
814 		brelse(bp);
815 		return (EIO);
816 	}
817 
818 	while (diroffset < file_size) {
819 		if (blkoff >= blocksize) {
820 			blkoff = 0; blocknr++;
821 			brelse(bp);
822 			error = nandfs_bread(dir_node, blocknr, NOCRED, 0,
823 			    &bp);
824 			if (error) {
825 				brelse(bp);
826 				return (EIO);
827 			}
828 		}
829 
830 		/* Read in one dirent */
831 		pos = (uint8_t *) bp->b_data + blkoff;
832 		ndirent = (struct nandfs_dir_entry *) pos;
833 		name_len = ndirent->name_len;
834 
835 		if ((name_len == namelen) &&
836 		    (strncmp(name, ndirent->name, name_len) == 0) &&
837 		    (ndirent->inode != 0)) {
838 			*ino = ndirent->inode;
839 			*off = diroffset;
840 			DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n",
841 			    name_len, ndirent->name, *ino));
842 			*found = 1;
843 			break;
844 		}
845 
846 		/* Advance */
847 		diroffset += ndirent->rec_len;
848 		blkoff += ndirent->rec_len;
849 	}
850 	brelse(bp);
851 
852 	return (error);
853 }
854 
855 int
nandfs_get_fsinfo(struct nandfsmount * nmp,struct nandfs_fsinfo * fsinfo)856 nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo)
857 {
858 	struct nandfs_device *fsdev;
859 
860 	fsdev = nmp->nm_nandfsdev;
861 
862 	memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
863 	memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super));
864 	snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev),
865 	    "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname);
866 
867 	return (0);
868 }
869 
870 void
nandfs_inode_init(struct nandfs_inode * inode,uint16_t mode)871 nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode)
872 {
873 	struct timespec ts;
874 
875 	vfs_timestamp(&ts);
876 
877 	inode->i_blocks = 0;
878 	inode->i_size = 0;
879 	inode->i_ctime = ts.tv_sec;
880 	inode->i_ctime_nsec = ts.tv_nsec;
881 	inode->i_mtime = ts.tv_sec;
882 	inode->i_mtime_nsec = ts.tv_nsec;
883 	inode->i_mode = mode;
884 	inode->i_links_count = 1;
885 	if (S_ISDIR(mode))
886 		inode->i_links_count = 2;
887 	inode->i_flags = 0;
888 
889 	inode->i_special = 0;
890 	memset(inode->i_db, 0, sizeof(inode->i_db));
891 	memset(inode->i_ib, 0, sizeof(inode->i_ib));
892 }
893 
894 void
nandfs_inode_destroy(struct nandfs_inode * inode)895 nandfs_inode_destroy(struct nandfs_inode *inode)
896 {
897 
898 	MPASS(inode->i_blocks == 0);
899 	bzero(inode, sizeof(*inode));
900 }
901 
902 int
nandfs_fs_full(struct nandfs_device * nffsdev)903 nandfs_fs_full(struct nandfs_device *nffsdev)
904 {
905 	uint64_t space, bps;
906 
907 	bps = nffsdev->nd_fsdata.f_blocks_per_segment;
908 	space = (nffsdev->nd_clean_segs - 1) * bps;
909 
910 	DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__,
911 	    (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space));
912 
913 	if (nffsdev->nd_dirty_bufs + (10 * bps) >= space)
914 		return (1);
915 
916 	return (0);
917 }
918 
919 static int
_nandfs_dirty_buf(struct buf * bp,int dirty_meta,int force)920 _nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force)
921 {
922 	struct nandfs_device *nffsdev;
923 	struct nandfs_node *node;
924 	uint64_t ino, bps;
925 
926 	if (NANDFS_ISGATHERED(bp)) {
927 		bqrelse(bp);
928 		return (0);
929 	}
930 	if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) {
931 		bqrelse(bp);
932 		return (0);
933 	}
934 
935 	node = VTON(bp->b_vp);
936 	nffsdev = node->nn_nandfsdev;
937 	DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp));
938 	ino = node->nn_ino;
939 
940 	if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) {
941 		brelse(bp);
942 		return (ENOSPC);
943 	}
944 
945 	bp->b_flags |= B_MANAGED;
946 	bdwrite(bp);
947 
948 	nandfs_dirty_bufs_increment(nffsdev);
949 
950 	KASSERT((bp->b_vp), ("vp missing for bp"));
951 	KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO),
952 	    ("bp vblk is 0"));
953 
954 	/*
955 	 * To maintain consistency of FS we need to force making
956 	 * meta buffers dirty, even if free space is low.
957 	 */
958 	if (dirty_meta && ino != NANDFS_GC_INO)
959 		nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
960 
961 	bps = nffsdev->nd_fsdata.f_blocks_per_segment;
962 
963 	if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) {
964 		mtx_lock(&nffsdev->nd_sync_mtx);
965 		if (nffsdev->nd_syncing == 0) {
966 			DPRINTF(SYNC, ("%s: wakeup gc\n", __func__));
967 			nffsdev->nd_syncing = 1;
968 			wakeup(&nffsdev->nd_syncing);
969 		}
970 		mtx_unlock(&nffsdev->nd_sync_mtx);
971 	}
972 
973 	return (0);
974 }
975 
976 int
nandfs_dirty_buf(struct buf * bp,int force)977 nandfs_dirty_buf(struct buf *bp, int force)
978 {
979 
980 	return (_nandfs_dirty_buf(bp, 1, force));
981 }
982 
983 int
nandfs_dirty_buf_meta(struct buf * bp,int force)984 nandfs_dirty_buf_meta(struct buf *bp, int force)
985 {
986 
987 	return (_nandfs_dirty_buf(bp, 0, force));
988 }
989 
990 void
nandfs_undirty_buf_fsdev(struct nandfs_device * nffsdev,struct buf * bp)991 nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp)
992 {
993 
994 	BUF_ASSERT_HELD(bp);
995 
996 	if (bp->b_flags & B_DELWRI) {
997 		bp->b_flags &= ~(B_DELWRI|B_MANAGED);
998 		nandfs_dirty_bufs_decrement(nffsdev);
999 	}
1000 	/*
1001 	 * Since it is now being written, we can clear its deferred write flag.
1002 	 */
1003 	bp->b_flags &= ~B_DEFERRED;
1004 
1005 	brelse(bp);
1006 }
1007 
1008 void
nandfs_undirty_buf(struct buf * bp)1009 nandfs_undirty_buf(struct buf *bp)
1010 {
1011 	struct nandfs_node *node;
1012 
1013 	node = VTON(bp->b_vp);
1014 
1015 	nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp);
1016 }
1017 
1018 void
nandfs_vblk_set(struct buf * bp,nandfs_daddr_t blocknr)1019 nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr)
1020 {
1021 
1022 	nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
1023 	*vblk = blocknr;
1024 }
1025 
1026 nandfs_daddr_t
nandfs_vblk_get(struct buf * bp)1027 nandfs_vblk_get(struct buf *bp)
1028 {
1029 
1030 	nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
1031 	return (*vblk);
1032 }
1033 
1034 void
nandfs_buf_set(struct buf * bp,uint32_t bits)1035 nandfs_buf_set(struct buf *bp, uint32_t bits)
1036 {
1037 	uintptr_t flags;
1038 
1039 	flags = (uintptr_t)bp->b_fsprivate3;
1040 	flags |= (uintptr_t)bits;
1041 	bp->b_fsprivate3 = (void *)flags;
1042 }
1043 
1044 void
nandfs_buf_clear(struct buf * bp,uint32_t bits)1045 nandfs_buf_clear(struct buf *bp, uint32_t bits)
1046 {
1047 	uintptr_t flags;
1048 
1049 	flags = (uintptr_t)bp->b_fsprivate3;
1050 	flags &= ~(uintptr_t)bits;
1051 	bp->b_fsprivate3 = (void *)flags;
1052 }
1053 
1054 int
nandfs_buf_check(struct buf * bp,uint32_t bits)1055 nandfs_buf_check(struct buf *bp, uint32_t bits)
1056 {
1057 	uintptr_t flags;
1058 
1059 	flags = (uintptr_t)bp->b_fsprivate3;
1060 	if (flags & bits)
1061 		return (1);
1062 	return (0);
1063 }
1064 
1065 int
nandfs_erase(struct nandfs_device * fsdev,off_t offset,size_t size)1066 nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size)
1067 {
1068 	struct buf *bp;
1069 	int read_size, error, i;
1070 
1071 	DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n",
1072 	    __func__, offset, size));
1073 
1074 	MPASS(size % fsdev->nd_erasesize == 0);
1075 
1076 	if (fsdev->nd_is_nand) {
1077 		error = g_delete_data(fsdev->nd_gconsumer, offset, size);
1078 		return (error);
1079 	}
1080 
1081 	if (size > MAXBSIZE)
1082 		read_size = MAXBSIZE;
1083 	else
1084 		read_size = size;
1085 
1086 	error = 0;
1087 	for (i = 0; i < size / MAXBSIZE; i++) {
1088 		error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
1089 		    read_size, NOCRED, &bp);
1090 		if (error) {
1091 			brelse(bp);
1092 			return (error);
1093 		}
1094 		memset(bp->b_data, 0xff, read_size);
1095 		error = bwrite(bp);
1096 		if (error) {
1097 			nandfs_error("%s: err:%d from bwrite\n",
1098 			    __func__, error);
1099 			return (error);
1100 		}
1101 	}
1102 
1103 	return (error);
1104 }
1105 
1106 int
nandfs_vop_islocked(struct vnode * vp)1107 nandfs_vop_islocked(struct vnode *vp)
1108 {
1109 	int islocked;
1110 
1111 	islocked = VOP_ISLOCKED(vp);
1112 	return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED);
1113 }
1114 
1115 nandfs_daddr_t
nandfs_block_to_dblock(struct nandfs_device * fsdev,nandfs_lbn_t block)1116 nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block)
1117 {
1118 
1119 	return (btodb(block * fsdev->nd_blocksize));
1120 }
1121