1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_da_btree.h"
32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir_sf.h"
36 #include "xfs_dir2_sf.h"
37 #include "xfs_attr_sf.h"
38 #include "xfs_dinode.h"
39 #include "xfs_inode.h"
40 #include "xfs_inode_item.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_bmap.h"
44 #include "xfs_attr.h"
45 #include "xfs_attr_leaf.h"
46 #include "xfs_dir_leaf.h"
47 #include "xfs_dir2_data.h"
48 #include "xfs_dir2_leaf.h"
49 #include "xfs_dir2_block.h"
50 #include "xfs_dir2_node.h"
51 #include "xfs_error.h"
52 
53 /*
54  * xfs_da_btree.c
55  *
56  * Routines to implement directories as Btrees of hashed names.
57  */
58 
59 /*========================================================================
60  * Function prototypes for the kernel.
61  *========================================================================*/
62 
63 /*
64  * Routines used for growing the Btree.
65  */
66 STATIC int xfs_da_root_split(xfs_da_state_t *state,
67 					    xfs_da_state_blk_t *existing_root,
68 					    xfs_da_state_blk_t *new_child);
69 STATIC int xfs_da_node_split(xfs_da_state_t *state,
70 					    xfs_da_state_blk_t *existing_blk,
71 					    xfs_da_state_blk_t *split_blk,
72 					    xfs_da_state_blk_t *blk_to_add,
73 					    int treelevel,
74 					    int *result);
75 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
76 					 xfs_da_state_blk_t *node_blk_1,
77 					 xfs_da_state_blk_t *node_blk_2);
78 STATIC void xfs_da_node_add(xfs_da_state_t *state,
79 				   xfs_da_state_blk_t *old_node_blk,
80 				   xfs_da_state_blk_t *new_node_blk);
81 
82 /*
83  * Routines used for shrinking the Btree.
84  */
85 STATIC int xfs_da_root_join(xfs_da_state_t *state,
86 					   xfs_da_state_blk_t *root_blk);
87 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
88 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
89 					      xfs_da_state_blk_t *drop_blk);
90 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
91 					 xfs_da_state_blk_t *src_node_blk,
92 					 xfs_da_state_blk_t *dst_node_blk);
93 
94 /*
95  * Utility routines.
96  */
97 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
98 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
99 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
100 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
101 				  xfs_da_state_blk_t *drop_blk,
102 				  xfs_da_state_blk_t *save_blk);
103 STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
104 
105 /*========================================================================
106  * Routines used for growing the Btree.
107  *========================================================================*/
108 
109 /*
110  * Create the initial contents of an intermediate node.
111  */
112 int
xfs_da_node_create(xfs_da_args_t * args,xfs_dablk_t blkno,int level,xfs_dabuf_t ** bpp,int whichfork)113 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
114 				 xfs_dabuf_t **bpp, int whichfork)
115 {
116 	xfs_da_intnode_t *node;
117 	xfs_dabuf_t *bp;
118 	int error;
119 	xfs_trans_t *tp;
120 
121 	tp = args->trans;
122 	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
123 	if (error)
124 		return(error);
125 	ASSERT(bp != NULL);
126 	node = bp->data;
127 	node->hdr.info.forw = 0;
128 	node->hdr.info.back = 0;
129 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
130 	node->hdr.info.pad = 0;
131 	node->hdr.count = 0;
132 	node->hdr.level = cpu_to_be16(level);
133 
134 	xfs_da_log_buf(tp, bp,
135 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
136 
137 	*bpp = bp;
138 	return(0);
139 }
140 
141 /*
142  * Split a leaf node, rebalance, then possibly split
143  * intermediate nodes, rebalance, etc.
144  */
145 int							/* error */
xfs_da_split(xfs_da_state_t * state)146 xfs_da_split(xfs_da_state_t *state)
147 {
148 	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
149 	xfs_da_intnode_t *node;
150 	xfs_dabuf_t *bp;
151 	int max, action, error, i;
152 
153 	/*
154 	 * Walk back up the tree splitting/inserting/adjusting as necessary.
155 	 * If we need to insert and there isn't room, split the node, then
156 	 * decide which fragment to insert the new block from below into.
157 	 * Note that we may split the root this way, but we need more fixup.
158 	 */
159 	max = state->path.active - 1;
160 	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
161 	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
162 	       state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
163 
164 	addblk = &state->path.blk[max];		/* initial dummy value */
165 	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
166 		oldblk = &state->path.blk[i];
167 		newblk = &state->altpath.blk[i];
168 
169 		/*
170 		 * If a leaf node then
171 		 *     Allocate a new leaf node, then rebalance across them.
172 		 * else if an intermediate node then
173 		 *     We split on the last layer, must we split the node?
174 		 */
175 		switch (oldblk->magic) {
176 		case XFS_ATTR_LEAF_MAGIC:
177 			error = xfs_attr_leaf_split(state, oldblk, newblk);
178 			if ((error != 0) && (error != ENOSPC)) {
179 				return(error);	/* GROT: attr is inconsistent */
180 			}
181 			if (!error) {
182 				addblk = newblk;
183 				break;
184 			}
185 			/*
186 			 * Entry wouldn't fit, split the leaf again.
187 			 */
188 			state->extravalid = 1;
189 			if (state->inleaf) {
190 				state->extraafter = 0;	/* before newblk */
191 				error = xfs_attr_leaf_split(state, oldblk,
192 							    &state->extrablk);
193 			} else {
194 				state->extraafter = 1;	/* after newblk */
195 				error = xfs_attr_leaf_split(state, newblk,
196 							    &state->extrablk);
197 			}
198 			if (error)
199 				return(error);	/* GROT: attr inconsistent */
200 			addblk = newblk;
201 			break;
202 		case XFS_DIR_LEAF_MAGIC:
203 			ASSERT(XFS_DIR_IS_V1(state->mp));
204 			error = xfs_dir_leaf_split(state, oldblk, newblk);
205 			if ((error != 0) && (error != ENOSPC)) {
206 				return(error);	/* GROT: dir is inconsistent */
207 			}
208 			if (!error) {
209 				addblk = newblk;
210 				break;
211 			}
212 			/*
213 			 * Entry wouldn't fit, split the leaf again.
214 			 */
215 			state->extravalid = 1;
216 			if (state->inleaf) {
217 				state->extraafter = 0;	/* before newblk */
218 				error = xfs_dir_leaf_split(state, oldblk,
219 							   &state->extrablk);
220 				if (error)
221 					return(error);	/* GROT: dir incon. */
222 				addblk = newblk;
223 			} else {
224 				state->extraafter = 1;	/* after newblk */
225 				error = xfs_dir_leaf_split(state, newblk,
226 							   &state->extrablk);
227 				if (error)
228 					return(error);	/* GROT: dir incon. */
229 				addblk = newblk;
230 			}
231 			break;
232 		case XFS_DIR2_LEAFN_MAGIC:
233 			ASSERT(XFS_DIR_IS_V2(state->mp));
234 			error = xfs_dir2_leafn_split(state, oldblk, newblk);
235 			if (error)
236 				return error;
237 			addblk = newblk;
238 			break;
239 		case XFS_DA_NODE_MAGIC:
240 			error = xfs_da_node_split(state, oldblk, newblk, addblk,
241 							 max - i, &action);
242 			xfs_da_buf_done(addblk->bp);
243 			addblk->bp = NULL;
244 			if (error)
245 				return(error);	/* GROT: dir is inconsistent */
246 			/*
247 			 * Record the newly split block for the next time thru?
248 			 */
249 			if (action)
250 				addblk = newblk;
251 			else
252 				addblk = NULL;
253 			break;
254 		}
255 
256 		/*
257 		 * Update the btree to show the new hashval for this child.
258 		 */
259 		xfs_da_fixhashpath(state, &state->path);
260 		/*
261 		 * If we won't need this block again, it's getting dropped
262 		 * from the active path by the loop control, so we need
263 		 * to mark it done now.
264 		 */
265 		if (i > 0 || !addblk)
266 			xfs_da_buf_done(oldblk->bp);
267 	}
268 	if (!addblk)
269 		return(0);
270 
271 	/*
272 	 * Split the root node.
273 	 */
274 	ASSERT(state->path.active == 0);
275 	oldblk = &state->path.blk[0];
276 	error = xfs_da_root_split(state, oldblk, addblk);
277 	if (error) {
278 		xfs_da_buf_done(oldblk->bp);
279 		xfs_da_buf_done(addblk->bp);
280 		addblk->bp = NULL;
281 		return(error);	/* GROT: dir is inconsistent */
282 	}
283 
284 	/*
285 	 * Update pointers to the node which used to be block 0 and
286 	 * just got bumped because of the addition of a new root node.
287 	 * There might be three blocks involved if a double split occurred,
288 	 * and the original block 0 could be at any position in the list.
289 	 */
290 
291 	node = oldblk->bp->data;
292 	if (node->hdr.info.forw) {
293 		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
294 			bp = addblk->bp;
295 		} else {
296 			ASSERT(state->extravalid);
297 			bp = state->extrablk.bp;
298 		}
299 		node = bp->data;
300 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
301 		xfs_da_log_buf(state->args->trans, bp,
302 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
303 		    sizeof(node->hdr.info)));
304 	}
305 	node = oldblk->bp->data;
306 	if (node->hdr.info.back) {
307 		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
308 			bp = addblk->bp;
309 		} else {
310 			ASSERT(state->extravalid);
311 			bp = state->extrablk.bp;
312 		}
313 		node = bp->data;
314 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
315 		xfs_da_log_buf(state->args->trans, bp,
316 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
317 		    sizeof(node->hdr.info)));
318 	}
319 	xfs_da_buf_done(oldblk->bp);
320 	xfs_da_buf_done(addblk->bp);
321 	addblk->bp = NULL;
322 	return(0);
323 }
324 
325 /*
326  * Split the root.  We have to create a new root and point to the two
327  * parts (the split old root) that we just created.  Copy block zero to
328  * the EOF, extending the inode in process.
329  */
330 STATIC int						/* error */
xfs_da_root_split(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)331 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
332 				 xfs_da_state_blk_t *blk2)
333 {
334 	xfs_da_intnode_t *node, *oldroot;
335 	xfs_da_args_t *args;
336 	xfs_dablk_t blkno;
337 	xfs_dabuf_t *bp;
338 	int error, size;
339 	xfs_inode_t *dp;
340 	xfs_trans_t *tp;
341 	xfs_mount_t *mp;
342 	xfs_dir2_leaf_t *leaf;
343 
344 	/*
345 	 * Copy the existing (incorrect) block from the root node position
346 	 * to a free space somewhere.
347 	 */
348 	args = state->args;
349 	ASSERT(args != NULL);
350 	error = xfs_da_grow_inode(args, &blkno);
351 	if (error)
352 		return(error);
353 	dp = args->dp;
354 	tp = args->trans;
355 	mp = state->mp;
356 	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
357 	if (error)
358 		return(error);
359 	ASSERT(bp != NULL);
360 	node = bp->data;
361 	oldroot = blk1->bp->data;
362 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
363 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
364 			     (char *)oldroot);
365 	} else {
366 		ASSERT(XFS_DIR_IS_V2(mp));
367 		ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
368 		leaf = (xfs_dir2_leaf_t *)oldroot;
369 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
370 			     (char *)leaf);
371 	}
372 	memcpy(node, oldroot, size);
373 	xfs_da_log_buf(tp, bp, 0, size - 1);
374 	xfs_da_buf_done(blk1->bp);
375 	blk1->bp = bp;
376 	blk1->blkno = blkno;
377 
378 	/*
379 	 * Set up the new root node.
380 	 */
381 	error = xfs_da_node_create(args,
382 		args->whichfork == XFS_DATA_FORK &&
383 		XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
384 		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
385 	if (error)
386 		return(error);
387 	node = bp->data;
388 	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
389 	node->btree[0].before = cpu_to_be32(blk1->blkno);
390 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
391 	node->btree[1].before = cpu_to_be32(blk2->blkno);
392 	node->hdr.count = cpu_to_be16(2);
393 
394 #ifdef DEBUG
395 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
396 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
397 		       blk1->blkno < mp->m_dirfreeblk);
398 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
399 		       blk2->blkno < mp->m_dirfreeblk);
400 	}
401 #endif
402 
403 	/* Header is already logged by xfs_da_node_create */
404 	xfs_da_log_buf(tp, bp,
405 		XFS_DA_LOGRANGE(node, node->btree,
406 			sizeof(xfs_da_node_entry_t) * 2));
407 	xfs_da_buf_done(bp);
408 
409 	return(0);
410 }
411 
412 /*
413  * Split the node, rebalance, then add the new entry.
414  */
415 STATIC int						/* error */
xfs_da_node_split(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk,xfs_da_state_blk_t * addblk,int treelevel,int * result)416 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
417 				 xfs_da_state_blk_t *newblk,
418 				 xfs_da_state_blk_t *addblk,
419 				 int treelevel, int *result)
420 {
421 	xfs_da_intnode_t *node;
422 	xfs_dablk_t blkno;
423 	int newcount, error;
424 	int useextra;
425 
426 	node = oldblk->bp->data;
427 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
428 
429 	/*
430 	 * With V2 the extra block is data or freespace.
431 	 */
432 	useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) ||
433 			state->args->whichfork == XFS_ATTR_FORK);
434 	newcount = 1 + useextra;
435 	/*
436 	 * Do we have to split the node?
437 	 */
438 	if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
439 		/*
440 		 * Allocate a new node, add to the doubly linked chain of
441 		 * nodes, then move some of our excess entries into it.
442 		 */
443 		error = xfs_da_grow_inode(state->args, &blkno);
444 		if (error)
445 			return(error);	/* GROT: dir is inconsistent */
446 
447 		error = xfs_da_node_create(state->args, blkno, treelevel,
448 					   &newblk->bp, state->args->whichfork);
449 		if (error)
450 			return(error);	/* GROT: dir is inconsistent */
451 		newblk->blkno = blkno;
452 		newblk->magic = XFS_DA_NODE_MAGIC;
453 		xfs_da_node_rebalance(state, oldblk, newblk);
454 		error = xfs_da_blk_link(state, oldblk, newblk);
455 		if (error)
456 			return(error);
457 		*result = 1;
458 	} else {
459 		*result = 0;
460 	}
461 
462 	/*
463 	 * Insert the new entry(s) into the correct block
464 	 * (updating last hashval in the process).
465 	 *
466 	 * xfs_da_node_add() inserts BEFORE the given index,
467 	 * and as a result of using node_lookup_int() we always
468 	 * point to a valid entry (not after one), but a split
469 	 * operation always results in a new block whose hashvals
470 	 * FOLLOW the current block.
471 	 *
472 	 * If we had double-split op below us, then add the extra block too.
473 	 */
474 	node = oldblk->bp->data;
475 	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
476 		oldblk->index++;
477 		xfs_da_node_add(state, oldblk, addblk);
478 		if (useextra) {
479 			if (state->extraafter)
480 				oldblk->index++;
481 			xfs_da_node_add(state, oldblk, &state->extrablk);
482 			state->extravalid = 0;
483 		}
484 	} else {
485 		newblk->index++;
486 		xfs_da_node_add(state, newblk, addblk);
487 		if (useextra) {
488 			if (state->extraafter)
489 				newblk->index++;
490 			xfs_da_node_add(state, newblk, &state->extrablk);
491 			state->extravalid = 0;
492 		}
493 	}
494 
495 	return(0);
496 }
497 
498 /*
499  * Balance the btree elements between two intermediate nodes,
500  * usually one full and one empty.
501  *
502  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
503  */
504 STATIC void
xfs_da_node_rebalance(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)505 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
506 				     xfs_da_state_blk_t *blk2)
507 {
508 	xfs_da_intnode_t *node1, *node2, *tmpnode;
509 	xfs_da_node_entry_t *btree_s, *btree_d;
510 	int count, tmp;
511 	xfs_trans_t *tp;
512 
513 	node1 = blk1->bp->data;
514 	node2 = blk2->bp->data;
515 	/*
516 	 * Figure out how many entries need to move, and in which direction.
517 	 * Swap the nodes around if that makes it simpler.
518 	 */
519 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
520 	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
521 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
522 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
523 		tmpnode = node1;
524 		node1 = node2;
525 		node2 = tmpnode;
526 	}
527 	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
528 	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
529 	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
530 	if (count == 0)
531 		return;
532 	tp = state->args->trans;
533 	/*
534 	 * Two cases: high-to-low and low-to-high.
535 	 */
536 	if (count > 0) {
537 		/*
538 		 * Move elements in node2 up to make a hole.
539 		 */
540 		if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
541 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
542 			btree_s = &node2->btree[0];
543 			btree_d = &node2->btree[count];
544 			memmove(btree_d, btree_s, tmp);
545 		}
546 
547 		/*
548 		 * Move the req'd B-tree elements from high in node1 to
549 		 * low in node2.
550 		 */
551 		be16_add(&node2->hdr.count, count);
552 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
553 		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
554 		btree_d = &node2->btree[0];
555 		memcpy(btree_d, btree_s, tmp);
556 		be16_add(&node1->hdr.count, -count);
557 	} else {
558 		/*
559 		 * Move the req'd B-tree elements from low in node2 to
560 		 * high in node1.
561 		 */
562 		count = -count;
563 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
564 		btree_s = &node2->btree[0];
565 		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
566 		memcpy(btree_d, btree_s, tmp);
567 		be16_add(&node1->hdr.count, count);
568 		xfs_da_log_buf(tp, blk1->bp,
569 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
570 
571 		/*
572 		 * Move elements in node2 down to fill the hole.
573 		 */
574 		tmp  = be16_to_cpu(node2->hdr.count) - count;
575 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
576 		btree_s = &node2->btree[count];
577 		btree_d = &node2->btree[0];
578 		memmove(btree_d, btree_s, tmp);
579 		be16_add(&node2->hdr.count, -count);
580 	}
581 
582 	/*
583 	 * Log header of node 1 and all current bits of node 2.
584 	 */
585 	xfs_da_log_buf(tp, blk1->bp,
586 		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
587 	xfs_da_log_buf(tp, blk2->bp,
588 		XFS_DA_LOGRANGE(node2, &node2->hdr,
589 			sizeof(node2->hdr) +
590 			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
591 
592 	/*
593 	 * Record the last hashval from each block for upward propagation.
594 	 * (note: don't use the swapped node pointers)
595 	 */
596 	node1 = blk1->bp->data;
597 	node2 = blk2->bp->data;
598 	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
599 	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
600 
601 	/*
602 	 * Adjust the expected index for insertion.
603 	 */
604 	if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
605 		blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
606 		blk1->index = be16_to_cpu(node1->hdr.count) + 1;	/* make it invalid */
607 	}
608 }
609 
610 /*
611  * Add a new entry to an intermediate node.
612  */
613 STATIC void
xfs_da_node_add(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk)614 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
615 			       xfs_da_state_blk_t *newblk)
616 {
617 	xfs_da_intnode_t *node;
618 	xfs_da_node_entry_t *btree;
619 	int tmp;
620 	xfs_mount_t *mp;
621 
622 	node = oldblk->bp->data;
623 	mp = state->mp;
624 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
625 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
626 	ASSERT(newblk->blkno != 0);
627 	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
628 		ASSERT(newblk->blkno >= mp->m_dirleafblk &&
629 		       newblk->blkno < mp->m_dirfreeblk);
630 
631 	/*
632 	 * We may need to make some room before we insert the new node.
633 	 */
634 	tmp = 0;
635 	btree = &node->btree[ oldblk->index ];
636 	if (oldblk->index < be16_to_cpu(node->hdr.count)) {
637 		tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
638 		memmove(btree + 1, btree, tmp);
639 	}
640 	btree->hashval = cpu_to_be32(newblk->hashval);
641 	btree->before = cpu_to_be32(newblk->blkno);
642 	xfs_da_log_buf(state->args->trans, oldblk->bp,
643 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
644 	be16_add(&node->hdr.count, 1);
645 	xfs_da_log_buf(state->args->trans, oldblk->bp,
646 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
647 
648 	/*
649 	 * Copy the last hash value from the oldblk to propagate upwards.
650 	 */
651 	oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
652 }
653 
654 /*========================================================================
655  * Routines used for shrinking the Btree.
656  *========================================================================*/
657 
658 /*
659  * Deallocate an empty leaf node, remove it from its parent,
660  * possibly deallocating that block, etc...
661  */
662 int
xfs_da_join(xfs_da_state_t * state)663 xfs_da_join(xfs_da_state_t *state)
664 {
665 	xfs_da_state_blk_t *drop_blk, *save_blk;
666 	int action, error;
667 
668 	action = 0;
669 	drop_blk = &state->path.blk[ state->path.active-1 ];
670 	save_blk = &state->altpath.blk[ state->path.active-1 ];
671 	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
672 	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
673 	       drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
674 
675 	/*
676 	 * Walk back up the tree joining/deallocating as necessary.
677 	 * When we stop dropping blocks, break out.
678 	 */
679 	for (  ; state->path.active >= 2; drop_blk--, save_blk--,
680 		 state->path.active--) {
681 		/*
682 		 * See if we can combine the block with a neighbor.
683 		 *   (action == 0) => no options, just leave
684 		 *   (action == 1) => coalesce, then unlink
685 		 *   (action == 2) => block empty, unlink it
686 		 */
687 		switch (drop_blk->magic) {
688 		case XFS_ATTR_LEAF_MAGIC:
689 			error = xfs_attr_leaf_toosmall(state, &action);
690 			if (error)
691 				return(error);
692 			if (action == 0)
693 				return(0);
694 			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
695 			break;
696 		case XFS_DIR_LEAF_MAGIC:
697 			ASSERT(XFS_DIR_IS_V1(state->mp));
698 			error = xfs_dir_leaf_toosmall(state, &action);
699 			if (error)
700 				return(error);
701 			if (action == 0)
702 				return(0);
703 			xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
704 			break;
705 		case XFS_DIR2_LEAFN_MAGIC:
706 			ASSERT(XFS_DIR_IS_V2(state->mp));
707 			error = xfs_dir2_leafn_toosmall(state, &action);
708 			if (error)
709 				return error;
710 			if (action == 0)
711 				return 0;
712 			xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
713 			break;
714 		case XFS_DA_NODE_MAGIC:
715 			/*
716 			 * Remove the offending node, fixup hashvals,
717 			 * check for a toosmall neighbor.
718 			 */
719 			xfs_da_node_remove(state, drop_blk);
720 			xfs_da_fixhashpath(state, &state->path);
721 			error = xfs_da_node_toosmall(state, &action);
722 			if (error)
723 				return(error);
724 			if (action == 0)
725 				return 0;
726 			xfs_da_node_unbalance(state, drop_blk, save_blk);
727 			break;
728 		}
729 		xfs_da_fixhashpath(state, &state->altpath);
730 		error = xfs_da_blk_unlink(state, drop_blk, save_blk);
731 		xfs_da_state_kill_altpath(state);
732 		if (error)
733 			return(error);
734 		error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
735 							 drop_blk->bp);
736 		drop_blk->bp = NULL;
737 		if (error)
738 			return(error);
739 	}
740 	/*
741 	 * We joined all the way to the top.  If it turns out that
742 	 * we only have one entry in the root, make the child block
743 	 * the new root.
744 	 */
745 	xfs_da_node_remove(state, drop_blk);
746 	xfs_da_fixhashpath(state, &state->path);
747 	error = xfs_da_root_join(state, &state->path.blk[0]);
748 	return(error);
749 }
750 
751 /*
752  * We have only one entry in the root.  Copy the only remaining child of
753  * the old root to block 0 as the new root node.
754  */
755 STATIC int
xfs_da_root_join(xfs_da_state_t * state,xfs_da_state_blk_t * root_blk)756 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
757 {
758 	xfs_da_intnode_t *oldroot;
759 	/* REFERENCED */
760 	xfs_da_blkinfo_t *blkinfo;
761 	xfs_da_args_t *args;
762 	xfs_dablk_t child;
763 	xfs_dabuf_t *bp;
764 	int error;
765 
766 	args = state->args;
767 	ASSERT(args != NULL);
768 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
769 	oldroot = root_blk->bp->data;
770 	ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
771 	ASSERT(!oldroot->hdr.info.forw);
772 	ASSERT(!oldroot->hdr.info.back);
773 
774 	/*
775 	 * If the root has more than one child, then don't do anything.
776 	 */
777 	if (be16_to_cpu(oldroot->hdr.count) > 1)
778 		return(0);
779 
780 	/*
781 	 * Read in the (only) child block, then copy those bytes into
782 	 * the root block's buffer and free the original child block.
783 	 */
784 	child = be32_to_cpu(oldroot->btree[0].before);
785 	ASSERT(child != 0);
786 	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
787 					     args->whichfork);
788 	if (error)
789 		return(error);
790 	ASSERT(bp != NULL);
791 	blkinfo = bp->data;
792 	if (be16_to_cpu(oldroot->hdr.level) == 1) {
793 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
794 		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
795 	} else {
796 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
797 	}
798 	ASSERT(!blkinfo->forw);
799 	ASSERT(!blkinfo->back);
800 	memcpy(root_blk->bp->data, bp->data, state->blocksize);
801 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
802 	error = xfs_da_shrink_inode(args, child, bp);
803 	return(error);
804 }
805 
806 /*
807  * Check a node block and its neighbors to see if the block should be
808  * collapsed into one or the other neighbor.  Always keep the block
809  * with the smaller block number.
810  * If the current block is over 50% full, don't try to join it, return 0.
811  * If the block is empty, fill in the state structure and return 2.
812  * If it can be collapsed, fill in the state structure and return 1.
813  * If nothing can be done, return 0.
814  */
815 STATIC int
xfs_da_node_toosmall(xfs_da_state_t * state,int * action)816 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
817 {
818 	xfs_da_intnode_t *node;
819 	xfs_da_state_blk_t *blk;
820 	xfs_da_blkinfo_t *info;
821 	int count, forward, error, retval, i;
822 	xfs_dablk_t blkno;
823 	xfs_dabuf_t *bp;
824 
825 	/*
826 	 * Check for the degenerate case of the block being over 50% full.
827 	 * If so, it's not worth even looking to see if we might be able
828 	 * to coalesce with a sibling.
829 	 */
830 	blk = &state->path.blk[ state->path.active-1 ];
831 	info = blk->bp->data;
832 	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
833 	node = (xfs_da_intnode_t *)info;
834 	count = be16_to_cpu(node->hdr.count);
835 	if (count > (state->node_ents >> 1)) {
836 		*action = 0;	/* blk over 50%, don't try to join */
837 		return(0);	/* blk over 50%, don't try to join */
838 	}
839 
840 	/*
841 	 * Check for the degenerate case of the block being empty.
842 	 * If the block is empty, we'll simply delete it, no need to
843 	 * coalesce it with a sibling block.  We choose (arbitrarily)
844 	 * to merge with the forward block unless it is NULL.
845 	 */
846 	if (count == 0) {
847 		/*
848 		 * Make altpath point to the block we want to keep and
849 		 * path point to the block we want to drop (this one).
850 		 */
851 		forward = (info->forw != 0);
852 		memcpy(&state->altpath, &state->path, sizeof(state->path));
853 		error = xfs_da_path_shift(state, &state->altpath, forward,
854 						 0, &retval);
855 		if (error)
856 			return(error);
857 		if (retval) {
858 			*action = 0;
859 		} else {
860 			*action = 2;
861 		}
862 		return(0);
863 	}
864 
865 	/*
866 	 * Examine each sibling block to see if we can coalesce with
867 	 * at least 25% free space to spare.  We need to figure out
868 	 * whether to merge with the forward or the backward block.
869 	 * We prefer coalescing with the lower numbered sibling so as
870 	 * to shrink a directory over time.
871 	 */
872 	/* start with smaller blk num */
873 	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
874 	for (i = 0; i < 2; forward = !forward, i++) {
875 		if (forward)
876 			blkno = be32_to_cpu(info->forw);
877 		else
878 			blkno = be32_to_cpu(info->back);
879 		if (blkno == 0)
880 			continue;
881 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
882 					blkno, -1, &bp, state->args->whichfork);
883 		if (error)
884 			return(error);
885 		ASSERT(bp != NULL);
886 
887 		node = (xfs_da_intnode_t *)info;
888 		count  = state->node_ents;
889 		count -= state->node_ents >> 2;
890 		count -= be16_to_cpu(node->hdr.count);
891 		node = bp->data;
892 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
893 		count -= be16_to_cpu(node->hdr.count);
894 		xfs_da_brelse(state->args->trans, bp);
895 		if (count >= 0)
896 			break;	/* fits with at least 25% to spare */
897 	}
898 	if (i >= 2) {
899 		*action = 0;
900 		return(0);
901 	}
902 
903 	/*
904 	 * Make altpath point to the block we want to keep (the lower
905 	 * numbered block) and path point to the block we want to drop.
906 	 */
907 	memcpy(&state->altpath, &state->path, sizeof(state->path));
908 	if (blkno < blk->blkno) {
909 		error = xfs_da_path_shift(state, &state->altpath, forward,
910 						 0, &retval);
911 		if (error) {
912 			return(error);
913 		}
914 		if (retval) {
915 			*action = 0;
916 			return(0);
917 		}
918 	} else {
919 		error = xfs_da_path_shift(state, &state->path, forward,
920 						 0, &retval);
921 		if (error) {
922 			return(error);
923 		}
924 		if (retval) {
925 			*action = 0;
926 			return(0);
927 		}
928 	}
929 	*action = 1;
930 	return(0);
931 }
932 
933 /*
934  * Walk back up the tree adjusting hash values as necessary,
935  * when we stop making changes, return.
936  */
937 void
xfs_da_fixhashpath(xfs_da_state_t * state,xfs_da_state_path_t * path)938 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
939 {
940 	xfs_da_state_blk_t *blk;
941 	xfs_da_intnode_t *node;
942 	xfs_da_node_entry_t *btree;
943 	xfs_dahash_t lasthash=0;
944 	int level, count;
945 
946 	level = path->active-1;
947 	blk = &path->blk[ level ];
948 	switch (blk->magic) {
949 	case XFS_ATTR_LEAF_MAGIC:
950 		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
951 		if (count == 0)
952 			return;
953 		break;
954 	case XFS_DIR_LEAF_MAGIC:
955 		ASSERT(XFS_DIR_IS_V1(state->mp));
956 		lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
957 		if (count == 0)
958 			return;
959 		break;
960 	case XFS_DIR2_LEAFN_MAGIC:
961 		ASSERT(XFS_DIR_IS_V2(state->mp));
962 		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
963 		if (count == 0)
964 			return;
965 		break;
966 	case XFS_DA_NODE_MAGIC:
967 		lasthash = xfs_da_node_lasthash(blk->bp, &count);
968 		if (count == 0)
969 			return;
970 		break;
971 	}
972 	for (blk--, level--; level >= 0; blk--, level--) {
973 		node = blk->bp->data;
974 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
975 		btree = &node->btree[ blk->index ];
976 		if (be32_to_cpu(btree->hashval) == lasthash)
977 			break;
978 		blk->hashval = lasthash;
979 		btree->hashval = cpu_to_be32(lasthash);
980 		xfs_da_log_buf(state->args->trans, blk->bp,
981 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
982 
983 		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
984 	}
985 }
986 
987 /*
988  * Remove an entry from an intermediate node.
989  */
990 STATIC void
xfs_da_node_remove(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk)991 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
992 {
993 	xfs_da_intnode_t *node;
994 	xfs_da_node_entry_t *btree;
995 	int tmp;
996 
997 	node = drop_blk->bp->data;
998 	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
999 	ASSERT(drop_blk->index >= 0);
1000 
1001 	/*
1002 	 * Copy over the offending entry, or just zero it out.
1003 	 */
1004 	btree = &node->btree[drop_blk->index];
1005 	if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
1006 		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
1007 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
1008 		memmove(btree, btree + 1, tmp);
1009 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
1010 		    XFS_DA_LOGRANGE(node, btree, tmp));
1011 		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
1012 	}
1013 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
1014 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
1015 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1016 	be16_add(&node->hdr.count, -1);
1017 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
1018 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
1019 
1020 	/*
1021 	 * Copy the last hash value from the block to propagate upwards.
1022 	 */
1023 	btree--;
1024 	drop_blk->hashval = be32_to_cpu(btree->hashval);
1025 }
1026 
1027 /*
1028  * Unbalance the btree elements between two intermediate nodes,
1029  * move all Btree elements from one node into another.
1030  */
1031 STATIC void
xfs_da_node_unbalance(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)1032 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1033 				     xfs_da_state_blk_t *save_blk)
1034 {
1035 	xfs_da_intnode_t *drop_node, *save_node;
1036 	xfs_da_node_entry_t *btree;
1037 	int tmp;
1038 	xfs_trans_t *tp;
1039 
1040 	drop_node = drop_blk->bp->data;
1041 	save_node = save_blk->bp->data;
1042 	ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1043 	ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1044 	tp = state->args->trans;
1045 
1046 	/*
1047 	 * If the dying block has lower hashvals, then move all the
1048 	 * elements in the remaining block up to make a hole.
1049 	 */
1050 	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
1051 	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
1052 	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
1053 	{
1054 		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
1055 		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1056 		memmove(btree, &save_node->btree[0], tmp);
1057 		btree = &save_node->btree[0];
1058 		xfs_da_log_buf(tp, save_blk->bp,
1059 			XFS_DA_LOGRANGE(save_node, btree,
1060 				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
1061 				sizeof(xfs_da_node_entry_t)));
1062 	} else {
1063 		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
1064 		xfs_da_log_buf(tp, save_blk->bp,
1065 			XFS_DA_LOGRANGE(save_node, btree,
1066 				be16_to_cpu(drop_node->hdr.count) *
1067 				sizeof(xfs_da_node_entry_t)));
1068 	}
1069 
1070 	/*
1071 	 * Move all the B-tree elements from drop_blk to save_blk.
1072 	 */
1073 	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1074 	memcpy(btree, &drop_node->btree[0], tmp);
1075 	be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
1076 
1077 	xfs_da_log_buf(tp, save_blk->bp,
1078 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1079 			sizeof(save_node->hdr)));
1080 
1081 	/*
1082 	 * Save the last hashval in the remaining block for upward propagation.
1083 	 */
1084 	save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
1085 }
1086 
1087 /*========================================================================
1088  * Routines used for finding things in the Btree.
1089  *========================================================================*/
1090 
1091 /*
1092  * Walk down the Btree looking for a particular filename, filling
1093  * in the state structure as we go.
1094  *
1095  * We will set the state structure to point to each of the elements
1096  * in each of the nodes where either the hashval is or should be.
1097  *
1098  * We support duplicate hashval's so for each entry in the current
1099  * node that could contain the desired hashval, descend.  This is a
1100  * pruned depth-first tree search.
1101  */
1102 int							/* error */
xfs_da_node_lookup_int(xfs_da_state_t * state,int * result)1103 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1104 {
1105 	xfs_da_state_blk_t *blk;
1106 	xfs_da_blkinfo_t *curr;
1107 	xfs_da_intnode_t *node;
1108 	xfs_da_node_entry_t *btree;
1109 	xfs_dablk_t blkno;
1110 	int probe, span, max, error, retval;
1111 	xfs_dahash_t hashval;
1112 	xfs_da_args_t *args;
1113 
1114 	args = state->args;
1115 
1116 	/*
1117 	 * Descend thru the B-tree searching each level for the right
1118 	 * node to use, until the right hashval is found.
1119 	 */
1120 	if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
1121 		blkno = state->mp->m_dirleafblk;
1122 	else
1123 		blkno = 0;
1124 	for (blk = &state->path.blk[0], state->path.active = 1;
1125 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1126 			 blk++, state->path.active++) {
1127 		/*
1128 		 * Read the next node down in the tree.
1129 		 */
1130 		blk->blkno = blkno;
1131 		error = xfs_da_read_buf(args->trans, args->dp, blkno,
1132 					-1, &blk->bp, args->whichfork);
1133 		if (error) {
1134 			blk->blkno = 0;
1135 			state->path.active--;
1136 			return(error);
1137 		}
1138 		curr = blk->bp->data;
1139 		ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC ||
1140 		       be16_to_cpu(curr->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1141 		       be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC);
1142 
1143 		/*
1144 		 * Search an intermediate node for a match.
1145 		 */
1146 		blk->magic = be16_to_cpu(curr->magic);
1147 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1148 			node = blk->bp->data;
1149 			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1150 
1151 			/*
1152 			 * Binary search.  (note: small blocks will skip loop)
1153 			 */
1154 			max = be16_to_cpu(node->hdr.count);
1155 			probe = span = max / 2;
1156 			hashval = args->hashval;
1157 			for (btree = &node->btree[probe]; span > 4;
1158 				   btree = &node->btree[probe]) {
1159 				span /= 2;
1160 				if (be32_to_cpu(btree->hashval) < hashval)
1161 					probe += span;
1162 				else if (be32_to_cpu(btree->hashval) > hashval)
1163 					probe -= span;
1164 				else
1165 					break;
1166 			}
1167 			ASSERT((probe >= 0) && (probe < max));
1168 			ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1169 
1170 			/*
1171 			 * Since we may have duplicate hashval's, find the first
1172 			 * matching hashval in the node.
1173 			 */
1174 			while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
1175 				btree--;
1176 				probe--;
1177 			}
1178 			while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
1179 				btree++;
1180 				probe++;
1181 			}
1182 
1183 			/*
1184 			 * Pick the right block to descend on.
1185 			 */
1186 			if (probe == max) {
1187 				blk->index = max-1;
1188 				blkno = be32_to_cpu(node->btree[max-1].before);
1189 			} else {
1190 				blk->index = probe;
1191 				blkno = be32_to_cpu(btree->before);
1192 			}
1193 		}
1194 		else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
1195 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1196 			break;
1197 		}
1198 		else if (be16_to_cpu(curr->magic) == XFS_DIR_LEAF_MAGIC) {
1199 			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1200 			break;
1201 		}
1202 		else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
1203 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1204 			break;
1205 		}
1206 	}
1207 
1208 	/*
1209 	 * A leaf block that ends in the hashval that we are interested in
1210 	 * (final hashval == search hashval) means that the next block may
1211 	 * contain more entries with the same hashval, shift upward to the
1212 	 * next leaf and keep searching.
1213 	 */
1214 	for (;;) {
1215 		if (blk->magic == XFS_DIR_LEAF_MAGIC) {
1216 			ASSERT(XFS_DIR_IS_V1(state->mp));
1217 			retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1218 								  &blk->index);
1219 		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1220 			ASSERT(XFS_DIR_IS_V2(state->mp));
1221 			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1222 							&blk->index, state);
1223 		}
1224 		else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1225 			retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1226 			blk->index = args->index;
1227 			args->blkno = blk->blkno;
1228 		}
1229 		if (((retval == ENOENT) || (retval == ENOATTR)) &&
1230 		    (blk->hashval == args->hashval)) {
1231 			error = xfs_da_path_shift(state, &state->path, 1, 1,
1232 							 &retval);
1233 			if (error)
1234 				return(error);
1235 			if (retval == 0) {
1236 				continue;
1237 			}
1238 			else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1239 				/* path_shift() gives ENOENT */
1240 				retval = XFS_ERROR(ENOATTR);
1241 			}
1242 		}
1243 		break;
1244 	}
1245 	*result = retval;
1246 	return(0);
1247 }
1248 
1249 /*========================================================================
1250  * Utility routines.
1251  *========================================================================*/
1252 
1253 /*
1254  * Link a new block into a doubly linked list of blocks (of whatever type).
1255  */
1256 int							/* error */
xfs_da_blk_link(xfs_da_state_t * state,xfs_da_state_blk_t * old_blk,xfs_da_state_blk_t * new_blk)1257 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1258 			       xfs_da_state_blk_t *new_blk)
1259 {
1260 	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1261 	xfs_da_args_t *args;
1262 	int before=0, error;
1263 	xfs_dabuf_t *bp;
1264 
1265 	/*
1266 	 * Set up environment.
1267 	 */
1268 	args = state->args;
1269 	ASSERT(args != NULL);
1270 	old_info = old_blk->bp->data;
1271 	new_info = new_blk->bp->data;
1272 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1273 	       old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1274 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1275 	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1276 	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1277 	ASSERT(old_blk->magic == new_blk->magic);
1278 
1279 	switch (old_blk->magic) {
1280 	case XFS_ATTR_LEAF_MAGIC:
1281 		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1282 		break;
1283 	case XFS_DIR_LEAF_MAGIC:
1284 		ASSERT(XFS_DIR_IS_V1(state->mp));
1285 		before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1286 		break;
1287 	case XFS_DIR2_LEAFN_MAGIC:
1288 		ASSERT(XFS_DIR_IS_V2(state->mp));
1289 		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1290 		break;
1291 	case XFS_DA_NODE_MAGIC:
1292 		before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1293 		break;
1294 	}
1295 
1296 	/*
1297 	 * Link blocks in appropriate order.
1298 	 */
1299 	if (before) {
1300 		/*
1301 		 * Link new block in before existing block.
1302 		 */
1303 		new_info->forw = cpu_to_be32(old_blk->blkno);
1304 		new_info->back = old_info->back;
1305 		if (old_info->back) {
1306 			error = xfs_da_read_buf(args->trans, args->dp,
1307 						be32_to_cpu(old_info->back),
1308 						-1, &bp, args->whichfork);
1309 			if (error)
1310 				return(error);
1311 			ASSERT(bp != NULL);
1312 			tmp_info = bp->data;
1313 			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
1314 			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1315 			tmp_info->forw = cpu_to_be32(new_blk->blkno);
1316 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1317 			xfs_da_buf_done(bp);
1318 		}
1319 		old_info->back = cpu_to_be32(new_blk->blkno);
1320 	} else {
1321 		/*
1322 		 * Link new block in after existing block.
1323 		 */
1324 		new_info->forw = old_info->forw;
1325 		new_info->back = cpu_to_be32(old_blk->blkno);
1326 		if (old_info->forw) {
1327 			error = xfs_da_read_buf(args->trans, args->dp,
1328 						be32_to_cpu(old_info->forw),
1329 						-1, &bp, args->whichfork);
1330 			if (error)
1331 				return(error);
1332 			ASSERT(bp != NULL);
1333 			tmp_info = bp->data;
1334 			ASSERT(tmp_info->magic == old_info->magic);
1335 			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1336 			tmp_info->back = cpu_to_be32(new_blk->blkno);
1337 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1338 			xfs_da_buf_done(bp);
1339 		}
1340 		old_info->forw = cpu_to_be32(new_blk->blkno);
1341 	}
1342 
1343 	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1344 	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1345 	return(0);
1346 }
1347 
1348 /*
1349  * Compare two intermediate nodes for "order".
1350  */
1351 STATIC int
xfs_da_node_order(xfs_dabuf_t * node1_bp,xfs_dabuf_t * node2_bp)1352 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1353 {
1354 	xfs_da_intnode_t *node1, *node2;
1355 
1356 	node1 = node1_bp->data;
1357 	node2 = node2_bp->data;
1358 	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
1359 	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
1360 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1361 	    ((be32_to_cpu(node2->btree[0].hashval) <
1362 	      be32_to_cpu(node1->btree[0].hashval)) ||
1363 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1364 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1365 		return(1);
1366 	}
1367 	return(0);
1368 }
1369 
1370 /*
1371  * Pick up the last hashvalue from an intermediate node.
1372  */
1373 STATIC uint
xfs_da_node_lasthash(xfs_dabuf_t * bp,int * count)1374 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1375 {
1376 	xfs_da_intnode_t *node;
1377 
1378 	node = bp->data;
1379 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1380 	if (count)
1381 		*count = be16_to_cpu(node->hdr.count);
1382 	if (!node->hdr.count)
1383 		return(0);
1384 	return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1385 }
1386 
1387 /*
1388  * Unlink a block from a doubly linked list of blocks.
1389  */
1390 STATIC int						/* error */
xfs_da_blk_unlink(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)1391 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1392 				 xfs_da_state_blk_t *save_blk)
1393 {
1394 	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1395 	xfs_da_args_t *args;
1396 	xfs_dabuf_t *bp;
1397 	int error;
1398 
1399 	/*
1400 	 * Set up environment.
1401 	 */
1402 	args = state->args;
1403 	ASSERT(args != NULL);
1404 	save_info = save_blk->bp->data;
1405 	drop_info = drop_blk->bp->data;
1406 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1407 	       save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1408 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1409 	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1410 	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1411 	ASSERT(save_blk->magic == drop_blk->magic);
1412 	ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1413 	       (be32_to_cpu(save_info->back) == drop_blk->blkno));
1414 	ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1415 	       (be32_to_cpu(drop_info->back) == save_blk->blkno));
1416 
1417 	/*
1418 	 * Unlink the leaf block from the doubly linked chain of leaves.
1419 	 */
1420 	if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1421 		save_info->back = drop_info->back;
1422 		if (drop_info->back) {
1423 			error = xfs_da_read_buf(args->trans, args->dp,
1424 						be32_to_cpu(drop_info->back),
1425 						-1, &bp, args->whichfork);
1426 			if (error)
1427 				return(error);
1428 			ASSERT(bp != NULL);
1429 			tmp_info = bp->data;
1430 			ASSERT(tmp_info->magic == save_info->magic);
1431 			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1432 			tmp_info->forw = cpu_to_be32(save_blk->blkno);
1433 			xfs_da_log_buf(args->trans, bp, 0,
1434 						    sizeof(*tmp_info) - 1);
1435 			xfs_da_buf_done(bp);
1436 		}
1437 	} else {
1438 		save_info->forw = drop_info->forw;
1439 		if (drop_info->forw) {
1440 			error = xfs_da_read_buf(args->trans, args->dp,
1441 						be32_to_cpu(drop_info->forw),
1442 						-1, &bp, args->whichfork);
1443 			if (error)
1444 				return(error);
1445 			ASSERT(bp != NULL);
1446 			tmp_info = bp->data;
1447 			ASSERT(tmp_info->magic == save_info->magic);
1448 			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1449 			tmp_info->back = cpu_to_be32(save_blk->blkno);
1450 			xfs_da_log_buf(args->trans, bp, 0,
1451 						    sizeof(*tmp_info) - 1);
1452 			xfs_da_buf_done(bp);
1453 		}
1454 	}
1455 
1456 	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1457 	return(0);
1458 }
1459 
1460 /*
1461  * Move a path "forward" or "!forward" one block at the current level.
1462  *
1463  * This routine will adjust a "path" to point to the next block
1464  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1465  * Btree, including updating pointers to the intermediate nodes between
1466  * the new bottom and the root.
1467  */
1468 int							/* error */
xfs_da_path_shift(xfs_da_state_t * state,xfs_da_state_path_t * path,int forward,int release,int * result)1469 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1470 				 int forward, int release, int *result)
1471 {
1472 	xfs_da_state_blk_t *blk;
1473 	xfs_da_blkinfo_t *info;
1474 	xfs_da_intnode_t *node;
1475 	xfs_da_args_t *args;
1476 	xfs_dablk_t blkno=0;
1477 	int level, error;
1478 
1479 	/*
1480 	 * Roll up the Btree looking for the first block where our
1481 	 * current index is not at the edge of the block.  Note that
1482 	 * we skip the bottom layer because we want the sibling block.
1483 	 */
1484 	args = state->args;
1485 	ASSERT(args != NULL);
1486 	ASSERT(path != NULL);
1487 	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1488 	level = (path->active-1) - 1;	/* skip bottom layer in path */
1489 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1490 		ASSERT(blk->bp != NULL);
1491 		node = blk->bp->data;
1492 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1493 		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1494 			blk->index++;
1495 			blkno = be32_to_cpu(node->btree[blk->index].before);
1496 			break;
1497 		} else if (!forward && (blk->index > 0)) {
1498 			blk->index--;
1499 			blkno = be32_to_cpu(node->btree[blk->index].before);
1500 			break;
1501 		}
1502 	}
1503 	if (level < 0) {
1504 		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
1505 		ASSERT(args->oknoent);
1506 		return(0);
1507 	}
1508 
1509 	/*
1510 	 * Roll down the edge of the subtree until we reach the
1511 	 * same depth we were at originally.
1512 	 */
1513 	for (blk++, level++; level < path->active; blk++, level++) {
1514 		/*
1515 		 * Release the old block.
1516 		 * (if it's dirty, trans won't actually let go)
1517 		 */
1518 		if (release)
1519 			xfs_da_brelse(args->trans, blk->bp);
1520 
1521 		/*
1522 		 * Read the next child block.
1523 		 */
1524 		blk->blkno = blkno;
1525 		error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1526 						     &blk->bp, args->whichfork);
1527 		if (error)
1528 			return(error);
1529 		ASSERT(blk->bp != NULL);
1530 		info = blk->bp->data;
1531 		ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
1532 		       be16_to_cpu(info->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1533 		       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
1534 		blk->magic = be16_to_cpu(info->magic);
1535 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1536 			node = (xfs_da_intnode_t *)info;
1537 			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1538 			if (forward)
1539 				blk->index = 0;
1540 			else
1541 				blk->index = be16_to_cpu(node->hdr.count)-1;
1542 			blkno = be32_to_cpu(node->btree[blk->index].before);
1543 		} else {
1544 			ASSERT(level == path->active-1);
1545 			blk->index = 0;
1546 			switch(blk->magic) {
1547 			case XFS_ATTR_LEAF_MAGIC:
1548 				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1549 								      NULL);
1550 				break;
1551 			case XFS_DIR_LEAF_MAGIC:
1552 				ASSERT(XFS_DIR_IS_V1(state->mp));
1553 				blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1554 								     NULL);
1555 				break;
1556 			case XFS_DIR2_LEAFN_MAGIC:
1557 				ASSERT(XFS_DIR_IS_V2(state->mp));
1558 				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1559 								       NULL);
1560 				break;
1561 			default:
1562 				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1563 				       blk->magic ==
1564 				       XFS_DIRX_LEAF_MAGIC(state->mp));
1565 				break;
1566 			}
1567 		}
1568 	}
1569 	*result = 0;
1570 	return(0);
1571 }
1572 
1573 
1574 /*========================================================================
1575  * Utility routines.
1576  *========================================================================*/
1577 
1578 /*
1579  * Implement a simple hash on a character string.
1580  * Rotate the hash value by 7 bits, then XOR each character in.
1581  * This is implemented with some source-level loop unrolling.
1582  */
1583 xfs_dahash_t
xfs_da_hashname(const uchar_t * name,int namelen)1584 xfs_da_hashname(const uchar_t *name, int namelen)
1585 {
1586 	xfs_dahash_t hash;
1587 
1588 	/*
1589 	 * Do four characters at a time as long as we can.
1590 	 */
1591 	for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1592 		hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1593 		       (name[3] << 0) ^ rol32(hash, 7 * 4);
1594 
1595 	/*
1596 	 * Now do the rest of the characters.
1597 	 */
1598 	switch (namelen) {
1599 	case 3:
1600 		return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1601 		       rol32(hash, 7 * 3);
1602 	case 2:
1603 		return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1604 	case 1:
1605 		return (name[0] << 0) ^ rol32(hash, 7 * 1);
1606 	default: /* case 0: */
1607 		return hash;
1608 	}
1609 }
1610 
1611 /*
1612  * Add a block to the btree ahead of the file.
1613  * Return the new block number to the caller.
1614  */
1615 int
xfs_da_grow_inode(xfs_da_args_t * args,xfs_dablk_t * new_blkno)1616 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1617 {
1618 	xfs_fileoff_t bno, b;
1619 	xfs_bmbt_irec_t map;
1620 	xfs_bmbt_irec_t	*mapp;
1621 	xfs_inode_t *dp;
1622 	int nmap, error, w, count, c, got, i, mapi;
1623 	xfs_fsize_t size;
1624 	xfs_trans_t *tp;
1625 	xfs_mount_t *mp;
1626 
1627 	dp = args->dp;
1628 	mp = dp->i_mount;
1629 	w = args->whichfork;
1630 	tp = args->trans;
1631 	/*
1632 	 * For new directories adjust the file offset and block count.
1633 	 */
1634 	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
1635 		bno = mp->m_dirleafblk;
1636 		count = mp->m_dirblkfsbs;
1637 	} else {
1638 		bno = 0;
1639 		count = 1;
1640 	}
1641 	/*
1642 	 * Find a spot in the file space to put the new block.
1643 	 */
1644 	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
1645 		return error;
1646 	}
1647 	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1648 		ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1649 	/*
1650 	 * Try mapping it in one filesystem block.
1651 	 */
1652 	nmap = 1;
1653 	ASSERT(args->firstblock != NULL);
1654 	if ((error = xfs_bmapi(tp, dp, bno, count,
1655 			XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1656 			XFS_BMAPI_CONTIG,
1657 			args->firstblock, args->total, &map, &nmap,
1658 			args->flist, NULL))) {
1659 		return error;
1660 	}
1661 	ASSERT(nmap <= 1);
1662 	if (nmap == 1) {
1663 		mapp = &map;
1664 		mapi = 1;
1665 	}
1666 	/*
1667 	 * If we didn't get it and the block might work if fragmented,
1668 	 * try without the CONTIG flag.  Loop until we get it all.
1669 	 */
1670 	else if (nmap == 0 && count > 1) {
1671 		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1672 		for (b = bno, mapi = 0; b < bno + count; ) {
1673 			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1674 			c = (int)(bno + count - b);
1675 			if ((error = xfs_bmapi(tp, dp, b, c,
1676 					XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
1677 					XFS_BMAPI_METADATA,
1678 					args->firstblock, args->total,
1679 					&mapp[mapi], &nmap, args->flist,
1680 					NULL))) {
1681 				kmem_free(mapp, sizeof(*mapp) * count);
1682 				return error;
1683 			}
1684 			if (nmap < 1)
1685 				break;
1686 			mapi += nmap;
1687 			b = mapp[mapi - 1].br_startoff +
1688 			    mapp[mapi - 1].br_blockcount;
1689 		}
1690 	} else {
1691 		mapi = 0;
1692 		mapp = NULL;
1693 	}
1694 	/*
1695 	 * Count the blocks we got, make sure it matches the total.
1696 	 */
1697 	for (i = 0, got = 0; i < mapi; i++)
1698 		got += mapp[i].br_blockcount;
1699 	if (got != count || mapp[0].br_startoff != bno ||
1700 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1701 	    bno + count) {
1702 		if (mapp != &map)
1703 			kmem_free(mapp, sizeof(*mapp) * count);
1704 		return XFS_ERROR(ENOSPC);
1705 	}
1706 	if (mapp != &map)
1707 		kmem_free(mapp, sizeof(*mapp) * count);
1708 	*new_blkno = (xfs_dablk_t)bno;
1709 	/*
1710 	 * For version 1 directories, adjust the file size if it changed.
1711 	 */
1712 	if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1713 		ASSERT(mapi == 1);
1714 		if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1715 			return error;
1716 		size = XFS_FSB_TO_B(mp, bno);
1717 		if (size != dp->i_d.di_size) {
1718 			dp->i_d.di_size = size;
1719 			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1720 		}
1721 	}
1722 	return 0;
1723 }
1724 
1725 /*
1726  * Ick.  We need to always be able to remove a btree block, even
1727  * if there's no space reservation because the filesystem is full.
1728  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1729  * It swaps the target block with the last block in the file.  The
1730  * last block in the file can always be removed since it can't cause
1731  * a bmap btree split to do that.
1732  */
1733 STATIC int
xfs_da_swap_lastblock(xfs_da_args_t * args,xfs_dablk_t * dead_blknop,xfs_dabuf_t ** dead_bufp)1734 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1735 		      xfs_dabuf_t **dead_bufp)
1736 {
1737 	xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1738 	xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1739 	xfs_fileoff_t lastoff;
1740 	xfs_inode_t *ip;
1741 	xfs_trans_t *tp;
1742 	xfs_mount_t *mp;
1743 	int error, w, entno, level, dead_level;
1744 	xfs_da_blkinfo_t *dead_info, *sib_info;
1745 	xfs_da_intnode_t *par_node, *dead_node;
1746 	xfs_dir_leafblock_t *dead_leaf;
1747 	xfs_dir2_leaf_t *dead_leaf2;
1748 	xfs_dahash_t dead_hash;
1749 
1750 	dead_buf = *dead_bufp;
1751 	dead_blkno = *dead_blknop;
1752 	tp = args->trans;
1753 	ip = args->dp;
1754 	w = args->whichfork;
1755 	ASSERT(w == XFS_DATA_FORK);
1756 	mp = ip->i_mount;
1757 	if (XFS_DIR_IS_V2(mp)) {
1758 		lastoff = mp->m_dirfreeblk;
1759 		error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1760 	} else
1761 		error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
1762 	if (error)
1763 		return error;
1764 	if (unlikely(lastoff == 0)) {
1765 		XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1766 				 mp);
1767 		return XFS_ERROR(EFSCORRUPTED);
1768 	}
1769 	/*
1770 	 * Read the last block in the btree space.
1771 	 */
1772 	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1773 	if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1774 		return error;
1775 	/*
1776 	 * Copy the last block into the dead buffer and log it.
1777 	 */
1778 	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1779 	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1780 	dead_info = dead_buf->data;
1781 	/*
1782 	 * Get values from the moved block.
1783 	 */
1784 	if (be16_to_cpu(dead_info->magic) == XFS_DIR_LEAF_MAGIC) {
1785 		ASSERT(XFS_DIR_IS_V1(mp));
1786 		dead_leaf = (xfs_dir_leafblock_t *)dead_info;
1787 		dead_level = 0;
1788 		dead_hash =
1789 			INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1790 	} else if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
1791 		ASSERT(XFS_DIR_IS_V2(mp));
1792 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1793 		dead_level = 0;
1794 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1795 	} else {
1796 		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
1797 		dead_node = (xfs_da_intnode_t *)dead_info;
1798 		dead_level = be16_to_cpu(dead_node->hdr.level);
1799 		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
1800 	}
1801 	sib_buf = par_buf = NULL;
1802 	/*
1803 	 * If the moved block has a left sibling, fix up the pointers.
1804 	 */
1805 	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1806 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1807 			goto done;
1808 		sib_info = sib_buf->data;
1809 		if (unlikely(
1810 		    be32_to_cpu(sib_info->forw) != last_blkno ||
1811 		    sib_info->magic != dead_info->magic)) {
1812 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1813 					 XFS_ERRLEVEL_LOW, mp);
1814 			error = XFS_ERROR(EFSCORRUPTED);
1815 			goto done;
1816 		}
1817 		sib_info->forw = cpu_to_be32(dead_blkno);
1818 		xfs_da_log_buf(tp, sib_buf,
1819 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1820 					sizeof(sib_info->forw)));
1821 		xfs_da_buf_done(sib_buf);
1822 		sib_buf = NULL;
1823 	}
1824 	/*
1825 	 * If the moved block has a right sibling, fix up the pointers.
1826 	 */
1827 	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1828 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1829 			goto done;
1830 		sib_info = sib_buf->data;
1831 		if (unlikely(
1832 		       be32_to_cpu(sib_info->back) != last_blkno ||
1833 		       sib_info->magic != dead_info->magic)) {
1834 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1835 					 XFS_ERRLEVEL_LOW, mp);
1836 			error = XFS_ERROR(EFSCORRUPTED);
1837 			goto done;
1838 		}
1839 		sib_info->back = cpu_to_be32(dead_blkno);
1840 		xfs_da_log_buf(tp, sib_buf,
1841 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1842 					sizeof(sib_info->back)));
1843 		xfs_da_buf_done(sib_buf);
1844 		sib_buf = NULL;
1845 	}
1846 	par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
1847 	level = -1;
1848 	/*
1849 	 * Walk down the tree looking for the parent of the moved block.
1850 	 */
1851 	for (;;) {
1852 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1853 			goto done;
1854 		par_node = par_buf->data;
1855 		if (unlikely(
1856 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
1857 		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1858 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1859 					 XFS_ERRLEVEL_LOW, mp);
1860 			error = XFS_ERROR(EFSCORRUPTED);
1861 			goto done;
1862 		}
1863 		level = be16_to_cpu(par_node->hdr.level);
1864 		for (entno = 0;
1865 		     entno < be16_to_cpu(par_node->hdr.count) &&
1866 		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
1867 		     entno++)
1868 			continue;
1869 		if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
1870 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1871 					 XFS_ERRLEVEL_LOW, mp);
1872 			error = XFS_ERROR(EFSCORRUPTED);
1873 			goto done;
1874 		}
1875 		par_blkno = be32_to_cpu(par_node->btree[entno].before);
1876 		if (level == dead_level + 1)
1877 			break;
1878 		xfs_da_brelse(tp, par_buf);
1879 		par_buf = NULL;
1880 	}
1881 	/*
1882 	 * We're in the right parent block.
1883 	 * Look for the right entry.
1884 	 */
1885 	for (;;) {
1886 		for (;
1887 		     entno < be16_to_cpu(par_node->hdr.count) &&
1888 		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
1889 		     entno++)
1890 			continue;
1891 		if (entno < be16_to_cpu(par_node->hdr.count))
1892 			break;
1893 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
1894 		xfs_da_brelse(tp, par_buf);
1895 		par_buf = NULL;
1896 		if (unlikely(par_blkno == 0)) {
1897 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1898 					 XFS_ERRLEVEL_LOW, mp);
1899 			error = XFS_ERROR(EFSCORRUPTED);
1900 			goto done;
1901 		}
1902 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1903 			goto done;
1904 		par_node = par_buf->data;
1905 		if (unlikely(
1906 		    be16_to_cpu(par_node->hdr.level) != level ||
1907 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
1908 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1909 					 XFS_ERRLEVEL_LOW, mp);
1910 			error = XFS_ERROR(EFSCORRUPTED);
1911 			goto done;
1912 		}
1913 		entno = 0;
1914 	}
1915 	/*
1916 	 * Update the parent entry pointing to the moved block.
1917 	 */
1918 	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
1919 	xfs_da_log_buf(tp, par_buf,
1920 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1921 				sizeof(par_node->btree[entno].before)));
1922 	xfs_da_buf_done(par_buf);
1923 	xfs_da_buf_done(dead_buf);
1924 	*dead_blknop = last_blkno;
1925 	*dead_bufp = last_buf;
1926 	return 0;
1927 done:
1928 	if (par_buf)
1929 		xfs_da_brelse(tp, par_buf);
1930 	if (sib_buf)
1931 		xfs_da_brelse(tp, sib_buf);
1932 	xfs_da_brelse(tp, last_buf);
1933 	return error;
1934 }
1935 
1936 /*
1937  * Remove a btree block from a directory or attribute.
1938  */
1939 int
xfs_da_shrink_inode(xfs_da_args_t * args,xfs_dablk_t dead_blkno,xfs_dabuf_t * dead_buf)1940 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1941 		    xfs_dabuf_t *dead_buf)
1942 {
1943 	xfs_inode_t *dp;
1944 	int done, error, w, count;
1945 	xfs_fileoff_t bno;
1946 	xfs_fsize_t size;
1947 	xfs_trans_t *tp;
1948 	xfs_mount_t *mp;
1949 
1950 	dp = args->dp;
1951 	w = args->whichfork;
1952 	tp = args->trans;
1953 	mp = dp->i_mount;
1954 	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1955 		count = mp->m_dirblkfsbs;
1956 	else
1957 		count = 1;
1958 	for (;;) {
1959 		/*
1960 		 * Remove extents.  If we get ENOSPC for a dir we have to move
1961 		 * the last block to the place we want to kill.
1962 		 */
1963 		if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1964 				XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
1965 				0, args->firstblock, args->flist, NULL,
1966 				&done)) == ENOSPC) {
1967 			if (w != XFS_DATA_FORK)
1968 				goto done;
1969 			if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1970 					&dead_buf)))
1971 				goto done;
1972 		} else if (error)
1973 			goto done;
1974 		else
1975 			break;
1976 	}
1977 	ASSERT(done);
1978 	xfs_da_binval(tp, dead_buf);
1979 	/*
1980 	 * Adjust the directory size for version 1.
1981 	 */
1982 	if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1983 		if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1984 			return error;
1985 		size = XFS_FSB_TO_B(dp->i_mount, bno);
1986 		if (size != dp->i_d.di_size) {
1987 			dp->i_d.di_size = size;
1988 			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1989 		}
1990 	}
1991 	return 0;
1992 done:
1993 	xfs_da_binval(tp, dead_buf);
1994 	return error;
1995 }
1996 
1997 /*
1998  * See if the mapping(s) for this btree block are valid, i.e.
1999  * don't contain holes, are logically contiguous, and cover the whole range.
2000  */
2001 STATIC int
xfs_da_map_covers_blocks(int nmap,xfs_bmbt_irec_t * mapp,xfs_dablk_t bno,int count)2002 xfs_da_map_covers_blocks(
2003 	int		nmap,
2004 	xfs_bmbt_irec_t	*mapp,
2005 	xfs_dablk_t	bno,
2006 	int		count)
2007 {
2008 	int		i;
2009 	xfs_fileoff_t	off;
2010 
2011 	for (i = 0, off = bno; i < nmap; i++) {
2012 		if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2013 		    mapp[i].br_startblock == DELAYSTARTBLOCK) {
2014 			return 0;
2015 		}
2016 		if (off != mapp[i].br_startoff) {
2017 			return 0;
2018 		}
2019 		off += mapp[i].br_blockcount;
2020 	}
2021 	return off == bno + count;
2022 }
2023 
2024 /*
2025  * Make a dabuf.
2026  * Used for get_buf, read_buf, read_bufr, and reada_buf.
2027  */
2028 STATIC int
xfs_da_do_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t * mappedbnop,xfs_dabuf_t ** bpp,int whichfork,int caller,inst_t * ra)2029 xfs_da_do_buf(
2030 	xfs_trans_t	*trans,
2031 	xfs_inode_t	*dp,
2032 	xfs_dablk_t	bno,
2033 	xfs_daddr_t	*mappedbnop,
2034 	xfs_dabuf_t	**bpp,
2035 	int		whichfork,
2036 	int		caller,
2037 	inst_t		*ra)
2038 {
2039 	xfs_buf_t	*bp = NULL;
2040 	xfs_buf_t	**bplist;
2041 	int		error=0;
2042 	int		i;
2043 	xfs_bmbt_irec_t	map;
2044 	xfs_bmbt_irec_t	*mapp;
2045 	xfs_daddr_t	mappedbno;
2046 	xfs_mount_t	*mp;
2047 	int		nbplist=0;
2048 	int		nfsb;
2049 	int		nmap;
2050 	xfs_dabuf_t	*rbp;
2051 
2052 	mp = dp->i_mount;
2053 	if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
2054 		nfsb = mp->m_dirblkfsbs;
2055 	else
2056 		nfsb = 1;
2057 	mappedbno = *mappedbnop;
2058 	/*
2059 	 * Caller doesn't have a mapping.  -2 means don't complain
2060 	 * if we land in a hole.
2061 	 */
2062 	if (mappedbno == -1 || mappedbno == -2) {
2063 		/*
2064 		 * Optimize the one-block case.
2065 		 */
2066 		if (nfsb == 1) {
2067 			xfs_fsblock_t	fsb;
2068 
2069 			if ((error =
2070 			    xfs_bmapi_single(trans, dp, whichfork, &fsb,
2071 				    (xfs_fileoff_t)bno))) {
2072 				return error;
2073 			}
2074 			mapp = &map;
2075 			if (fsb == NULLFSBLOCK) {
2076 				nmap = 0;
2077 			} else {
2078 				map.br_startblock = fsb;
2079 				map.br_startoff = (xfs_fileoff_t)bno;
2080 				map.br_blockcount = 1;
2081 				nmap = 1;
2082 			}
2083 		} else {
2084 			mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
2085 			nmap = nfsb;
2086 			if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
2087 					nfsb,
2088 					XFS_BMAPI_METADATA |
2089 						XFS_BMAPI_AFLAG(whichfork),
2090 					NULL, 0, mapp, &nmap, NULL, NULL)))
2091 				goto exit0;
2092 		}
2093 	} else {
2094 		map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2095 		map.br_startoff = (xfs_fileoff_t)bno;
2096 		map.br_blockcount = nfsb;
2097 		mapp = &map;
2098 		nmap = 1;
2099 	}
2100 	if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
2101 		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
2102 		if (unlikely(error == EFSCORRUPTED)) {
2103 			if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2104 				int	i;
2105 				cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
2106 					(long long)bno);
2107 				cmn_err(CE_ALERT, "dir: inode %lld\n",
2108 					(long long)dp->i_ino);
2109 				for (i = 0; i < nmap; i++) {
2110 					cmn_err(CE_ALERT,
2111 						"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
2112 						i,
2113 						(long long)mapp[i].br_startoff,
2114 						(long long)mapp[i].br_startblock,
2115 						(long long)mapp[i].br_blockcount,
2116 						mapp[i].br_state);
2117 				}
2118 			}
2119 			XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2120 					 XFS_ERRLEVEL_LOW, mp);
2121 		}
2122 		goto exit0;
2123 	}
2124 	if (caller != 3 && nmap > 1) {
2125 		bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2126 		nbplist = 0;
2127 	} else
2128 		bplist = NULL;
2129 	/*
2130 	 * Turn the mapping(s) into buffer(s).
2131 	 */
2132 	for (i = 0; i < nmap; i++) {
2133 		int	nmapped;
2134 
2135 		mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2136 		if (i == 0)
2137 			*mappedbnop = mappedbno;
2138 		nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2139 		switch (caller) {
2140 		case 0:
2141 			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2142 				mappedbno, nmapped, 0);
2143 			error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2144 			break;
2145 		case 1:
2146 		case 2:
2147 			bp = NULL;
2148 			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2149 				mappedbno, nmapped, 0, &bp);
2150 			break;
2151 		case 3:
2152 			xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
2153 			error = 0;
2154 			bp = NULL;
2155 			break;
2156 		}
2157 		if (error) {
2158 			if (bp)
2159 				xfs_trans_brelse(trans, bp);
2160 			goto exit1;
2161 		}
2162 		if (!bp)
2163 			continue;
2164 		if (caller == 1) {
2165 			if (whichfork == XFS_ATTR_FORK) {
2166 				XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2167 						XFS_ATTR_BTREE_REF);
2168 			} else {
2169 				XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2170 						XFS_DIR_BTREE_REF);
2171 			}
2172 		}
2173 		if (bplist) {
2174 			bplist[nbplist++] = bp;
2175 		}
2176 	}
2177 	/*
2178 	 * Build a dabuf structure.
2179 	 */
2180 	if (bplist) {
2181 		rbp = xfs_da_buf_make(nbplist, bplist, ra);
2182 	} else if (bp)
2183 		rbp = xfs_da_buf_make(1, &bp, ra);
2184 	else
2185 		rbp = NULL;
2186 	/*
2187 	 * For read_buf, check the magic number.
2188 	 */
2189 	if (caller == 1) {
2190 		xfs_dir2_data_t		*data;
2191 		xfs_dir2_free_t		*free;
2192 		xfs_da_blkinfo_t	*info;
2193 		uint			magic, magic1;
2194 
2195 		info = rbp->data;
2196 		data = rbp->data;
2197 		free = rbp->data;
2198 		magic = be16_to_cpu(info->magic);
2199 		magic1 = be32_to_cpu(data->hdr.magic);
2200 		if (unlikely(
2201 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2202 				   (magic != XFS_DIR_LEAF_MAGIC) &&
2203 				   (magic != XFS_ATTR_LEAF_MAGIC) &&
2204 				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
2205 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
2206 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2207 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
2208 				   (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
2209 				mp, XFS_ERRTAG_DA_READ_BUF,
2210 				XFS_RANDOM_DA_READ_BUF))) {
2211 			xfs_buftrace("DA READ ERROR", rbp->bps[0]);
2212 			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2213 					     XFS_ERRLEVEL_LOW, mp, info);
2214 			error = XFS_ERROR(EFSCORRUPTED);
2215 			xfs_da_brelse(trans, rbp);
2216 			nbplist = 0;
2217 			goto exit1;
2218 		}
2219 	}
2220 	if (bplist) {
2221 		kmem_free(bplist, sizeof(*bplist) * nmap);
2222 	}
2223 	if (mapp != &map) {
2224 		kmem_free(mapp, sizeof(*mapp) * nfsb);
2225 	}
2226 	if (bpp)
2227 		*bpp = rbp;
2228 	return 0;
2229 exit1:
2230 	if (bplist) {
2231 		for (i = 0; i < nbplist; i++)
2232 			xfs_trans_brelse(trans, bplist[i]);
2233 		kmem_free(bplist, sizeof(*bplist) * nmap);
2234 	}
2235 exit0:
2236 	if (mapp != &map)
2237 		kmem_free(mapp, sizeof(*mapp) * nfsb);
2238 	if (bpp)
2239 		*bpp = NULL;
2240 	return error;
2241 }
2242 
2243 /*
2244  * Get a buffer for the dir/attr block.
2245  */
2246 int
xfs_da_get_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2247 xfs_da_get_buf(
2248 	xfs_trans_t	*trans,
2249 	xfs_inode_t	*dp,
2250 	xfs_dablk_t	bno,
2251 	xfs_daddr_t		mappedbno,
2252 	xfs_dabuf_t	**bpp,
2253 	int		whichfork)
2254 {
2255 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2256 						 (inst_t *)__return_address);
2257 }
2258 
2259 /*
2260  * Get a buffer for the dir/attr block, fill in the contents.
2261  */
2262 int
xfs_da_read_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2263 xfs_da_read_buf(
2264 	xfs_trans_t	*trans,
2265 	xfs_inode_t	*dp,
2266 	xfs_dablk_t	bno,
2267 	xfs_daddr_t		mappedbno,
2268 	xfs_dabuf_t	**bpp,
2269 	int		whichfork)
2270 {
2271 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2272 		(inst_t *)__return_address);
2273 }
2274 
2275 /*
2276  * Readahead the dir/attr block.
2277  */
2278 xfs_daddr_t
xfs_da_reada_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,int whichfork)2279 xfs_da_reada_buf(
2280 	xfs_trans_t	*trans,
2281 	xfs_inode_t	*dp,
2282 	xfs_dablk_t	bno,
2283 	int		whichfork)
2284 {
2285 	xfs_daddr_t		rval;
2286 
2287 	rval = -1;
2288 	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2289 			(inst_t *)__return_address))
2290 		return -1;
2291 	else
2292 		return rval;
2293 }
2294 
2295 /*
2296  * Calculate the number of bits needed to hold i different values.
2297  */
2298 uint
xfs_da_log2_roundup(uint i)2299 xfs_da_log2_roundup(uint i)
2300 {
2301 	uint rval;
2302 
2303 	for (rval = 0; rval < NBBY * sizeof(i); rval++) {
2304 		if ((1 << rval) >= i)
2305 			break;
2306 	}
2307 	return(rval);
2308 }
2309 
2310 kmem_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
2311 kmem_zone_t *xfs_dabuf_zone;		/* dabuf zone */
2312 
2313 /*
2314  * Allocate a dir-state structure.
2315  * We don't put them on the stack since they're large.
2316  */
2317 xfs_da_state_t *
xfs_da_state_alloc(void)2318 xfs_da_state_alloc(void)
2319 {
2320 	return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
2321 }
2322 
2323 /*
2324  * Kill the altpath contents of a da-state structure.
2325  */
2326 STATIC void
xfs_da_state_kill_altpath(xfs_da_state_t * state)2327 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2328 {
2329 	int	i;
2330 
2331 	for (i = 0; i < state->altpath.active; i++) {
2332 		if (state->altpath.blk[i].bp) {
2333 			if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2334 				xfs_da_buf_done(state->altpath.blk[i].bp);
2335 			state->altpath.blk[i].bp = NULL;
2336 		}
2337 	}
2338 	state->altpath.active = 0;
2339 }
2340 
2341 /*
2342  * Free a da-state structure.
2343  */
2344 void
xfs_da_state_free(xfs_da_state_t * state)2345 xfs_da_state_free(xfs_da_state_t *state)
2346 {
2347 	int	i;
2348 
2349 	xfs_da_state_kill_altpath(state);
2350 	for (i = 0; i < state->path.active; i++) {
2351 		if (state->path.blk[i].bp)
2352 			xfs_da_buf_done(state->path.blk[i].bp);
2353 	}
2354 	if (state->extravalid && state->extrablk.bp)
2355 		xfs_da_buf_done(state->extrablk.bp);
2356 #ifdef DEBUG
2357 	memset((char *)state, 0, sizeof(*state));
2358 #endif /* DEBUG */
2359 	kmem_zone_free(xfs_da_state_zone, state);
2360 }
2361 
2362 #ifdef XFS_DABUF_DEBUG
2363 xfs_dabuf_t	*xfs_dabuf_global_list;
2364 lock_t		xfs_dabuf_global_lock;
2365 #endif
2366 
2367 /*
2368  * Create a dabuf.
2369  */
2370 /* ARGSUSED */
2371 STATIC xfs_dabuf_t *
xfs_da_buf_make(int nbuf,xfs_buf_t ** bps,inst_t * ra)2372 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2373 {
2374 	xfs_buf_t	*bp;
2375 	xfs_dabuf_t	*dabuf;
2376 	int		i;
2377 	int		off;
2378 
2379 	if (nbuf == 1)
2380 		dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
2381 	else
2382 		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
2383 	dabuf->dirty = 0;
2384 #ifdef XFS_DABUF_DEBUG
2385 	dabuf->ra = ra;
2386 	dabuf->target = XFS_BUF_TARGET(bps[0]);
2387 	dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2388 #endif
2389 	if (nbuf == 1) {
2390 		dabuf->nbuf = 1;
2391 		bp = bps[0];
2392 		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2393 		dabuf->data = XFS_BUF_PTR(bp);
2394 		dabuf->bps[0] = bp;
2395 	} else {
2396 		dabuf->nbuf = nbuf;
2397 		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2398 			dabuf->bps[i] = bp = bps[i];
2399 			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2400 		}
2401 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2402 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2403 			bp = bps[i];
2404 			memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2405 				XFS_BUF_COUNT(bp));
2406 		}
2407 	}
2408 #ifdef XFS_DABUF_DEBUG
2409 	{
2410 		SPLDECL(s);
2411 		xfs_dabuf_t	*p;
2412 
2413 		s = mutex_spinlock(&xfs_dabuf_global_lock);
2414 		for (p = xfs_dabuf_global_list; p; p = p->next) {
2415 			ASSERT(p->blkno != dabuf->blkno ||
2416 			       p->target != dabuf->target);
2417 		}
2418 		dabuf->prev = NULL;
2419 		if (xfs_dabuf_global_list)
2420 			xfs_dabuf_global_list->prev = dabuf;
2421 		dabuf->next = xfs_dabuf_global_list;
2422 		xfs_dabuf_global_list = dabuf;
2423 		mutex_spinunlock(&xfs_dabuf_global_lock, s);
2424 	}
2425 #endif
2426 	return dabuf;
2427 }
2428 
2429 /*
2430  * Un-dirty a dabuf.
2431  */
2432 STATIC void
xfs_da_buf_clean(xfs_dabuf_t * dabuf)2433 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2434 {
2435 	xfs_buf_t	*bp;
2436 	int		i;
2437 	int		off;
2438 
2439 	if (dabuf->dirty) {
2440 		ASSERT(dabuf->nbuf > 1);
2441 		dabuf->dirty = 0;
2442 		for (i = off = 0; i < dabuf->nbuf;
2443 				i++, off += XFS_BUF_COUNT(bp)) {
2444 			bp = dabuf->bps[i];
2445 			memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2446 				XFS_BUF_COUNT(bp));
2447 		}
2448 	}
2449 }
2450 
2451 /*
2452  * Release a dabuf.
2453  */
2454 void
xfs_da_buf_done(xfs_dabuf_t * dabuf)2455 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2456 {
2457 	ASSERT(dabuf);
2458 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2459 	if (dabuf->dirty)
2460 		xfs_da_buf_clean(dabuf);
2461 	if (dabuf->nbuf > 1)
2462 		kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
2463 #ifdef XFS_DABUF_DEBUG
2464 	{
2465 		SPLDECL(s);
2466 
2467 		s = mutex_spinlock(&xfs_dabuf_global_lock);
2468 		if (dabuf->prev)
2469 			dabuf->prev->next = dabuf->next;
2470 		else
2471 			xfs_dabuf_global_list = dabuf->next;
2472 		if (dabuf->next)
2473 			dabuf->next->prev = dabuf->prev;
2474 		mutex_spinunlock(&xfs_dabuf_global_lock, s);
2475 	}
2476 	memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2477 #endif
2478 	if (dabuf->nbuf == 1)
2479 		kmem_zone_free(xfs_dabuf_zone, dabuf);
2480 	else
2481 		kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
2482 }
2483 
2484 /*
2485  * Log transaction from a dabuf.
2486  */
2487 void
xfs_da_log_buf(xfs_trans_t * tp,xfs_dabuf_t * dabuf,uint first,uint last)2488 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2489 {
2490 	xfs_buf_t	*bp;
2491 	uint		f;
2492 	int		i;
2493 	uint		l;
2494 	int		off;
2495 
2496 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2497 	if (dabuf->nbuf == 1) {
2498 		ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2499 		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2500 		return;
2501 	}
2502 	dabuf->dirty = 1;
2503 	ASSERT(first <= last);
2504 	for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2505 		bp = dabuf->bps[i];
2506 		f = off;
2507 		l = f + XFS_BUF_COUNT(bp) - 1;
2508 		if (f < first)
2509 			f = first;
2510 		if (l > last)
2511 			l = last;
2512 		if (f <= l)
2513 			xfs_trans_log_buf(tp, bp, f - off, l - off);
2514 		/*
2515 		 * B_DONE is set by xfs_trans_log buf.
2516 		 * If we don't set it on a new buffer (get not read)
2517 		 * then if we don't put anything in the buffer it won't
2518 		 * be set, and at commit it it released into the cache,
2519 		 * and then a read will fail.
2520 		 */
2521 		else if (!(XFS_BUF_ISDONE(bp)))
2522 		  XFS_BUF_DONE(bp);
2523 	}
2524 	ASSERT(last < off);
2525 }
2526 
2527 /*
2528  * Release dabuf from a transaction.
2529  * Have to free up the dabuf before the buffers are released,
2530  * since the synchronization on the dabuf is really the lock on the buffer.
2531  */
2532 void
xfs_da_brelse(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2533 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2534 {
2535 	xfs_buf_t	*bp;
2536 	xfs_buf_t	**bplist;
2537 	int		i;
2538 	int		nbuf;
2539 
2540 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2541 	if ((nbuf = dabuf->nbuf) == 1) {
2542 		bplist = &bp;
2543 		bp = dabuf->bps[0];
2544 	} else {
2545 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2546 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2547 	}
2548 	xfs_da_buf_done(dabuf);
2549 	for (i = 0; i < nbuf; i++)
2550 		xfs_trans_brelse(tp, bplist[i]);
2551 	if (bplist != &bp)
2552 		kmem_free(bplist, nbuf * sizeof(*bplist));
2553 }
2554 
2555 /*
2556  * Invalidate dabuf from a transaction.
2557  */
2558 void
xfs_da_binval(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2559 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2560 {
2561 	xfs_buf_t	*bp;
2562 	xfs_buf_t	**bplist;
2563 	int		i;
2564 	int		nbuf;
2565 
2566 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2567 	if ((nbuf = dabuf->nbuf) == 1) {
2568 		bplist = &bp;
2569 		bp = dabuf->bps[0];
2570 	} else {
2571 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2572 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2573 	}
2574 	xfs_da_buf_done(dabuf);
2575 	for (i = 0; i < nbuf; i++)
2576 		xfs_trans_binval(tp, bplist[i]);
2577 	if (bplist != &bp)
2578 		kmem_free(bplist, nbuf * sizeof(*bplist));
2579 }
2580 
2581 /*
2582  * Get the first daddr from a dabuf.
2583  */
2584 xfs_daddr_t
xfs_da_blkno(xfs_dabuf_t * dabuf)2585 xfs_da_blkno(xfs_dabuf_t *dabuf)
2586 {
2587 	ASSERT(dabuf->nbuf);
2588 	ASSERT(dabuf->data);
2589 	return XFS_BUF_ADDR(dabuf->bps[0]);
2590 }
2591