1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir_sf.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_btree.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_alloc.h"
42 #include "xfs_error.h"
43 
44 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
45 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
47 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
48 STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
49 STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
50 STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
51 STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
52 		xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
53 STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
54 
55 /*
56  * Single level of the xfs_inobt_delete record deletion routine.
57  * Delete record pointed to by cur/level.
58  * Remove the record from its block then rebalance the tree.
59  * Return 0 for error, 1 for done, 2 to go on to the next level.
60  */
61 STATIC int				/* error */
xfs_inobt_delrec(xfs_btree_cur_t * cur,int level,int * stat)62 xfs_inobt_delrec(
63 	xfs_btree_cur_t		*cur,	/* btree cursor */
64 	int			level,	/* level removing record from */
65 	int			*stat)	/* fail/done/go-on */
66 {
67 	xfs_buf_t		*agbp;	/* buffer for a.g. inode header */
68 	xfs_mount_t		*mp;	/* mount structure */
69 	xfs_agi_t		*agi;	/* allocation group inode header */
70 	xfs_inobt_block_t	*block;	/* btree block record/key lives in */
71 	xfs_agblock_t		bno;	/* btree block number */
72 	xfs_buf_t		*bp;	/* buffer for block */
73 	int			error;	/* error return value */
74 	int			i;	/* loop index */
75 	xfs_inobt_key_t		key;	/* kp points here if block is level 0 */
76 	xfs_inobt_key_t		*kp = NULL;	/* pointer to btree keys */
77 	xfs_agblock_t		lbno;	/* left block's block number */
78 	xfs_buf_t		*lbp;	/* left block's buffer pointer */
79 	xfs_inobt_block_t	*left;	/* left btree block */
80 	xfs_inobt_key_t		*lkp;	/* left block key pointer */
81 	xfs_inobt_ptr_t		*lpp;	/* left block address pointer */
82 	int			lrecs = 0;	/* number of records in left block */
83 	xfs_inobt_rec_t		*lrp;	/* left block record pointer */
84 	xfs_inobt_ptr_t		*pp = NULL;	/* pointer to btree addresses */
85 	int			ptr;	/* index in btree block for this rec */
86 	xfs_agblock_t		rbno;	/* right block's block number */
87 	xfs_buf_t		*rbp;	/* right block's buffer pointer */
88 	xfs_inobt_block_t	*right;	/* right btree block */
89 	xfs_inobt_key_t		*rkp;	/* right block key pointer */
90 	xfs_inobt_rec_t		*rp;	/* pointer to btree records */
91 	xfs_inobt_ptr_t		*rpp;	/* right block address pointer */
92 	int			rrecs = 0;	/* number of records in right block */
93 	int			numrecs;
94 	xfs_inobt_rec_t		*rrp;	/* right block record pointer */
95 	xfs_btree_cur_t		*tcur;	/* temporary btree cursor */
96 
97 	mp = cur->bc_mp;
98 
99 	/*
100 	 * Get the index of the entry being deleted, check for nothing there.
101 	 */
102 	ptr = cur->bc_ptrs[level];
103 	if (ptr == 0) {
104 		*stat = 0;
105 		return 0;
106 	}
107 
108 	/*
109 	 * Get the buffer & block containing the record or key/ptr.
110 	 */
111 	bp = cur->bc_bufs[level];
112 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
113 #ifdef DEBUG
114 	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
115 		return error;
116 #endif
117 	/*
118 	 * Fail if we're off the end of the block.
119 	 */
120 
121 	numrecs = be16_to_cpu(block->bb_numrecs);
122 	if (ptr > numrecs) {
123 		*stat = 0;
124 		return 0;
125 	}
126 	/*
127 	 * It's a nonleaf.  Excise the key and ptr being deleted, by
128 	 * sliding the entries past them down one.
129 	 * Log the changed areas of the block.
130 	 */
131 	if (level > 0) {
132 		kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
133 		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
134 #ifdef DEBUG
135 		for (i = ptr; i < numrecs; i++) {
136 			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
137 				return error;
138 		}
139 #endif
140 		if (ptr < numrecs) {
141 			memmove(&kp[ptr - 1], &kp[ptr],
142 				(numrecs - ptr) * sizeof(*kp));
143 			memmove(&pp[ptr - 1], &pp[ptr],
144 				(numrecs - ptr) * sizeof(*kp));
145 			xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
146 			xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
147 		}
148 	}
149 	/*
150 	 * It's a leaf.  Excise the record being deleted, by sliding the
151 	 * entries past it down one.  Log the changed areas of the block.
152 	 */
153 	else {
154 		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
155 		if (ptr < numrecs) {
156 			memmove(&rp[ptr - 1], &rp[ptr],
157 				(numrecs - ptr) * sizeof(*rp));
158 			xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
159 		}
160 		/*
161 		 * If it's the first record in the block, we'll need a key
162 		 * structure to pass up to the next level (updkey).
163 		 */
164 		if (ptr == 1) {
165 			key.ir_startino = rp->ir_startino;
166 			kp = &key;
167 		}
168 	}
169 	/*
170 	 * Decrement and log the number of entries in the block.
171 	 */
172 	numrecs--;
173 	block->bb_numrecs = cpu_to_be16(numrecs);
174 	xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
175 	/*
176 	 * Is this the root level?  If so, we're almost done.
177 	 */
178 	if (level == cur->bc_nlevels - 1) {
179 		/*
180 		 * If this is the root level,
181 		 * and there's only one entry left,
182 		 * and it's NOT the leaf level,
183 		 * then we can get rid of this level.
184 		 */
185 		if (numrecs == 1 && level > 0) {
186 			agbp = cur->bc_private.i.agbp;
187 			agi = XFS_BUF_TO_AGI(agbp);
188 			/*
189 			 * pp is still set to the first pointer in the block.
190 			 * Make it the new root of the btree.
191 			 */
192 			bno = be32_to_cpu(agi->agi_root);
193 			agi->agi_root = *pp;
194 			be32_add(&agi->agi_level, -1);
195 			/*
196 			 * Free the block.
197 			 */
198 			if ((error = xfs_free_extent(cur->bc_tp,
199 				XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
200 				return error;
201 			xfs_trans_binval(cur->bc_tp, bp);
202 			xfs_ialloc_log_agi(cur->bc_tp, agbp,
203 				XFS_AGI_ROOT | XFS_AGI_LEVEL);
204 			/*
205 			 * Update the cursor so there's one fewer level.
206 			 */
207 			cur->bc_bufs[level] = NULL;
208 			cur->bc_nlevels--;
209 		} else if (level > 0 &&
210 			   (error = xfs_inobt_decrement(cur, level, &i)))
211 			return error;
212 		*stat = 1;
213 		return 0;
214 	}
215 	/*
216 	 * If we deleted the leftmost entry in the block, update the
217 	 * key values above us in the tree.
218 	 */
219 	if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
220 		return error;
221 	/*
222 	 * If the number of records remaining in the block is at least
223 	 * the minimum, we're done.
224 	 */
225 	if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
226 		if (level > 0 &&
227 		    (error = xfs_inobt_decrement(cur, level, &i)))
228 			return error;
229 		*stat = 1;
230 		return 0;
231 	}
232 	/*
233 	 * Otherwise, we have to move some records around to keep the
234 	 * tree balanced.  Look at the left and right sibling blocks to
235 	 * see if we can re-balance by moving only one record.
236 	 */
237 	rbno = be32_to_cpu(block->bb_rightsib);
238 	lbno = be32_to_cpu(block->bb_leftsib);
239 	bno = NULLAGBLOCK;
240 	ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
241 	/*
242 	 * Duplicate the cursor so our btree manipulations here won't
243 	 * disrupt the next level up.
244 	 */
245 	if ((error = xfs_btree_dup_cursor(cur, &tcur)))
246 		return error;
247 	/*
248 	 * If there's a right sibling, see if it's ok to shift an entry
249 	 * out of it.
250 	 */
251 	if (rbno != NULLAGBLOCK) {
252 		/*
253 		 * Move the temp cursor to the last entry in the next block.
254 		 * Actually any entry but the first would suffice.
255 		 */
256 		i = xfs_btree_lastrec(tcur, level);
257 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
258 		if ((error = xfs_inobt_increment(tcur, level, &i)))
259 			goto error0;
260 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261 		i = xfs_btree_lastrec(tcur, level);
262 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
263 		/*
264 		 * Grab a pointer to the block.
265 		 */
266 		rbp = tcur->bc_bufs[level];
267 		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
268 #ifdef DEBUG
269 		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
270 			goto error0;
271 #endif
272 		/*
273 		 * Grab the current block number, for future use.
274 		 */
275 		bno = be32_to_cpu(right->bb_leftsib);
276 		/*
277 		 * If right block is full enough so that removing one entry
278 		 * won't make it too empty, and left-shifting an entry out
279 		 * of right to us works, we're done.
280 		 */
281 		if (be16_to_cpu(right->bb_numrecs) - 1 >=
282 		     XFS_INOBT_BLOCK_MINRECS(level, cur)) {
283 			if ((error = xfs_inobt_lshift(tcur, level, &i)))
284 				goto error0;
285 			if (i) {
286 				ASSERT(be16_to_cpu(block->bb_numrecs) >=
287 				       XFS_INOBT_BLOCK_MINRECS(level, cur));
288 				xfs_btree_del_cursor(tcur,
289 						     XFS_BTREE_NOERROR);
290 				if (level > 0 &&
291 				    (error = xfs_inobt_decrement(cur, level,
292 						&i)))
293 					return error;
294 				*stat = 1;
295 				return 0;
296 			}
297 		}
298 		/*
299 		 * Otherwise, grab the number of records in right for
300 		 * future reference, and fix up the temp cursor to point
301 		 * to our block again (last record).
302 		 */
303 		rrecs = be16_to_cpu(right->bb_numrecs);
304 		if (lbno != NULLAGBLOCK) {
305 			xfs_btree_firstrec(tcur, level);
306 			if ((error = xfs_inobt_decrement(tcur, level, &i)))
307 				goto error0;
308 		}
309 	}
310 	/*
311 	 * If there's a left sibling, see if it's ok to shift an entry
312 	 * out of it.
313 	 */
314 	if (lbno != NULLAGBLOCK) {
315 		/*
316 		 * Move the temp cursor to the first entry in the
317 		 * previous block.
318 		 */
319 		xfs_btree_firstrec(tcur, level);
320 		if ((error = xfs_inobt_decrement(tcur, level, &i)))
321 			goto error0;
322 		xfs_btree_firstrec(tcur, level);
323 		/*
324 		 * Grab a pointer to the block.
325 		 */
326 		lbp = tcur->bc_bufs[level];
327 		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
328 #ifdef DEBUG
329 		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
330 			goto error0;
331 #endif
332 		/*
333 		 * Grab the current block number, for future use.
334 		 */
335 		bno = be32_to_cpu(left->bb_rightsib);
336 		/*
337 		 * If left block is full enough so that removing one entry
338 		 * won't make it too empty, and right-shifting an entry out
339 		 * of left to us works, we're done.
340 		 */
341 		if (be16_to_cpu(left->bb_numrecs) - 1 >=
342 		     XFS_INOBT_BLOCK_MINRECS(level, cur)) {
343 			if ((error = xfs_inobt_rshift(tcur, level, &i)))
344 				goto error0;
345 			if (i) {
346 				ASSERT(be16_to_cpu(block->bb_numrecs) >=
347 				       XFS_INOBT_BLOCK_MINRECS(level, cur));
348 				xfs_btree_del_cursor(tcur,
349 						     XFS_BTREE_NOERROR);
350 				if (level == 0)
351 					cur->bc_ptrs[0]++;
352 				*stat = 1;
353 				return 0;
354 			}
355 		}
356 		/*
357 		 * Otherwise, grab the number of records in right for
358 		 * future reference.
359 		 */
360 		lrecs = be16_to_cpu(left->bb_numrecs);
361 	}
362 	/*
363 	 * Delete the temp cursor, we're done with it.
364 	 */
365 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
366 	/*
367 	 * If here, we need to do a join to keep the tree balanced.
368 	 */
369 	ASSERT(bno != NULLAGBLOCK);
370 	/*
371 	 * See if we can join with the left neighbor block.
372 	 */
373 	if (lbno != NULLAGBLOCK &&
374 	    lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
375 		/*
376 		 * Set "right" to be the starting block,
377 		 * "left" to be the left neighbor.
378 		 */
379 		rbno = bno;
380 		right = block;
381 		rrecs = be16_to_cpu(right->bb_numrecs);
382 		rbp = bp;
383 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
384 				cur->bc_private.i.agno, lbno, 0, &lbp,
385 				XFS_INO_BTREE_REF)))
386 			return error;
387 		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
388 		lrecs = be16_to_cpu(left->bb_numrecs);
389 		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
390 			return error;
391 	}
392 	/*
393 	 * If that won't work, see if we can join with the right neighbor block.
394 	 */
395 	else if (rbno != NULLAGBLOCK &&
396 		 rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
397 		/*
398 		 * Set "left" to be the starting block,
399 		 * "right" to be the right neighbor.
400 		 */
401 		lbno = bno;
402 		left = block;
403 		lrecs = be16_to_cpu(left->bb_numrecs);
404 		lbp = bp;
405 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
406 				cur->bc_private.i.agno, rbno, 0, &rbp,
407 				XFS_INO_BTREE_REF)))
408 			return error;
409 		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
410 		rrecs = be16_to_cpu(right->bb_numrecs);
411 		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
412 			return error;
413 	}
414 	/*
415 	 * Otherwise, we can't fix the imbalance.
416 	 * Just return.  This is probably a logic error, but it's not fatal.
417 	 */
418 	else {
419 		if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
420 			return error;
421 		*stat = 1;
422 		return 0;
423 	}
424 	/*
425 	 * We're now going to join "left" and "right" by moving all the stuff
426 	 * in "right" to "left" and deleting "right".
427 	 */
428 	if (level > 0) {
429 		/*
430 		 * It's a non-leaf.  Move keys and pointers.
431 		 */
432 		lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
433 		lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
434 		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
435 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
436 #ifdef DEBUG
437 		for (i = 0; i < rrecs; i++) {
438 			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
439 				return error;
440 		}
441 #endif
442 		memcpy(lkp, rkp, rrecs * sizeof(*lkp));
443 		memcpy(lpp, rpp, rrecs * sizeof(*lpp));
444 		xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
445 		xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
446 	} else {
447 		/*
448 		 * It's a leaf.  Move records.
449 		 */
450 		lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
451 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
452 		memcpy(lrp, rrp, rrecs * sizeof(*lrp));
453 		xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
454 	}
455 	/*
456 	 * If we joined with the left neighbor, set the buffer in the
457 	 * cursor to the left block, and fix up the index.
458 	 */
459 	if (bp != lbp) {
460 		xfs_btree_setbuf(cur, level, lbp);
461 		cur->bc_ptrs[level] += lrecs;
462 	}
463 	/*
464 	 * If we joined with the right neighbor and there's a level above
465 	 * us, increment the cursor at that level.
466 	 */
467 	else if (level + 1 < cur->bc_nlevels &&
468 		 (error = xfs_alloc_increment(cur, level + 1, &i)))
469 		return error;
470 	/*
471 	 * Fix up the number of records in the surviving block.
472 	 */
473 	lrecs += rrecs;
474 	left->bb_numrecs = cpu_to_be16(lrecs);
475 	/*
476 	 * Fix up the right block pointer in the surviving block, and log it.
477 	 */
478 	left->bb_rightsib = right->bb_rightsib;
479 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
480 	/*
481 	 * If there is a right sibling now, make it point to the
482 	 * remaining block.
483 	 */
484 	if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
485 		xfs_inobt_block_t	*rrblock;
486 		xfs_buf_t		*rrbp;
487 
488 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
489 				cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0,
490 				&rrbp, XFS_INO_BTREE_REF)))
491 			return error;
492 		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
493 		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
494 			return error;
495 		rrblock->bb_leftsib = cpu_to_be32(lbno);
496 		xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
497 	}
498 	/*
499 	 * Free the deleting block.
500 	 */
501 	if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
502 				     cur->bc_private.i.agno, rbno), 1)))
503 		return error;
504 	xfs_trans_binval(cur->bc_tp, rbp);
505 	/*
506 	 * Readjust the ptr at this level if it's not a leaf, since it's
507 	 * still pointing at the deletion point, which makes the cursor
508 	 * inconsistent.  If this makes the ptr 0, the caller fixes it up.
509 	 * We can't use decrement because it would change the next level up.
510 	 */
511 	if (level > 0)
512 		cur->bc_ptrs[level]--;
513 	/*
514 	 * Return value means the next level up has something to do.
515 	 */
516 	*stat = 2;
517 	return 0;
518 
519 error0:
520 	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
521 	return error;
522 }
523 
524 /*
525  * Insert one record/level.  Return information to the caller
526  * allowing the next level up to proceed if necessary.
527  */
528 STATIC int				/* error */
xfs_inobt_insrec(xfs_btree_cur_t * cur,int level,xfs_agblock_t * bnop,xfs_inobt_rec_t * recp,xfs_btree_cur_t ** curp,int * stat)529 xfs_inobt_insrec(
530 	xfs_btree_cur_t		*cur,	/* btree cursor */
531 	int			level,	/* level to insert record at */
532 	xfs_agblock_t		*bnop,	/* i/o: block number inserted */
533 	xfs_inobt_rec_t		*recp,	/* i/o: record data inserted */
534 	xfs_btree_cur_t		**curp,	/* output: new cursor replacing cur */
535 	int			*stat)	/* success/failure */
536 {
537 	xfs_inobt_block_t	*block;	/* btree block record/key lives in */
538 	xfs_buf_t		*bp;	/* buffer for block */
539 	int			error;	/* error return value */
540 	int			i;	/* loop index */
541 	xfs_inobt_key_t		key;	/* key value being inserted */
542 	xfs_inobt_key_t		*kp=NULL;	/* pointer to btree keys */
543 	xfs_agblock_t		nbno;	/* block number of allocated block */
544 	xfs_btree_cur_t		*ncur;	/* new cursor to be used at next lvl */
545 	xfs_inobt_key_t		nkey;	/* new key value, from split */
546 	xfs_inobt_rec_t		nrec;	/* new record value, for caller */
547 	int			numrecs;
548 	int			optr;	/* old ptr value */
549 	xfs_inobt_ptr_t		*pp;	/* pointer to btree addresses */
550 	int			ptr;	/* index in btree block for this rec */
551 	xfs_inobt_rec_t		*rp=NULL;	/* pointer to btree records */
552 
553 	/*
554 	 * GCC doesn't understand the (arguably complex) control flow in
555 	 * this function and complains about uninitialized structure fields
556 	 * without this.
557 	 */
558 	memset(&nrec, 0, sizeof(nrec));
559 
560 	/*
561 	 * If we made it to the root level, allocate a new root block
562 	 * and we're done.
563 	 */
564 	if (level >= cur->bc_nlevels) {
565 		error = xfs_inobt_newroot(cur, &i);
566 		*bnop = NULLAGBLOCK;
567 		*stat = i;
568 		return error;
569 	}
570 	/*
571 	 * Make a key out of the record data to be inserted, and save it.
572 	 */
573 	key.ir_startino = recp->ir_startino; /* INT_: direct copy */
574 	optr = ptr = cur->bc_ptrs[level];
575 	/*
576 	 * If we're off the left edge, return failure.
577 	 */
578 	if (ptr == 0) {
579 		*stat = 0;
580 		return 0;
581 	}
582 	/*
583 	 * Get pointers to the btree buffer and block.
584 	 */
585 	bp = cur->bc_bufs[level];
586 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
587 	numrecs = be16_to_cpu(block->bb_numrecs);
588 #ifdef DEBUG
589 	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
590 		return error;
591 	/*
592 	 * Check that the new entry is being inserted in the right place.
593 	 */
594 	if (ptr <= numrecs) {
595 		if (level == 0) {
596 			rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
597 			xfs_btree_check_rec(cur->bc_btnum, recp, rp);
598 		} else {
599 			kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
600 			xfs_btree_check_key(cur->bc_btnum, &key, kp);
601 		}
602 	}
603 #endif
604 	nbno = NULLAGBLOCK;
605 	ncur = (xfs_btree_cur_t *)0;
606 	/*
607 	 * If the block is full, we can't insert the new entry until we
608 	 * make the block un-full.
609 	 */
610 	if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
611 		/*
612 		 * First, try shifting an entry to the right neighbor.
613 		 */
614 		if ((error = xfs_inobt_rshift(cur, level, &i)))
615 			return error;
616 		if (i) {
617 			/* nothing */
618 		}
619 		/*
620 		 * Next, try shifting an entry to the left neighbor.
621 		 */
622 		else {
623 			if ((error = xfs_inobt_lshift(cur, level, &i)))
624 				return error;
625 			if (i) {
626 				optr = ptr = cur->bc_ptrs[level];
627 			} else {
628 				/*
629 				 * Next, try splitting the current block
630 				 * in half. If this works we have to
631 				 * re-set our variables because
632 				 * we could be in a different block now.
633 				 */
634 				if ((error = xfs_inobt_split(cur, level, &nbno,
635 						&nkey, &ncur, &i)))
636 					return error;
637 				if (i) {
638 					bp = cur->bc_bufs[level];
639 					block = XFS_BUF_TO_INOBT_BLOCK(bp);
640 #ifdef DEBUG
641 					if ((error = xfs_btree_check_sblock(cur,
642 							block, level, bp)))
643 						return error;
644 #endif
645 					ptr = cur->bc_ptrs[level];
646 					nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
647 				} else {
648 					/*
649 					 * Otherwise the insert fails.
650 					 */
651 					*stat = 0;
652 					return 0;
653 				}
654 			}
655 		}
656 	}
657 	/*
658 	 * At this point we know there's room for our new entry in the block
659 	 * we're pointing at.
660 	 */
661 	numrecs = be16_to_cpu(block->bb_numrecs);
662 	if (level > 0) {
663 		/*
664 		 * It's a non-leaf entry.  Make a hole for the new data
665 		 * in the key and ptr regions of the block.
666 		 */
667 		kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
668 		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
669 #ifdef DEBUG
670 		for (i = numrecs; i >= ptr; i--) {
671 			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
672 				return error;
673 		}
674 #endif
675 		memmove(&kp[ptr], &kp[ptr - 1],
676 			(numrecs - ptr + 1) * sizeof(*kp));
677 		memmove(&pp[ptr], &pp[ptr - 1],
678 			(numrecs - ptr + 1) * sizeof(*pp));
679 		/*
680 		 * Now stuff the new data in, bump numrecs and log the new data.
681 		 */
682 #ifdef DEBUG
683 		if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
684 			return error;
685 #endif
686 		kp[ptr - 1] = key; /* INT_: struct copy */
687 		pp[ptr - 1] = cpu_to_be32(*bnop);
688 		numrecs++;
689 		block->bb_numrecs = cpu_to_be16(numrecs);
690 		xfs_inobt_log_keys(cur, bp, ptr, numrecs);
691 		xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
692 	} else {
693 		/*
694 		 * It's a leaf entry.  Make a hole for the new record.
695 		 */
696 		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
697 		memmove(&rp[ptr], &rp[ptr - 1],
698 			(numrecs - ptr + 1) * sizeof(*rp));
699 		/*
700 		 * Now stuff the new record in, bump numrecs
701 		 * and log the new data.
702 		 */
703 		rp[ptr - 1] = *recp; /* INT_: struct copy */
704 		numrecs++;
705 		block->bb_numrecs = cpu_to_be16(numrecs);
706 		xfs_inobt_log_recs(cur, bp, ptr, numrecs);
707 	}
708 	/*
709 	 * Log the new number of records in the btree header.
710 	 */
711 	xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
712 #ifdef DEBUG
713 	/*
714 	 * Check that the key/record is in the right place, now.
715 	 */
716 	if (ptr < numrecs) {
717 		if (level == 0)
718 			xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
719 				rp + ptr);
720 		else
721 			xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
722 				kp + ptr);
723 	}
724 #endif
725 	/*
726 	 * If we inserted at the start of a block, update the parents' keys.
727 	 */
728 	if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
729 		return error;
730 	/*
731 	 * Return the new block number, if any.
732 	 * If there is one, give back a record value and a cursor too.
733 	 */
734 	*bnop = nbno;
735 	if (nbno != NULLAGBLOCK) {
736 		*recp = nrec; /* INT_: struct copy */
737 		*curp = ncur;
738 	}
739 	*stat = 1;
740 	return 0;
741 }
742 
743 /*
744  * Log header fields from a btree block.
745  */
746 STATIC void
xfs_inobt_log_block(xfs_trans_t * tp,xfs_buf_t * bp,int fields)747 xfs_inobt_log_block(
748 	xfs_trans_t		*tp,	/* transaction pointer */
749 	xfs_buf_t		*bp,	/* buffer containing btree block */
750 	int			fields)	/* mask of fields: XFS_BB_... */
751 {
752 	int			first;	/* first byte offset logged */
753 	int			last;	/* last byte offset logged */
754 	static const short	offsets[] = {	/* table of offsets */
755 		offsetof(xfs_inobt_block_t, bb_magic),
756 		offsetof(xfs_inobt_block_t, bb_level),
757 		offsetof(xfs_inobt_block_t, bb_numrecs),
758 		offsetof(xfs_inobt_block_t, bb_leftsib),
759 		offsetof(xfs_inobt_block_t, bb_rightsib),
760 		sizeof(xfs_inobt_block_t)
761 	};
762 
763 	xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
764 	xfs_trans_log_buf(tp, bp, first, last);
765 }
766 
767 /*
768  * Log keys from a btree block (nonleaf).
769  */
770 STATIC void
xfs_inobt_log_keys(xfs_btree_cur_t * cur,xfs_buf_t * bp,int kfirst,int klast)771 xfs_inobt_log_keys(
772 	xfs_btree_cur_t		*cur,	/* btree cursor */
773 	xfs_buf_t		*bp,	/* buffer containing btree block */
774 	int			kfirst,	/* index of first key to log */
775 	int			klast)	/* index of last key to log */
776 {
777 	xfs_inobt_block_t	*block;	/* btree block to log from */
778 	int			first;	/* first byte offset logged */
779 	xfs_inobt_key_t		*kp;	/* key pointer in btree block */
780 	int			last;	/* last byte offset logged */
781 
782 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
783 	kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
784 	first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
785 	last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
786 	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
787 }
788 
789 /*
790  * Log block pointer fields from a btree block (nonleaf).
791  */
792 STATIC void
xfs_inobt_log_ptrs(xfs_btree_cur_t * cur,xfs_buf_t * bp,int pfirst,int plast)793 xfs_inobt_log_ptrs(
794 	xfs_btree_cur_t		*cur,	/* btree cursor */
795 	xfs_buf_t		*bp,	/* buffer containing btree block */
796 	int			pfirst,	/* index of first pointer to log */
797 	int			plast)	/* index of last pointer to log */
798 {
799 	xfs_inobt_block_t	*block;	/* btree block to log from */
800 	int			first;	/* first byte offset logged */
801 	int			last;	/* last byte offset logged */
802 	xfs_inobt_ptr_t		*pp;	/* block-pointer pointer in btree blk */
803 
804 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
805 	pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
806 	first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
807 	last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
808 	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
809 }
810 
811 /*
812  * Log records from a btree block (leaf).
813  */
814 STATIC void
xfs_inobt_log_recs(xfs_btree_cur_t * cur,xfs_buf_t * bp,int rfirst,int rlast)815 xfs_inobt_log_recs(
816 	xfs_btree_cur_t		*cur,	/* btree cursor */
817 	xfs_buf_t		*bp,	/* buffer containing btree block */
818 	int			rfirst,	/* index of first record to log */
819 	int			rlast)	/* index of last record to log */
820 {
821 	xfs_inobt_block_t	*block;	/* btree block to log from */
822 	int			first;	/* first byte offset logged */
823 	int			last;	/* last byte offset logged */
824 	xfs_inobt_rec_t		*rp;	/* record pointer for btree block */
825 
826 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
827 	rp = XFS_INOBT_REC_ADDR(block, 1, cur);
828 	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
829 	last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
830 	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
831 }
832 
833 /*
834  * Lookup the record.  The cursor is made to point to it, based on dir.
835  * Return 0 if can't find any such record, 1 for success.
836  */
837 STATIC int				/* error */
xfs_inobt_lookup(xfs_btree_cur_t * cur,xfs_lookup_t dir,int * stat)838 xfs_inobt_lookup(
839 	xfs_btree_cur_t		*cur,	/* btree cursor */
840 	xfs_lookup_t		dir,	/* <=, ==, or >= */
841 	int			*stat)	/* success/failure */
842 {
843 	xfs_agblock_t		agbno;	/* a.g. relative btree block number */
844 	xfs_agnumber_t		agno;	/* allocation group number */
845 	xfs_inobt_block_t	*block=NULL;	/* current btree block */
846 	__int64_t		diff;	/* difference for the current key */
847 	int			error;	/* error return value */
848 	int			keyno=0;	/* current key number */
849 	int			level;	/* level in the btree */
850 	xfs_mount_t		*mp;	/* file system mount point */
851 
852 	/*
853 	 * Get the allocation group header, and the root block number.
854 	 */
855 	mp = cur->bc_mp;
856 	{
857 		xfs_agi_t	*agi;	/* a.g. inode header */
858 
859 		agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
860 		agno = be32_to_cpu(agi->agi_seqno);
861 		agbno = be32_to_cpu(agi->agi_root);
862 	}
863 	/*
864 	 * Iterate over each level in the btree, starting at the root.
865 	 * For each level above the leaves, find the key we need, based
866 	 * on the lookup record, then follow the corresponding block
867 	 * pointer down to the next level.
868 	 */
869 	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
870 		xfs_buf_t	*bp;	/* buffer pointer for btree block */
871 		xfs_daddr_t	d;	/* disk address of btree block */
872 
873 		/*
874 		 * Get the disk address we're looking for.
875 		 */
876 		d = XFS_AGB_TO_DADDR(mp, agno, agbno);
877 		/*
878 		 * If the old buffer at this level is for a different block,
879 		 * throw it away, otherwise just use it.
880 		 */
881 		bp = cur->bc_bufs[level];
882 		if (bp && XFS_BUF_ADDR(bp) != d)
883 			bp = (xfs_buf_t *)0;
884 		if (!bp) {
885 			/*
886 			 * Need to get a new buffer.  Read it, then
887 			 * set it in the cursor, releasing the old one.
888 			 */
889 			if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
890 					agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
891 				return error;
892 			xfs_btree_setbuf(cur, level, bp);
893 			/*
894 			 * Point to the btree block, now that we have the buffer
895 			 */
896 			block = XFS_BUF_TO_INOBT_BLOCK(bp);
897 			if ((error = xfs_btree_check_sblock(cur, block, level,
898 					bp)))
899 				return error;
900 		} else
901 			block = XFS_BUF_TO_INOBT_BLOCK(bp);
902 		/*
903 		 * If we already had a key match at a higher level, we know
904 		 * we need to use the first entry in this block.
905 		 */
906 		if (diff == 0)
907 			keyno = 1;
908 		/*
909 		 * Otherwise we need to search this block.  Do a binary search.
910 		 */
911 		else {
912 			int		high;	/* high entry number */
913 			xfs_inobt_key_t	*kkbase=NULL;/* base of keys in block */
914 			xfs_inobt_rec_t	*krbase=NULL;/* base of records in block */
915 			int		low;	/* low entry number */
916 
917 			/*
918 			 * Get a pointer to keys or records.
919 			 */
920 			if (level > 0)
921 				kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
922 			else
923 				krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
924 			/*
925 			 * Set low and high entry numbers, 1-based.
926 			 */
927 			low = 1;
928 			if (!(high = be16_to_cpu(block->bb_numrecs))) {
929 				/*
930 				 * If the block is empty, the tree must
931 				 * be an empty leaf.
932 				 */
933 				ASSERT(level == 0 && cur->bc_nlevels == 1);
934 				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
935 				*stat = 0;
936 				return 0;
937 			}
938 			/*
939 			 * Binary search the block.
940 			 */
941 			while (low <= high) {
942 				xfs_agino_t	startino;	/* key value */
943 
944 				/*
945 				 * keyno is average of low and high.
946 				 */
947 				keyno = (low + high) >> 1;
948 				/*
949 				 * Get startino.
950 				 */
951 				if (level > 0) {
952 					xfs_inobt_key_t	*kkp;
953 
954 					kkp = kkbase + keyno - 1;
955 					startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
956 				} else {
957 					xfs_inobt_rec_t	*krp;
958 
959 					krp = krbase + keyno - 1;
960 					startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
961 				}
962 				/*
963 				 * Compute difference to get next direction.
964 				 */
965 				diff = (__int64_t)
966 					startino - cur->bc_rec.i.ir_startino;
967 				/*
968 				 * Less than, move right.
969 				 */
970 				if (diff < 0)
971 					low = keyno + 1;
972 				/*
973 				 * Greater than, move left.
974 				 */
975 				else if (diff > 0)
976 					high = keyno - 1;
977 				/*
978 				 * Equal, we're done.
979 				 */
980 				else
981 					break;
982 			}
983 		}
984 		/*
985 		 * If there are more levels, set up for the next level
986 		 * by getting the block number and filling in the cursor.
987 		 */
988 		if (level > 0) {
989 			/*
990 			 * If we moved left, need the previous key number,
991 			 * unless there isn't one.
992 			 */
993 			if (diff > 0 && --keyno < 1)
994 				keyno = 1;
995 			agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
996 #ifdef DEBUG
997 			if ((error = xfs_btree_check_sptr(cur, agbno, level)))
998 				return error;
999 #endif
1000 			cur->bc_ptrs[level] = keyno;
1001 		}
1002 	}
1003 	/*
1004 	 * Done with the search.
1005 	 * See if we need to adjust the results.
1006 	 */
1007 	if (dir != XFS_LOOKUP_LE && diff < 0) {
1008 		keyno++;
1009 		/*
1010 		 * If ge search and we went off the end of the block, but it's
1011 		 * not the last block, we're in the wrong block.
1012 		 */
1013 		if (dir == XFS_LOOKUP_GE &&
1014 		    keyno > be16_to_cpu(block->bb_numrecs) &&
1015 		    be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1016 			int	i;
1017 
1018 			cur->bc_ptrs[0] = keyno;
1019 			if ((error = xfs_inobt_increment(cur, 0, &i)))
1020 				return error;
1021 			ASSERT(i == 1);
1022 			*stat = 1;
1023 			return 0;
1024 		}
1025 	}
1026 	else if (dir == XFS_LOOKUP_LE && diff > 0)
1027 		keyno--;
1028 	cur->bc_ptrs[0] = keyno;
1029 	/*
1030 	 * Return if we succeeded or not.
1031 	 */
1032 	if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1033 		*stat = 0;
1034 	else
1035 		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1036 	return 0;
1037 }
1038 
1039 /*
1040  * Move 1 record left from cur/level if possible.
1041  * Update cur to reflect the new path.
1042  */
1043 STATIC int				/* error */
xfs_inobt_lshift(xfs_btree_cur_t * cur,int level,int * stat)1044 xfs_inobt_lshift(
1045 	xfs_btree_cur_t		*cur,	/* btree cursor */
1046 	int			level,	/* level to shift record on */
1047 	int			*stat)	/* success/failure */
1048 {
1049 	int			error;	/* error return value */
1050 #ifdef DEBUG
1051 	int			i;	/* loop index */
1052 #endif
1053 	xfs_inobt_key_t		key;	/* key value for leaf level upward */
1054 	xfs_buf_t		*lbp;	/* buffer for left neighbor block */
1055 	xfs_inobt_block_t	*left;	/* left neighbor btree block */
1056 	xfs_inobt_key_t		*lkp=NULL;	/* key pointer for left block */
1057 	xfs_inobt_ptr_t		*lpp;	/* address pointer for left block */
1058 	xfs_inobt_rec_t		*lrp=NULL;	/* record pointer for left block */
1059 	int			nrec;	/* new number of left block entries */
1060 	xfs_buf_t		*rbp;	/* buffer for right (current) block */
1061 	xfs_inobt_block_t	*right;	/* right (current) btree block */
1062 	xfs_inobt_key_t		*rkp=NULL;	/* key pointer for right block */
1063 	xfs_inobt_ptr_t		*rpp=NULL;	/* address pointer for right block */
1064 	xfs_inobt_rec_t		*rrp=NULL;	/* record pointer for right block */
1065 
1066 	/*
1067 	 * Set up variables for this block as "right".
1068 	 */
1069 	rbp = cur->bc_bufs[level];
1070 	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1071 #ifdef DEBUG
1072 	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1073 		return error;
1074 #endif
1075 	/*
1076 	 * If we've got no left sibling then we can't shift an entry left.
1077 	 */
1078 	if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1079 		*stat = 0;
1080 		return 0;
1081 	}
1082 	/*
1083 	 * If the cursor entry is the one that would be moved, don't
1084 	 * do it... it's too complicated.
1085 	 */
1086 	if (cur->bc_ptrs[level] <= 1) {
1087 		*stat = 0;
1088 		return 0;
1089 	}
1090 	/*
1091 	 * Set up the left neighbor as "left".
1092 	 */
1093 	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1094 			cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib),
1095 			0, &lbp, XFS_INO_BTREE_REF)))
1096 		return error;
1097 	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1098 	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1099 		return error;
1100 	/*
1101 	 * If it's full, it can't take another entry.
1102 	 */
1103 	if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1104 		*stat = 0;
1105 		return 0;
1106 	}
1107 	nrec = be16_to_cpu(left->bb_numrecs) + 1;
1108 	/*
1109 	 * If non-leaf, copy a key and a ptr to the left block.
1110 	 */
1111 	if (level > 0) {
1112 		lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1113 		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1114 		*lkp = *rkp;
1115 		xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1116 		lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1117 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1118 #ifdef DEBUG
1119 		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1120 			return error;
1121 #endif
1122 		*lpp = *rpp; /* INT_: no-change copy */
1123 		xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1124 	}
1125 	/*
1126 	 * If leaf, copy a record to the left block.
1127 	 */
1128 	else {
1129 		lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1130 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1131 		*lrp = *rrp;
1132 		xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1133 	}
1134 	/*
1135 	 * Bump and log left's numrecs, decrement and log right's numrecs.
1136 	 */
1137 	be16_add(&left->bb_numrecs, 1);
1138 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1139 #ifdef DEBUG
1140 	if (level > 0)
1141 		xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1142 	else
1143 		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1144 #endif
1145 	be16_add(&right->bb_numrecs, -1);
1146 	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1147 	/*
1148 	 * Slide the contents of right down one entry.
1149 	 */
1150 	if (level > 0) {
1151 #ifdef DEBUG
1152 		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1153 			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1154 					level)))
1155 				return error;
1156 		}
1157 #endif
1158 		memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1159 		memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1160 		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1161 		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1162 	} else {
1163 		memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1164 		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1165 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1166 		rkp = &key;
1167 	}
1168 	/*
1169 	 * Update the parent key values of right.
1170 	 */
1171 	if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1172 		return error;
1173 	/*
1174 	 * Slide the cursor value left one.
1175 	 */
1176 	cur->bc_ptrs[level]--;
1177 	*stat = 1;
1178 	return 0;
1179 }
1180 
1181 /*
1182  * Allocate a new root block, fill it in.
1183  */
1184 STATIC int				/* error */
xfs_inobt_newroot(xfs_btree_cur_t * cur,int * stat)1185 xfs_inobt_newroot(
1186 	xfs_btree_cur_t		*cur,	/* btree cursor */
1187 	int			*stat)	/* success/failure */
1188 {
1189 	xfs_agi_t		*agi;	/* a.g. inode header */
1190 	xfs_alloc_arg_t		args;	/* allocation argument structure */
1191 	xfs_inobt_block_t	*block;	/* one half of the old root block */
1192 	xfs_buf_t		*bp;	/* buffer containing block */
1193 	int			error;	/* error return value */
1194 	xfs_inobt_key_t		*kp;	/* btree key pointer */
1195 	xfs_agblock_t		lbno;	/* left block number */
1196 	xfs_buf_t		*lbp;	/* left buffer pointer */
1197 	xfs_inobt_block_t	*left;	/* left btree block */
1198 	xfs_buf_t		*nbp;	/* new (root) buffer */
1199 	xfs_inobt_block_t	*new;	/* new (root) btree block */
1200 	int			nptr;	/* new value for key index, 1 or 2 */
1201 	xfs_inobt_ptr_t		*pp;	/* btree address pointer */
1202 	xfs_agblock_t		rbno;	/* right block number */
1203 	xfs_buf_t		*rbp;	/* right buffer pointer */
1204 	xfs_inobt_block_t	*right;	/* right btree block */
1205 	xfs_inobt_rec_t		*rp;	/* btree record pointer */
1206 
1207 	ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
1208 
1209 	/*
1210 	 * Get a block & a buffer.
1211 	 */
1212 	agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
1213 	args.tp = cur->bc_tp;
1214 	args.mp = cur->bc_mp;
1215 	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
1216 		be32_to_cpu(agi->agi_root));
1217 	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1218 		args.isfl = args.userdata = args.minalignslop = 0;
1219 	args.minlen = args.maxlen = args.prod = 1;
1220 	args.type = XFS_ALLOCTYPE_NEAR_BNO;
1221 	if ((error = xfs_alloc_vextent(&args)))
1222 		return error;
1223 	/*
1224 	 * None available, we fail.
1225 	 */
1226 	if (args.fsbno == NULLFSBLOCK) {
1227 		*stat = 0;
1228 		return 0;
1229 	}
1230 	ASSERT(args.len == 1);
1231 	nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1232 	new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1233 	/*
1234 	 * Set the root data in the a.g. inode structure.
1235 	 */
1236 	agi->agi_root = cpu_to_be32(args.agbno);
1237 	be32_add(&agi->agi_level, 1);
1238 	xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
1239 		XFS_AGI_ROOT | XFS_AGI_LEVEL);
1240 	/*
1241 	 * At the previous root level there are now two blocks: the old
1242 	 * root, and the new block generated when it was split.
1243 	 * We don't know which one the cursor is pointing at, so we
1244 	 * set up variables "left" and "right" for each case.
1245 	 */
1246 	bp = cur->bc_bufs[cur->bc_nlevels - 1];
1247 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
1248 #ifdef DEBUG
1249 	if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1250 		return error;
1251 #endif
1252 	if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1253 		/*
1254 		 * Our block is left, pick up the right block.
1255 		 */
1256 		lbp = bp;
1257 		lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1258 		left = block;
1259 		rbno = be32_to_cpu(left->bb_rightsib);
1260 		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1261 				rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1262 			return error;
1263 		bp = rbp;
1264 		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1265 		if ((error = xfs_btree_check_sblock(cur, right,
1266 				cur->bc_nlevels - 1, rbp)))
1267 			return error;
1268 		nptr = 1;
1269 	} else {
1270 		/*
1271 		 * Our block is right, pick up the left block.
1272 		 */
1273 		rbp = bp;
1274 		rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1275 		right = block;
1276 		lbno = be32_to_cpu(right->bb_leftsib);
1277 		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1278 				lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1279 			return error;
1280 		bp = lbp;
1281 		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1282 		if ((error = xfs_btree_check_sblock(cur, left,
1283 				cur->bc_nlevels - 1, lbp)))
1284 			return error;
1285 		nptr = 2;
1286 	}
1287 	/*
1288 	 * Fill in the new block's btree header and log it.
1289 	 */
1290 	new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1291 	new->bb_level = cpu_to_be16(cur->bc_nlevels);
1292 	new->bb_numrecs = cpu_to_be16(2);
1293 	new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1294 	new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1295 	xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1296 	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1297 	/*
1298 	 * Fill in the key data in the new root.
1299 	 */
1300 	kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1301 	if (be16_to_cpu(left->bb_level) > 0) {
1302 		kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
1303 		kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
1304 	} else {
1305 		rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1306 		INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
1307 		rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1308 		INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
1309 	}
1310 	xfs_inobt_log_keys(cur, nbp, 1, 2);
1311 	/*
1312 	 * Fill in the pointer data in the new root.
1313 	 */
1314 	pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1315 	pp[0] = cpu_to_be32(lbno);
1316 	pp[1] = cpu_to_be32(rbno);
1317 	xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1318 	/*
1319 	 * Fix up the cursor.
1320 	 */
1321 	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1322 	cur->bc_ptrs[cur->bc_nlevels] = nptr;
1323 	cur->bc_nlevels++;
1324 	*stat = 1;
1325 	return 0;
1326 }
1327 
1328 /*
1329  * Move 1 record right from cur/level if possible.
1330  * Update cur to reflect the new path.
1331  */
1332 STATIC int				/* error */
xfs_inobt_rshift(xfs_btree_cur_t * cur,int level,int * stat)1333 xfs_inobt_rshift(
1334 	xfs_btree_cur_t		*cur,	/* btree cursor */
1335 	int			level,	/* level to shift record on */
1336 	int			*stat)	/* success/failure */
1337 {
1338 	int			error;	/* error return value */
1339 	int			i;	/* loop index */
1340 	xfs_inobt_key_t		key;	/* key value for leaf level upward */
1341 	xfs_buf_t		*lbp;	/* buffer for left (current) block */
1342 	xfs_inobt_block_t	*left;	/* left (current) btree block */
1343 	xfs_inobt_key_t		*lkp;	/* key pointer for left block */
1344 	xfs_inobt_ptr_t		*lpp;	/* address pointer for left block */
1345 	xfs_inobt_rec_t		*lrp;	/* record pointer for left block */
1346 	xfs_buf_t		*rbp;	/* buffer for right neighbor block */
1347 	xfs_inobt_block_t	*right;	/* right neighbor btree block */
1348 	xfs_inobt_key_t		*rkp;	/* key pointer for right block */
1349 	xfs_inobt_ptr_t		*rpp;	/* address pointer for right block */
1350 	xfs_inobt_rec_t		*rrp=NULL;	/* record pointer for right block */
1351 	xfs_btree_cur_t		*tcur;	/* temporary cursor */
1352 
1353 	/*
1354 	 * Set up variables for this block as "left".
1355 	 */
1356 	lbp = cur->bc_bufs[level];
1357 	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1358 #ifdef DEBUG
1359 	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1360 		return error;
1361 #endif
1362 	/*
1363 	 * If we've got no right sibling then we can't shift an entry right.
1364 	 */
1365 	if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1366 		*stat = 0;
1367 		return 0;
1368 	}
1369 	/*
1370 	 * If the cursor entry is the one that would be moved, don't
1371 	 * do it... it's too complicated.
1372 	 */
1373 	if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1374 		*stat = 0;
1375 		return 0;
1376 	}
1377 	/*
1378 	 * Set up the right neighbor as "right".
1379 	 */
1380 	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1381 			cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib),
1382 			0, &rbp, XFS_INO_BTREE_REF)))
1383 		return error;
1384 	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1385 	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1386 		return error;
1387 	/*
1388 	 * If it's full, it can't take another entry.
1389 	 */
1390 	if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1391 		*stat = 0;
1392 		return 0;
1393 	}
1394 	/*
1395 	 * Make a hole at the start of the right neighbor block, then
1396 	 * copy the last left block entry to the hole.
1397 	 */
1398 	if (level > 0) {
1399 		lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1400 		lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1401 		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1402 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1403 #ifdef DEBUG
1404 		for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1405 			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1406 				return error;
1407 		}
1408 #endif
1409 		memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1410 		memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1411 #ifdef DEBUG
1412 		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1413 			return error;
1414 #endif
1415 		*rkp = *lkp; /* INT_: no change copy */
1416 		*rpp = *lpp; /* INT_: no change copy */
1417 		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1418 		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1419 	} else {
1420 		lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1421 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1422 		memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1423 		*rrp = *lrp;
1424 		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1425 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1426 		rkp = &key;
1427 	}
1428 	/*
1429 	 * Decrement and log left's numrecs, bump and log right's numrecs.
1430 	 */
1431 	be16_add(&left->bb_numrecs, -1);
1432 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1433 	be16_add(&right->bb_numrecs, 1);
1434 #ifdef DEBUG
1435 	if (level > 0)
1436 		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1437 	else
1438 		xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1439 #endif
1440 	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1441 	/*
1442 	 * Using a temporary cursor, update the parent key values of the
1443 	 * block on the right.
1444 	 */
1445 	if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1446 		return error;
1447 	xfs_btree_lastrec(tcur, level);
1448 	if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1449 	    (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1450 		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1451 		return error;
1452 	}
1453 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1454 	*stat = 1;
1455 	return 0;
1456 }
1457 
1458 /*
1459  * Split cur/level block in half.
1460  * Return new block number and its first record (to be inserted into parent).
1461  */
1462 STATIC int				/* error */
xfs_inobt_split(xfs_btree_cur_t * cur,int level,xfs_agblock_t * bnop,xfs_inobt_key_t * keyp,xfs_btree_cur_t ** curp,int * stat)1463 xfs_inobt_split(
1464 	xfs_btree_cur_t		*cur,	/* btree cursor */
1465 	int			level,	/* level to split */
1466 	xfs_agblock_t		*bnop,	/* output: block number allocated */
1467 	xfs_inobt_key_t		*keyp,	/* output: first key of new block */
1468 	xfs_btree_cur_t		**curp,	/* output: new cursor */
1469 	int			*stat)	/* success/failure */
1470 {
1471 	xfs_alloc_arg_t		args;	/* allocation argument structure */
1472 	int			error;	/* error return value */
1473 	int			i;	/* loop index/record number */
1474 	xfs_agblock_t		lbno;	/* left (current) block number */
1475 	xfs_buf_t		*lbp;	/* buffer for left block */
1476 	xfs_inobt_block_t	*left;	/* left (current) btree block */
1477 	xfs_inobt_key_t		*lkp;	/* left btree key pointer */
1478 	xfs_inobt_ptr_t		*lpp;	/* left btree address pointer */
1479 	xfs_inobt_rec_t		*lrp;	/* left btree record pointer */
1480 	xfs_buf_t		*rbp;	/* buffer for right block */
1481 	xfs_inobt_block_t	*right;	/* right (new) btree block */
1482 	xfs_inobt_key_t		*rkp;	/* right btree key pointer */
1483 	xfs_inobt_ptr_t		*rpp;	/* right btree address pointer */
1484 	xfs_inobt_rec_t		*rrp;	/* right btree record pointer */
1485 
1486 	/*
1487 	 * Set up left block (current one).
1488 	 */
1489 	lbp = cur->bc_bufs[level];
1490 	args.tp = cur->bc_tp;
1491 	args.mp = cur->bc_mp;
1492 	lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1493 	/*
1494 	 * Allocate the new block.
1495 	 * If we can't do it, we're toast.  Give up.
1496 	 */
1497 	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
1498 	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1499 		args.isfl = args.userdata = args.minalignslop = 0;
1500 	args.minlen = args.maxlen = args.prod = 1;
1501 	args.type = XFS_ALLOCTYPE_NEAR_BNO;
1502 	if ((error = xfs_alloc_vextent(&args)))
1503 		return error;
1504 	if (args.fsbno == NULLFSBLOCK) {
1505 		*stat = 0;
1506 		return 0;
1507 	}
1508 	ASSERT(args.len == 1);
1509 	rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1510 	/*
1511 	 * Set up the new block as "right".
1512 	 */
1513 	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1514 	/*
1515 	 * "Left" is the current (according to the cursor) block.
1516 	 */
1517 	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1518 #ifdef DEBUG
1519 	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1520 		return error;
1521 #endif
1522 	/*
1523 	 * Fill in the btree header for the new block.
1524 	 */
1525 	right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1526 	right->bb_level = left->bb_level;
1527 	right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1528 	/*
1529 	 * Make sure that if there's an odd number of entries now, that
1530 	 * each new block will have the same number of entries.
1531 	 */
1532 	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1533 	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1534 		be16_add(&right->bb_numrecs, 1);
1535 	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1536 	/*
1537 	 * For non-leaf blocks, copy keys and addresses over to the new block.
1538 	 */
1539 	if (level > 0) {
1540 		lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1541 		lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1542 		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1543 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1544 #ifdef DEBUG
1545 		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1546 			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1547 				return error;
1548 		}
1549 #endif
1550 		memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1551 		memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1552 		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1553 		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1554 		*keyp = *rkp;
1555 	}
1556 	/*
1557 	 * For leaf blocks, copy records over to the new block.
1558 	 */
1559 	else {
1560 		lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1561 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1562 		memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1563 		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1564 		keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
1565 	}
1566 	/*
1567 	 * Find the left block number by looking in the buffer.
1568 	 * Adjust numrecs, sibling pointers.
1569 	 */
1570 	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1571 	right->bb_rightsib = left->bb_rightsib;
1572 	left->bb_rightsib = cpu_to_be32(args.agbno);
1573 	right->bb_leftsib = cpu_to_be32(lbno);
1574 	xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1575 	xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1576 	/*
1577 	 * If there's a block to the new block's right, make that block
1578 	 * point back to right instead of to left.
1579 	 */
1580 	if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1581 		xfs_inobt_block_t	*rrblock;	/* rr btree block */
1582 		xfs_buf_t		*rrbp;		/* buffer for rrblock */
1583 
1584 		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1585 				be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1586 				XFS_INO_BTREE_REF)))
1587 			return error;
1588 		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1589 		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1590 			return error;
1591 		rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1592 		xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1593 	}
1594 	/*
1595 	 * If the cursor is really in the right block, move it there.
1596 	 * If it's just pointing past the last entry in left, then we'll
1597 	 * insert there, so don't change anything in that case.
1598 	 */
1599 	if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1600 		xfs_btree_setbuf(cur, level, rbp);
1601 		cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1602 	}
1603 	/*
1604 	 * If there are more levels, we'll need another cursor which refers
1605 	 * the right block, no matter where this cursor was.
1606 	 */
1607 	if (level + 1 < cur->bc_nlevels) {
1608 		if ((error = xfs_btree_dup_cursor(cur, curp)))
1609 			return error;
1610 		(*curp)->bc_ptrs[level + 1]++;
1611 	}
1612 	*bnop = args.agbno;
1613 	*stat = 1;
1614 	return 0;
1615 }
1616 
1617 /*
1618  * Update keys at all levels from here to the root along the cursor's path.
1619  */
1620 STATIC int				/* error */
xfs_inobt_updkey(xfs_btree_cur_t * cur,xfs_inobt_key_t * keyp,int level)1621 xfs_inobt_updkey(
1622 	xfs_btree_cur_t		*cur,	/* btree cursor */
1623 	xfs_inobt_key_t		*keyp,	/* new key value to update to */
1624 	int			level)	/* starting level for update */
1625 {
1626 	int			ptr;	/* index of key in block */
1627 
1628 	/*
1629 	 * Go up the tree from this level toward the root.
1630 	 * At each level, update the key value to the value input.
1631 	 * Stop when we reach a level where the cursor isn't pointing
1632 	 * at the first entry in the block.
1633 	 */
1634 	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1635 		xfs_buf_t		*bp;	/* buffer for block */
1636 		xfs_inobt_block_t	*block;	/* btree block */
1637 #ifdef DEBUG
1638 		int			error;	/* error return value */
1639 #endif
1640 		xfs_inobt_key_t		*kp;	/* ptr to btree block keys */
1641 
1642 		bp = cur->bc_bufs[level];
1643 		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1644 #ifdef DEBUG
1645 		if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1646 			return error;
1647 #endif
1648 		ptr = cur->bc_ptrs[level];
1649 		kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1650 		*kp = *keyp;
1651 		xfs_inobt_log_keys(cur, bp, ptr, ptr);
1652 	}
1653 	return 0;
1654 }
1655 
1656 /*
1657  * Externally visible routines.
1658  */
1659 
1660 /*
1661  * Decrement cursor by one record at the level.
1662  * For nonzero levels the leaf-ward information is untouched.
1663  */
1664 int					/* error */
xfs_inobt_decrement(xfs_btree_cur_t * cur,int level,int * stat)1665 xfs_inobt_decrement(
1666 	xfs_btree_cur_t		*cur,	/* btree cursor */
1667 	int			level,	/* level in btree, 0 is leaf */
1668 	int			*stat)	/* success/failure */
1669 {
1670 	xfs_inobt_block_t	*block;	/* btree block */
1671 	int			error;
1672 	int			lev;	/* btree level */
1673 
1674 	ASSERT(level < cur->bc_nlevels);
1675 	/*
1676 	 * Read-ahead to the left at this level.
1677 	 */
1678 	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1679 	/*
1680 	 * Decrement the ptr at this level.  If we're still in the block
1681 	 * then we're done.
1682 	 */
1683 	if (--cur->bc_ptrs[level] > 0) {
1684 		*stat = 1;
1685 		return 0;
1686 	}
1687 	/*
1688 	 * Get a pointer to the btree block.
1689 	 */
1690 	block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1691 #ifdef DEBUG
1692 	if ((error = xfs_btree_check_sblock(cur, block, level,
1693 			cur->bc_bufs[level])))
1694 		return error;
1695 #endif
1696 	/*
1697 	 * If we just went off the left edge of the tree, return failure.
1698 	 */
1699 	if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1700 		*stat = 0;
1701 		return 0;
1702 	}
1703 	/*
1704 	 * March up the tree decrementing pointers.
1705 	 * Stop when we don't go off the left edge of a block.
1706 	 */
1707 	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1708 		if (--cur->bc_ptrs[lev] > 0)
1709 			break;
1710 		/*
1711 		 * Read-ahead the left block, we're going to read it
1712 		 * in the next loop.
1713 		 */
1714 		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1715 	}
1716 	/*
1717 	 * If we went off the root then we are seriously confused.
1718 	 */
1719 	ASSERT(lev < cur->bc_nlevels);
1720 	/*
1721 	 * Now walk back down the tree, fixing up the cursor's buffer
1722 	 * pointers and key numbers.
1723 	 */
1724 	for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1725 		xfs_agblock_t	agbno;	/* block number of btree block */
1726 		xfs_buf_t	*bp;	/* buffer containing btree block */
1727 
1728 		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1729 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1730 				cur->bc_private.i.agno, agbno, 0, &bp,
1731 				XFS_INO_BTREE_REF)))
1732 			return error;
1733 		lev--;
1734 		xfs_btree_setbuf(cur, lev, bp);
1735 		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1736 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1737 			return error;
1738 		cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1739 	}
1740 	*stat = 1;
1741 	return 0;
1742 }
1743 
1744 /*
1745  * Delete the record pointed to by cur.
1746  * The cursor refers to the place where the record was (could be inserted)
1747  * when the operation returns.
1748  */
1749 int					/* error */
xfs_inobt_delete(xfs_btree_cur_t * cur,int * stat)1750 xfs_inobt_delete(
1751 	xfs_btree_cur_t	*cur,		/* btree cursor */
1752 	int		*stat)		/* success/failure */
1753 {
1754 	int		error;
1755 	int		i;		/* result code */
1756 	int		level;		/* btree level */
1757 
1758 	/*
1759 	 * Go up the tree, starting at leaf level.
1760 	 * If 2 is returned then a join was done; go to the next level.
1761 	 * Otherwise we are done.
1762 	 */
1763 	for (level = 0, i = 2; i == 2; level++) {
1764 		if ((error = xfs_inobt_delrec(cur, level, &i)))
1765 			return error;
1766 	}
1767 	if (i == 0) {
1768 		for (level = 1; level < cur->bc_nlevels; level++) {
1769 			if (cur->bc_ptrs[level] == 0) {
1770 				if ((error = xfs_inobt_decrement(cur, level, &i)))
1771 					return error;
1772 				break;
1773 			}
1774 		}
1775 	}
1776 	*stat = i;
1777 	return 0;
1778 }
1779 
1780 
1781 /*
1782  * Get the data from the pointed-to record.
1783  */
1784 int					/* error */
xfs_inobt_get_rec(xfs_btree_cur_t * cur,xfs_agino_t * ino,__int32_t * fcnt,xfs_inofree_t * free,int * stat)1785 xfs_inobt_get_rec(
1786 	xfs_btree_cur_t		*cur,	/* btree cursor */
1787 	xfs_agino_t		*ino,	/* output: starting inode of chunk */
1788 	__int32_t		*fcnt,	/* output: number of free inodes */
1789 	xfs_inofree_t		*free,	/* output: free inode mask */
1790 	int			*stat)	/* output: success/failure */
1791 {
1792 	xfs_inobt_block_t	*block;	/* btree block */
1793 	xfs_buf_t		*bp;	/* buffer containing btree block */
1794 #ifdef DEBUG
1795 	int			error;	/* error return value */
1796 #endif
1797 	int			ptr;	/* record number */
1798 	xfs_inobt_rec_t		*rec;	/* record data */
1799 
1800 	bp = cur->bc_bufs[0];
1801 	ptr = cur->bc_ptrs[0];
1802 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
1803 #ifdef DEBUG
1804 	if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1805 		return error;
1806 #endif
1807 	/*
1808 	 * Off the right end or left end, return failure.
1809 	 */
1810 	if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1811 		*stat = 0;
1812 		return 0;
1813 	}
1814 	/*
1815 	 * Point to the record and extract its data.
1816 	 */
1817 	rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1818 	*ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
1819 	*fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
1820 	*free = INT_GET(rec->ir_free, ARCH_CONVERT);
1821 	*stat = 1;
1822 	return 0;
1823 }
1824 
1825 /*
1826  * Increment cursor by one record at the level.
1827  * For nonzero levels the leaf-ward information is untouched.
1828  */
1829 int					/* error */
xfs_inobt_increment(xfs_btree_cur_t * cur,int level,int * stat)1830 xfs_inobt_increment(
1831 	xfs_btree_cur_t		*cur,	/* btree cursor */
1832 	int			level,	/* level in btree, 0 is leaf */
1833 	int			*stat)	/* success/failure */
1834 {
1835 	xfs_inobt_block_t	*block;	/* btree block */
1836 	xfs_buf_t		*bp;	/* buffer containing btree block */
1837 	int			error;	/* error return value */
1838 	int			lev;	/* btree level */
1839 
1840 	ASSERT(level < cur->bc_nlevels);
1841 	/*
1842 	 * Read-ahead to the right at this level.
1843 	 */
1844 	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1845 	/*
1846 	 * Get a pointer to the btree block.
1847 	 */
1848 	bp = cur->bc_bufs[level];
1849 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
1850 #ifdef DEBUG
1851 	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1852 		return error;
1853 #endif
1854 	/*
1855 	 * Increment the ptr at this level.  If we're still in the block
1856 	 * then we're done.
1857 	 */
1858 	if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1859 		*stat = 1;
1860 		return 0;
1861 	}
1862 	/*
1863 	 * If we just went off the right edge of the tree, return failure.
1864 	 */
1865 	if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1866 		*stat = 0;
1867 		return 0;
1868 	}
1869 	/*
1870 	 * March up the tree incrementing pointers.
1871 	 * Stop when we don't go off the right edge of a block.
1872 	 */
1873 	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1874 		bp = cur->bc_bufs[lev];
1875 		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1876 #ifdef DEBUG
1877 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1878 			return error;
1879 #endif
1880 		if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1881 			break;
1882 		/*
1883 		 * Read-ahead the right block, we're going to read it
1884 		 * in the next loop.
1885 		 */
1886 		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1887 	}
1888 	/*
1889 	 * If we went off the root then we are seriously confused.
1890 	 */
1891 	ASSERT(lev < cur->bc_nlevels);
1892 	/*
1893 	 * Now walk back down the tree, fixing up the cursor's buffer
1894 	 * pointers and key numbers.
1895 	 */
1896 	for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1897 	     lev > level; ) {
1898 		xfs_agblock_t	agbno;	/* block number of btree block */
1899 
1900 		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1901 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1902 				cur->bc_private.i.agno, agbno, 0, &bp,
1903 				XFS_INO_BTREE_REF)))
1904 			return error;
1905 		lev--;
1906 		xfs_btree_setbuf(cur, lev, bp);
1907 		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1908 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1909 			return error;
1910 		cur->bc_ptrs[lev] = 1;
1911 	}
1912 	*stat = 1;
1913 	return 0;
1914 }
1915 
1916 /*
1917  * Insert the current record at the point referenced by cur.
1918  * The cursor may be inconsistent on return if splits have been done.
1919  */
1920 int					/* error */
xfs_inobt_insert(xfs_btree_cur_t * cur,int * stat)1921 xfs_inobt_insert(
1922 	xfs_btree_cur_t	*cur,		/* btree cursor */
1923 	int		*stat)		/* success/failure */
1924 {
1925 	int		error;		/* error return value */
1926 	int		i;		/* result value, 0 for failure */
1927 	int		level;		/* current level number in btree */
1928 	xfs_agblock_t	nbno;		/* new block number (split result) */
1929 	xfs_btree_cur_t	*ncur;		/* new cursor (split result) */
1930 	xfs_inobt_rec_t	nrec;		/* record being inserted this level */
1931 	xfs_btree_cur_t	*pcur;		/* previous level's cursor */
1932 
1933 	level = 0;
1934 	nbno = NULLAGBLOCK;
1935 	INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
1936 	INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
1937 	INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
1938 	ncur = (xfs_btree_cur_t *)0;
1939 	pcur = cur;
1940 	/*
1941 	 * Loop going up the tree, starting at the leaf level.
1942 	 * Stop when we don't get a split block, that must mean that
1943 	 * the insert is finished with this level.
1944 	 */
1945 	do {
1946 		/*
1947 		 * Insert nrec/nbno into this level of the tree.
1948 		 * Note if we fail, nbno will be null.
1949 		 */
1950 		if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1951 				&i))) {
1952 			if (pcur != cur)
1953 				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1954 			return error;
1955 		}
1956 		/*
1957 		 * See if the cursor we just used is trash.
1958 		 * Can't trash the caller's cursor, but otherwise we should
1959 		 * if ncur is a new cursor or we're about to be done.
1960 		 */
1961 		if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1962 			cur->bc_nlevels = pcur->bc_nlevels;
1963 			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1964 		}
1965 		/*
1966 		 * If we got a new cursor, switch to it.
1967 		 */
1968 		if (ncur) {
1969 			pcur = ncur;
1970 			ncur = (xfs_btree_cur_t *)0;
1971 		}
1972 	} while (nbno != NULLAGBLOCK);
1973 	*stat = i;
1974 	return 0;
1975 }
1976 
1977 /*
1978  * Lookup the record equal to ino in the btree given by cur.
1979  */
1980 int					/* error */
xfs_inobt_lookup_eq(xfs_btree_cur_t * cur,xfs_agino_t ino,__int32_t fcnt,xfs_inofree_t free,int * stat)1981 xfs_inobt_lookup_eq(
1982 	xfs_btree_cur_t	*cur,		/* btree cursor */
1983 	xfs_agino_t	ino,		/* starting inode of chunk */
1984 	__int32_t	fcnt,		/* free inode count */
1985 	xfs_inofree_t	free,		/* free inode mask */
1986 	int		*stat)		/* success/failure */
1987 {
1988 	cur->bc_rec.i.ir_startino = ino;
1989 	cur->bc_rec.i.ir_freecount = fcnt;
1990 	cur->bc_rec.i.ir_free = free;
1991 	return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1992 }
1993 
1994 /*
1995  * Lookup the first record greater than or equal to ino
1996  * in the btree given by cur.
1997  */
1998 int					/* error */
xfs_inobt_lookup_ge(xfs_btree_cur_t * cur,xfs_agino_t ino,__int32_t fcnt,xfs_inofree_t free,int * stat)1999 xfs_inobt_lookup_ge(
2000 	xfs_btree_cur_t	*cur,		/* btree cursor */
2001 	xfs_agino_t	ino,		/* starting inode of chunk */
2002 	__int32_t	fcnt,		/* free inode count */
2003 	xfs_inofree_t	free,		/* free inode mask */
2004 	int		*stat)		/* success/failure */
2005 {
2006 	cur->bc_rec.i.ir_startino = ino;
2007 	cur->bc_rec.i.ir_freecount = fcnt;
2008 	cur->bc_rec.i.ir_free = free;
2009 	return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2010 }
2011 
2012 /*
2013  * Lookup the first record less than or equal to ino
2014  * in the btree given by cur.
2015  */
2016 int					/* error */
xfs_inobt_lookup_le(xfs_btree_cur_t * cur,xfs_agino_t ino,__int32_t fcnt,xfs_inofree_t free,int * stat)2017 xfs_inobt_lookup_le(
2018 	xfs_btree_cur_t	*cur,		/* btree cursor */
2019 	xfs_agino_t	ino,		/* starting inode of chunk */
2020 	__int32_t	fcnt,		/* free inode count */
2021 	xfs_inofree_t	free,		/* free inode mask */
2022 	int		*stat)		/* success/failure */
2023 {
2024 	cur->bc_rec.i.ir_startino = ino;
2025 	cur->bc_rec.i.ir_freecount = fcnt;
2026 	cur->bc_rec.i.ir_free = free;
2027 	return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2028 }
2029 
2030 /*
2031  * Update the record referred to by cur, to the value given
2032  * by [ino, fcnt, free].
2033  * This either works (return 0) or gets an EFSCORRUPTED error.
2034  */
2035 int					/* error */
xfs_inobt_update(xfs_btree_cur_t * cur,xfs_agino_t ino,__int32_t fcnt,xfs_inofree_t free)2036 xfs_inobt_update(
2037 	xfs_btree_cur_t		*cur,	/* btree cursor */
2038 	xfs_agino_t		ino,	/* starting inode of chunk */
2039 	__int32_t		fcnt,	/* free inode count */
2040 	xfs_inofree_t		free)	/* free inode mask */
2041 {
2042 	xfs_inobt_block_t	*block;	/* btree block to update */
2043 	xfs_buf_t		*bp;	/* buffer containing btree block */
2044 	int			error;	/* error return value */
2045 	int			ptr;	/* current record number (updating) */
2046 	xfs_inobt_rec_t		*rp;	/* pointer to updated record */
2047 
2048 	/*
2049 	 * Pick up the current block.
2050 	 */
2051 	bp = cur->bc_bufs[0];
2052 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
2053 #ifdef DEBUG
2054 	if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2055 		return error;
2056 #endif
2057 	/*
2058 	 * Get the address of the rec to be updated.
2059 	 */
2060 	ptr = cur->bc_ptrs[0];
2061 	rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2062 	/*
2063 	 * Fill in the new contents and log them.
2064 	 */
2065 	INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
2066 	INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
2067 	INT_SET(rp->ir_free, ARCH_CONVERT, free);
2068 	xfs_inobt_log_recs(cur, bp, ptr, ptr);
2069 	/*
2070 	 * Updating first record in leaf. Pass new key value up to our parent.
2071 	 */
2072 	if (ptr == 1) {
2073 		xfs_inobt_key_t	key;	/* key containing [ino] */
2074 
2075 		INT_SET(key.ir_startino, ARCH_CONVERT, ino);
2076 		if ((error = xfs_inobt_updkey(cur, &key, 1)))
2077 			return error;
2078 	}
2079 	return 0;
2080 }
2081