xref: /NextBSD/sys/gnu/fs/reiserfs/reiserfs_stree.c (revision eb1a5f8de9f7ea602c373a710f531abbf81141c4)
1 /*-
2  * Copyright 2000 Hans Reiser
3  * See README for licensing and copyright details
4  *
5  * Ported to FreeBSD by Jean-Sébastien Pédron <jspedron@club-internet.fr>
6  *
7  * $FreeBSD$
8  */
9 
10 #include <gnu/fs/reiserfs/reiserfs_fs.h>
11 
12 /* Minimal possible key. It is never in the tree. */
13 const struct key MIN_KEY = {
14 	0,
15 	0,
16 	{ {0, 0}, }
17 };
18 
19 /* Maximal possible key. It is never in the tree. */
20 const struct key MAX_KEY = {
21 	0xffffffff,
22 	0xffffffff,
23 	{ {0xffffffff, 0xffffffff }, }
24 };
25 
26 /* Does the buffer contain a disk block which is in the tree. */
27 int
B_IS_IN_TREE(const struct buf * p_s_bp)28 B_IS_IN_TREE(const struct buf *p_s_bp)
29 {
30 
31 	return (B_LEVEL(p_s_bp) != FREE_LEVEL);
32 }
33 
34 /* To gets item head in le form */
35 void
copy_item_head(struct item_head * p_v_to,const struct item_head * p_v_from)36 copy_item_head(struct item_head *p_v_to, const struct item_head *p_v_from)
37 {
38 
39 	memcpy(p_v_to, p_v_from, IH_SIZE);
40 }
41 
42 /*
43  * k1 is pointer to on-disk structure which is stored in little-endian
44  * form. k2 is pointer to cpu variable. For key of items of the same
45  * object this returns 0.
46  * Returns: -1 if key1 < key2, 0 if key1 == key2 or 1 if key1 > key2
47  */
48 /*inline*/ int
comp_short_keys(const struct key * le_key,const struct cpu_key * cpu_key)49 comp_short_keys(const struct key *le_key, const struct cpu_key *cpu_key)
50 {
51 	const uint32_t *p_s_le_u32, *p_s_cpu_u32;
52 	int n_key_length = REISERFS_SHORT_KEY_LEN;
53 
54 	p_s_le_u32  = (const uint32_t *)le_key;
55 	p_s_cpu_u32 = (const uint32_t *)&cpu_key->on_disk_key;
56 	for(; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32) {
57 		if (le32toh(*p_s_le_u32) < *p_s_cpu_u32)
58 			return (-1);
59 		if (le32toh(*p_s_le_u32) > *p_s_cpu_u32)
60 			return (1);
61 	}
62 
63 	return (0);
64 }
65 
66 /*
67  * k1 is pointer to on-disk structure which is stored in little-endian
68  * form. k2 is pointer to cpu variable. Compare keys using all 4 key
69  * fields.
70  * Returns: -1 if key1 < key2, 0 if key1 = key2 or 1 if key1 > key2
71  */
72 /*inline*/ int
comp_keys(const struct key * le_key,const struct cpu_key * cpu_key)73 comp_keys(const struct key *le_key, const struct cpu_key *cpu_key)
74 {
75 	int retval;
76 
77 	retval = comp_short_keys(le_key, cpu_key);
78 	if (retval)
79 		return retval;
80 
81 	if (le_key_k_offset(le_key_version(le_key), le_key) <
82 	    cpu_key_k_offset(cpu_key))
83 		return (-1);
84 	if (le_key_k_offset(le_key_version(le_key), le_key) >
85 	    cpu_key_k_offset(cpu_key))
86 		return (1);
87 
88 	if (cpu_key->key_length == 3)
89 		return (0);
90 
91 	/* This part is needed only when tail conversion is in progress */
92 	if (le_key_k_type(le_key_version(le_key), le_key) <
93 	    cpu_key_k_type(cpu_key))
94 		return (-1);
95 
96 	if (le_key_k_type(le_key_version(le_key), le_key) >
97 	    cpu_key_k_type(cpu_key))
98 		return (1);
99 
100 	return (0);
101 }
102 
103 /* Release all buffers in the path. */
104 void
pathrelse(struct path * p_s_search_path)105 pathrelse(struct path *p_s_search_path)
106 {
107 	struct buf *bp;
108 	int n_path_offset = p_s_search_path->path_length;
109 
110 	while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
111 		bp = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
112 		free(bp->b_data, M_REISERFSPATH);
113 		free(bp, M_REISERFSPATH);
114 	}
115 
116 	p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
117 }
118 
119 /*
120  * This does not say which one is bigger, it only returns 1 if keys
121  * are not equal, 0 otherwise
122  */
123 int
comp_le_keys(const struct key * k1,const struct key * k2)124 comp_le_keys(const struct key *k1, const struct key *k2)
125 {
126 
127 	return (memcmp(k1, k2, sizeof(struct key)));
128 }
129 
130 /*
131  * Binary search toolkit function. Search for an item in the array by
132  * the item key.
133  * Returns: 1 if found,  0 if not found;
134  *          *p_n_pos = number of the searched element if found, else the
135  *          number of the first element that is larger than p_v_key.
136  */
137 /*
138  * For those not familiar with binary search: n_lbound is the leftmost
139  * item that it could be, n_rbound the rightmost item that it could be.
140  * We examine the item halfway between n_lbound and n_rbound, and that
141  * tells us either that we can increase n_lbound, or decrease n_rbound,
142  * or that we have found it, or if n_lbound <= n_rbound that there are
143  * no possible items, and we have not found it. With each examination we
144  * cut the number of possible items it could be by one more than half
145  * rounded down, or we find it.
146  */
147 int
bin_search(const void * p_v_key,const void * p_v_base,int p_n_num,int p_n_width,int * p_n_pos)148 bin_search(const void *p_v_key,  /* Key to search for. */
149     const void *p_v_base, /* First item in the array. */
150     int p_n_num,          /* Number of items in the array. */
151     int p_n_width,        /* Item size in the array. searched. Lest the
152 			     reader be confused, note that this is crafted
153 			     as a general function, and when it is applied
154 			     specifically to the array of item headers in
155 			     a node, p_n_width is actually the item header
156 			     size not the item size. */
157     int *p_n_pos)         /* Number of the searched for element. */
158 {
159 	int n_rbound, n_lbound, n_j;
160 
161 	for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
162 	    n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) {
163 		switch (COMP_KEYS((const struct key *)
164 		    ((const char *)p_v_base + n_j * p_n_width),
165 		    (const struct cpu_key *)p_v_key)) {
166 		case -1:
167 			n_lbound = n_j + 1;
168 			continue;
169 		case 1:
170 			n_rbound = n_j - 1;
171 			continue;
172 		case 0:
173 			*p_n_pos = n_j;
174 			return (ITEM_FOUND); /* Key found in the array. */
175 		}
176 	}
177 
178 	/*
179 	 * bin_search did not find given key, it returns position of key,
180 	 * that is minimal and greater than the given one.
181 	 */
182 	*p_n_pos = n_lbound;
183 	return (ITEM_NOT_FOUND);
184 }
185 
186 /*
187  * Get delimiting key of the buffer by looking for it in the buffers in
188  * the path, starting from the bottom of the path, and going upwards. We
189  * must check the path's validity at each step. If the key is not in the
190  * path, there is no delimiting key in the tree (buffer is first or last
191  * buffer in tree), and in this case we return a special key, either
192  * MIN_KEY or MAX_KEY.
193  */
194 const struct key *
get_lkey(const struct path * p_s_chk_path,const struct reiserfs_sb_info * p_s_sbi)195 get_lkey(const struct path *p_s_chk_path,
196     const struct reiserfs_sb_info *p_s_sbi)
197 {
198 	struct buf *p_s_parent;
199 	int n_position, n_path_offset = p_s_chk_path->path_length;
200 
201 	/* While not higher in path than first element. */
202 	while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
203 		/* Parent at the path is not in the tree now. */
204 		if (!B_IS_IN_TREE(p_s_parent =
205 		    PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
206 			return (&MAX_KEY);
207 
208 		/* Check whether position in the parent is correct. */
209 		if ((n_position = PATH_OFFSET_POSITION(p_s_chk_path,
210 		    n_path_offset)) > B_NR_ITEMS(p_s_parent))
211 			return (&MAX_KEY);
212 
213 		/*
214 		 * Check whether parent at the path really points to
215 		 * the child.
216 		 */
217 		if (B_N_CHILD_NUM(p_s_parent, n_position) !=
218 		    (PATH_OFFSET_PBUFFER(p_s_chk_path,
219 					 n_path_offset + 1)->b_blkno
220 		     / btodb(p_s_sbi->s_blocksize)))
221 			return (&MAX_KEY);
222 
223 		/*
224 		 * Return delimiting key if position in the parent is not
225 		 * equal to zero.
226 		 */
227 		if (n_position)
228 			return (B_N_PDELIM_KEY(p_s_parent, n_position - 1));
229 	}
230 
231 	/* Return MIN_KEY if we are in the root of the buffer tree. */
232 	if ((PATH_OFFSET_PBUFFER(p_s_chk_path,
233 	    FIRST_PATH_ELEMENT_OFFSET)->b_blkno
234 	    / btodb(p_s_sbi->s_blocksize)) == SB_ROOT_BLOCK(p_s_sbi))
235 		return (&MIN_KEY);
236 
237 	return (&MAX_KEY);
238 }
239 
240 /* Get delimiting key of the buffer at the path and its right neighbor. */
241 const struct key *
get_rkey(const struct path * p_s_chk_path,const struct reiserfs_sb_info * p_s_sbi)242 get_rkey(const struct path *p_s_chk_path,
243     const struct reiserfs_sb_info *p_s_sbi)
244 {
245 	struct buf *p_s_parent;
246 	int n_position, n_path_offset = p_s_chk_path->path_length;
247 
248 	while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
249 		/* Parent at the path is not in the tree now. */
250 		if (!B_IS_IN_TREE(p_s_parent =
251 		    PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
252 			return (&MIN_KEY);
253 
254 		/* Check whether position in the parent is correct. */
255 		if ((n_position = PATH_OFFSET_POSITION(p_s_chk_path,
256 		    n_path_offset)) >
257 		    B_NR_ITEMS(p_s_parent))
258 			return (&MIN_KEY);
259 
260 		/*
261 		 * Check whether parent at the path really points to the
262 		 * child.
263 		 */
264 		if (B_N_CHILD_NUM(p_s_parent, n_position) !=
265 		    (PATH_OFFSET_PBUFFER(p_s_chk_path,
266 					 n_path_offset + 1)->b_blkno
267 		     / btodb(p_s_sbi->s_blocksize)))
268 			return (&MIN_KEY);
269 
270 		/*
271 		 * Return delimiting key if position in the parent is not
272 		 * the last one.
273 		 */
274 		if (n_position != B_NR_ITEMS(p_s_parent))
275 			return (B_N_PDELIM_KEY(p_s_parent, n_position));
276 	}
277 
278 	/* Return MAX_KEY if we are in the root of the buffer tree. */
279 	if ((PATH_OFFSET_PBUFFER(p_s_chk_path,
280 	    FIRST_PATH_ELEMENT_OFFSET)->b_blkno
281 	    / btodb(p_s_sbi->s_blocksize)) == SB_ROOT_BLOCK(p_s_sbi))
282 		return (&MAX_KEY);
283 
284 	return (&MIN_KEY);
285 }
286 
287 int
reiserfs_check_path(struct path * p)288 reiserfs_check_path(struct path *p)
289 {
290 
291 	if (p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET)
292 		reiserfs_log(LOG_WARNING, "path not properly relsed\n");
293 	return (0);
294 }
295 
296 /*
297  * Check whether a key is contained in the tree rooted from a buffer at
298  * a path. This works by looking at the left and right delimiting keys
299  * for the buffer in the last path_element in the path. These delimiting
300  * keys are stored at least one level above that buffer in the tree.
301  * If the buffer is the first or last node in the tree order then one
302  * of the delimiting keys may be absent, and in this case get_lkey and
303  * get_rkey return a special key which is MIN_KEY or MAX_KEY.
304  */
305 static inline int
key_in_buffer(struct path * p_s_chk_path,const struct cpu_key * p_s_key,struct reiserfs_sb_info * p_s_sbi)306 key_in_buffer(
307     struct path *p_s_chk_path,         /* Path which should be checked. */
308     const struct cpu_key *p_s_key,     /* Key which should be checked. */
309     struct reiserfs_sb_info  *p_s_sbi) /* Super block pointer. */
310 {
311 
312 	if (COMP_KEYS(get_lkey(p_s_chk_path, p_s_sbi), p_s_key) == 1)
313 		/* left delimiting key is bigger, that the key we look for */
314 		return (0);
315 
316 	if (COMP_KEYS(get_rkey(p_s_chk_path, p_s_sbi), p_s_key) != 1)
317 		/* p_s_key must be less than right delimitiing key */
318 		return (0);
319 
320 	return (1);
321 }
322 
323 #if 0
324 /* XXX Il ne semble pas y avoir de compteur de référence dans struct buf */
325 inline void
326 decrement_bcount(struct buf *p_s_bp)
327 {
328 
329 	if (p_s_bp) {
330 		if (atomic_read(&(p_s_bp->b_count))) {
331 			put_bh(p_s_bp);
332 			return;
333 		}
334 	}
335 }
336 #endif
337 
338 /* Decrement b_count field of the all buffers in the path. */
339 void
decrement_counters_in_path(struct path * p_s_search_path)340 decrement_counters_in_path(struct path *p_s_search_path)
341 {
342 
343 	pathrelse(p_s_search_path);
344 #if 0
345 	int n_path_offset = p_s_search_path->path_length;
346 
347 	while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
348 		struct buf *bp;
349 
350 		bp = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
351 		decrement_bcount(bp);
352 	}
353 
354 	p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
355 #endif
356 }
357 
358 static int
is_leaf(char * buf,int blocksize,struct buf * bp)359 is_leaf(char *buf, int blocksize, struct buf *bp)
360 {
361 	struct item_head *ih;
362 	struct block_head *blkh;
363 	int used_space, prev_location, i, nr;
364 
365 	blkh = (struct block_head *)buf;
366 	if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
367 		reiserfs_log(LOG_WARNING, "this should be caught earlier");
368 		return (0);
369 	}
370 
371 	nr = blkh_nr_item(blkh);
372 	if (nr < 1 || nr >
373 	    ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
374 		/* Item number is too big or too small */
375 		reiserfs_log(LOG_WARNING, "nr_item seems wrong\n");
376 		return (0);
377 	}
378 
379 	ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
380 	used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
381 	if (used_space != blocksize - blkh_free_space(blkh)) {
382 		/*
383 		 * Free space does not match to calculated amount of
384 		 * use space
385 		 */
386 		reiserfs_log(LOG_WARNING, "free space seems wrong\n");
387 		return (0);
388 	}
389 
390 	/* FIXME: it is_leaf will hit performance too much - we may have
391 	 * return 1 here */
392 
393 	/* Check tables of item heads */
394 	ih = (struct item_head *)(buf + BLKH_SIZE);
395 	prev_location = blocksize;
396 	for (i = 0; i < nr; i++, ih++) {
397 		if (le_ih_k_type(ih) == TYPE_ANY) {
398 			reiserfs_log(LOG_WARNING,
399 			    "wrong item type for item\n");
400 			return (0);
401 		}
402 		if (ih_location(ih) >= blocksize ||
403 		    ih_location(ih) < IH_SIZE * nr) {
404 			reiserfs_log(LOG_WARNING,
405 			    "item location seems wrong\n");
406 			return (0);
407 		}
408 		if (ih_item_len(ih) < 1 ||
409 		    ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
410 			reiserfs_log(LOG_WARNING, "item length seems wrong\n");
411 			return (0);
412 		}
413 		if (prev_location - ih_location(ih) != ih_item_len(ih)) {
414 			reiserfs_log(LOG_WARNING,
415 			    "item location seems wrong (second one)\n");
416 			return (0);
417 		}
418 		prev_location = ih_location(ih);
419 	}
420 
421 	/* One may imagine much more checks */
422 	return 1;
423 }
424 
425 /* Returns 1 if buf looks like an internal node, 0 otherwise */
426 static int
is_internal(char * buf,int blocksize,struct buf * bp)427 is_internal(char *buf, int blocksize, struct buf *bp)
428 {
429 	int nr, used_space;
430 	struct block_head *blkh;
431 
432 	blkh = (struct block_head *)buf;
433 	nr   = blkh_level(blkh);
434 	if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
435 		/* This level is not possible for internal nodes */
436 		reiserfs_log(LOG_WARNING, "this should be caught earlier\n");
437 		return (0);
438 	}
439 
440 	nr = blkh_nr_item(blkh);
441 	if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
442 		/*
443 		 * For internal which is not root we might check min
444 		 * number of keys
445 		 */
446 		reiserfs_log(LOG_WARNING, "number of key seems wrong\n");
447 		return (0);
448 	}
449 
450 	used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
451 	if (used_space != blocksize - blkh_free_space(blkh)) {
452 		reiserfs_log(LOG_WARNING,
453 		    "is_internal: free space seems wrong\n");
454 		return (0);
455 	}
456 
457 	/* One may imagine much more checks */
458 	return (1);
459 }
460 
461 /*
462  * Make sure that bh contains formatted node of reiserfs tree of
463  * 'level'-th level
464  */
465 static int
is_tree_node(struct buf * bp,int level)466 is_tree_node(struct buf *bp, int level)
467 {
468 	if (B_LEVEL(bp) != level) {
469 		reiserfs_log(LOG_WARNING,
470 		    "node level (%d) doesn't match to the "
471 		    "expected one (%d)\n", B_LEVEL (bp), level);
472 		return (0);
473 	}
474 
475 	if (level == DISK_LEAF_NODE_LEVEL)
476 		return (is_leaf(bp->b_data, bp->b_bcount, bp));
477 
478 	return (is_internal(bp->b_data, bp->b_bcount, bp));
479 }
480 
481 int
search_by_key(struct reiserfs_sb_info * p_s_sbi,const struct cpu_key * p_s_key,struct path * p_s_search_path,int n_stop_level)482 search_by_key(struct reiserfs_sb_info *p_s_sbi,
483     const struct cpu_key * p_s_key, /* Key to search. */
484     struct path * p_s_search_path,  /* This structure was allocated and
485 				       initialized by the calling function.
486 				       It is filled up by this function. */
487     int n_stop_level)               /* How far down the tree to search. To
488 				       stop at leaf level - set to
489 				       DISK_LEAF_NODE_LEVEL */
490 {
491 	int error;
492 	int n_node_level, n_retval;
493 	int n_block_number, expected_level, fs_gen;
494 	struct path_element *p_s_last_element;
495 	struct buf *p_s_bp, *tmp_bp;
496 
497 	/*
498 	 * As we add each node to a path we increase its count. This means that
499 	 * we must be careful to release all nodes in a path before we either
500 	 * discard the path struct or re-use the path struct, as we do here.
501 	 */
502 	decrement_counters_in_path(p_s_search_path);
503 
504 	/*
505 	 * With each iteration of this loop we search through the items in the
506 	 * current node, and calculate the next current node(next path element)
507 	 * for the next iteration of this loop...
508 	 */
509 	n_block_number = SB_ROOT_BLOCK(p_s_sbi);
510 	expected_level = -1;
511 
512 	reiserfs_log(LOG_DEBUG, "root block: #%d\n", n_block_number);
513 
514 	while (1) {
515 		/* Prep path to have another element added to it. */
516 		reiserfs_log(LOG_DEBUG, "path element #%d\n",
517 		    p_s_search_path->path_length);
518 		p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path,
519 		    ++p_s_search_path->path_length);
520 		fs_gen = get_generation(p_s_sbi);
521 
522 		/*
523 		 * Read the next tree node, and set the last element in the
524 		 * path to have a pointer to it.
525 		 */
526 		reiserfs_log(LOG_DEBUG, "reading block #%d\n",
527 		    n_block_number);
528 		if ((error = bread(p_s_sbi->s_devvp,
529 		    n_block_number * btodb(p_s_sbi->s_blocksize),
530 		    p_s_sbi->s_blocksize, NOCRED, &tmp_bp)) != 0) {
531 			reiserfs_log(LOG_DEBUG, "error reading block\n");
532 			p_s_search_path->path_length--;
533 			pathrelse(p_s_search_path);
534 			return (IO_ERROR);
535 		}
536 		reiserfs_log(LOG_DEBUG, "blkno = %ju, lblkno = %ju\n",
537 		    (intmax_t)tmp_bp->b_blkno, (intmax_t)tmp_bp->b_lblkno);
538 
539 		/*
540 		 * As i didn't found a way to handle the lock correctly,
541 		 * i copy the data into a fake buffer
542 		 */
543 		reiserfs_log(LOG_DEBUG, "allocating p_s_bp\n");
544 		p_s_bp = malloc(sizeof *p_s_bp, M_REISERFSPATH, M_WAITOK);
545 		if (!p_s_bp) {
546 			reiserfs_log(LOG_DEBUG, "error allocating memory\n");
547 			p_s_search_path->path_length--;
548 			pathrelse(p_s_search_path);
549 			brelse(tmp_bp);
550 			return (IO_ERROR);
551 		}
552 		reiserfs_log(LOG_DEBUG, "copying struct buf\n");
553 		bcopy(tmp_bp, p_s_bp, sizeof(struct buf));
554 
555 		reiserfs_log(LOG_DEBUG, "allocating p_s_bp->b_data\n");
556 		p_s_bp->b_data = malloc(p_s_sbi->s_blocksize,
557 		    M_REISERFSPATH, M_WAITOK);
558 		if (!p_s_bp->b_data) {
559 			reiserfs_log(LOG_DEBUG, "error allocating memory\n");
560 			p_s_search_path->path_length--;
561 			pathrelse(p_s_search_path);
562 			free(p_s_bp, M_REISERFSPATH);
563 			brelse(tmp_bp);
564 			return (IO_ERROR);
565 		}
566 		reiserfs_log(LOG_DEBUG, "copying buffer data\n");
567 		bcopy(tmp_bp->b_data, p_s_bp->b_data, p_s_sbi->s_blocksize);
568 		brelse(tmp_bp);
569 		tmp_bp = NULL;
570 
571 		reiserfs_log(LOG_DEBUG, "...done\n");
572 		p_s_last_element->pe_buffer = p_s_bp;
573 
574 		if (expected_level == -1)
575 			expected_level = SB_TREE_HEIGHT(p_s_sbi);
576 		expected_level--;
577 		reiserfs_log(LOG_DEBUG, "expected level: %d (%d)\n",
578 		    expected_level, SB_TREE_HEIGHT(p_s_sbi));
579 
580 		/* XXX */
581 		/*
582 		 * It is possible that schedule occurred. We must check
583 		 * whether the key to search is still in the tree rooted
584 		 * from the current buffer. If not then repeat search
585 		 * from the root.
586 		 */
587 		if (fs_changed(fs_gen, p_s_sbi) &&
588 		    (!B_IS_IN_TREE(p_s_bp) ||
589 		     B_LEVEL(p_s_bp) != expected_level ||
590 		     !key_in_buffer(p_s_search_path, p_s_key, p_s_sbi))) {
591 			reiserfs_log(LOG_DEBUG,
592 			    "the key isn't in the tree anymore\n");
593 			decrement_counters_in_path(p_s_search_path);
594 
595 			/*
596 			 * Get the root block number so that we can repeat
597 			 * the search starting from the root.
598 			 */
599 			n_block_number = SB_ROOT_BLOCK(p_s_sbi);
600 			expected_level = -1;
601 
602 			/* Repeat search from the root */
603 			continue;
604 		}
605 
606 		/*
607 		 * Make sure, that the node contents look like a node of
608 		 * certain level
609 		 */
610 		if (!is_tree_node(p_s_bp, expected_level)) {
611 			reiserfs_log(LOG_WARNING,
612 			    "invalid format found in block %ju. Fsck?",
613 			    (intmax_t)p_s_bp->b_blkno);
614 			pathrelse (p_s_search_path);
615 			return (IO_ERROR);
616 		}
617 
618 		/* Ok, we have acquired next formatted node in the tree */
619 		n_node_level = B_LEVEL(p_s_bp);
620 		reiserfs_log(LOG_DEBUG, "block info:\n");
621 		reiserfs_log(LOG_DEBUG, "  node level:  %d\n",
622 		    n_node_level);
623 		reiserfs_log(LOG_DEBUG, "  nb of items: %d\n",
624 		    B_NR_ITEMS(p_s_bp));
625 		reiserfs_log(LOG_DEBUG, "  free space:  %d bytes\n",
626 		    B_FREE_SPACE(p_s_bp));
627 		reiserfs_log(LOG_DEBUG, "bin_search with :\n"
628 		    "  p_s_key = (objectid=%d, dirid=%d)\n"
629 		    "  B_NR_ITEMS(p_s_bp) = %d\n"
630 		    "  p_s_last_element->pe_position = %d (path_length = %d)\n",
631 		    p_s_key->on_disk_key.k_objectid,
632 		    p_s_key->on_disk_key.k_dir_id,
633 		    B_NR_ITEMS(p_s_bp),
634 		    p_s_last_element->pe_position,
635 		    p_s_search_path->path_length);
636 		n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bp, 0),
637 		    B_NR_ITEMS(p_s_bp),
638 		    (n_node_level == DISK_LEAF_NODE_LEVEL) ? IH_SIZE : KEY_SIZE,
639 		    &(p_s_last_element->pe_position));
640 		reiserfs_log(LOG_DEBUG, "bin_search result: %d\n",
641 		    n_retval);
642 		if (n_node_level == n_stop_level) {
643 			reiserfs_log(LOG_DEBUG, "stop level reached (%s)\n",
644 			    n_retval == ITEM_FOUND ? "found" : "not found");
645 			return (n_retval);
646 		}
647 
648 		/* We are not in the stop level */
649 		if (n_retval == ITEM_FOUND)
650 			/*
651 			 * Item has been found, so we choose the pointer
652 			 * which is to the right of the found one
653 			 */
654 			p_s_last_element->pe_position++;
655 
656 		/*
657 		 * If item was not found we choose the position which is
658 		 * to the left of the found item. This requires no code,
659 		 * bin_search did it already.
660 		 */
661 
662 		/*
663 		 * So we have chosen a position in the current node which
664 		 * is an internal node. Now we calculate child block number
665 		 * by position in the node.
666 		 */
667 		n_block_number = B_N_CHILD_NUM(p_s_bp,
668 		    p_s_last_element->pe_position);
669 	}
670 
671 	reiserfs_log(LOG_DEBUG, "done\n");
672 	return (0);
673 }
674 
675 /*
676  * Form the path to an item and position in this item which contains
677  * file byte defined by p_s_key. If there is no such item corresponding
678  * to the key, we point the path to the item with maximal key less than
679  * p_s_key, and *p_n_pos_in_item is set to one past the last entry/byte
680  * in the item. If searching for entry in a directory item, and it is
681  * not found, *p_n_pos_in_item is set to one entry more than the entry
682  * with maximal key which is less than the sought key.
683  *
684  * Note that if there is no entry in this same node which is one more,
685  * then we point to an imaginary entry. For direct items, the position
686  * is in units of bytes, for indirect items the position is in units
687  * of blocknr entries, for directory items the position is in units of
688  * directory entries.
689  */
690 
691 /* The function is NOT SCHEDULE-SAFE! */
692 int
search_for_position_by_key(struct reiserfs_sb_info * p_s_sbi,const struct cpu_key * p_cpu_key,struct path * p_s_search_path)693 search_for_position_by_key(struct reiserfs_sb_info *p_s_sbi,
694     const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
695     struct path *p_s_search_path)    /* Filled up by this function.  */
696 {
697 	int retval, n_blk_size;
698 	off_t item_offset, offset;
699 	struct item_head *p_le_ih; /* Pointer to on-disk structure */
700 	struct reiserfs_dir_entry de;
701 
702 	/* If searching for directory entry. */
703 	if (is_direntry_cpu_key(p_cpu_key))
704 		return (search_by_entry_key(p_s_sbi, p_cpu_key,
705 		    p_s_search_path, &de));
706 
707 	/* If not searching for directory entry. */
708 
709 	/* If item is found. */
710 	retval = search_item(p_s_sbi, p_cpu_key, p_s_search_path);
711 	if (retval == IO_ERROR)
712 		return (retval);
713 	if (retval == ITEM_FOUND) {
714 		if (ih_item_len(B_N_PITEM_HEAD(
715 		    PATH_PLAST_BUFFER(p_s_search_path),
716 		    PATH_LAST_POSITION(p_s_search_path))) == 0) {
717 			reiserfs_log(LOG_WARNING, "item length equals zero\n");
718 		}
719 
720 		pos_in_item(p_s_search_path) = 0;
721 		return (POSITION_FOUND);
722 	}
723 
724 	if (PATH_LAST_POSITION(p_s_search_path) == 0) {
725 		reiserfs_log(LOG_WARNING, "position equals zero\n");
726 	}
727 
728 	/* Item is not found. Set path to the previous item. */
729 	p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
730 	    --PATH_LAST_POSITION(p_s_search_path));
731 	n_blk_size = p_s_sbi->s_blocksize;
732 
733 	if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
734 		return (FILE_NOT_FOUND);
735 	}
736 
737 	item_offset = le_ih_k_offset(p_le_ih);
738 	offset = cpu_key_k_offset(p_cpu_key);
739 
740 	/* Needed byte is contained in the item pointed to by the path.*/
741 	if (item_offset <= offset &&
742 	    item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
743 		pos_in_item(p_s_search_path) = offset - item_offset;
744 		if (is_indirect_le_ih(p_le_ih)) {
745 			pos_in_item(p_s_search_path) /= n_blk_size;
746 		}
747 		return (POSITION_FOUND);
748 	}
749 
750 	/* Needed byte is not contained in the item pointed to by the
751 	 * path. Set pos_in_item out of the item. */
752 	if (is_indirect_le_ih(p_le_ih))
753 		pos_in_item(p_s_search_path) =
754 		    ih_item_len(p_le_ih) / UNFM_P_SIZE;
755 	else
756 		pos_in_item(p_s_search_path) =
757 		    ih_item_len(p_le_ih);
758 
759 	return (POSITION_NOT_FOUND);
760 }
761