1 /*        $NetBSD: hash_page.c,v 1.29 2016/09/24 20:08:29 christos Exp $        */
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Margo Seltzer.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
38 
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: hash_page.c,v 1.29 2016/09/24 20:08:29 christos Exp $");
41 
42 /*
43  * PACKAGE:  hashing
44  *
45  * DESCRIPTION:
46  *        Page manipulation for hashing package.
47  *
48  * ROUTINES:
49  *
50  * External
51  *        __get_page
52  *        __add_ovflpage
53  * Internal
54  *        overflow_page
55  */
56 
57 #include "namespace.h"
58 
59 #include <sys/types.h>
60 
61 #include <errno.h>
62 #include <fcntl.h>
63 #include <signal.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <unistd.h>
68 #include <paths.h>
69 #include <assert.h>
70 
71 #include <db.h>
72 #include "hash.h"
73 #include "page.h"
74 #include "extern.h"
75 
76 static uint32_t     *fetch_bitmap(HTAB *, int);
77 static uint32_t      first_free(uint32_t);
78 static uint16_t      overflow_page(HTAB *);
79 static void          putpair(char *, const DBT *, const DBT *);
80 static void          squeeze_key(uint16_t *, const DBT *, const DBT *);
81 static int           ugly_split(HTAB *, uint32_t, BUFHEAD *, BUFHEAD *, int, int);
82 
83 #define   PAGE_INIT(P) { \
84           ((uint16_t *)(void *)(P))[0] = 0; \
85           temp = 3 * sizeof(uint16_t); \
86           _DIAGASSERT((size_t)HASH_BSIZE(hashp) >= temp); \
87           ((uint16_t *)(void *)(P))[1] = (uint16_t)(HASH_BSIZE(hashp) - temp); \
88           ((uint16_t *)(void *)(P))[2] = HASH_BSIZE(hashp); \
89 }
90 
91 /*
92  * This is called AFTER we have verified that there is room on the page for
93  * the pair (PAIRFITS has returned true) so we go right ahead and start moving
94  * stuff on.
95  */
96 static void
putpair(char * p,const DBT * key,const DBT * val)97 putpair(char *p, const DBT *key, const DBT *val)
98 {
99           uint16_t *bp, n, off;
100           size_t temp;
101 
102           bp = (uint16_t *)(void *)p;
103 
104           /* Enter the key first. */
105           n = bp[0];
106 
107           temp = OFFSET(bp);
108           _DIAGASSERT(temp >= key->size);
109           off = (uint16_t)(temp - key->size);
110           memmove(p + off, key->data, key->size);
111           bp[++n] = off;
112 
113           /* Now the data. */
114           _DIAGASSERT(off >= val->size);
115           off -= (uint16_t)val->size;
116           memmove(p + off, val->data, val->size);
117           bp[++n] = off;
118 
119           /* Adjust page info. */
120           bp[0] = n;
121           temp = (n + 3) * sizeof(uint16_t);
122           _DIAGASSERT(off >= temp);
123           bp[n + 1] = (uint16_t)(off - temp);
124           bp[n + 2] = off;
125 }
126 
127 /*
128  * Returns:
129  *         0 OK
130  *        -1 error
131  */
132 int
__delpair(HTAB * hashp,BUFHEAD * bufp,int ndx)133 __delpair(HTAB *hashp, BUFHEAD *bufp, int ndx)
134 {
135           uint16_t *bp, newoff;
136           int n;
137           uint16_t pairlen;
138           size_t temp;
139 
140           bp = (uint16_t *)(void *)bufp->page;
141           n = bp[0];
142 
143           if (bp[ndx + 1] < REAL_KEY)
144                     return (__big_delete(hashp, bufp));
145           if (ndx != 1)
146                     newoff = bp[ndx - 1];
147           else
148                     newoff = HASH_BSIZE(hashp);
149           pairlen = newoff - bp[ndx + 1];
150 
151           if (ndx != (n - 1)) {
152                     /* Hard Case -- need to shuffle keys */
153                     int i;
154                     char *src = bufp->page + (int)OFFSET(bp);
155                     char *dst = src + (int)pairlen;
156                     memmove(dst, src, (size_t)(bp[ndx + 1] - OFFSET(bp)));
157 
158                     /* Now adjust the pointers */
159                     for (i = ndx + 2; i <= n; i += 2) {
160                               if (bp[i + 1] == OVFLPAGE) {
161                                         bp[i - 2] = bp[i];
162                                         bp[i - 1] = bp[i + 1];
163                               } else {
164                                         bp[i - 2] = bp[i] + pairlen;
165                                         bp[i - 1] = bp[i + 1] + pairlen;
166                               }
167                     }
168           }
169           /* Finally adjust the page data */
170           bp[n] = OFFSET(bp) + pairlen;
171           temp = bp[n + 1] + pairlen + 2 * sizeof(uint16_t);
172           _DIAGASSERT(temp <= 0xffff);
173           bp[n - 1] = (uint16_t)temp;
174           bp[0] = n - 2;
175           hashp->NKEYS--;
176 
177           bufp->flags |= BUF_MOD;
178           return (0);
179 }
180 /*
181  * Returns:
182  *         0 ==> OK
183  *        -1 ==> Error
184  */
185 int
__split_page(HTAB * hashp,uint32_t obucket,uint32_t nbucket)186 __split_page(HTAB *hashp, uint32_t obucket, uint32_t nbucket)
187 {
188           BUFHEAD *new_bufp, *old_bufp;
189           uint16_t *ino;
190           char *np;
191           DBT key, val;
192           int n, ndx, retval;
193           uint16_t copyto, diff, off, moved;
194           char *op;
195           size_t temp;
196 
197           copyto = HASH_BSIZE(hashp);
198           off = HASH_BSIZE(hashp);
199           old_bufp = __get_buf(hashp, obucket, NULL, 0);
200           if (old_bufp == NULL)
201                     return (-1);
202           new_bufp = __get_buf(hashp, nbucket, NULL, 0);
203           if (new_bufp == NULL)
204                     return (-1);
205 
206           old_bufp->flags |= (BUF_MOD | BUF_PIN);
207           new_bufp->flags |= (BUF_MOD | BUF_PIN);
208 
209           ino = (uint16_t *)(void *)(op = old_bufp->page);
210           np = new_bufp->page;
211 
212           moved = 0;
213 
214           for (n = 1, ndx = 1; n < ino[0]; n += 2) {
215                     if (ino[n + 1] < REAL_KEY) {
216                               retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
217                                   (int)copyto, (int)moved);
218                               old_bufp->flags &= ~BUF_PIN;
219                               new_bufp->flags &= ~BUF_PIN;
220                               return (retval);
221 
222                     }
223                     key.data = (uint8_t *)op + ino[n];
224                     key.size = off - ino[n];
225 
226                     if (__call_hash(hashp, key.data, (int)key.size) == obucket) {
227                               /* Don't switch page */
228                               diff = copyto - off;
229                               if (diff) {
230                                         copyto = ino[n + 1] + diff;
231                                         memmove(op + copyto, op + ino[n + 1],
232                                             (size_t)(off - ino[n + 1]));
233                                         ino[ndx] = copyto + ino[n] - ino[n + 1];
234                                         ino[ndx + 1] = copyto;
235                               } else
236                                         copyto = ino[n + 1];
237                               ndx += 2;
238                     } else {
239                               /* Switch page */
240                               val.data = (uint8_t *)op + ino[n + 1];
241                               val.size = ino[n] - ino[n + 1];
242                               putpair(np, &key, &val);
243                               moved += 2;
244                     }
245 
246                     off = ino[n + 1];
247           }
248 
249           /* Now clean up the page */
250           ino[0] -= moved;
251           temp = sizeof(uint16_t) * (ino[0] + 3);
252           _DIAGASSERT(copyto >= temp);
253           FREESPACE(ino) = (uint16_t)(copyto - temp);
254           OFFSET(ino) = copyto;
255 
256 #ifdef DEBUG3
257           (void)fprintf(stderr, "split %d/%d\n",
258               ((uint16_t *)np)[0] / 2,
259               ((uint16_t *)op)[0] / 2);
260 #endif
261           /* unpin both pages */
262           old_bufp->flags &= ~BUF_PIN;
263           new_bufp->flags &= ~BUF_PIN;
264           return (0);
265 }
266 
267 /*
268  * Called when we encounter an overflow or big key/data page during split
269  * handling.  This is special cased since we have to begin checking whether
270  * the key/data pairs fit on their respective pages and because we may need
271  * overflow pages for both the old and new pages.
272  *
273  * The first page might be a page with regular key/data pairs in which case
274  * we have a regular overflow condition and just need to go on to the next
275  * page or it might be a big key/data pair in which case we need to fix the
276  * big key/data pair.
277  *
278  * Returns:
279  *         0 ==> success
280  *        -1 ==> failure
281  */
282 static int
ugly_split(HTAB * hashp,uint32_t obucket,BUFHEAD * old_bufp,BUFHEAD * new_bufp,int copyto,int moved)283 ugly_split(
284           HTAB *hashp,
285           uint32_t obucket,   /* Same as __split_page. */
286           BUFHEAD *old_bufp,
287           BUFHEAD *new_bufp,
288           int copyto,         /* First byte on page which contains key/data values. */
289           int moved /* Number of pairs moved to new page. */
290 )
291 {
292           BUFHEAD *bufp;      /* Buffer header for ino */
293           uint16_t *ino;      /* Page keys come off of */
294           uint16_t *np;       /* New page */
295           uint16_t *op;       /* Page keys go on to if they aren't moving */
296           size_t temp;
297 
298           BUFHEAD *last_bfp;  /* Last buf header OVFL needing to be freed */
299           DBT key, val;
300           SPLIT_RETURN ret;
301           uint16_t n, off, ov_addr, scopyto;
302           char *cino;                   /* Character value of ino */
303 
304           bufp = old_bufp;
305           ino = (uint16_t *)(void *)old_bufp->page;
306           np = (uint16_t *)(void *)new_bufp->page;
307           op = (uint16_t *)(void *)old_bufp->page;
308           last_bfp = NULL;
309           scopyto = (uint16_t)copyto;   /* ANSI */
310 
311           n = ino[0] - 1;
312           while (n < ino[0]) {
313                     if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
314                               if (__big_split(hashp, old_bufp,
315                                   new_bufp, bufp, (int)bufp->addr, obucket, &ret))
316                                         return (-1);
317                               old_bufp = ret.oldp;
318                               if (!old_bufp)
319                                         return (-1);
320                               op = (uint16_t *)(void *)old_bufp->page;
321                               new_bufp = ret.newp;
322                               if (!new_bufp)
323                                         return (-1);
324                               np = (uint16_t *)(void *)new_bufp->page;
325                               bufp = ret.nextp;
326                               if (!bufp)
327                                         return (0);
328                               cino = (char *)bufp->page;
329                               ino = (uint16_t *)(void *)cino;
330                               last_bfp = ret.nextp;
331                     } else if (ino[n + 1] == OVFLPAGE) {
332                               ov_addr = ino[n];
333                               /*
334                                * Fix up the old page -- the extra 2 are the fields
335                                * which contained the overflow information.
336                                */
337                               ino[0] -= (moved + 2);
338                               temp = sizeof(uint16_t) * (ino[0] + 3);
339                               _DIAGASSERT(scopyto >= temp);
340                               FREESPACE(ino) = (uint16_t)(scopyto - temp);
341                               OFFSET(ino) = scopyto;
342 
343                               bufp = __get_buf(hashp, (uint32_t)ov_addr, bufp, 0);
344                               if (!bufp)
345                                         return (-1);
346 
347                               ino = (uint16_t *)(void *)bufp->page;
348                               n = 1;
349                               scopyto = HASH_BSIZE(hashp);
350                               moved = 0;
351 
352                               if (last_bfp)
353                                         __free_ovflpage(hashp, last_bfp);
354                               last_bfp = bufp;
355                     }
356                     /* Move regular sized pairs of there are any */
357                     off = HASH_BSIZE(hashp);
358                     for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
359                               cino = (char *)(void *)ino;
360                               key.data = (uint8_t *)cino + ino[n];
361                               key.size = off - ino[n];
362                               val.data = (uint8_t *)cino + ino[n + 1];
363                               val.size = ino[n] - ino[n + 1];
364                               off = ino[n + 1];
365 
366                               if (__call_hash(hashp, key.data, (int)key.size) == obucket) {
367                                         /* Keep on old page */
368                                         if (PAIRFITS(op, (&key), (&val)))
369                                                   putpair((char *)(void *)op, &key, &val);
370                                         else {
371                                                   old_bufp =
372                                                       __add_ovflpage(hashp, old_bufp);
373                                                   if (!old_bufp)
374                                                             return (-1);
375                                                   op = (uint16_t *)(void *)old_bufp->page;
376                                                   putpair((char *)(void *)op, &key, &val);
377                                         }
378                                         old_bufp->flags |= BUF_MOD;
379                               } else {
380                                         /* Move to new page */
381                                         if (PAIRFITS(np, (&key), (&val)))
382                                                   putpair((char *)(void *)np, &key, &val);
383                                         else {
384                                                   new_bufp =
385                                                       __add_ovflpage(hashp, new_bufp);
386                                                   if (!new_bufp)
387                                                             return (-1);
388                                                   np = (uint16_t *)(void *)new_bufp->page;
389                                                   putpair((char *)(void *)np, &key, &val);
390                                         }
391                                         new_bufp->flags |= BUF_MOD;
392                               }
393                     }
394           }
395           if (last_bfp)
396                     __free_ovflpage(hashp, last_bfp);
397           return (0);
398 }
399 
400 /*
401  * Add the given pair to the page
402  *
403  * Returns:
404  *        0 ==> OK
405  *        1 ==> failure
406  */
407 int
__addel(HTAB * hashp,BUFHEAD * bufp,const DBT * key,const DBT * val)408 __addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
409 {
410           uint16_t *bp, *sop;
411           int do_expand;
412 
413           bp = (uint16_t *)(void *)bufp->page;
414           do_expand = 0;
415           while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
416                     /* Exception case */
417                     if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
418                               /* This is the last page of a big key/data pair
419                                  and we need to add another page */
420                               break;
421                     else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
422                               bufp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp,
423                                   0);
424                               if (!bufp)
425                                         return (-1);
426                               bp = (uint16_t *)(void *)bufp->page;
427                     } else if (bp[bp[0]] != OVFLPAGE) {
428                               /* Short key/data pairs, no more pages */
429                               break;
430                     } else {
431                               /* Try to squeeze key on this page */
432                               if (bp[2] >= REAL_KEY &&
433                                   FREESPACE(bp) >= PAIRSIZE(key, val)) {
434                                         squeeze_key(bp, key, val);
435                                         goto stats;
436                               } else {
437                                         bufp = __get_buf(hashp,
438                                             (uint32_t)bp[bp[0] - 1], bufp, 0);
439                                         if (!bufp)
440                                                   return (-1);
441                                         bp = (uint16_t *)(void *)bufp->page;
442                               }
443                     }
444 
445           if (PAIRFITS(bp, key, val))
446                     putpair(bufp->page, key, val);
447           else {
448                     do_expand = 1;
449                     bufp = __add_ovflpage(hashp, bufp);
450                     if (!bufp)
451                               return (-1);
452                     sop = (uint16_t *)(void *)bufp->page;
453 
454                     if (PAIRFITS(sop, key, val))
455                               putpair((char *)(void *)sop, key, val);
456                     else
457                               if (__big_insert(hashp, bufp, key, val))
458                                         return (-1);
459           }
460 stats:
461           bufp->flags |= BUF_MOD;
462           /*
463            * If the average number of keys per bucket exceeds the fill factor,
464            * expand the table.
465            */
466           hashp->NKEYS++;
467           if (do_expand ||
468               (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
469                     return (__expand_table(hashp));
470           return (0);
471 }
472 
473 /*
474  *
475  * Returns:
476  *        pointer on success
477  *        NULL on error
478  */
479 BUFHEAD *
__add_ovflpage(HTAB * hashp,BUFHEAD * bufp)480 __add_ovflpage(HTAB *hashp, BUFHEAD *bufp)
481 {
482           uint16_t *sp;
483           uint16_t ndx, ovfl_num;
484           size_t temp;
485 #ifdef DEBUG1
486           int tmp1, tmp2;
487 #endif
488           sp = (uint16_t *)(void *)bufp->page;
489 
490           /* Check if we are dynamically determining the fill factor */
491           if (hashp->FFACTOR == DEF_FFACTOR) {
492                     hashp->FFACTOR = (uint32_t)sp[0] >> 1;
493                     if (hashp->FFACTOR < MIN_FFACTOR)
494                               hashp->FFACTOR = MIN_FFACTOR;
495           }
496           bufp->flags |= BUF_MOD;
497           ovfl_num = overflow_page(hashp);
498 #ifdef DEBUG1
499           tmp1 = bufp->addr;
500           tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
501 #endif
502           if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, (uint32_t)ovfl_num,
503               bufp, 1)))
504                     return (NULL);
505           bufp->ovfl->flags |= BUF_MOD;
506 #ifdef DEBUG1
507           (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
508               tmp1, tmp2, bufp->ovfl->addr);
509 #endif
510           ndx = sp[0];
511           /*
512            * Since a pair is allocated on a page only if there's room to add
513            * an overflow page, we know that the OVFL information will fit on
514            * the page.
515            */
516           sp[ndx + 4] = OFFSET(sp);
517           temp = FREESPACE(sp);
518           _DIAGASSERT(temp >= OVFLSIZE);
519           sp[ndx + 3] = (uint16_t)(temp - OVFLSIZE);
520           sp[ndx + 1] = ovfl_num;
521           sp[ndx + 2] = OVFLPAGE;
522           sp[0] = ndx + 2;
523 #ifdef HASH_STATISTICS
524           hash_overflows++;
525 #endif
526           return (bufp->ovfl);
527 }
528 
529 /*
530  * Returns:
531  *         0 indicates SUCCESS
532  *        -1 indicates FAILURE
533  */
534 int
__get_page(HTAB * hashp,char * p,uint32_t bucket,int is_bucket,int is_disk,int is_bitmap)535 __get_page(HTAB *hashp, char *p, uint32_t bucket, int is_bucket, int is_disk,
536     int is_bitmap)
537 {
538           int fd, page, size;
539           ssize_t rsize;
540           uint16_t *bp;
541           size_t temp;
542 
543           fd = hashp->fp;
544           size = HASH_BSIZE(hashp);
545 
546           if ((fd == -1) || !is_disk) {
547                     PAGE_INIT(p);
548                     return (0);
549           }
550           if (is_bucket)
551                     page = BUCKET_TO_PAGE(bucket);
552           else
553                     page = OADDR_TO_PAGE(bucket);
554           if ((rsize = pread(fd, p, (size_t)size, (off_t)page << hashp->BSHIFT)) == -1)
555                     return (-1);
556           bp = (uint16_t *)(void *)p;
557           if (!rsize)
558                     bp[0] = 0;          /* We hit the EOF, so initialize a new page */
559           else
560                     if (rsize != size) {
561                               errno = EFTYPE;
562                               return (-1);
563                     }
564           if (!is_bitmap && !bp[0]) {
565                     PAGE_INIT(p);
566           } else
567                     if (hashp->LORDER != BYTE_ORDER) {
568                               int i, max;
569 
570                               if (is_bitmap) {
571                                         max = (uint32_t)hashp->BSIZE >> 2; /* divide by 4 */
572                                         for (i = 0; i < max; i++)
573                                                   M_32_SWAP(((int *)(void *)p)[i]);
574                               } else {
575                                         M_16_SWAP(bp[0]);
576                                         max = bp[0] + 2;
577                                         for (i = 1; i <= max; i++)
578                                                   M_16_SWAP(bp[i]);
579                               }
580                     }
581           return (0);
582 }
583 
584 /*
585  * Write page p to disk
586  *
587  * Returns:
588  *         0 ==> OK
589  *        -1 ==>failure
590  */
591 int
__put_page(HTAB * hashp,char * p,uint32_t bucket,int is_bucket,int is_bitmap)592 __put_page(HTAB *hashp, char *p, uint32_t bucket, int is_bucket, int is_bitmap)
593 {
594           int fd, page, size;
595           ssize_t wsize;
596           char pbuf[MAX_BSIZE];
597 
598           size = HASH_BSIZE(hashp);
599           if ((hashp->fp == -1) && (hashp->fp = __dbtemp("_hash", NULL)) == -1)
600                     return (-1);
601           fd = hashp->fp;
602 
603           if (hashp->LORDER != BYTE_ORDER) {
604                     int i;
605                     int max;
606 
607                     memcpy(pbuf, p, size);
608                     if (is_bitmap) {
609                               max = (uint32_t)hashp->BSIZE >> 2;      /* divide by 4 */
610                               for (i = 0; i < max; i++)
611                                         M_32_SWAP(((int *)(void *)pbuf)[i]);
612                     } else {
613                               uint16_t *bp = (uint16_t *)(void *)pbuf;
614                               max = bp[0] + 2;
615                               for (i = 0; i <= max; i++)
616                                         M_16_SWAP(bp[i]);
617                     }
618                     p = pbuf;
619           }
620           if (is_bucket)
621                     page = BUCKET_TO_PAGE(bucket);
622           else
623                     page = OADDR_TO_PAGE(bucket);
624           if ((wsize = pwrite(fd, p, (size_t)size, (off_t)page << hashp->BSHIFT)) == -1)
625                     /* Errno is set */
626                     return (-1);
627           if (wsize != size) {
628                     errno = EFTYPE;
629                     return (-1);
630           }
631           return (0);
632 }
633 
634 #define BYTE_MASK   ((1 << INT_BYTE_SHIFT) -1)
635 /*
636  * Initialize a new bitmap page.  Bitmap pages are left in memory
637  * once they are read in.
638  */
639 int
__ibitmap(HTAB * hashp,int pnum,int nbits,int ndx)640 __ibitmap(HTAB *hashp, int pnum, int nbits, int ndx)
641 {
642           uint32_t *ip;
643           int clearbytes, clearints;
644 
645           if ((ip = malloc((size_t)hashp->BSIZE)) == NULL)
646                     return (1);
647           hashp->nmaps++;
648           clearints = ((uint32_t)(nbits - 1) >> INT_BYTE_SHIFT) + 1;
649           clearbytes = clearints << INT_TO_BYTE;
650           (void)memset(ip, 0, (size_t)clearbytes);
651           (void)memset(((char *)(void *)ip) + clearbytes, 0xFF,
652               (size_t)(hashp->BSIZE - clearbytes));
653           ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
654           SETBIT(ip, 0);
655           hashp->BITMAPS[ndx] = (uint16_t)pnum;
656           hashp->mapp[ndx] = ip;
657           return (0);
658 }
659 
660 static uint32_t
first_free(uint32_t map)661 first_free(uint32_t map)
662 {
663           uint32_t i, mask;
664 
665           mask = 0x1;
666           for (i = 0; i < BITS_PER_MAP; i++) {
667                     if (!(mask & map))
668                               return (i);
669                     mask = mask << 1;
670           }
671           return (i);
672 }
673 
674 static uint16_t
overflow_page(HTAB * hashp)675 overflow_page(HTAB *hashp)
676 {
677           uint32_t *freep = NULL;
678           int max_free, offset, splitnum;
679           uint16_t addr;
680           int bit, first_page, free_bit, free_page, i, in_use_bits, j;
681 #ifdef DEBUG2
682           int tmp1, tmp2;
683 #endif
684           splitnum = hashp->OVFL_POINT;
685           max_free = hashp->SPARES[splitnum];
686 
687           free_page = (uint32_t)(max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
688           free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
689 
690           /* Look through all the free maps to find the first free block */
691           first_page = (uint32_t)hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
692           for ( i = first_page; i <= free_page; i++ ) {
693                     if (!(freep = (uint32_t *)hashp->mapp[i]) &&
694                         !(freep = fetch_bitmap(hashp, i)))
695                               return (0);
696                     if (i == free_page)
697                               in_use_bits = free_bit;
698                     else
699                               in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
700 
701                     if (i == first_page) {
702                               bit = hashp->LAST_FREED &
703                                   ((hashp->BSIZE << BYTE_SHIFT) - 1);
704                               j = bit / BITS_PER_MAP;
705                               bit = bit & ~(BITS_PER_MAP - 1);
706                     } else {
707                               bit = 0;
708                               j = 0;
709                     }
710                     for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
711                               if (freep[j] != ALL_SET)
712                                         goto found;
713           }
714 
715           /* No Free Page Found */
716           hashp->LAST_FREED = hashp->SPARES[splitnum];
717           hashp->SPARES[splitnum]++;
718           offset = hashp->SPARES[splitnum] -
719               (splitnum ? hashp->SPARES[splitnum - 1] : 0);
720 
721 #define   OVMSG     "HASH: Out of overflow pages.  Increase page size\n"
722           if (offset > SPLITMASK) {
723                     if (++splitnum >= NCACHED) {
724                               (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
725                               errno = EFBIG;
726                               return (0);
727                     }
728                     hashp->OVFL_POINT = splitnum;
729                     hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
730                     hashp->SPARES[splitnum-1]--;
731                     offset = 1;
732           }
733 
734           /* Check if we need to allocate a new bitmap page */
735           if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
736                     free_page++;
737                     if (free_page >= NCACHED) {
738                               (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
739                               errno = EFBIG;
740                               return (0);
741                     }
742                     /*
743                      * This is tricky.  The 1 indicates that you want the new page
744                      * allocated with 1 clear bit.  Actually, you are going to
745                      * allocate 2 pages from this map.  The first is going to be
746                      * the map page, the second is the overflow page we were
747                      * looking for.  The init_bitmap routine automatically, sets
748                      * the first bit of itself to indicate that the bitmap itself
749                      * is in use.  We would explicitly set the second bit, but
750                      * don't have to if we tell init_bitmap not to leave it clear
751                      * in the first place.
752                      */
753                     if (__ibitmap(hashp,
754                         (int)OADDR_OF(splitnum, offset), 1, free_page))
755                               return (0);
756                     hashp->SPARES[splitnum]++;
757 #ifdef DEBUG2
758                     free_bit = 2;
759 #endif
760                     offset++;
761                     if (offset > SPLITMASK) {
762                               if (++splitnum >= NCACHED) {
763                                         (void)write(STDERR_FILENO, OVMSG,
764                                             sizeof(OVMSG) - 1);
765                                         errno = EFBIG;
766                                         return (0);
767                               }
768                               hashp->OVFL_POINT = splitnum;
769                               hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
770                               hashp->SPARES[splitnum-1]--;
771                               offset = 0;
772                     }
773           } else {
774                     /*
775                      * Free_bit addresses the last used bit.  Bump it to address
776                      * the first available bit.
777                      */
778                     free_bit++;
779                     SETBIT(freep, free_bit);
780           }
781 
782           /* Calculate address of the new overflow page */
783           addr = OADDR_OF(splitnum, offset);
784 #ifdef DEBUG2
785           (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
786               addr, free_bit, free_page);
787 #endif
788           return (addr);
789 
790 found:
791           bit = bit + first_free(freep[j]);
792           SETBIT(freep, bit);
793 #ifdef DEBUG2
794           tmp1 = bit;
795           tmp2 = i;
796 #endif
797           /*
798            * Bits are addressed starting with 0, but overflow pages are addressed
799            * beginning at 1. Bit is a bit addressnumber, so we need to increment
800            * it to convert it to a page number.
801            */
802           bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
803           if (bit >= hashp->LAST_FREED)
804                     hashp->LAST_FREED = bit - 1;
805 
806           /* Calculate the split number for this page */
807           for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
808           offset = (i ? bit - hashp->SPARES[i - 1] : bit);
809           if (offset >= SPLITMASK) {
810                     (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
811                     errno = EFBIG;
812                     return (0);         /* Out of overflow pages */
813           }
814           addr = OADDR_OF(i, offset);
815 #ifdef DEBUG2
816           (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
817               addr, tmp1, tmp2);
818 #endif
819 
820           /* Allocate and return the overflow page */
821           return (addr);
822 }
823 
824 /*
825  * Mark this overflow page as free.
826  */
827 void
__free_ovflpage(HTAB * hashp,BUFHEAD * obufp)828 __free_ovflpage(HTAB *hashp, BUFHEAD *obufp)
829 {
830           uint16_t addr;
831           uint32_t *freep;
832           int bit_address, free_page, free_bit;
833           uint16_t ndx;
834 
835           addr = obufp->addr;
836 #ifdef DEBUG1
837           (void)fprintf(stderr, "Freeing %d\n", addr);
838 #endif
839           ndx = (((uint32_t)addr) >> SPLITSHIFT);
840           bit_address =
841               (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
842            if (bit_address < hashp->LAST_FREED)
843                     hashp->LAST_FREED = bit_address;
844           free_page = ((uint32_t)bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
845           free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
846 
847           if (!(freep = hashp->mapp[free_page]))
848                     freep = fetch_bitmap(hashp, free_page);
849           /*
850            * This had better never happen.  It means we tried to read a bitmap
851            * that has already had overflow pages allocated off it, and we
852            * failed to read it from the file.
853            */
854           _DIAGASSERT(freep != NULL);
855           CLRBIT(freep, free_bit);
856 #ifdef DEBUG2
857           (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
858               obufp->addr, free_bit, free_page);
859 #endif
860           __reclaim_buf(hashp, obufp);
861 }
862 
863 /*
864  * We have to know that the key will fit, but the last entry on the page is
865  * an overflow pair, so we need to shift things.
866  */
867 static void
squeeze_key(uint16_t * sp,const DBT * key,const DBT * val)868 squeeze_key(uint16_t *sp, const DBT *key, const DBT *val)
869 {
870           char *p;
871           uint16_t free_space, n, off, pageno;
872           size_t temp;
873 
874           p = (char *)(void *)sp;
875           n = sp[0];
876           free_space = FREESPACE(sp);
877           off = OFFSET(sp);
878 
879           pageno = sp[n - 1];
880           _DIAGASSERT(off >= key->size);
881           off -= (uint16_t)key->size;
882           sp[n - 1] = off;
883           memmove(p + off, key->data, key->size);
884           _DIAGASSERT(off >= val->size);
885           off -= (uint16_t)val->size;
886           sp[n] = off;
887           memmove(p + off, val->data, val->size);
888           sp[0] = n + 2;
889           sp[n + 1] = pageno;
890           sp[n + 2] = OVFLPAGE;
891           temp = PAIRSIZE(key, val);
892           _DIAGASSERT(free_space >= temp);
893           FREESPACE(sp) = (uint16_t)(free_space - temp);
894           OFFSET(sp) = off;
895 }
896 
897 static uint32_t *
fetch_bitmap(HTAB * hashp,int ndx)898 fetch_bitmap(HTAB *hashp, int ndx)
899 {
900           if (ndx >= hashp->nmaps)
901                     return (NULL);
902           if ((hashp->mapp[ndx] = malloc((size_t)hashp->BSIZE)) == NULL)
903                     return (NULL);
904           if (__get_page(hashp,
905               (char *)(void *)hashp->mapp[ndx], (uint32_t)hashp->BITMAPS[ndx], 0, 1, 1)) {
906                     free(hashp->mapp[ndx]);
907                     return (NULL);
908           }
909           return (hashp->mapp[ndx]);
910 }
911 
912 #ifdef DEBUG4
913 void print_chain(HTAB *, uint32_t);
914 void
print_chain(HTAB * hashp,uint32_t addr)915 print_chain(HTAB *hashp, uint32_t addr)
916 {
917           BUFHEAD *bufp;
918           uint16_t *bp, oaddr;
919 
920           (void)fprintf(stderr, "%d ", addr);
921           bufp = __get_buf(hashp, addr, NULL, 0);
922           bp = (uint16_t *)bufp->page;
923           while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
924                     ((bp[0] > 2) && bp[2] < REAL_KEY))) {
925                     oaddr = bp[bp[0] - 1];
926                     (void)fprintf(stderr, "%d ", (int)oaddr);
927                     bufp = __get_buf(hashp, (uint32_t)oaddr, bufp, 0);
928                     bp = (uint16_t *)bufp->page;
929           }
930           (void)fprintf(stderr, "\n");
931 }
932 #endif
933