1 /*        $NetBSD: hash.h,v 1.17 2020/02/21 22:04:06 kamil Exp $      */
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Margo Seltzer.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *        @(#)hash.h          8.3 (Berkeley) 5/31/94
35  */
36 
37 #if HAVE_NBTOOL_CONFIG_H
38 #include "nbtool_config.h"
39 #endif
40 
41 /* Operations */
42 typedef enum {
43           HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT
44 } ACTION;
45 
46 /* Buffer Management structures */
47 typedef struct _bufhead BUFHEAD;
48 
49 struct _bufhead {
50           BUFHEAD             *prev;              /* LRU links */
51           BUFHEAD             *next;              /* LRU links */
52           BUFHEAD             *ovfl;              /* Overflow page buffer header */
53           uint32_t   addr;              /* Address of this page */
54           char                *page;              /* Actual page data */
55           char                flags;
56 #define   BUF_MOD             0x0001
57 #define BUF_DISK    0x0002
58 #define   BUF_BUCKET          0x0004
59 #define   BUF_PIN             0x0008
60 };
61 
62 #define IS_BUCKET(X)          ((X) & BUF_BUCKET)
63 
64 typedef BUFHEAD **SEGMENT;
65 
66 /* Hash Table Information */
67 typedef struct hashhdr {                /* Disk resident portion */
68           int32_t             magic;              /* Magic NO for hash tables */
69           int32_t             version;  /* Version ID */
70           uint32_t  lorder;             /* Byte Order */
71           int32_t             bsize;              /* Bucket/Page Size */
72           int32_t             bshift;             /* Bucket shift */
73           int32_t             dsize;              /* Directory Size */
74           int32_t             ssize;              /* Segment Size */
75           int32_t             sshift;             /* Segment shift */
76           int32_t             ovfl_point;         /* Where overflow pages are being
77                                                    * allocated */
78           int32_t             last_freed;         /* Last overflow page freed */
79           int32_t             max_bucket;         /* ID of Maximum bucket in use */
80           int32_t             high_mask;          /* Mask to modulo into entire table */
81           int32_t             low_mask; /* Mask to modulo into lower half of
82                                                    * table */
83           int32_t             ffactor;  /* Fill factor */
84           int32_t             nkeys;              /* Number of keys in hash table */
85           int32_t             hdrpages; /* Size of table header */
86           int32_t             h_charkey;          /* value of hash(CHARKEY) */
87 #define NCACHED     32                            /* number of bit maps and spare
88                                                    * points */
89           int32_t             spares[NCACHED];/* spare pages for overflow */
90           uint16_t  bitmaps[NCACHED];   /* address of overflow page
91                                                              * bitmaps */
92 } HASHHDR;
93 
94 typedef struct htab  {                  /* Memory resident data structure */
95           HASHHDR   hdr;                /* Header */
96           int                 nsegs;              /* Number of allocated segments */
97           int                 exsegs;             /* Number of extra allocated
98                                                    * segments */
99           uint32_t  (*hash)(const void *, size_t);          /* Hash function */
100           int                 flags;              /* Flag values */
101           int                 fp;                 /* File pointer */
102           char                *tmp_buf; /* Temporary Buffer for BIG data */
103           char                *tmp_key; /* Temporary Buffer for BIG keys */
104           BUFHEAD   *cpage;             /* Current page */
105           int                 cbucket;  /* Current bucket */
106           int                 cndx;               /* Index of next item on cpage */
107           int                 err;                /* Error Number -- for DBM
108                                                    * compatibility */
109           int                 new_file; /* Indicates if fd is backing store
110                                                    * or no */
111           int                 save_file;          /* Indicates whether we need to flush
112                                                    * file at
113                                                    * exit */
114           uint32_t  *mapp[NCACHED];     /* Pointers to page maps */
115           int                 nmaps;              /* Initial number of bitmaps */
116           int                 nbufs;              /* Number of buffers left to
117                                                    * allocate */
118           BUFHEAD   bufhead;  /* Header of buffer lru list */
119           SEGMENT   *dir;               /* Hash Bucket directory */
120 } HTAB;
121 
122 /*
123  * Constants
124  */
125 #define   MAX_BSIZE           65536               /* 2^16 */
126 /*
127  * Make it fit in uint16_t; a better way would be to store size - 1, but
128  * then we'd need to bump the version.
129  */
130 #define HASH_BSIZE(hp)        ((hp)->BSIZE == MAX_BSIZE ? MAX_BSIZE - 1 : (hp)->BSIZE)
131 #define MIN_BUFFERS           6
132 #define MINHDRSIZE            512
133 #define DEF_BUFSIZE           65536               /* 64 K */
134 #define DEF_BUCKET_SIZE                 4096
135 #define DEF_BUCKET_SHIFT      12                  /* log2(BUCKET) */
136 #define DEF_SEGSIZE           256
137 #define DEF_SEGSIZE_SHIFT     8                   /* log2(SEGSIZE)     */
138 #define DEF_DIRSIZE           256
139 #define DEF_FFACTOR           65536
140 #define MIN_FFACTOR           4
141 #define SPLTMAX                         8
142 #define CHARKEY                         "%$sniglet^&"
143 #define NUMKEY                          1038583
144 #define BYTE_SHIFT            3
145 #define INT_TO_BYTE           2
146 #define INT_BYTE_SHIFT                  5
147 #define ALL_SET                         ((uint32_t)0xFFFFFFFF)
148 #define ALL_CLEAR             0
149 
150 #define PTROF(X)    ((BUFHEAD *)(void *)((u_long)(X)&~0x3))
151 #define ISMOD(X)    ((uint32_t)(u_long)(X)&0x1)
152 #define DOMOD(X)    ((X) = (char *)(void *)((u_long)(X)|0x1))
153 #define ISDISK(X)   ((uint32_t)(u_long)(X)&0x2)
154 #define DODISK(X)   ((X) = (char *)(void *)((u_long)(X)|0x2))
155 
156 #define BITS_PER_MAP          32
157 
158 /* Given the address of the beginning of a big map, clear/set the nth bit */
159 #define CLRBIT(A, N)          ((A)[(N)/BITS_PER_MAP] &= ~(1U<<((N)%BITS_PER_MAP)))
160 #define SETBIT(A, N)          ((A)[(N)/BITS_PER_MAP] |= (1U<<((N)%BITS_PER_MAP)))
161 #define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1U<<((N)%BITS_PER_MAP)))
162 
163 /* Overflow management */
164 /*
165  * Overflow page numbers are allocated per split point.  At each doubling of
166  * the table, we can allocate extra pages.  So, an overflow page number has
167  * the top 5 bits indicate which split point and the lower 11 bits indicate
168  * which page at that split point is indicated (pages within split points are
169  * numberered starting with 1).
170  */
171 
172 #define SPLITSHIFT  11
173 #define SPLITMASK   0x7FF
174 #define SPLITNUM(N) (((uint32_t)(N)) >> SPLITSHIFT)
175 #define OPAGENUM(N) ((N) & SPLITMASK)
176 #define   OADDR_OF(S,O)       ((uint32_t)((uint32_t)(S) << SPLITSHIFT) + (O))
177 
178 #define BUCKET_TO_PAGE(B) \
179           (B) + hashp->HDRPAGES + \
180           ((B) ? hashp->SPARES[__log2((uint32_t)((B)+1))-1] : 0)
181 #define OADDR_TO_PAGE(B)      \
182           BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B));
183 
184 /*
185  * page.h contains a detailed description of the page format.
186  *
187  * Normally, keys and data are accessed from offset tables in the top of
188  * each page which point to the beginning of the key and data.  There are
189  * four flag values which may be stored in these offset tables which indicate
190  * the following:
191  *
192  *
193  * OVFLPAGE         Rather than a key data pair, this pair contains
194  *                  the address of an overflow page.  The format of
195  *                  the pair is:
196  *                      OVERFLOW_PAGE_NUMBER OVFLPAGE
197  *
198  * PARTIAL_KEY      This must be the first key/data pair on a page
199  *                  and implies that page contains only a partial key.
200  *                  That is, the key is too big to fit on a single page
201  *                  so it starts on this page and continues on the next.
202  *                  The format of the page is:
203  *                      KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE
204  *
205  *                      KEY_OFF -- offset of the beginning of the key
206  *                      PARTIAL_KEY -- 1
207  *                      OVFL_PAGENO - page number of the next overflow page
208  *                      OVFLPAGE -- 0
209  *
210  * FULL_KEY         This must be the first key/data pair on the page.  It
211  *                  is used in two cases.
212  *
213  *                  Case 1:
214  *                      There is a complete key on the page but no data
215  *                      (because it wouldn't fit).  The next page contains
216  *                      the data.
217  *
218  *                      Page format it:
219  *                      KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
220  *
221  *                      KEY_OFF -- offset of the beginning of the key
222  *                      FULL_KEY -- 2
223  *                      OVFL_PAGENO - page number of the next overflow page
224  *                      OVFLPAGE -- 0
225  *
226  *                  Case 2:
227  *                      This page contains no key, but part of a large
228  *                      data field, which is continued on the next page.
229  *
230  *                      Page format it:
231  *                      DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
232  *
233  *                      KEY_OFF -- offset of the beginning of the data on
234  *                                      this page
235  *                      FULL_KEY -- 2
236  *                      OVFL_PAGENO - page number of the next overflow page
237  *                      OVFLPAGE -- 0
238  *
239  * FULL_KEY_DATA
240  *                  This must be the first key/data pair on the page.
241  *                  There are two cases:
242  *
243  *                  Case 1:
244  *                      This page contains a key and the beginning of the
245  *                      data field, but the data field is continued on the
246  *                      next page.
247  *
248  *                      Page format is:
249  *                      KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF
250  *
251  *                      KEY_OFF -- offset of the beginning of the key
252  *                      FULL_KEY_DATA -- 3
253  *                      OVFL_PAGENO - page number of the next overflow page
254  *                      DATA_OFF -- offset of the beginning of the data
255  *
256  *                  Case 2:
257  *                      This page contains the last page of a big data pair.
258  *                      There is no key, only the  tail end of the data
259  *                      on this page.
260  *
261  *                      Page format is:
262  *                      DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE>
263  *
264  *                      DATA_OFF -- offset of the beginning of the data on
265  *                                      this page
266  *                      FULL_KEY_DATA -- 3
267  *                      OVFL_PAGENO - page number of the next overflow page
268  *                      OVFLPAGE -- 0
269  *
270  *                      OVFL_PAGENO and OVFLPAGE are optional (they are
271  *                      not present if there is no next page).
272  */
273 
274 #define OVFLPAGE    0
275 #define PARTIAL_KEY 1
276 #define FULL_KEY    2
277 #define FULL_KEY_DATA         3
278 #define   REAL_KEY  4
279 
280 /* Short hands for accessing structure */
281 #define BSIZE                 hdr.bsize
282 #define BSHIFT                hdr.bshift
283 #define DSIZE                 hdr.dsize
284 #define SGSIZE                hdr.ssize
285 #define SSHIFT                hdr.sshift
286 #define LORDER                hdr.lorder
287 #define OVFL_POINT  hdr.ovfl_point
288 #define   LAST_FREED          hdr.last_freed
289 #define MAX_BUCKET  hdr.max_bucket
290 #define FFACTOR               hdr.ffactor
291 #define HIGH_MASK   hdr.high_mask
292 #define LOW_MASK    hdr.low_mask
293 #define NKEYS                 hdr.nkeys
294 #define HDRPAGES    hdr.hdrpages
295 #define SPARES                hdr.spares
296 #define BITMAPS               hdr.bitmaps
297 #define VERSION               hdr.version
298 #define MAGIC                 hdr.magic
299 #define NEXT_FREE   hdr.next_free
300 #define H_CHARKEY   hdr.h_charkey
301