1 /*        $NetBSD: binhash.c,v 1.4 2025/02/25 19:15:51 christos Exp $ */
2 
3 /*++
4 /* NAME
5 /*        binhash 3
6 /* SUMMARY
7 /*        hash table manager
8 /* SYNOPSIS
9 /*        #include <binhash.h>
10 /*
11 /*        typedef   struct {
12 /* .in +4
13 /*                  void      *key;
14 /*                  ssize_t   key_len;
15 /*                  void      *value;
16 /*                  /* private fields... */
17 /* .in -4
18 /*        } BINHASH_INFO;
19 /*
20 /*        BINHASH   *binhash_create(size)
21 /*        ssize_t   size;
22 /*
23 /*        BINHASH_INFO *binhash_enter(table, key, key_len, value)
24 /*        BINHASH   *table;
25 /*        const void *key;
26 /*        ssize_t   key_len;
27 /*        void      *value;
28 /*
29 /*        char      *binhash_find(table, key, key_len)
30 /*        BINHASH   *table;
31 /*        const void *key;
32 /*        ssize_t   key_len;
33 /*
34 /*        BINHASH_INFO *binhash_locate(table, key, key_len)
35 /*        BINHASH   *table;
36 /*        const void *key;
37 /*        ssize_t   key_len;
38 /*
39 /*        void      binhash_delete(table, key, key_len, free_fn)
40 /*        BINHASH   *table;
41 /*        const void *key;
42 /*        ssize_t   key_len;
43 /*        void      (*free_fn)(void *);
44 /*
45 /*        void      binhash_free(table, free_fn)
46 /*        BINHASH   *table;
47 /*        void      (*free_fn)(void *);
48 /*
49 /*        void      binhash_walk(table, action, ptr)
50 /*        BINHASH   *table;
51 /*        void      (*action)(BINHASH_INFO *info, void *ptr);
52 /*        void      *ptr;
53 /*
54 /*        BINHASH_INFO **binhash_list(table)
55 /*        BINHASH   *table;
56 /*
57 /*        BINHASH_INFO *binhash_sequence(table, how)
58 /*        BINHASH   *table;
59 /*        int       how;
60 /* DESCRIPTION
61 /*        This module maintains one or more hash tables. Each table entry
62 /*        consists of a unique binary-valued lookup key and a generic
63 /*        character-pointer value.
64 /*        The tables are automatically resized when they fill up. When the
65 /*        values to be remembered are not character pointers, proper casts
66 /*        should be used or the code will not be portable.
67 /*
68 /*        binhash_create() creates a table of the specified size and returns a
69 /*        pointer to the result. The lookup keys are saved with mymemdup().
70 /*
71 /*        binhash_enter() stores a (key, value) pair into the specified table
72 /*        and returns a pointer to the resulting entry. The code does not
73 /*        check if an entry with that key already exists: use binhash_locate()
74 /*        for updating an existing entry. The key is copied; the value is not.
75 /*
76 /*        binhash_find() returns the value that was stored under the given key,
77 /*        or a null pointer if it was not found. In order to distinguish
78 /*        a null value from a non-existent value, use binhash_locate().
79 /*
80 /*        binhash_locate() returns a pointer to the entry that was stored
81 /*        for the given key, or a null pointer if it was not found.
82 /*
83 /*        binhash_delete() removes one entry that was stored under the given key.
84 /*        If the free_fn argument is not a null pointer, the corresponding
85 /*        function is called with as argument the value that was stored under
86 /*        the key.
87 /*
88 /*        binhash_free() destroys a hash table, including contents. If the free_fn
89 /*        argument is not a null pointer, the corresponding function is called
90 /*        for each table entry, with as argument the value that was stored
91 /*        with the entry.
92 /*
93 /*        binhash_walk() invokes the action function for each table entry, with
94 /*        a pointer to the entry as its argument. The ptr argument is passed
95 /*        on to the action function.
96 /*
97 /*        binhash_list() returns a null-terminated list of pointers to
98 /*        all elements in the named table. The list should be passed to
99 /*        myfree().
100 /*
101 /*        binhash_sequence() returns the first or next element
102 /*        depending on the value of the "how" argument. Specify
103 /*        BINHASH_SEQ_FIRST to start a new sequence, BINHASH_SEQ_NEXT
104 /*        to continue, and BINHASH_SEQ_STOP to terminate a sequence
105 /*        early. The caller must not delete an element before it is
106 /*        visited.
107 /* RESTRICTIONS
108 /*        A callback function should not modify the hash table that is
109 /*        specified to its caller.
110 /* DIAGNOSTICS
111 /*        The following conditions are reported and cause the program to
112 /*        terminate immediately: memory allocation failure; an attempt
113 /*        to delete a non-existent entry.
114 /* SEE ALSO
115 /*        mymalloc(3) memory management wrapper
116 /*        hash_fnv(3) Fowler/Noll/Vo hash function
117 /* LICENSE
118 /* .ad
119 /* .fi
120 /*        The Secure Mailer license must be distributed with this software.
121 /* AUTHOR(S)
122 /*        Wietse Venema
123 /*        IBM T.J. Watson Research
124 /*        P.O. Box 704
125 /*        Yorktown Heights, NY 10598, USA
126 /*
127 /*        Wietse Venema
128 /*        Google, Inc.
129 /*        111 8th Avenue
130 /*        New York, NY 10011, USA
131 /*--*/
132 
133 /* C library */
134 
135 #include <sys_defs.h>
136 #include <string.h>
137 
138 /* Local stuff */
139 
140 #include "mymalloc.h"
141 #include "msg.h"
142 #include "binhash.h"
143 
144 /* binhash_hash - hash a string */
145 
146 #ifndef NO_HASH_FNV
147 #include "hash_fnv.h"
148 
149 #define binhash_hash(key, len, size) (hash_fnv((key), (len)) % (size))
150 
151 #else
152 
binhash_hash(const void * key,ssize_t len,size_t size)153 static size_t binhash_hash(const void *key, ssize_t len, size_t size)
154 {
155     size_t  h = 0;
156     size_t  g;
157 
158     /*
159      * From the "Dragon" book by Aho, Sethi and Ullman.
160      */
161 
162     while (len-- > 0) {
163           h = (h << 4U) + *(unsigned const char *) key++;
164           if ((g = (h & 0xf0000000)) != 0) {
165               h ^= (g >> 24U);
166               h ^= g;
167           }
168     }
169     return (h % size);
170 }
171 
172 #endif
173 
174 /* binhash_link - insert element into table */
175 
176 #define binhash_link(table, elm) { \
177     BINHASH_INFO **_h = table->data + binhash_hash(elm->key, elm->key_len, table->size);\
178     elm->prev = 0; \
179     if ((elm->next = *_h) != 0) \
180           (*_h)->prev = elm; \
181     *_h = elm; \
182     table->used++; \
183 }
184 
185 /* binhash_size - allocate and initialize hash table */
186 
binhash_size(BINHASH * table,size_t size)187 static void binhash_size(BINHASH *table, size_t size)
188 {
189     BINHASH_INFO **h;
190 
191     size |= 1;
192 
193     table->data = h = (BINHASH_INFO **) mymalloc(size * sizeof(BINHASH_INFO *));
194     table->size = size;
195     table->used = 0;
196 
197     while (size-- > 0)
198           *h++ = 0;
199 }
200 
201 /* binhash_create - create initial hash table */
202 
binhash_create(ssize_t size)203 BINHASH *binhash_create(ssize_t size)
204 {
205     BINHASH *table;
206 
207     table = (BINHASH *) mymalloc(sizeof(BINHASH));
208     binhash_size(table, size < 13 ? 13 : size);
209     table->seq_bucket = table->seq_element = 0;
210     return (table);
211 }
212 
213 /* binhash_grow - extend existing table */
214 
binhash_grow(BINHASH * table)215 static void binhash_grow(BINHASH *table)
216 {
217     BINHASH_INFO *ht;
218     BINHASH_INFO *next;
219     ssize_t old_size = table->size;
220     BINHASH_INFO **h = table->data;
221     BINHASH_INFO **old_entries = h;
222 
223     binhash_size(table, 2 * old_size);
224 
225     while (old_size-- > 0) {
226           for (ht = *h++; ht; ht = next) {
227               next = ht->next;
228               binhash_link(table, ht);
229           }
230     }
231     myfree((void *) old_entries);
232 }
233 
234 /* binhash_enter - enter (key, value) pair */
235 
binhash_enter(BINHASH * table,const void * key,ssize_t key_len,void * value)236 BINHASH_INFO *binhash_enter(BINHASH *table, const void *key, ssize_t key_len, void *value)
237 {
238     BINHASH_INFO *ht;
239 
240     if (table->used >= table->size)
241           binhash_grow(table);
242     ht = (BINHASH_INFO *) mymalloc(sizeof(BINHASH_INFO));
243     ht->key = mymemdup(key, key_len);
244     ht->key_len = key_len;
245     ht->value = value;
246     binhash_link(table, ht);
247     return (ht);
248 }
249 
250 /* binhash_find - lookup value */
251 
binhash_find(BINHASH * table,const void * key,ssize_t key_len)252 void   *binhash_find(BINHASH *table, const void *key, ssize_t key_len)
253 {
254     BINHASH_INFO *ht;
255 
256 #define   KEY_EQ(x,y,l) (((unsigned char *) x)[0] == ((unsigned char *) y)[0] && memcmp(x,y,l) == 0)
257 
258     if (table != 0)
259           for (ht = table->data[binhash_hash(key, key_len, table->size)]; ht; ht = ht->next)
260               if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len))
261                     return (ht->value);
262     return (0);
263 }
264 
265 /* binhash_locate - lookup entry */
266 
binhash_locate(BINHASH * table,const void * key,ssize_t key_len)267 BINHASH_INFO *binhash_locate(BINHASH *table, const void *key, ssize_t key_len)
268 {
269     BINHASH_INFO *ht;
270 
271     if (table != 0)
272           for (ht = table->data[binhash_hash(key, key_len, table->size)]; ht; ht = ht->next)
273               if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len))
274                     return (ht);
275     return (0);
276 }
277 
278 /* binhash_delete - delete one entry */
279 
binhash_delete(BINHASH * table,const void * key,ssize_t key_len,void (* free_fn)(void *))280 void    binhash_delete(BINHASH *table, const void *key, ssize_t key_len, void (*free_fn) (void *))
281 {
282     if (table != 0) {
283           BINHASH_INFO *ht;
284           BINHASH_INFO **h = table->data + binhash_hash(key, key_len, table->size);
285 
286           for (ht = *h; ht; ht = ht->next) {
287               if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len)) {
288                     if (ht->next)
289                         ht->next->prev = ht->prev;
290                     if (ht->prev)
291                         ht->prev->next = ht->next;
292                     else
293                         *h = ht->next;
294                     table->used--;
295                     myfree(ht->key);
296                     if (free_fn)
297                         (*free_fn) (ht->value);
298                     myfree((void *) ht);
299                     return;
300               }
301           }
302           msg_panic("binhash_delete: unknown_key: \"%s\"", (char *) key);
303     }
304 }
305 
306 /* binhash_free - destroy hash table */
307 
binhash_free(BINHASH * table,void (* free_fn)(void *))308 void    binhash_free(BINHASH *table, void (*free_fn) (void *))
309 {
310     if (table != 0) {
311           ssize_t i = table->size;
312           BINHASH_INFO *ht;
313           BINHASH_INFO *next;
314           BINHASH_INFO **h = table->data;
315 
316           while (i-- > 0) {
317               for (ht = *h++; ht; ht = next) {
318                     next = ht->next;
319                     myfree(ht->key);
320                     if (free_fn)
321                         (*free_fn) (ht->value);
322                     myfree((void *) ht);
323               }
324           }
325           myfree((void *) table->data);
326           table->data = 0;
327           if (table->seq_bucket)
328               myfree((void *) table->seq_bucket);
329           table->seq_bucket = 0;
330           myfree((void *) table);
331     }
332 }
333 
334 /* binhash_walk - iterate over hash table */
335 
binhash_walk(BINHASH * table,void (* action)(BINHASH_INFO *,void *),void * ptr)336 void    binhash_walk(BINHASH *table, void (*action) (BINHASH_INFO *, void *),
337                                  void *ptr) {
338     if (table != 0) {
339           ssize_t i = table->size;
340           BINHASH_INFO **h = table->data;
341           BINHASH_INFO *ht;
342 
343           while (i-- > 0)
344               for (ht = *h++; ht; ht = ht->next)
345                     (*action) (ht, ptr);
346     }
347 }
348 
349 /* binhash_list - list all table members */
350 
binhash_list(BINHASH * table)351 BINHASH_INFO **binhash_list(BINHASH *table)
352 {
353     BINHASH_INFO **list;
354     BINHASH_INFO *member;
355     ssize_t count = 0;
356     ssize_t i;
357 
358     if (table != 0) {
359           list = (BINHASH_INFO **) mymalloc(sizeof(*list) * (table->used + 1));
360           for (i = 0; i < table->size; i++)
361               for (member = table->data[i]; member != 0; member = member->next)
362                     list[count++] = member;
363     } else {
364           list = (BINHASH_INFO **) mymalloc(sizeof(*list));
365     }
366     list[count] = 0;
367     return (list);
368 }
369 
370 /* binhash_sequence - dict(3) compatibility iterator */
371 
binhash_sequence(BINHASH * table,int how)372 BINHASH_INFO *binhash_sequence(BINHASH *table, int how)
373 {
374     if (table == 0)
375           return (0);
376 
377     switch (how) {
378     case BINHASH_SEQ_FIRST:                       /* start new sequence */
379           if (table->seq_bucket)
380               myfree((void *) table->seq_bucket);
381           table->seq_bucket = binhash_list(table);
382           table->seq_element = table->seq_bucket;
383           return (*(table->seq_element)++);
384     case BINHASH_SEQ_NEXT:                        /* next element */
385           if (table->seq_element && *table->seq_element)
386               return (*(table->seq_element)++);
387           /* FALLTHROUGH */
388     default:                                                /* terminate sequence */
389           if (table->seq_bucket) {
390               myfree((void *) table->seq_bucket);
391               table->seq_bucket = table->seq_element = 0;
392           }
393           return (0);
394     }
395 }
396 
397 #ifdef TEST
398 #include <vstring_vstream.h>
399 #include <myrand.h>
400 
main(int unused_argc,char ** unused_argv)401 int     main(int unused_argc, char **unused_argv)
402 {
403     VSTRING *buf = vstring_alloc(10);
404     ssize_t count = 0;
405     BINHASH *hash;
406     BINHASH_INFO **ht_info;
407     BINHASH_INFO **ht;
408     BINHASH_INFO *info;
409     ssize_t i;
410     ssize_t r;
411     int     op;
412 
413     /*
414      * Load a large number of strings including terminator, and delete them
415      * in a random order.
416      */
417     hash = binhash_create(10);
418     while (vstring_get(buf, VSTREAM_IN) != VSTREAM_EOF)
419           binhash_enter(hash, vstring_str(buf), VSTRING_LEN(buf) + 1,
420                           CAST_INT_TO_VOID_PTR(count++));
421     if (count != hash->used)
422           msg_panic("%ld entries stored, but %lu entries exist",
423                       (long) count, (unsigned long) hash->used);
424     for (i = 0, op = BINHASH_SEQ_FIRST; (info = binhash_sequence(hash, op)) != 0;
425            i++, op = BINHASH_SEQ_NEXT)
426           if (memchr(info->key, 0, info->key_len) == 0)
427               msg_panic("no null byte in lookup key");
428     if (i != hash->used)
429           msg_panic("%ld entries found, but %lu entries exist",
430                       (long) i, (unsigned long) hash->used);
431     ht_info = binhash_list(hash);
432     for (i = 0; i < hash->used; i++) {
433           r = myrand() % hash->used;
434           info = ht_info[i];
435           ht_info[i] = ht_info[r];
436           ht_info[r] = info;
437     }
438     for (ht = ht_info; *ht; ht++)
439           binhash_delete(hash, ht[0]->key, ht[0]->key_len, (void (*) (void *)) 0);
440     if (hash->used > 0)
441           msg_panic("%ld entries not deleted", (long) hash->used);
442     myfree((void *) ht_info);
443     binhash_free(hash, (void (*) (void *)) 0);
444     vstring_free(buf);
445     return (0);
446 }
447 
448 #endif
449