1 /*
2 February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
3 January 2012(Wouter) added randomised initial value, fallout from 28c3.
4 March 2007(Wouter) adapted from lookup3.c original, add config.h include.
5 added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
6 added include of lookup3.h to check definitions match declarations.
7 removed include of stdint - config.h takes care of platform independence.
8 url http://burtleburtle.net/bob/hash/index.html.
9 */
10 /*
11 -------------------------------------------------------------------------------
12 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
13
14 These are functions for producing 32-bit hashes for hash table lookup.
15 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
16 are externally useful functions. Routines to test the hash are included
17 if SELF_TEST is defined. You can use this free for any purpose. It's in
18 the public domain. It has no warranty.
19
20 You probably want to use hashlittle(). hashlittle() and hashbig()
21 hash byte arrays. hashlittle() is is faster than hashbig() on
22 little-endian machines. Intel and AMD are little-endian machines.
23 On second thought, you probably want hashlittle2(), which is identical to
24 hashlittle() except it returns two 32-bit hashes for the price of one.
25 You could implement hashbig2() if you wanted but I haven't bothered here.
26
27 If you want to find a hash of, say, exactly 7 integers, do
28 a = i1; b = i2; c = i3;
29 mix(a,b,c);
30 a += i4; b += i5; c += i6;
31 mix(a,b,c);
32 a += i7;
33 final(a,b,c);
34 then use c as the hash value. If you have a variable length array of
35 4-byte integers to hash, use hashword(). If you have a byte array (like
36 a character string), use hashlittle(). If you have several byte arrays, or
37 a mix of things, see the comments above hashlittle().
38
39 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
40 then mix those integers. This is fast (you can do a lot more thorough
41 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
42 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
43 -------------------------------------------------------------------------------
44 */
45 /*#define SELF_TEST 1*/
46
47 #include "config.h"
48 #include "util/storage/lookup3.h"
49 #include <stdio.h> /* defines printf for tests */
50 #include <time.h> /* defines time_t for timings in the test */
51 /*#include <stdint.h> defines uint32_t etc (from config.h) */
52 #include <sys/param.h> /* attempt to define endianness */
53 #ifdef HAVE_SYS_TYPES_H
54 # include <sys/types.h> /* attempt to define endianness (solaris) */
55 #endif
56 #ifdef linux
57 # include <endian.h> /* attempt to define endianness */
58 #endif
59 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
60 #include <sys/endian.h> /* attempt to define endianness */
61 #endif
62 #ifdef __OpenBSD__
63 #include <machine/endian.h> /* attempt to define endianness */
64 #endif
65
66 /* random initial value */
67 static uint32_t raninit = (uint32_t)0xdeadbeef;
68
69 void
hash_set_raninit(uint32_t v)70 hash_set_raninit(uint32_t v)
71 {
72 raninit = v;
73 }
74
75 /*
76 * My best guess at if you are big-endian or little-endian. This may
77 * need adjustment.
78 */
79 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
80 __BYTE_ORDER == __LITTLE_ENDIAN) || \
81 (defined(i386) || defined(__i386__) || defined(__i486__) || \
82 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
83 # define HASH_LITTLE_ENDIAN 1
84 # define HASH_BIG_ENDIAN 0
85 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
86 __BYTE_ORDER == __BIG_ENDIAN) || \
87 (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
88 # define HASH_LITTLE_ENDIAN 0
89 # define HASH_BIG_ENDIAN 1
90 #elif defined(_MACHINE_ENDIAN_H_)
91 /* test for machine_endian_h protects failure if some are empty strings */
92 # if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
93 # define HASH_LITTLE_ENDIAN 0
94 # define HASH_BIG_ENDIAN 1
95 # endif
96 # if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
97 # define HASH_LITTLE_ENDIAN 1
98 # define HASH_BIG_ENDIAN 0
99 # endif /* _MACHINE_ENDIAN_H_ */
100 #else
101 # define HASH_LITTLE_ENDIAN 0
102 # define HASH_BIG_ENDIAN 0
103 #endif
104
105 #define hashsize(n) ((uint32_t)1<<(n))
106 #define hashmask(n) (hashsize(n)-1)
107 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
108
109 /*
110 -------------------------------------------------------------------------------
111 mix -- mix 3 32-bit values reversibly.
112
113 This is reversible, so any information in (a,b,c) before mix() is
114 still in (a,b,c) after mix().
115
116 If four pairs of (a,b,c) inputs are run through mix(), or through
117 mix() in reverse, there are at least 32 bits of the output that
118 are sometimes the same for one pair and different for another pair.
119 This was tested for:
120 * pairs that differed by one bit, by two bits, in any combination
121 of top bits of (a,b,c), or in any combination of bottom bits of
122 (a,b,c).
123 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
124 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
125 is commonly produced by subtraction) look like a single 1-bit
126 difference.
127 * the base values were pseudorandom, all zero but one bit set, or
128 all zero plus a counter that starts at zero.
129
130 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
131 satisfy this are
132 4 6 8 16 19 4
133 9 15 3 18 27 15
134 14 9 3 7 17 3
135 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
136 for "differ" defined as + with a one-bit base and a two-bit delta. I
137 used http://burtleburtle.net/bob/hash/avalanche.html to choose
138 the operations, constants, and arrangements of the variables.
139
140 This does not achieve avalanche. There are input bits of (a,b,c)
141 that fail to affect some output bits of (a,b,c), especially of a. The
142 most thoroughly mixed value is c, but it doesn't really even achieve
143 avalanche in c.
144
145 This allows some parallelism. Read-after-writes are good at doubling
146 the number of bits affected, so the goal of mixing pulls in the opposite
147 direction as the goal of parallelism. I did what I could. Rotates
148 seem to cost as much as shifts on every machine I could lay my hands
149 on, and rotates are much kinder to the top and bottom bits, so I used
150 rotates.
151 -------------------------------------------------------------------------------
152 */
153 #define mix(a,b,c) \
154 { \
155 a -= c; a ^= rot(c, 4); c += b; \
156 b -= a; b ^= rot(a, 6); a += c; \
157 c -= b; c ^= rot(b, 8); b += a; \
158 a -= c; a ^= rot(c,16); c += b; \
159 b -= a; b ^= rot(a,19); a += c; \
160 c -= b; c ^= rot(b, 4); b += a; \
161 }
162
163 /*
164 -------------------------------------------------------------------------------
165 final -- final mixing of 3 32-bit values (a,b,c) into c
166
167 Pairs of (a,b,c) values differing in only a few bits will usually
168 produce values of c that look totally different. This was tested for
169 * pairs that differed by one bit, by two bits, in any combination
170 of top bits of (a,b,c), or in any combination of bottom bits of
171 (a,b,c).
172 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
173 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
174 is commonly produced by subtraction) look like a single 1-bit
175 difference.
176 * the base values were pseudorandom, all zero but one bit set, or
177 all zero plus a counter that starts at zero.
178
179 These constants passed:
180 14 11 25 16 4 14 24
181 12 14 25 16 4 14 24
182 and these came close:
183 4 8 15 26 3 22 24
184 10 8 15 26 3 22 24
185 11 8 15 26 3 22 24
186 -------------------------------------------------------------------------------
187 */
188 #define final(a,b,c) \
189 { \
190 c ^= b; c -= rot(b,14); \
191 a ^= c; a -= rot(c,11); \
192 b ^= a; b -= rot(a,25); \
193 c ^= b; c -= rot(b,16); \
194 a ^= c; a -= rot(c,4); \
195 b ^= a; b -= rot(a,14); \
196 c ^= b; c -= rot(b,24); \
197 }
198
199 /*
200 --------------------------------------------------------------------
201 This works on all machines. To be useful, it requires
202 -- that the key be an array of uint32_t's, and
203 -- that the length be the number of uint32_t's in the key
204
205 The function hashword() is identical to hashlittle() on little-endian
206 machines, and identical to hashbig() on big-endian machines,
207 except that the length has to be measured in uint32_ts rather than in
208 bytes. hashlittle() is more complicated than hashword() only because
209 hashlittle() has to dance around fitting the key bytes into registers.
210 --------------------------------------------------------------------
211 */
hashword(const uint32_t * k,size_t length,uint32_t initval)212 uint32_t hashword(
213 const uint32_t *k, /* the key, an array of uint32_t values */
214 size_t length, /* the length of the key, in uint32_ts */
215 uint32_t initval) /* the previous hash, or an arbitrary value */
216 {
217 uint32_t a,b,c;
218
219 /* Set up the internal state */
220 a = b = c = raninit + (((uint32_t)length)<<2) + initval;
221
222 /*------------------------------------------------- handle most of the key */
223 while (length > 3)
224 {
225 a += k[0];
226 b += k[1];
227 c += k[2];
228 mix(a,b,c);
229 length -= 3;
230 k += 3;
231 }
232
233 /*------------------------------------------- handle the last 3 uint32_t's */
234 switch(length) /* all the case statements fall through */
235 {
236 case 3 : c+=k[2];
237 case 2 : b+=k[1];
238 case 1 : a+=k[0];
239 final(a,b,c);
240 case 0: /* case 0: nothing left to add */
241 break;
242 }
243 /*------------------------------------------------------ report the result */
244 return c;
245 }
246
247
248 #ifdef SELF_TEST
249
250 /*
251 --------------------------------------------------------------------
252 hashword2() -- same as hashword(), but take two seeds and return two
253 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
254 both be initialized with seeds. If you pass in (*pb)==0, the output
255 (*pc) will be the same as the return value from hashword().
256 --------------------------------------------------------------------
257 */
hashword2(const uint32_t * k,size_t length,uint32_t * pc,uint32_t * pb)258 void hashword2 (
259 const uint32_t *k, /* the key, an array of uint32_t values */
260 size_t length, /* the length of the key, in uint32_ts */
261 uint32_t *pc, /* IN: seed OUT: primary hash value */
262 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
263 {
264 uint32_t a,b,c;
265
266 /* Set up the internal state */
267 a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
268 c += *pb;
269
270 /*------------------------------------------------- handle most of the key */
271 while (length > 3)
272 {
273 a += k[0];
274 b += k[1];
275 c += k[2];
276 mix(a,b,c);
277 length -= 3;
278 k += 3;
279 }
280
281 /*------------------------------------------- handle the last 3 uint32_t's */
282 switch(length) /* all the case statements fall through */
283 {
284 case 3 : c+=k[2];
285 case 2 : b+=k[1];
286 case 1 : a+=k[0];
287 final(a,b,c);
288 case 0: /* case 0: nothing left to add */
289 break;
290 }
291 /*------------------------------------------------------ report the result */
292 *pc=c; *pb=b;
293 }
294
295 #endif /* SELF_TEST */
296
297 /*
298 -------------------------------------------------------------------------------
299 hashlittle() -- hash a variable-length key into a 32-bit value
300 k : the key (the unaligned variable-length array of bytes)
301 length : the length of the key, counting by bytes
302 initval : can be any 4-byte value
303 Returns a 32-bit value. Every bit of the key affects every bit of
304 the return value. Two keys differing by one or two bits will have
305 totally different hash values.
306
307 The best hash table sizes are powers of 2. There is no need to do
308 mod a prime (mod is sooo slow!). If you need less than 32 bits,
309 use a bitmask. For example, if you need only 10 bits, do
310 h = (h & hashmask(10));
311 In which case, the hash table should have hashsize(10) elements.
312
313 If you are hashing n strings (uint8_t **)k, do it like this:
314 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
315
316 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
317 code any way you wish, private, educational, or commercial. It's free.
318
319 Use for hash table lookup, or anything where one collision in 2^^32 is
320 acceptable. Do NOT use for cryptographic purposes.
321 -------------------------------------------------------------------------------
322 */
323
hashlittle(const void * key,size_t length,uint32_t initval)324 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
325 {
326 uint32_t a,b,c; /* internal state */
327 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
328
329 /* Set up the internal state */
330 a = b = c = raninit + ((uint32_t)length) + initval;
331
332 u.ptr = key;
333 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
334 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
335 #ifdef VALGRIND
336 const uint8_t *k8;
337 #endif
338
339 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
340 while (length > 12)
341 {
342 a += k[0];
343 b += k[1];
344 c += k[2];
345 mix(a,b,c);
346 length -= 12;
347 k += 3;
348 }
349
350 /*----------------------------- handle the last (probably partial) block */
351 /*
352 * "k[2]&0xffffff" actually reads beyond the end of the string, but
353 * then masks off the part it's not allowed to read. Because the
354 * string is aligned, the masked-off tail is in the same word as the
355 * rest of the string. Every machine with memory protection I've seen
356 * does it on word boundaries, so is OK with this. But VALGRIND will
357 * still catch it and complain. The masking trick does make the hash
358 * noticably faster for short strings (like English words).
359 */
360 #ifndef VALGRIND
361
362 switch(length)
363 {
364 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
365 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
366 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
367 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
368 case 8 : b+=k[1]; a+=k[0]; break;
369 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
370 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
371 case 5 : b+=k[1]&0xff; a+=k[0]; break;
372 case 4 : a+=k[0]; break;
373 case 3 : a+=k[0]&0xffffff; break;
374 case 2 : a+=k[0]&0xffff; break;
375 case 1 : a+=k[0]&0xff; break;
376 case 0 : return c; /* zero length strings require no mixing */
377 }
378
379 #else /* make valgrind happy */
380
381 k8 = (const uint8_t *)k;
382 switch(length)
383 {
384 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
385 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
386 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
387 case 9 : c+=k8[8]; /* fall through */
388 case 8 : b+=k[1]; a+=k[0]; break;
389 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
390 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
391 case 5 : b+=k8[4]; /* fall through */
392 case 4 : a+=k[0]; break;
393 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
394 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
395 case 1 : a+=k8[0]; break;
396 case 0 : return c;
397 }
398
399 #endif /* !valgrind */
400
401 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
402 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
403 const uint8_t *k8;
404
405 /*--------------- all but last block: aligned reads and different mixing */
406 while (length > 12)
407 {
408 a += k[0] + (((uint32_t)k[1])<<16);
409 b += k[2] + (((uint32_t)k[3])<<16);
410 c += k[4] + (((uint32_t)k[5])<<16);
411 mix(a,b,c);
412 length -= 12;
413 k += 6;
414 }
415
416 /*----------------------------- handle the last (probably partial) block */
417 k8 = (const uint8_t *)k;
418 switch(length)
419 {
420 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
421 b+=k[2]+(((uint32_t)k[3])<<16);
422 a+=k[0]+(((uint32_t)k[1])<<16);
423 break;
424 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
425 case 10: c+=k[4];
426 b+=k[2]+(((uint32_t)k[3])<<16);
427 a+=k[0]+(((uint32_t)k[1])<<16);
428 break;
429 case 9 : c+=k8[8]; /* fall through */
430 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
431 a+=k[0]+(((uint32_t)k[1])<<16);
432 break;
433 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
434 case 6 : b+=k[2];
435 a+=k[0]+(((uint32_t)k[1])<<16);
436 break;
437 case 5 : b+=k8[4]; /* fall through */
438 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
439 break;
440 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
441 case 2 : a+=k[0];
442 break;
443 case 1 : a+=k8[0];
444 break;
445 case 0 : return c; /* zero length requires no mixing */
446 }
447
448 } else { /* need to read the key one byte at a time */
449 const uint8_t *k = (const uint8_t *)key;
450
451 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
452 while (length > 12)
453 {
454 a += k[0];
455 a += ((uint32_t)k[1])<<8;
456 a += ((uint32_t)k[2])<<16;
457 a += ((uint32_t)k[3])<<24;
458 b += k[4];
459 b += ((uint32_t)k[5])<<8;
460 b += ((uint32_t)k[6])<<16;
461 b += ((uint32_t)k[7])<<24;
462 c += k[8];
463 c += ((uint32_t)k[9])<<8;
464 c += ((uint32_t)k[10])<<16;
465 c += ((uint32_t)k[11])<<24;
466 mix(a,b,c);
467 length -= 12;
468 k += 12;
469 }
470
471 /*-------------------------------- last block: affect all 32 bits of (c) */
472 switch(length) /* all the case statements fall through */
473 {
474 case 12: c+=((uint32_t)k[11])<<24;
475 case 11: c+=((uint32_t)k[10])<<16;
476 case 10: c+=((uint32_t)k[9])<<8;
477 case 9 : c+=k[8];
478 case 8 : b+=((uint32_t)k[7])<<24;
479 case 7 : b+=((uint32_t)k[6])<<16;
480 case 6 : b+=((uint32_t)k[5])<<8;
481 case 5 : b+=k[4];
482 case 4 : a+=((uint32_t)k[3])<<24;
483 case 3 : a+=((uint32_t)k[2])<<16;
484 case 2 : a+=((uint32_t)k[1])<<8;
485 case 1 : a+=k[0];
486 break;
487 case 0 : return c;
488 }
489 }
490
491 final(a,b,c);
492 return c;
493 }
494
495 #ifdef SELF_TEST
496
497 /*
498 * hashlittle2: return 2 32-bit hash values
499 *
500 * This is identical to hashlittle(), except it returns two 32-bit hash
501 * values instead of just one. This is good enough for hash table
502 * lookup with 2^^64 buckets, or if you want a second hash if you're not
503 * happy with the first, or if you want a probably-unique 64-bit ID for
504 * the key. *pc is better mixed than *pb, so use *pc first. If you want
505 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
506 */
hashlittle2(const void * key,size_t length,uint32_t * pc,uint32_t * pb)507 void hashlittle2(
508 const void *key, /* the key to hash */
509 size_t length, /* length of the key */
510 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
511 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
512 {
513 uint32_t a,b,c; /* internal state */
514 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
515
516 /* Set up the internal state */
517 a = b = c = raninit + ((uint32_t)length) + *pc;
518 c += *pb;
519
520 u.ptr = key;
521 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
522 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
523 #ifdef VALGRIND
524 const uint8_t *k8;
525 #endif
526
527 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
528 while (length > 12)
529 {
530 a += k[0];
531 b += k[1];
532 c += k[2];
533 mix(a,b,c);
534 length -= 12;
535 k += 3;
536 }
537
538 /*----------------------------- handle the last (probably partial) block */
539 /*
540 * "k[2]&0xffffff" actually reads beyond the end of the string, but
541 * then masks off the part it's not allowed to read. Because the
542 * string is aligned, the masked-off tail is in the same word as the
543 * rest of the string. Every machine with memory protection I've seen
544 * does it on word boundaries, so is OK with this. But VALGRIND will
545 * still catch it and complain. The masking trick does make the hash
546 * noticably faster for short strings (like English words).
547 */
548 #ifndef VALGRIND
549
550 switch(length)
551 {
552 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
553 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
554 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
555 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
556 case 8 : b+=k[1]; a+=k[0]; break;
557 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
558 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
559 case 5 : b+=k[1]&0xff; a+=k[0]; break;
560 case 4 : a+=k[0]; break;
561 case 3 : a+=k[0]&0xffffff; break;
562 case 2 : a+=k[0]&0xffff; break;
563 case 1 : a+=k[0]&0xff; break;
564 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
565 }
566
567 #else /* make valgrind happy */
568
569 k8 = (const uint8_t *)k;
570 switch(length)
571 {
572 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
573 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
574 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
575 case 9 : c+=k8[8]; /* fall through */
576 case 8 : b+=k[1]; a+=k[0]; break;
577 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
578 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
579 case 5 : b+=k8[4]; /* fall through */
580 case 4 : a+=k[0]; break;
581 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
582 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
583 case 1 : a+=k8[0]; break;
584 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
585 }
586
587 #endif /* !valgrind */
588
589 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
590 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
591 const uint8_t *k8;
592
593 /*--------------- all but last block: aligned reads and different mixing */
594 while (length > 12)
595 {
596 a += k[0] + (((uint32_t)k[1])<<16);
597 b += k[2] + (((uint32_t)k[3])<<16);
598 c += k[4] + (((uint32_t)k[5])<<16);
599 mix(a,b,c);
600 length -= 12;
601 k += 6;
602 }
603
604 /*----------------------------- handle the last (probably partial) block */
605 k8 = (const uint8_t *)k;
606 switch(length)
607 {
608 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
609 b+=k[2]+(((uint32_t)k[3])<<16);
610 a+=k[0]+(((uint32_t)k[1])<<16);
611 break;
612 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
613 case 10: c+=k[4];
614 b+=k[2]+(((uint32_t)k[3])<<16);
615 a+=k[0]+(((uint32_t)k[1])<<16);
616 break;
617 case 9 : c+=k8[8]; /* fall through */
618 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
619 a+=k[0]+(((uint32_t)k[1])<<16);
620 break;
621 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
622 case 6 : b+=k[2];
623 a+=k[0]+(((uint32_t)k[1])<<16);
624 break;
625 case 5 : b+=k8[4]; /* fall through */
626 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
627 break;
628 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
629 case 2 : a+=k[0];
630 break;
631 case 1 : a+=k8[0];
632 break;
633 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
634 }
635
636 } else { /* need to read the key one byte at a time */
637 const uint8_t *k = (const uint8_t *)key;
638
639 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
640 while (length > 12)
641 {
642 a += k[0];
643 a += ((uint32_t)k[1])<<8;
644 a += ((uint32_t)k[2])<<16;
645 a += ((uint32_t)k[3])<<24;
646 b += k[4];
647 b += ((uint32_t)k[5])<<8;
648 b += ((uint32_t)k[6])<<16;
649 b += ((uint32_t)k[7])<<24;
650 c += k[8];
651 c += ((uint32_t)k[9])<<8;
652 c += ((uint32_t)k[10])<<16;
653 c += ((uint32_t)k[11])<<24;
654 mix(a,b,c);
655 length -= 12;
656 k += 12;
657 }
658
659 /*-------------------------------- last block: affect all 32 bits of (c) */
660 switch(length) /* all the case statements fall through */
661 {
662 case 12: c+=((uint32_t)k[11])<<24;
663 case 11: c+=((uint32_t)k[10])<<16;
664 case 10: c+=((uint32_t)k[9])<<8;
665 case 9 : c+=k[8];
666 case 8 : b+=((uint32_t)k[7])<<24;
667 case 7 : b+=((uint32_t)k[6])<<16;
668 case 6 : b+=((uint32_t)k[5])<<8;
669 case 5 : b+=k[4];
670 case 4 : a+=((uint32_t)k[3])<<24;
671 case 3 : a+=((uint32_t)k[2])<<16;
672 case 2 : a+=((uint32_t)k[1])<<8;
673 case 1 : a+=k[0];
674 break;
675 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
676 }
677 }
678
679 final(a,b,c);
680 *pc=c; *pb=b;
681 }
682
683 #endif /* SELF_TEST */
684
685 #if 0 /* currently not used */
686
687 /*
688 * hashbig():
689 * This is the same as hashword() on big-endian machines. It is different
690 * from hashlittle() on all machines. hashbig() takes advantage of
691 * big-endian byte ordering.
692 */
693 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
694 {
695 uint32_t a,b,c;
696 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
697
698 /* Set up the internal state */
699 a = b = c = raninit + ((uint32_t)length) + initval;
700
701 u.ptr = key;
702 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
703 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
704 #ifdef VALGRIND
705 const uint8_t *k8;
706 #endif
707
708 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
709 while (length > 12)
710 {
711 a += k[0];
712 b += k[1];
713 c += k[2];
714 mix(a,b,c);
715 length -= 12;
716 k += 3;
717 }
718
719 /*----------------------------- handle the last (probably partial) block */
720 /*
721 * "k[2]<<8" actually reads beyond the end of the string, but
722 * then shifts out the part it's not allowed to read. Because the
723 * string is aligned, the illegal read is in the same word as the
724 * rest of the string. Every machine with memory protection I've seen
725 * does it on word boundaries, so is OK with this. But VALGRIND will
726 * still catch it and complain. The masking trick does make the hash
727 * noticably faster for short strings (like English words).
728 */
729 #ifndef VALGRIND
730
731 switch(length)
732 {
733 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
734 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
735 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
736 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
737 case 8 : b+=k[1]; a+=k[0]; break;
738 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
739 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
740 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
741 case 4 : a+=k[0]; break;
742 case 3 : a+=k[0]&0xffffff00; break;
743 case 2 : a+=k[0]&0xffff0000; break;
744 case 1 : a+=k[0]&0xff000000; break;
745 case 0 : return c; /* zero length strings require no mixing */
746 }
747
748 #else /* make valgrind happy */
749
750 k8 = (const uint8_t *)k;
751 switch(length) /* all the case statements fall through */
752 {
753 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
754 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
755 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
756 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
757 case 8 : b+=k[1]; a+=k[0]; break;
758 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
759 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
760 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
761 case 4 : a+=k[0]; break;
762 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
763 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
764 case 1 : a+=((uint32_t)k8[0])<<24; break;
765 case 0 : return c;
766 }
767
768 #endif /* !VALGRIND */
769
770 } else { /* need to read the key one byte at a time */
771 const uint8_t *k = (const uint8_t *)key;
772
773 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
774 while (length > 12)
775 {
776 a += ((uint32_t)k[0])<<24;
777 a += ((uint32_t)k[1])<<16;
778 a += ((uint32_t)k[2])<<8;
779 a += ((uint32_t)k[3]);
780 b += ((uint32_t)k[4])<<24;
781 b += ((uint32_t)k[5])<<16;
782 b += ((uint32_t)k[6])<<8;
783 b += ((uint32_t)k[7]);
784 c += ((uint32_t)k[8])<<24;
785 c += ((uint32_t)k[9])<<16;
786 c += ((uint32_t)k[10])<<8;
787 c += ((uint32_t)k[11]);
788 mix(a,b,c);
789 length -= 12;
790 k += 12;
791 }
792
793 /*-------------------------------- last block: affect all 32 bits of (c) */
794 switch(length) /* all the case statements fall through */
795 {
796 case 12: c+=k[11];
797 case 11: c+=((uint32_t)k[10])<<8;
798 case 10: c+=((uint32_t)k[9])<<16;
799 case 9 : c+=((uint32_t)k[8])<<24;
800 case 8 : b+=k[7];
801 case 7 : b+=((uint32_t)k[6])<<8;
802 case 6 : b+=((uint32_t)k[5])<<16;
803 case 5 : b+=((uint32_t)k[4])<<24;
804 case 4 : a+=k[3];
805 case 3 : a+=((uint32_t)k[2])<<8;
806 case 2 : a+=((uint32_t)k[1])<<16;
807 case 1 : a+=((uint32_t)k[0])<<24;
808 break;
809 case 0 : return c;
810 }
811 }
812
813 final(a,b,c);
814 return c;
815 }
816
817 #endif /* 0 == currently not used */
818
819 #ifdef SELF_TEST
820
821 /* used for timings */
driver1()822 void driver1()
823 {
824 uint8_t buf[256];
825 uint32_t i;
826 uint32_t h=0;
827 time_t a,z;
828
829 time(&a);
830 for (i=0; i<256; ++i) buf[i] = 'x';
831 for (i=0; i<1; ++i)
832 {
833 h = hashlittle(&buf[0],1,h);
834 }
835 time(&z);
836 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
837 }
838
839 /* check that every input bit changes every output bit half the time */
840 #define HASHSTATE 1
841 #define HASHLEN 1
842 #define MAXPAIR 60
843 #define MAXLEN 70
driver2()844 void driver2()
845 {
846 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
847 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
848 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
849 uint32_t x[HASHSTATE],y[HASHSTATE];
850 uint32_t hlen;
851
852 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
853 for (hlen=0; hlen < MAXLEN; ++hlen)
854 {
855 z=0;
856 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
857 {
858 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
859 {
860 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
861 {
862 for (l=0; l<HASHSTATE; ++l)
863 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
864
865 /*---- check that every output bit is affected by that input bit */
866 for (k=0; k<MAXPAIR; k+=2)
867 {
868 uint32_t finished=1;
869 /* keys have one bit different */
870 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
871 /* have a and b be two keys differing in only one bit */
872 a[i] ^= (k<<j);
873 a[i] ^= (k>>(8-j));
874 c[0] = hashlittle(a, hlen, m);
875 b[i] ^= ((k+1)<<j);
876 b[i] ^= ((k+1)>>(8-j));
877 d[0] = hashlittle(b, hlen, m);
878 /* check every bit is 1, 0, set, and not set at least once */
879 for (l=0; l<HASHSTATE; ++l)
880 {
881 e[l] &= (c[l]^d[l]);
882 f[l] &= ~(c[l]^d[l]);
883 g[l] &= c[l];
884 h[l] &= ~c[l];
885 x[l] &= d[l];
886 y[l] &= ~d[l];
887 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
888 }
889 if (finished) break;
890 }
891 if (k>z) z=k;
892 if (k==MAXPAIR)
893 {
894 printf("Some bit didn't change: ");
895 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
896 e[0],f[0],g[0],h[0],x[0],y[0]);
897 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
898 }
899 if (z==MAXPAIR) goto done;
900 }
901 }
902 }
903 done:
904 if (z < MAXPAIR)
905 {
906 printf("Mix success %2d bytes %2d initvals ",i,m);
907 printf("required %d trials\n", z/2);
908 }
909 }
910 printf("\n");
911 }
912
913 /* Check for reading beyond the end of the buffer and alignment problems */
driver3()914 void driver3()
915 {
916 uint8_t buf[MAXLEN+20], *b;
917 uint32_t len;
918 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
919 uint32_t h;
920 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
921 uint32_t i;
922 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
923 uint32_t j;
924 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
925 uint32_t ref,x,y;
926 uint8_t *p;
927
928 printf("Endianness. These lines should all be the same (for values filled in):\n");
929 printf("%.8x %.8x %.8x\n",
930 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
931 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
932 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
933 p = q;
934 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
935 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
936 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
937 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
938 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
939 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
940 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
941 p = &qq[1];
942 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
943 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
944 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
945 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
946 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
947 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
948 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
949 p = &qqq[2];
950 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
951 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
952 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
953 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
954 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
955 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
956 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
957 p = &qqqq[3];
958 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
959 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
960 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
961 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
962 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
963 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
964 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
965 printf("\n");
966
967 /* check that hashlittle2 and hashlittle produce the same results */
968 i=47; j=0;
969 hashlittle2(q, sizeof(q), &i, &j);
970 if (hashlittle(q, sizeof(q), 47) != i)
971 printf("hashlittle2 and hashlittle mismatch\n");
972
973 /* check that hashword2 and hashword produce the same results */
974 len = raninit;
975 i=47, j=0;
976 hashword2(&len, 1, &i, &j);
977 if (hashword(&len, 1, 47) != i)
978 printf("hashword2 and hashword mismatch %x %x\n",
979 i, hashword(&len, 1, 47));
980
981 /* check hashlittle doesn't read before or after the ends of the string */
982 for (h=0, b=buf+1; h<8; ++h, ++b)
983 {
984 for (i=0; i<MAXLEN; ++i)
985 {
986 len = i;
987 for (j=0; j<i; ++j) *(b+j)=0;
988
989 /* these should all be equal */
990 ref = hashlittle(b, len, (uint32_t)1);
991 *(b+i)=(uint8_t)~0;
992 *(b-1)=(uint8_t)~0;
993 x = hashlittle(b, len, (uint32_t)1);
994 y = hashlittle(b, len, (uint32_t)1);
995 if ((ref != x) || (ref != y))
996 {
997 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
998 h, i);
999 }
1000 }
1001 }
1002 }
1003
1004 /* check for problems with nulls */
driver4()1005 void driver4()
1006 {
1007 uint8_t buf[1];
1008 uint32_t h,i,state[HASHSTATE];
1009
1010
1011 buf[0] = ~0;
1012 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1013 printf("These should all be different\n");
1014 for (i=0, h=0; i<8; ++i)
1015 {
1016 h = hashlittle(buf, 0, h);
1017 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
1018 }
1019 }
1020
1021
main()1022 int main()
1023 {
1024 driver1(); /* test that the key is hashed: used for timings */
1025 driver2(); /* test that whole key is hashed thoroughly */
1026 driver3(); /* test that nothing but the key is hashed */
1027 driver4(); /* test hashing multiple buffers (all buffers are null) */
1028 return 1;
1029 }
1030
1031 #endif /* SELF_TEST */
1032