1 /*
2 * hash.c : dumping and reading hash tables to/from files.
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25
26 #include <stdlib.h>
27 #include <limits.h>
28
29 #include <apr_version.h>
30 #include <apr_pools.h>
31 #include <apr_hash.h>
32 #include <apr_file_io.h>
33
34 #ifndef SVN_HASH__GETS_SETS
35 #define SVN_HASH__GETS_SETS
36 #endif
37 #include "svn_hash.h"
38
39 #include "svn_types.h"
40 #include "svn_string.h"
41 #include "svn_error.h"
42 #include "svn_sorts.h"
43 #include "svn_io.h"
44 #include "svn_pools.h"
45
46 #include "private/svn_dep_compat.h"
47 #include "private/svn_sorts_private.h"
48 #include "private/svn_subr_private.h"
49
50 #include "svn_private_config.h"
51
52
53
54 /*
55 * The format of a dumped hash table is:
56 *
57 * K <nlength>
58 * name (a string of <nlength> bytes, followed by a newline)
59 * V <vlength>
60 * val (a string of <vlength> bytes, followed by a newline)
61 * [... etc, etc ...]
62 * END
63 *
64 *
65 * (Yes, there is a newline after END.)
66 *
67 * For example:
68 *
69 * K 5
70 * color
71 * V 3
72 * red
73 * K 11
74 * wine review
75 * V 376
76 * A forthright entrance, yet coquettish on the tongue, its deceptively
77 * fruity exterior hides the warm mahagony undercurrent that is the
78 * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will
79 * be pleased to note the familiar, subtle hints of mulberries and
80 * carburator fluid. Its confident finish is marred only by a barely
81 * detectable suggestion of rancid squid ink.
82 * K 5
83 * price
84 * V 8
85 * US $6.50
86 * END
87 *
88 */
89
90
91
92
93 /*** Dumping and loading hash files. */
94
95 /* Implements svn_hash_read2 and svn_hash_read_incremental. */
96 svn_error_t *
svn_hash__read_entry(svn_hash__entry_t * entry,svn_stream_t * stream,const char * terminator,svn_boolean_t incremental,apr_pool_t * pool)97 svn_hash__read_entry(svn_hash__entry_t *entry,
98 svn_stream_t *stream,
99 const char *terminator,
100 svn_boolean_t incremental,
101 apr_pool_t *pool)
102 {
103 svn_stringbuf_t *buf;
104 svn_boolean_t eof;
105 apr_size_t len;
106 char c;
107
108 svn_error_t *err;
109 apr_uint64_t ui64;
110
111 /* Read a key length line. Might be END, though. */
112 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
113
114 /* Check for the end of the hash. */
115 if ((!terminator && eof && buf->len == 0)
116 || (terminator && (strcmp(buf->data, terminator) == 0)))
117 {
118 entry->key = NULL;
119 entry->keylen = 0;
120 entry->val = NULL;
121 entry->vallen = 0;
122
123 return SVN_NO_ERROR;
124 }
125
126 /* Check for unexpected end of stream */
127 if (eof)
128 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
129 _("Serialized hash missing terminator"));
130
131 if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
132 {
133 /* Get the length of the key */
134 err = svn_cstring_strtoui64(&ui64, buf->data + 2,
135 0, APR_SIZE_MAX, 10);
136 if (err)
137 return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
138 _("Serialized hash malformed key length"));
139 entry->keylen = (apr_size_t)ui64;
140
141 /* Now read that much into a buffer. */
142 entry->key = apr_palloc(pool, entry->keylen + 1);
143 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
144 entry->key[entry->keylen] = '\0';
145
146 /* Suck up extra newline after key data */
147 len = 1;
148 SVN_ERR(svn_stream_read_full(stream, &c, &len));
149 if (c != '\n')
150 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
151 _("Serialized hash malformed key data"));
152
153 /* Read a val length line */
154 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
155
156 if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
157 {
158 /* Get the length of the val */
159 err = svn_cstring_strtoui64(&ui64, buf->data + 2,
160 0, APR_SIZE_MAX, 10);
161 if (err)
162 return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
163 _("Serialized hash malformed value length"));
164 entry->vallen = (apr_size_t)ui64;
165
166 entry->val = apr_palloc(pool, entry->vallen + 1);
167 SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen));
168 entry->val[entry->vallen] = '\0';
169
170 /* Suck up extra newline after val data */
171 len = 1;
172 SVN_ERR(svn_stream_read_full(stream, &c, &len));
173 if (c != '\n')
174 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
175 _("Serialized hash malformed value data"));
176 }
177 else
178 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
179 _("Serialized hash malformed"));
180 }
181 else if (incremental && (buf->len >= 3)
182 && (buf->data[0] == 'D') && (buf->data[1] == ' '))
183 {
184 /* Get the length of the key */
185 err = svn_cstring_strtoui64(&ui64, buf->data + 2,
186 0, APR_SIZE_MAX, 10);
187 if (err)
188 return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
189 _("Serialized hash malformed key length"));
190 entry->keylen = (apr_size_t)ui64;
191
192 /* Now read that much into a buffer. */
193 entry->key = apr_palloc(pool, entry->keylen + 1);
194 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
195 entry->key[entry->keylen] = '\0';
196
197 /* Suck up extra newline after key data */
198 len = 1;
199 SVN_ERR(svn_stream_read_full(stream, &c, &len));
200 if (c != '\n')
201 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
202 _("Serialized hash malformed key data"));
203
204 /* Remove this hash entry. */
205 entry->vallen = 0;
206 entry->val = NULL;
207 }
208 else
209 {
210 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
211 _("Serialized hash malformed"));
212 }
213
214 return SVN_NO_ERROR;
215 }
216
217 static svn_error_t *
hash_read(apr_hash_t * hash,svn_stream_t * stream,const char * terminator,svn_boolean_t incremental,apr_pool_t * pool)218 hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
219 svn_boolean_t incremental, apr_pool_t *pool)
220 {
221 apr_pool_t *iterpool = svn_pool_create(pool);
222
223 while (1)
224 {
225 svn_hash__entry_t entry;
226
227 svn_pool_clear(iterpool);
228 SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
229 incremental, iterpool));
230
231 /* end of hash? */
232 if (entry.key == NULL)
233 break;
234
235 if (entry.val)
236 {
237 /* Add a new hash entry. */
238 apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen),
239 entry.keylen,
240 svn_string_ncreate(entry.val, entry.vallen, pool));
241 }
242 else
243 {
244 /* Remove this hash entry. */
245 apr_hash_set(hash, entry.key, entry.keylen, NULL);
246 }
247 }
248
249 svn_pool_destroy(iterpool);
250 return SVN_NO_ERROR;
251 }
252
253
254 /* Implements svn_hash_write2 and svn_hash_write_incremental. */
255 static svn_error_t *
hash_write(apr_hash_t * hash,apr_hash_t * oldhash,svn_stream_t * stream,const char * terminator,apr_pool_t * pool)256 hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
257 const char *terminator, apr_pool_t *pool)
258 {
259 apr_pool_t *subpool;
260 apr_size_t len;
261 apr_array_header_t *list;
262 int i;
263
264 subpool = svn_pool_create(pool);
265
266 list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
267 for (i = 0; i < list->nelts; i++)
268 {
269 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
270 svn_string_t *valstr = item->value;
271
272 svn_pool_clear(subpool);
273
274 /* Don't output entries equal to the ones in oldhash, if present. */
275 if (oldhash)
276 {
277 svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
278
279 if (oldstr && svn_string_compare(valstr, oldstr))
280 continue;
281 }
282
283 if (item->klen < 0)
284 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
285 _("Cannot serialize negative length"));
286
287 /* Write it out. */
288 SVN_ERR(svn_stream_printf(stream, subpool,
289 "K %" APR_SIZE_T_FMT "\n%s\n"
290 "V %" APR_SIZE_T_FMT "\n",
291 (apr_size_t) item->klen,
292 (const char *) item->key,
293 valstr->len));
294 len = valstr->len;
295 SVN_ERR(svn_stream_write(stream, valstr->data, &len));
296 SVN_ERR(svn_stream_puts(stream, "\n"));
297 }
298
299 if (oldhash)
300 {
301 /* Output a deletion entry for each property in oldhash but not hash. */
302 list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
303 pool);
304 for (i = 0; i < list->nelts; i++)
305 {
306 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
307
308 svn_pool_clear(subpool);
309
310 /* If it's not present in the new hash, write out a D entry. */
311 if (! apr_hash_get(hash, item->key, item->klen))
312 SVN_ERR(svn_stream_printf(stream, subpool,
313 "D %" APR_SSIZE_T_FMT "\n%s\n",
314 item->klen, (const char *) item->key));
315 }
316 }
317
318 if (terminator)
319 SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
320
321 svn_pool_destroy(subpool);
322 return SVN_NO_ERROR;
323 }
324
325
svn_hash_read2(apr_hash_t * hash,svn_stream_t * stream,const char * terminator,apr_pool_t * pool)326 svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
327 const char *terminator, apr_pool_t *pool)
328 {
329 return hash_read(hash, stream, terminator, FALSE, pool);
330 }
331
332
svn_hash_read_incremental(apr_hash_t * hash,svn_stream_t * stream,const char * terminator,apr_pool_t * pool)333 svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
334 svn_stream_t *stream,
335 const char *terminator,
336 apr_pool_t *pool)
337 {
338 return hash_read(hash, stream, terminator, TRUE, pool);
339 }
340
341
342 svn_error_t *
svn_hash_write2(apr_hash_t * hash,svn_stream_t * stream,const char * terminator,apr_pool_t * pool)343 svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
344 const char *terminator, apr_pool_t *pool)
345 {
346 return hash_write(hash, NULL, stream, terminator, pool);
347 }
348
349
350 svn_error_t *
svn_hash_write_incremental(apr_hash_t * hash,apr_hash_t * oldhash,svn_stream_t * stream,const char * terminator,apr_pool_t * pool)351 svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
352 svn_stream_t *stream, const char *terminator,
353 apr_pool_t *pool)
354 {
355 SVN_ERR_ASSERT(oldhash != NULL);
356 return hash_write(hash, oldhash, stream, terminator, pool);
357 }
358
359
360 svn_error_t *
svn_hash_write(apr_hash_t * hash,apr_file_t * destfile,apr_pool_t * pool)361 svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
362 {
363 return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
364 SVN_HASH_TERMINATOR, pool);
365 }
366
367
368 /* There are enough quirks in the deprecated svn_hash_read that we
369 should just preserve its implementation. */
370 svn_error_t *
svn_hash_read(apr_hash_t * hash,apr_file_t * srcfile,apr_pool_t * pool)371 svn_hash_read(apr_hash_t *hash,
372 apr_file_t *srcfile,
373 apr_pool_t *pool)
374 {
375 svn_error_t *err;
376 char buf[SVN_KEYLINE_MAXLEN];
377 apr_size_t num_read;
378 char c;
379 int first_time = 1;
380
381
382 while (1)
383 {
384 /* Read a key length line. Might be END, though. */
385 apr_size_t len = sizeof(buf);
386
387 err = svn_io_read_length_line(srcfile, buf, &len, pool);
388 if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
389 {
390 /* We got an EOF on our very first attempt to read, which
391 means it's a zero-byte file. No problem, just go home. */
392 svn_error_clear(err);
393 return SVN_NO_ERROR;
394 }
395 else if (err)
396 /* Any other circumstance is a genuine error. */
397 return err;
398
399 first_time = 0;
400
401 if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
402 || ((len == 9)
403 && (buf[0] == 'P')
404 && (buf[1] == 'R') /* We formerly used just "END" to */
405 && (buf[2] == 'O') /* end a property hash, but later */
406 && (buf[3] == 'P') /* we added "PROPS-END", so that */
407 && (buf[4] == 'S') /* the fs dump format would be */
408 && (buf[5] == '-') /* more human-readable. That's */
409 && (buf[6] == 'E') /* why we accept either way here. */
410 && (buf[7] == 'N')
411 && (buf[8] == 'D')))
412 {
413 /* We've reached the end of the dumped hash table, so leave. */
414 return SVN_NO_ERROR;
415 }
416 else if ((buf[0] == 'K') && (buf[1] == ' '))
417 {
418 size_t keylen;
419 int parsed_len;
420 void *keybuf;
421
422 /* Get the length of the key */
423 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
424 keylen = parsed_len;
425
426 /* Now read that much into a buffer, + 1 byte for null terminator */
427 keybuf = apr_palloc(pool, keylen + 1);
428 SVN_ERR(svn_io_file_read_full2(srcfile,
429 keybuf, keylen,
430 &num_read, NULL, pool));
431 ((char *) keybuf)[keylen] = '\0';
432
433 /* Suck up extra newline after key data */
434 SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
435 if (c != '\n')
436 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
437
438 /* Read a val length line */
439 len = sizeof(buf);
440 SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
441
442 if ((buf[0] == 'V') && (buf[1] == ' '))
443 {
444 svn_string_t *value = apr_palloc(pool, sizeof(*value));
445 apr_size_t vallen;
446 void *valbuf;
447
448 /* Get the length of the value */
449 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
450 vallen = parsed_len;
451
452 /* Again, 1 extra byte for the null termination. */
453 valbuf = apr_palloc(pool, vallen + 1);
454 SVN_ERR(svn_io_file_read_full2(srcfile,
455 valbuf, vallen,
456 &num_read, NULL, pool));
457 ((char *) valbuf)[vallen] = '\0';
458
459 /* Suck up extra newline after val data */
460 SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
461 if (c != '\n')
462 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
463
464 value->data = valbuf;
465 value->len = vallen;
466
467 /* The Grand Moment: add a new hash entry! */
468 apr_hash_set(hash, keybuf, keylen, value);
469 }
470 else
471 {
472 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
473 }
474 }
475 else
476 {
477 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
478 }
479 } /* while (1) */
480 }
481
482
483
484 /*** Diffing hashes ***/
485
486 svn_error_t *
svn_hash_diff(apr_hash_t * hash_a,apr_hash_t * hash_b,svn_hash_diff_func_t diff_func,void * diff_func_baton,apr_pool_t * pool)487 svn_hash_diff(apr_hash_t *hash_a,
488 apr_hash_t *hash_b,
489 svn_hash_diff_func_t diff_func,
490 void *diff_func_baton,
491 apr_pool_t *pool)
492 {
493 apr_hash_index_t *hi;
494
495 if (hash_a)
496 for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
497 {
498 const void *key;
499 apr_ssize_t klen;
500
501 apr_hash_this(hi, &key, &klen, NULL);
502
503 if (hash_b && (apr_hash_get(hash_b, key, klen)))
504 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
505 diff_func_baton));
506 else
507 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
508 diff_func_baton));
509 }
510
511 if (hash_b)
512 for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
513 {
514 const void *key;
515 apr_ssize_t klen;
516
517 apr_hash_this(hi, &key, &klen, NULL);
518
519 if (! (hash_a && apr_hash_get(hash_a, key, klen)))
520 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
521 diff_func_baton));
522 }
523
524 return SVN_NO_ERROR;
525 }
526
527
528 /*** Misc. hash APIs ***/
529
530 svn_error_t *
svn_hash_keys(apr_array_header_t ** array,apr_hash_t * hash,apr_pool_t * pool)531 svn_hash_keys(apr_array_header_t **array,
532 apr_hash_t *hash,
533 apr_pool_t *pool)
534 {
535 apr_hash_index_t *hi;
536
537 *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
538
539 for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
540 {
541 APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi);
542 }
543
544 return SVN_NO_ERROR;
545 }
546
547
548 svn_error_t *
svn_hash_from_cstring_keys(apr_hash_t ** hash_p,const apr_array_header_t * keys,apr_pool_t * pool)549 svn_hash_from_cstring_keys(apr_hash_t **hash_p,
550 const apr_array_header_t *keys,
551 apr_pool_t *pool)
552 {
553 int i;
554 apr_hash_t *hash = svn_hash__make(pool);
555 for (i = 0; i < keys->nelts; i++)
556 {
557 const char *key =
558 apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
559 svn_hash_sets(hash, key, key);
560 }
561 *hash_p = hash;
562 return SVN_NO_ERROR;
563 }
564
565
566 void *
svn_hash__gets_debug(apr_hash_t * ht,const char * key)567 svn_hash__gets_debug(apr_hash_t *ht, const char *key)
568 {
569 return apr_hash_get(ht, key, APR_HASH_KEY_STRING);
570 }
571
572
573 void
svn_hash__sets_debug(apr_hash_t * ht,const char * key,const void * val)574 svn_hash__sets_debug(apr_hash_t *ht, const char *key, const void *val)
575 {
576 apr_hash_set(ht, key, APR_HASH_KEY_STRING, val);
577 }
578
579
580
581 /*** Specialized getter APIs ***/
582
583 const char *
svn_hash__get_cstring(apr_hash_t * hash,const char * key,const char * default_value)584 svn_hash__get_cstring(apr_hash_t *hash,
585 const char *key,
586 const char *default_value)
587 {
588 if (hash)
589 {
590 const char *value = svn_hash_gets(hash, key);
591 return value ? value : default_value;
592 }
593
594 return default_value;
595 }
596
597
598 svn_boolean_t
svn_hash__get_bool(apr_hash_t * hash,const char * key,svn_boolean_t default_value)599 svn_hash__get_bool(apr_hash_t *hash, const char *key,
600 svn_boolean_t default_value)
601 {
602 const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
603 svn_tristate_t value = svn_tristate__from_word(tmp_value);
604
605 if (value == svn_tristate_true)
606 return TRUE;
607 else if (value == svn_tristate_false)
608 return FALSE;
609
610 return default_value;
611 }
612
613
614
615 /*** Optimized hash function ***/
616
617 /* apr_hashfunc_t optimized for the key that we use in SVN: paths and
618 * property names. Its primary goal is speed for keys of known length.
619 *
620 * Since strings tend to spawn large value spaces (usually differ in many
621 * bits with differences spanning a larger section of the key), we can be
622 * quite sloppy extracting a hash value. The more keys there are in a
623 * hash container, the more bits of the value returned by this function
624 * will be used. For a small number of string keys, choosing bits from any
625 * any fix location close to the tail of those keys would usually be good
626 * enough to prevent high collision rates.
627 */
628 static unsigned int
hashfunc_compatible(const char * char_key,apr_ssize_t * klen)629 hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
630 {
631 unsigned int hash = 0;
632 const unsigned char *key = (const unsigned char *)char_key;
633 const unsigned char *p;
634 apr_ssize_t i;
635
636 if (*klen == APR_HASH_KEY_STRING)
637 *klen = strlen(char_key);
638
639 #if SVN_UNALIGNED_ACCESS_IS_OK
640 for (p = key, i = *klen; i >= 4; i-=4, p+=4)
641 {
642 apr_uint32_t chunk = *(const apr_uint32_t *)p;
643
644 /* the ">> 17" part gives upper bits in the chunk a chance to make
645 some impact as well */
646 hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
647 }
648 #else
649 for (p = key, i = *klen; i >= 4; i-=4, p+=4)
650 {
651 hash = hash * 33 * 33 * 33 * 33
652 + p[0] * 33 * 33 * 33
653 + p[1] * 33 * 33
654 + p[2] * 33
655 + p[3];
656 }
657 #endif
658 for (; i; i--, p++)
659 hash = hash * 33 + *p;
660
661 return hash;
662 }
663
664 apr_hash_t *
svn_hash__make(apr_pool_t * pool)665 svn_hash__make(apr_pool_t *pool)
666 {
667 return apr_hash_make_custom(pool, hashfunc_compatible);
668 }
669