xref: /dragonfly/contrib/xz/src/liblzma/common/index.c (revision b5feb3da7c498482b19d14ac6f2b1901005f7d94)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index.c
4 /// \brief      Handling of .xz Indexes and some other Stream information
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index.h"
14 #include "stream_flags_common.h"
15 
16 
17 /// \brief      How many Records to allocate at once
18 ///
19 /// This should be big enough to avoid making lots of tiny allocations
20 /// but small enough to avoid too much unused memory at once.
21 #define INDEX_GROUP_SIZE 512
22 
23 
24 /// \brief      How many Records can be allocated at once at maximum
25 #define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record))
26 
27 
28 /// \brief      Base structure for index_stream and index_group structures
29 typedef struct index_tree_node_s index_tree_node;
30 struct index_tree_node_s {
31           /// Uncompressed start offset of this Stream (relative to the
32           /// beginning of the file) or Block (relative to the beginning
33           /// of the Stream)
34           lzma_vli uncompressed_base;
35 
36           /// Compressed start offset of this Stream or Block
37           lzma_vli compressed_base;
38 
39           index_tree_node *parent;
40           index_tree_node *left;
41           index_tree_node *right;
42 };
43 
44 
45 /// \brief      AVL tree to hold index_stream or index_group structures
46 typedef struct {
47           /// Root node
48           index_tree_node *root;
49 
50           /// Leftmost node. Since the tree will be filled sequentially,
51           /// this won't change after the first node has been added to
52           /// the tree.
53           index_tree_node *leftmost;
54 
55           /// The rightmost node in the tree. Since the tree is filled
56           /// sequentially, this is always the node where to add the new data.
57           index_tree_node *rightmost;
58 
59           /// Number of nodes in the tree
60           uint32_t count;
61 
62 } index_tree;
63 
64 
65 typedef struct {
66           lzma_vli uncompressed_sum;
67           lzma_vli unpadded_sum;
68 } index_record;
69 
70 
71 typedef struct {
72           /// Every Record group is part of index_stream.groups tree.
73           index_tree_node node;
74 
75           /// Number of Blocks in this Stream before this group.
76           lzma_vli number_base;
77 
78           /// Number of Records that can be put in records[].
79           size_t allocated;
80 
81           /// Index of the last Record in use.
82           size_t last;
83 
84           /// The sizes in this array are stored as cumulative sums relative
85           /// to the beginning of the Stream. This makes it possible to
86           /// use binary search in lzma_index_locate().
87           ///
88           /// Note that the cumulative summing is done specially for
89           /// unpadded_sum: The previous value is rounded up to the next
90           /// multiple of four before adding the Unpadded Size of the new
91           /// Block. The total encoded size of the Blocks in the Stream
92           /// is records[last].unpadded_sum in the last Record group of
93           /// the Stream.
94           ///
95           /// For example, if the Unpadded Sizes are 39, 57, and 81, the
96           /// stored values are 39, 97 (40 + 57), and 181 (100 + 181).
97           /// The total encoded size of these Blocks is 184.
98           ///
99           /// This is a flexible array, because it makes easy to optimize
100           /// memory usage in case someone concatenates many Streams that
101           /// have only one or few Blocks.
102           index_record records[];
103 
104 } index_group;
105 
106 
107 typedef struct {
108           /// Every index_stream is a node in the tree of Streams.
109           index_tree_node node;
110 
111           /// Number of this Stream (first one is 1)
112           uint32_t number;
113 
114           /// Total number of Blocks before this Stream
115           lzma_vli block_number_base;
116 
117           /// Record groups of this Stream are stored in a tree.
118           /// It's a T-tree with AVL-tree balancing. There are
119           /// INDEX_GROUP_SIZE Records per node by default.
120           /// This keeps the number of memory allocations reasonable
121           /// and finding a Record is fast.
122           index_tree groups;
123 
124           /// Number of Records in this Stream
125           lzma_vli record_count;
126 
127           /// Size of the List of Records field in this Stream. This is used
128           /// together with record_count to calculate the size of the Index
129           /// field and thus the total size of the Stream.
130           lzma_vli index_list_size;
131 
132           /// Stream Flags of this Stream. This is meaningful only if
133           /// the Stream Flags have been told us with lzma_index_stream_flags().
134           /// Initially stream_flags.version is set to UINT32_MAX to indicate
135           /// that the Stream Flags are unknown.
136           lzma_stream_flags stream_flags;
137 
138           /// Amount of Stream Padding after this Stream. This defaults to
139           /// zero and can be set with lzma_index_stream_padding().
140           lzma_vli stream_padding;
141 
142 } index_stream;
143 
144 
145 struct lzma_index_s {
146           /// AVL-tree containing the Stream(s). Often there is just one
147           /// Stream, but using a tree keeps lookups fast even when there
148           /// are many concatenated Streams.
149           index_tree streams;
150 
151           /// Uncompressed size of all the Blocks in the Stream(s)
152           lzma_vli uncompressed_size;
153 
154           /// Total size of all the Blocks in the Stream(s)
155           lzma_vli total_size;
156 
157           /// Total number of Records in all Streams in this lzma_index
158           lzma_vli record_count;
159 
160           /// Size of the List of Records field if all the Streams in this
161           /// lzma_index were packed into a single Stream (makes it simpler to
162           /// take many .xz files and combine them into a single Stream).
163           ///
164           /// This value together with record_count is needed to calculate
165           /// Backward Size that is stored into Stream Footer.
166           lzma_vli index_list_size;
167 
168           /// How many Records to allocate at once in lzma_index_append().
169           /// This defaults to INDEX_GROUP_SIZE but can be overridden with
170           /// lzma_index_prealloc().
171           size_t prealloc;
172 
173           /// Bitmask indicating what integrity check types have been used
174           /// as set by lzma_index_stream_flags(). The bit of the last Stream
175           /// is not included here, since it is possible to change it by
176           /// calling lzma_index_stream_flags() again.
177           uint32_t checks;
178 };
179 
180 
181 static void
index_tree_init(index_tree * tree)182 index_tree_init(index_tree *tree)
183 {
184           tree->root = NULL;
185           tree->leftmost = NULL;
186           tree->rightmost = NULL;
187           tree->count = 0;
188           return;
189 }
190 
191 
192 /// Helper for index_tree_end()
193 static void
index_tree_node_end(index_tree_node * node,const lzma_allocator * allocator,void (* free_func)(void * node,const lzma_allocator * allocator))194 index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator,
195                     void (*free_func)(void *node, const lzma_allocator *allocator))
196 {
197           // The tree won't ever be very huge, so recursion should be fine.
198           // 20 levels in the tree is likely quite a lot already in practice.
199           if (node->left != NULL)
200                     index_tree_node_end(node->left, allocator, free_func);
201 
202           if (node->right != NULL)
203                     index_tree_node_end(node->right, allocator, free_func);
204 
205           free_func(node, allocator);
206           return;
207 }
208 
209 
210 /// Free the memory allocated for a tree. Each node is freed using the
211 /// given free_func which is either &lzma_free or &index_stream_end.
212 /// The latter is used to free the Record groups from each index_stream
213 /// before freeing the index_stream itself.
214 static void
index_tree_end(index_tree * tree,const lzma_allocator * allocator,void (* free_func)(void * node,const lzma_allocator * allocator))215 index_tree_end(index_tree *tree, const lzma_allocator *allocator,
216                     void (*free_func)(void *node, const lzma_allocator *allocator))
217 {
218           assert(free_func != NULL);
219 
220           if (tree->root != NULL)
221                     index_tree_node_end(tree->root, allocator, free_func);
222 
223           return;
224 }
225 
226 
227 /// Add a new node to the tree. node->uncompressed_base and
228 /// node->compressed_base must have been set by the caller already.
229 static void
index_tree_append(index_tree * tree,index_tree_node * node)230 index_tree_append(index_tree *tree, index_tree_node *node)
231 {
232           node->parent = tree->rightmost;
233           node->left = NULL;
234           node->right = NULL;
235 
236           ++tree->count;
237 
238           // Handle the special case of adding the first node.
239           if (tree->root == NULL) {
240                     tree->root = node;
241                     tree->leftmost = node;
242                     tree->rightmost = node;
243                     return;
244           }
245 
246           // The tree is always filled sequentially.
247           assert(tree->rightmost->uncompressed_base <= node->uncompressed_base);
248           assert(tree->rightmost->compressed_base < node->compressed_base);
249 
250           // Add the new node after the rightmost node. It's the correct
251           // place due to the reason above.
252           tree->rightmost->right = node;
253           tree->rightmost = node;
254 
255           // Balance the AVL-tree if needed. We don't need to keep the balance
256           // factors in nodes, because we always fill the tree sequentially,
257           // and thus know the state of the tree just by looking at the node
258           // count. From the node count we can calculate how many steps to go
259           // up in the tree to find the rotation root.
260           uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count));
261           if (up != 0) {
262                     // Locate the root node for the rotation.
263                     up = ctz32(tree->count) + 2;
264                     do {
265                               node = node->parent;
266                     } while (--up > 0);
267 
268                     // Rotate left using node as the rotation root.
269                     index_tree_node *pivot = node->right;
270 
271                     if (node->parent == NULL) {
272                               tree->root = pivot;
273                     } else {
274                               assert(node->parent->right == node);
275                               node->parent->right = pivot;
276                     }
277 
278                     pivot->parent = node->parent;
279 
280                     node->right = pivot->left;
281                     if (node->right != NULL)
282                               node->right->parent = node;
283 
284                     pivot->left = node;
285                     node->parent = pivot;
286           }
287 
288           return;
289 }
290 
291 
292 /// Get the next node in the tree. Return NULL if there are no more nodes.
293 static void *
index_tree_next(const index_tree_node * node)294 index_tree_next(const index_tree_node *node)
295 {
296           if (node->right != NULL) {
297                     node = node->right;
298                     while (node->left != NULL)
299                               node = node->left;
300 
301                     return (void *)(node);
302           }
303 
304           while (node->parent != NULL && node->parent->right == node)
305                     node = node->parent;
306 
307           return (void *)(node->parent);
308 }
309 
310 
311 /// Locate a node that contains the given uncompressed offset. It is
312 /// caller's job to check that target is not bigger than the uncompressed
313 /// size of the tree (the last node would be returned in that case still).
314 static void *
index_tree_locate(const index_tree * tree,lzma_vli target)315 index_tree_locate(const index_tree *tree, lzma_vli target)
316 {
317           const index_tree_node *result = NULL;
318           const index_tree_node *node = tree->root;
319 
320           assert(tree->leftmost == NULL
321                               || tree->leftmost->uncompressed_base == 0);
322 
323           // Consecutive nodes may have the same uncompressed_base.
324           // We must pick the rightmost one.
325           while (node != NULL) {
326                     if (node->uncompressed_base > target) {
327                               node = node->left;
328                     } else {
329                               result = node;
330                               node = node->right;
331                     }
332           }
333 
334           return (void *)(result);
335 }
336 
337 
338 /// Allocate and initialize a new Stream using the given base offsets.
339 static index_stream *
index_stream_init(lzma_vli compressed_base,lzma_vli uncompressed_base,uint32_t stream_number,lzma_vli block_number_base,const lzma_allocator * allocator)340 index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
341                     uint32_t stream_number, lzma_vli block_number_base,
342                     const lzma_allocator *allocator)
343 {
344           index_stream *s = lzma_alloc(sizeof(index_stream), allocator);
345           if (s == NULL)
346                     return NULL;
347 
348           s->node.uncompressed_base = uncompressed_base;
349           s->node.compressed_base = compressed_base;
350           s->node.parent = NULL;
351           s->node.left = NULL;
352           s->node.right = NULL;
353 
354           s->number = stream_number;
355           s->block_number_base = block_number_base;
356 
357           index_tree_init(&s->groups);
358 
359           s->record_count = 0;
360           s->index_list_size = 0;
361           s->stream_flags.version = UINT32_MAX;
362           s->stream_padding = 0;
363 
364           return s;
365 }
366 
367 
368 /// Free the memory allocated for a Stream and its Record groups.
369 static void
index_stream_end(void * node,const lzma_allocator * allocator)370 index_stream_end(void *node, const lzma_allocator *allocator)
371 {
372           index_stream *s = node;
373           index_tree_end(&s->groups, allocator, &lzma_free);
374           lzma_free(s, allocator);
375           return;
376 }
377 
378 
379 static lzma_index *
index_init_plain(const lzma_allocator * allocator)380 index_init_plain(const lzma_allocator *allocator)
381 {
382           lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
383           if (i != NULL) {
384                     index_tree_init(&i->streams);
385                     i->uncompressed_size = 0;
386                     i->total_size = 0;
387                     i->record_count = 0;
388                     i->index_list_size = 0;
389                     i->prealloc = INDEX_GROUP_SIZE;
390                     i->checks = 0;
391           }
392 
393           return i;
394 }
395 
396 
397 extern LZMA_API(lzma_index *)
lzma_index_init(const lzma_allocator * allocator)398 lzma_index_init(const lzma_allocator *allocator)
399 {
400           lzma_index *i = index_init_plain(allocator);
401           if (i == NULL)
402                     return NULL;
403 
404           index_stream *s = index_stream_init(0, 0, 1, 0, allocator);
405           if (s == NULL) {
406                     lzma_free(i, allocator);
407                     return NULL;
408           }
409 
410           index_tree_append(&i->streams, &s->node);
411 
412           return i;
413 }
414 
415 
416 extern LZMA_API(void)
lzma_index_end(lzma_index * i,const lzma_allocator * allocator)417 lzma_index_end(lzma_index *i, const lzma_allocator *allocator)
418 {
419           // NOTE: If you modify this function, check also the bottom
420           // of lzma_index_cat().
421           if (i != NULL) {
422                     index_tree_end(&i->streams, allocator, &index_stream_end);
423                     lzma_free(i, allocator);
424           }
425 
426           return;
427 }
428 
429 
430 extern void
lzma_index_prealloc(lzma_index * i,lzma_vli records)431 lzma_index_prealloc(lzma_index *i, lzma_vli records)
432 {
433           if (records > PREALLOC_MAX)
434                     records = PREALLOC_MAX;
435 
436           i->prealloc = (size_t)(records);
437           return;
438 }
439 
440 
441 extern LZMA_API(uint64_t)
lzma_index_memusage(lzma_vli streams,lzma_vli blocks)442 lzma_index_memusage(lzma_vli streams, lzma_vli blocks)
443 {
444           // This calculates an upper bound that is only a little bit
445           // bigger than the exact maximum memory usage with the given
446           // parameters.
447 
448           // Typical malloc() overhead is 2 * sizeof(void *) but we take
449           // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE
450           // instead would give too inaccurate estimate.
451           const size_t alloc_overhead = 4 * sizeof(void *);
452 
453           // Amount of memory needed for each Stream base structures.
454           // We assume that every Stream has at least one Block and
455           // thus at least one group.
456           const size_t stream_base = sizeof(index_stream)
457                               + sizeof(index_group) + 2 * alloc_overhead;
458 
459           // Amount of memory needed per group.
460           const size_t group_base = sizeof(index_group)
461                               + INDEX_GROUP_SIZE * sizeof(index_record)
462                               + alloc_overhead;
463 
464           // Number of groups. There may actually be more, but that overhead
465           // has been taken into account in stream_base already.
466           const lzma_vli groups
467                               = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE;
468 
469           // Memory used by index_stream and index_group structures.
470           const uint64_t streams_mem = streams * stream_base;
471           const uint64_t groups_mem = groups * group_base;
472 
473           // Memory used by the base structure.
474           const uint64_t index_base = sizeof(lzma_index) + alloc_overhead;
475 
476           // Validate the arguments and catch integer overflows.
477           // Maximum number of Streams is "only" UINT32_MAX, because
478           // that limit is used by the tree containing the Streams.
479           const uint64_t limit = UINT64_MAX - index_base;
480           if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX
481                               || streams > limit / stream_base
482                               || groups > limit / group_base
483                               || limit - streams_mem < groups_mem)
484                     return UINT64_MAX;
485 
486           return index_base + streams_mem + groups_mem;
487 }
488 
489 
490 extern LZMA_API(uint64_t)
lzma_index_memused(const lzma_index * i)491 lzma_index_memused(const lzma_index *i)
492 {
493           return lzma_index_memusage(i->streams.count, i->record_count);
494 }
495 
496 
497 extern LZMA_API(lzma_vli)
lzma_index_block_count(const lzma_index * i)498 lzma_index_block_count(const lzma_index *i)
499 {
500           return i->record_count;
501 }
502 
503 
504 extern LZMA_API(lzma_vli)
lzma_index_stream_count(const lzma_index * i)505 lzma_index_stream_count(const lzma_index *i)
506 {
507           return i->streams.count;
508 }
509 
510 
511 extern LZMA_API(lzma_vli)
lzma_index_size(const lzma_index * i)512 lzma_index_size(const lzma_index *i)
513 {
514           return index_size(i->record_count, i->index_list_size);
515 }
516 
517 
518 extern LZMA_API(lzma_vli)
lzma_index_total_size(const lzma_index * i)519 lzma_index_total_size(const lzma_index *i)
520 {
521           return i->total_size;
522 }
523 
524 
525 extern LZMA_API(lzma_vli)
lzma_index_stream_size(const lzma_index * i)526 lzma_index_stream_size(const lzma_index *i)
527 {
528           // Stream Header + Blocks + Index + Stream Footer
529           return LZMA_STREAM_HEADER_SIZE + i->total_size
530                               + index_size(i->record_count, i->index_list_size)
531                               + LZMA_STREAM_HEADER_SIZE;
532 }
533 
534 
535 static lzma_vli
index_file_size(lzma_vli compressed_base,lzma_vli unpadded_sum,lzma_vli record_count,lzma_vli index_list_size,lzma_vli stream_padding)536 index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum,
537                     lzma_vli record_count, lzma_vli index_list_size,
538                     lzma_vli stream_padding)
539 {
540           // Earlier Streams and Stream Paddings + Stream Header
541           // + Blocks + Index + Stream Footer + Stream Padding
542           //
543           // This might go over LZMA_VLI_MAX due to too big unpadded_sum
544           // when this function is used in lzma_index_append().
545           lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE
546                               + stream_padding + vli_ceil4(unpadded_sum);
547           if (file_size > LZMA_VLI_MAX)
548                     return LZMA_VLI_UNKNOWN;
549 
550           // The same applies here.
551           file_size += index_size(record_count, index_list_size);
552           if (file_size > LZMA_VLI_MAX)
553                     return LZMA_VLI_UNKNOWN;
554 
555           return file_size;
556 }
557 
558 
559 extern LZMA_API(lzma_vli)
lzma_index_file_size(const lzma_index * i)560 lzma_index_file_size(const lzma_index *i)
561 {
562           const index_stream *s = (const index_stream *)(i->streams.rightmost);
563           const index_group *g = (const index_group *)(s->groups.rightmost);
564           return index_file_size(s->node.compressed_base,
565                               g == NULL ? 0 : g->records[g->last].unpadded_sum,
566                               s->record_count, s->index_list_size,
567                               s->stream_padding);
568 }
569 
570 
571 extern LZMA_API(lzma_vli)
lzma_index_uncompressed_size(const lzma_index * i)572 lzma_index_uncompressed_size(const lzma_index *i)
573 {
574           return i->uncompressed_size;
575 }
576 
577 
578 extern LZMA_API(uint32_t)
lzma_index_checks(const lzma_index * i)579 lzma_index_checks(const lzma_index *i)
580 {
581           uint32_t checks = i->checks;
582 
583           // Get the type of the Check of the last Stream too.
584           const index_stream *s = (const index_stream *)(i->streams.rightmost);
585           if (s->stream_flags.version != UINT32_MAX)
586                     checks |= UINT32_C(1) << s->stream_flags.check;
587 
588           return checks;
589 }
590 
591 
592 extern uint32_t
lzma_index_padding_size(const lzma_index * i)593 lzma_index_padding_size(const lzma_index *i)
594 {
595           return (LZMA_VLI_C(4) - index_size_unpadded(
596                               i->record_count, i->index_list_size)) & 3;
597 }
598 
599 
600 extern LZMA_API(lzma_ret)
lzma_index_stream_flags(lzma_index * i,const lzma_stream_flags * stream_flags)601 lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags)
602 {
603           if (i == NULL || stream_flags == NULL)
604                     return LZMA_PROG_ERROR;
605 
606           // Validate the Stream Flags.
607           return_if_error(lzma_stream_flags_compare(
608                               stream_flags, stream_flags));
609 
610           index_stream *s = (index_stream *)(i->streams.rightmost);
611           s->stream_flags = *stream_flags;
612 
613           return LZMA_OK;
614 }
615 
616 
617 extern LZMA_API(lzma_ret)
lzma_index_stream_padding(lzma_index * i,lzma_vli stream_padding)618 lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding)
619 {
620           if (i == NULL || stream_padding > LZMA_VLI_MAX
621                               || (stream_padding & 3) != 0)
622                     return LZMA_PROG_ERROR;
623 
624           index_stream *s = (index_stream *)(i->streams.rightmost);
625 
626           // Check that the new value won't make the file grow too big.
627           const lzma_vli old_stream_padding = s->stream_padding;
628           s->stream_padding = 0;
629           if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) {
630                     s->stream_padding = old_stream_padding;
631                     return LZMA_DATA_ERROR;
632           }
633 
634           s->stream_padding = stream_padding;
635           return LZMA_OK;
636 }
637 
638 
639 extern LZMA_API(lzma_ret)
lzma_index_append(lzma_index * i,const lzma_allocator * allocator,lzma_vli unpadded_size,lzma_vli uncompressed_size)640 lzma_index_append(lzma_index *i, const lzma_allocator *allocator,
641                     lzma_vli unpadded_size, lzma_vli uncompressed_size)
642 {
643           // Validate.
644           if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN
645                               || unpadded_size > UNPADDED_SIZE_MAX
646                               || uncompressed_size > LZMA_VLI_MAX)
647                     return LZMA_PROG_ERROR;
648 
649           index_stream *s = (index_stream *)(i->streams.rightmost);
650           index_group *g = (index_group *)(s->groups.rightmost);
651 
652           const lzma_vli compressed_base = g == NULL ? 0
653                               : vli_ceil4(g->records[g->last].unpadded_sum);
654           const lzma_vli uncompressed_base = g == NULL ? 0
655                               : g->records[g->last].uncompressed_sum;
656           const uint32_t index_list_size_add = lzma_vli_size(unpadded_size)
657                               + lzma_vli_size(uncompressed_size);
658 
659           // Check that the file size will stay within limits.
660           if (index_file_size(s->node.compressed_base,
661                               compressed_base + unpadded_size, s->record_count + 1,
662                               s->index_list_size + index_list_size_add,
663                               s->stream_padding) == LZMA_VLI_UNKNOWN)
664                     return LZMA_DATA_ERROR;
665 
666           // The size of the Index field must not exceed the maximum value
667           // that can be stored in the Backward Size field.
668           if (index_size(i->record_count + 1,
669                               i->index_list_size + index_list_size_add)
670                               > LZMA_BACKWARD_SIZE_MAX)
671                     return LZMA_DATA_ERROR;
672 
673           if (g != NULL && g->last + 1 < g->allocated) {
674                     // There is space in the last group at least for one Record.
675                     ++g->last;
676           } else {
677                     // We need to allocate a new group.
678                     g = lzma_alloc(sizeof(index_group)
679                                         + i->prealloc * sizeof(index_record),
680                                         allocator);
681                     if (g == NULL)
682                               return LZMA_MEM_ERROR;
683 
684                     g->last = 0;
685                     g->allocated = i->prealloc;
686 
687                     // Reset prealloc so that if the application happens to
688                     // add new Records, the allocation size will be sane.
689                     i->prealloc = INDEX_GROUP_SIZE;
690 
691                     // Set the start offsets of this group.
692                     g->node.uncompressed_base = uncompressed_base;
693                     g->node.compressed_base = compressed_base;
694                     g->number_base = s->record_count + 1;
695 
696                     // Add the new group to the Stream.
697                     index_tree_append(&s->groups, &g->node);
698           }
699 
700           // Add the new Record to the group.
701           g->records[g->last].uncompressed_sum
702                               = uncompressed_base + uncompressed_size;
703           g->records[g->last].unpadded_sum
704                               = compressed_base + unpadded_size;
705 
706           // Update the totals.
707           ++s->record_count;
708           s->index_list_size += index_list_size_add;
709 
710           i->total_size += vli_ceil4(unpadded_size);
711           i->uncompressed_size += uncompressed_size;
712           ++i->record_count;
713           i->index_list_size += index_list_size_add;
714 
715           return LZMA_OK;
716 }
717 
718 
719 /// Structure to pass info to index_cat_helper()
720 typedef struct {
721           /// Uncompressed size of the destination
722           lzma_vli uncompressed_size;
723 
724           /// Compressed file size of the destination
725           lzma_vli file_size;
726 
727           /// Same as above but for Block numbers
728           lzma_vli block_number_add;
729 
730           /// Number of Streams that were in the destination index before we
731           /// started appending new Streams from the source index. This is
732           /// used to fix the Stream numbering.
733           uint32_t stream_number_add;
734 
735           /// Destination index' Stream tree
736           index_tree *streams;
737 
738 } index_cat_info;
739 
740 
741 /// Add the Stream nodes from the source index to dest using recursion.
742 /// Simplest iterative traversal of the source tree wouldn't work, because
743 /// we update the pointers in nodes when moving them to the destination tree.
744 static void
index_cat_helper(const index_cat_info * info,index_stream * this)745 index_cat_helper(const index_cat_info *info, index_stream *this)
746 {
747           index_stream *left = (index_stream *)(this->node.left);
748           index_stream *right = (index_stream *)(this->node.right);
749 
750           if (left != NULL)
751                     index_cat_helper(info, left);
752 
753           this->node.uncompressed_base += info->uncompressed_size;
754           this->node.compressed_base += info->file_size;
755           this->number += info->stream_number_add;
756           this->block_number_base += info->block_number_add;
757           index_tree_append(info->streams, &this->node);
758 
759           if (right != NULL)
760                     index_cat_helper(info, right);
761 
762           return;
763 }
764 
765 
766 extern LZMA_API(lzma_ret)
lzma_index_cat(lzma_index * restrict dest,lzma_index * restrict src,const lzma_allocator * allocator)767 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
768                     const lzma_allocator *allocator)
769 {
770           const lzma_vli dest_file_size = lzma_index_file_size(dest);
771 
772           // Check that we don't exceed the file size limits.
773           if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX
774                               || dest->uncompressed_size + src->uncompressed_size
775                                         > LZMA_VLI_MAX)
776                     return LZMA_DATA_ERROR;
777 
778           // Check that the encoded size of the combined lzma_indexes stays
779           // within limits. In theory, this should be done only if we know
780           // that the user plans to actually combine the Streams and thus
781           // construct a single Index (probably rare). However, exceeding
782           // this limit is quite theoretical, so we do this check always
783           // to simplify things elsewhere.
784           {
785                     const lzma_vli dest_size = index_size_unpadded(
786                                         dest->record_count, dest->index_list_size);
787                     const lzma_vli src_size = index_size_unpadded(
788                                         src->record_count, src->index_list_size);
789                     if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
790                               return LZMA_DATA_ERROR;
791           }
792 
793           // Optimize the last group to minimize memory usage. Allocation has
794           // to be done before modifying dest or src.
795           {
796                     index_stream *s = (index_stream *)(dest->streams.rightmost);
797                     index_group *g = (index_group *)(s->groups.rightmost);
798                     if (g != NULL && g->last + 1 < g->allocated) {
799                               assert(g->node.left == NULL);
800                               assert(g->node.right == NULL);
801 
802                               index_group *newg = lzma_alloc(sizeof(index_group)
803                                                   + (g->last + 1)
804                                                   * sizeof(index_record),
805                                                   allocator);
806                               if (newg == NULL)
807                                         return LZMA_MEM_ERROR;
808 
809                               newg->node = g->node;
810                               newg->allocated = g->last + 1;
811                               newg->last = g->last;
812                               newg->number_base = g->number_base;
813 
814                               memcpy(newg->records, g->records, newg->allocated
815                                                   * sizeof(index_record));
816 
817                               if (g->node.parent != NULL) {
818                                         assert(g->node.parent->right == &g->node);
819                                         g->node.parent->right = &newg->node;
820                               }
821 
822                               if (s->groups.leftmost == &g->node) {
823                                         assert(s->groups.root == &g->node);
824                                         s->groups.leftmost = &newg->node;
825                                         s->groups.root = &newg->node;
826                               }
827 
828                               assert(s->groups.rightmost == &g->node);
829                               s->groups.rightmost = &newg->node;
830 
831                               lzma_free(g, allocator);
832 
833                               // NOTE: newg isn't leaked here because
834                               // newg == (void *)&newg->node.
835                     }
836           }
837 
838           // Add all the Streams from src to dest. Update the base offsets
839           // of each Stream from src.
840           const index_cat_info info = {
841                     .uncompressed_size = dest->uncompressed_size,
842                     .file_size = dest_file_size,
843                     .stream_number_add = dest->streams.count,
844                     .block_number_add = dest->record_count,
845                     .streams = &dest->streams,
846           };
847           index_cat_helper(&info, (index_stream *)(src->streams.root));
848 
849           // Update info about all the combined Streams.
850           dest->uncompressed_size += src->uncompressed_size;
851           dest->total_size += src->total_size;
852           dest->record_count += src->record_count;
853           dest->index_list_size += src->index_list_size;
854           dest->checks = lzma_index_checks(dest) | src->checks;
855 
856           // There's nothing else left in src than the base structure.
857           lzma_free(src, allocator);
858 
859           return LZMA_OK;
860 }
861 
862 
863 /// Duplicate an index_stream.
864 static index_stream *
index_dup_stream(const index_stream * src,const lzma_allocator * allocator)865 index_dup_stream(const index_stream *src, const lzma_allocator *allocator)
866 {
867           // Catch a somewhat theoretical integer overflow.
868           if (src->record_count > PREALLOC_MAX)
869                     return NULL;
870 
871           // Allocate and initialize a new Stream.
872           index_stream *dest = index_stream_init(src->node.compressed_base,
873                               src->node.uncompressed_base, src->number,
874                               src->block_number_base, allocator);
875           if (dest == NULL)
876                     return NULL;
877 
878           // Copy the overall information.
879           dest->record_count = src->record_count;
880           dest->index_list_size = src->index_list_size;
881           dest->stream_flags = src->stream_flags;
882           dest->stream_padding = src->stream_padding;
883 
884           // Return if there are no groups to duplicate.
885           if (src->groups.leftmost == NULL)
886                     return dest;
887 
888           // Allocate memory for the Records. We put all the Records into
889           // a single group. It's simplest and also tends to make
890           // lzma_index_locate() a little bit faster with very big Indexes.
891           index_group *destg = lzma_alloc(sizeof(index_group)
892                               + src->record_count * sizeof(index_record),
893                               allocator);
894           if (destg == NULL) {
895                     index_stream_end(dest, allocator);
896                     return NULL;
897           }
898 
899           // Initialize destg.
900           destg->node.uncompressed_base = 0;
901           destg->node.compressed_base = 0;
902           destg->number_base = 1;
903           destg->allocated = src->record_count;
904           destg->last = src->record_count - 1;
905 
906           // Go through all the groups in src and copy the Records into destg.
907           const index_group *srcg = (const index_group *)(src->groups.leftmost);
908           size_t i = 0;
909           do {
910                     memcpy(destg->records + i, srcg->records,
911                                         (srcg->last + 1) * sizeof(index_record));
912                     i += srcg->last + 1;
913                     srcg = index_tree_next(&srcg->node);
914           } while (srcg != NULL);
915 
916           assert(i == destg->allocated);
917 
918           // Add the group to the new Stream.
919           index_tree_append(&dest->groups, &destg->node);
920 
921           return dest;
922 }
923 
924 
925 extern LZMA_API(lzma_index *)
lzma_index_dup(const lzma_index * src,const lzma_allocator * allocator)926 lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator)
927 {
928           // Allocate the base structure (no initial Stream).
929           lzma_index *dest = index_init_plain(allocator);
930           if (dest == NULL)
931                     return NULL;
932 
933           // Copy the totals.
934           dest->uncompressed_size = src->uncompressed_size;
935           dest->total_size = src->total_size;
936           dest->record_count = src->record_count;
937           dest->index_list_size = src->index_list_size;
938 
939           // Copy the Streams and the groups in them.
940           const index_stream *srcstream
941                               = (const index_stream *)(src->streams.leftmost);
942           do {
943                     index_stream *deststream = index_dup_stream(
944                                         srcstream, allocator);
945                     if (deststream == NULL) {
946                               lzma_index_end(dest, allocator);
947                               return NULL;
948                     }
949 
950                     index_tree_append(&dest->streams, &deststream->node);
951 
952                     srcstream = index_tree_next(&srcstream->node);
953           } while (srcstream != NULL);
954 
955           return dest;
956 }
957 
958 
959 /// Indexing for lzma_index_iter.internal[]
960 enum {
961           ITER_INDEX,
962           ITER_STREAM,
963           ITER_GROUP,
964           ITER_RECORD,
965           ITER_METHOD,
966 };
967 
968 
969 /// Values for lzma_index_iter.internal[ITER_METHOD].s
970 enum {
971           ITER_METHOD_NORMAL,
972           ITER_METHOD_NEXT,
973           ITER_METHOD_LEFTMOST,
974 };
975 
976 
977 static void
iter_set_info(lzma_index_iter * iter)978 iter_set_info(lzma_index_iter *iter)
979 {
980           const lzma_index *i = iter->internal[ITER_INDEX].p;
981           const index_stream *stream = iter->internal[ITER_STREAM].p;
982           const index_group *group = iter->internal[ITER_GROUP].p;
983           const size_t record = iter->internal[ITER_RECORD].s;
984 
985           // lzma_index_iter.internal must not contain a pointer to the last
986           // group in the index, because that may be reallocated by
987           // lzma_index_cat().
988           if (group == NULL) {
989                     // There are no groups.
990                     assert(stream->groups.root == NULL);
991                     iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST;
992 
993           } else if (i->streams.rightmost != &stream->node
994                               || stream->groups.rightmost != &group->node) {
995                     // The group is not not the last group in the index.
996                     iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL;
997 
998           } else if (stream->groups.leftmost != &group->node) {
999                     // The group isn't the only group in the Stream, thus we
1000                     // know that it must have a parent group i.e. it's not
1001                     // the root node.
1002                     assert(stream->groups.root != &group->node);
1003                     assert(group->node.parent->right == &group->node);
1004                     iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT;
1005                     iter->internal[ITER_GROUP].p = group->node.parent;
1006 
1007           } else {
1008                     // The Stream has only one group.
1009                     assert(stream->groups.root == &group->node);
1010                     assert(group->node.parent == NULL);
1011                     iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST;
1012                     iter->internal[ITER_GROUP].p = NULL;
1013           }
1014 
1015           // NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t
1016           // internally.
1017           iter->stream.number = stream->number;
1018           iter->stream.block_count = stream->record_count;
1019           iter->stream.compressed_offset = stream->node.compressed_base;
1020           iter->stream.uncompressed_offset = stream->node.uncompressed_base;
1021 
1022           // iter->stream.flags will be NULL if the Stream Flags haven't been
1023           // set with lzma_index_stream_flags().
1024           iter->stream.flags = stream->stream_flags.version == UINT32_MAX
1025                               ? NULL : &stream->stream_flags;
1026           iter->stream.padding = stream->stream_padding;
1027 
1028           if (stream->groups.rightmost == NULL) {
1029                     // Stream has no Blocks.
1030                     iter->stream.compressed_size = index_size(0, 0)
1031                                         + 2 * LZMA_STREAM_HEADER_SIZE;
1032                     iter->stream.uncompressed_size = 0;
1033           } else {
1034                     const index_group *g = (const index_group *)(
1035                                         stream->groups.rightmost);
1036 
1037                     // Stream Header + Stream Footer + Index + Blocks
1038                     iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE
1039                                         + index_size(stream->record_count,
1040                                                   stream->index_list_size)
1041                                         + vli_ceil4(g->records[g->last].unpadded_sum);
1042                     iter->stream.uncompressed_size
1043                                         = g->records[g->last].uncompressed_sum;
1044           }
1045 
1046           if (group != NULL) {
1047                     iter->block.number_in_stream = group->number_base + record;
1048                     iter->block.number_in_file = iter->block.number_in_stream
1049                                         + stream->block_number_base;
1050 
1051                     iter->block.compressed_stream_offset
1052                                         = record == 0 ? group->node.compressed_base
1053                                         : vli_ceil4(group->records[
1054                                                   record - 1].unpadded_sum);
1055                     iter->block.uncompressed_stream_offset
1056                                         = record == 0 ? group->node.uncompressed_base
1057                                         : group->records[record - 1].uncompressed_sum;
1058 
1059                     iter->block.uncompressed_size
1060                                         = group->records[record].uncompressed_sum
1061                                         - iter->block.uncompressed_stream_offset;
1062                     iter->block.unpadded_size
1063                                         = group->records[record].unpadded_sum
1064                                         - iter->block.compressed_stream_offset;
1065                     iter->block.total_size = vli_ceil4(iter->block.unpadded_size);
1066 
1067                     iter->block.compressed_stream_offset
1068                                         += LZMA_STREAM_HEADER_SIZE;
1069 
1070                     iter->block.compressed_file_offset
1071                                         = iter->block.compressed_stream_offset
1072                                         + iter->stream.compressed_offset;
1073                     iter->block.uncompressed_file_offset
1074                                         = iter->block.uncompressed_stream_offset
1075                                         + iter->stream.uncompressed_offset;
1076           }
1077 
1078           return;
1079 }
1080 
1081 
1082 extern LZMA_API(void)
lzma_index_iter_init(lzma_index_iter * iter,const lzma_index * i)1083 lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i)
1084 {
1085           iter->internal[ITER_INDEX].p = i;
1086           lzma_index_iter_rewind(iter);
1087           return;
1088 }
1089 
1090 
1091 extern LZMA_API(void)
lzma_index_iter_rewind(lzma_index_iter * iter)1092 lzma_index_iter_rewind(lzma_index_iter *iter)
1093 {
1094           iter->internal[ITER_STREAM].p = NULL;
1095           iter->internal[ITER_GROUP].p = NULL;
1096           iter->internal[ITER_RECORD].s = 0;
1097           iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL;
1098           return;
1099 }
1100 
1101 
1102 extern LZMA_API(lzma_bool)
lzma_index_iter_next(lzma_index_iter * iter,lzma_index_iter_mode mode)1103 lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode)
1104 {
1105           // Catch unsupported mode values.
1106           if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK)
1107                     return true;
1108 
1109           const lzma_index *i = iter->internal[ITER_INDEX].p;
1110           const index_stream *stream = iter->internal[ITER_STREAM].p;
1111           const index_group *group = NULL;
1112           size_t record = iter->internal[ITER_RECORD].s;
1113 
1114           // If we are being asked for the next Stream, leave group to NULL
1115           // so that the rest of the this function thinks that this Stream
1116           // has no groups and will thus go to the next Stream.
1117           if (mode != LZMA_INDEX_ITER_STREAM) {
1118                     // Get the pointer to the current group. See iter_set_inf()
1119                     // for explanation.
1120                     switch (iter->internal[ITER_METHOD].s) {
1121                     case ITER_METHOD_NORMAL:
1122                               group = iter->internal[ITER_GROUP].p;
1123                               break;
1124 
1125                     case ITER_METHOD_NEXT:
1126                               group = index_tree_next(iter->internal[ITER_GROUP].p);
1127                               break;
1128 
1129                     case ITER_METHOD_LEFTMOST:
1130                               group = (const index_group *)(
1131                                                   stream->groups.leftmost);
1132                               break;
1133                     }
1134           }
1135 
1136 again:
1137           if (stream == NULL) {
1138                     // We at the beginning of the lzma_index.
1139                     // Locate the first Stream.
1140                     stream = (const index_stream *)(i->streams.leftmost);
1141                     if (mode >= LZMA_INDEX_ITER_BLOCK) {
1142                               // Since we are being asked to return information
1143                               // about the first a Block, skip Streams that have
1144                               // no Blocks.
1145                               while (stream->groups.leftmost == NULL) {
1146                                         stream = index_tree_next(&stream->node);
1147                                         if (stream == NULL)
1148                                                   return true;
1149                               }
1150                     }
1151 
1152                     // Start from the first Record in the Stream.
1153                     group = (const index_group *)(stream->groups.leftmost);
1154                     record = 0;
1155 
1156           } else if (group != NULL && record < group->last) {
1157                     // The next Record is in the same group.
1158                     ++record;
1159 
1160           } else {
1161                     // This group has no more Records or this Stream has
1162                     // no Blocks at all.
1163                     record = 0;
1164 
1165                     // If group is not NULL, this Stream has at least one Block
1166                     // and thus at least one group. Find the next group.
1167                     if (group != NULL)
1168                               group = index_tree_next(&group->node);
1169 
1170                     if (group == NULL) {
1171                               // This Stream has no more Records. Find the next
1172                               // Stream. If we are being asked to return information
1173                               // about a Block, we skip empty Streams.
1174                               do {
1175                                         stream = index_tree_next(&stream->node);
1176                                         if (stream == NULL)
1177                                                   return true;
1178                               } while (mode >= LZMA_INDEX_ITER_BLOCK
1179                                                   && stream->groups.leftmost == NULL);
1180 
1181                               group = (const index_group *)(
1182                                                   stream->groups.leftmost);
1183                     }
1184           }
1185 
1186           if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) {
1187                     // We need to look for the next Block again if this Block
1188                     // is empty.
1189                     if (record == 0) {
1190                               if (group->node.uncompressed_base
1191                                                   == group->records[0].uncompressed_sum)
1192                                         goto again;
1193                     } else if (group->records[record - 1].uncompressed_sum
1194                                         == group->records[record].uncompressed_sum) {
1195                               goto again;
1196                     }
1197           }
1198 
1199           iter->internal[ITER_STREAM].p = stream;
1200           iter->internal[ITER_GROUP].p = group;
1201           iter->internal[ITER_RECORD].s = record;
1202 
1203           iter_set_info(iter);
1204 
1205           return false;
1206 }
1207 
1208 
1209 extern LZMA_API(lzma_bool)
lzma_index_iter_locate(lzma_index_iter * iter,lzma_vli target)1210 lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target)
1211 {
1212           const lzma_index *i = iter->internal[ITER_INDEX].p;
1213 
1214           // If the target is past the end of the file, return immediately.
1215           if (i->uncompressed_size <= target)
1216                     return true;
1217 
1218           // Locate the Stream containing the target offset.
1219           const index_stream *stream = index_tree_locate(&i->streams, target);
1220           assert(stream != NULL);
1221           target -= stream->node.uncompressed_base;
1222 
1223           // Locate the group containing the target offset.
1224           const index_group *group = index_tree_locate(&stream->groups, target);
1225           assert(group != NULL);
1226 
1227           // Use binary search to locate the exact Record. It is the first
1228           // Record whose uncompressed_sum is greater than target.
1229           // This is because we want the rightmost Record that fullfills the
1230           // search criterion. It is possible that there are empty Blocks;
1231           // we don't want to return them.
1232           size_t left = 0;
1233           size_t right = group->last;
1234 
1235           while (left < right) {
1236                     const size_t pos = left + (right - left) / 2;
1237                     if (group->records[pos].uncompressed_sum <= target)
1238                               left = pos + 1;
1239                     else
1240                               right = pos;
1241           }
1242 
1243           iter->internal[ITER_STREAM].p = stream;
1244           iter->internal[ITER_GROUP].p = group;
1245           iter->internal[ITER_RECORD].s = left;
1246 
1247           iter_set_info(iter);
1248 
1249           return false;
1250 }
1251