xref: /dragonfly/contrib/libarchive/libarchive/archive_entry_link_resolver.c (revision d4d8193e041c7781712669d8c7ab029146e56e0f)
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $");
28 
29 #ifdef HAVE_SYS_STAT_H
30 #include <sys/stat.h>
31 #endif
32 #ifdef HAVE_ERRNO_H
33 #include <errno.h>
34 #endif
35 #include <stdio.h>
36 #ifdef HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #ifdef HAVE_STRING_H
40 #include <string.h>
41 #endif
42 
43 #include "archive.h"
44 #include "archive_entry.h"
45 
46 /*
47  * This is mostly a pretty straightforward hash table implementation.
48  * The only interesting bit is the different strategies used to
49  * match up links.  These strategies match those used by various
50  * archiving formats:
51  *   tar - content stored with first link, remainder refer back to it.
52  *       This requires us to match each subsequent link up with the
53  *       first appearance.
54  *   cpio - Old cpio just stored body with each link, match-ups were
55  *       implicit.  This is trivial.
56  *   new cpio - New cpio only stores body with last link, match-ups
57  *       are implicit.  This is actually quite tricky; see the notes
58  *       below.
59  */
60 
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR  0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66 
67 /* Initial size of link cache. */
68 #define   links_cache_initial_size 1024
69 
70 struct links_entry {
71           struct links_entry  *next;
72           struct links_entry  *previous;
73           struct archive_entry          *canonical;
74           struct archive_entry          *entry;
75           size_t                         hash;
76           unsigned int                   links; /* # links not yet seen */
77 };
78 
79 struct archive_entry_linkresolver {
80           struct links_entry  **buckets;
81           struct links_entry   *spare;
82           unsigned long                   number_entries;
83           size_t                          number_buckets;
84           int                             strategy;
85 };
86 
87 #define   NEXT_ENTRY_DEFERRED 1
88 #define   NEXT_ENTRY_PARTIAL  2
89 #define   NEXT_ENTRY_ALL                (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
90 
91 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
92                         struct archive_entry *);
93 static void grow_hash(struct archive_entry_linkresolver *);
94 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
95                         struct archive_entry *);
96 static struct links_entry *next_entry(struct archive_entry_linkresolver *,
97     int);
98 
99 struct archive_entry_linkresolver *
archive_entry_linkresolver_new(void)100 archive_entry_linkresolver_new(void)
101 {
102           struct archive_entry_linkresolver *res;
103 
104           /* Check for positive power-of-two */
105           if (links_cache_initial_size == 0 ||
106               (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
107                     return (NULL);
108 
109           res = calloc(1, sizeof(struct archive_entry_linkresolver));
110           if (res == NULL)
111                     return (NULL);
112           res->number_buckets = links_cache_initial_size;
113           res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
114           if (res->buckets == NULL) {
115                     free(res);
116                     return (NULL);
117           }
118           return (res);
119 }
120 
121 void
archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver * res,int fmt)122 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
123     int fmt)
124 {
125           int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
126 
127           switch (fmtbase) {
128           case ARCHIVE_FORMAT_7ZIP:
129           case ARCHIVE_FORMAT_AR:
130           case ARCHIVE_FORMAT_ZIP:
131                     res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
132                     break;
133           case ARCHIVE_FORMAT_CPIO:
134                     switch (fmt) {
135                     case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
136                     case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
137                               res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
138                               break;
139                     default:
140                               res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
141                               break;
142                     }
143                     break;
144           case ARCHIVE_FORMAT_MTREE:
145                     res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
146                     break;
147           case ARCHIVE_FORMAT_ISO9660:
148           case ARCHIVE_FORMAT_SHAR:
149           case ARCHIVE_FORMAT_TAR:
150           case ARCHIVE_FORMAT_XAR:
151                     res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
152                     break;
153           default:
154                     res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
155                     break;
156           }
157 }
158 
159 void
archive_entry_linkresolver_free(struct archive_entry_linkresolver * res)160 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
161 {
162           struct links_entry *le;
163 
164           if (res == NULL)
165                     return;
166 
167           while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
168                     archive_entry_free(le->entry);
169           free(res->buckets);
170           free(res);
171 }
172 
173 void
archive_entry_linkify(struct archive_entry_linkresolver * res,struct archive_entry ** e,struct archive_entry ** f)174 archive_entry_linkify(struct archive_entry_linkresolver *res,
175     struct archive_entry **e, struct archive_entry **f)
176 {
177           struct links_entry *le;
178           struct archive_entry *t;
179 
180           *f = NULL; /* Default: Don't return a second entry. */
181 
182           if (*e == NULL) {
183                     le = next_entry(res, NEXT_ENTRY_DEFERRED);
184                     if (le != NULL) {
185                               *e = le->entry;
186                               le->entry = NULL;
187                     }
188                     return;
189           }
190 
191           /* If it has only one link, then we're done. */
192           if (archive_entry_nlink(*e) == 1)
193                     return;
194           /* Directories, devices never have hardlinks. */
195           if (archive_entry_filetype(*e) == AE_IFDIR
196               || archive_entry_filetype(*e) == AE_IFBLK
197               || archive_entry_filetype(*e) == AE_IFCHR)
198                     return;
199 
200           switch (res->strategy) {
201           case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
202                     le = find_entry(res, *e);
203                     if (le != NULL) {
204                               archive_entry_unset_size(*e);
205                               archive_entry_copy_hardlink(*e,
206                                   archive_entry_pathname(le->canonical));
207                     } else
208                               insert_entry(res, *e);
209                     return;
210           case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
211                     le = find_entry(res, *e);
212                     if (le != NULL) {
213                               archive_entry_copy_hardlink(*e,
214                                   archive_entry_pathname(le->canonical));
215                     } else
216                               insert_entry(res, *e);
217                     return;
218           case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
219                     /* This one is trivial. */
220                     return;
221           case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
222                     le = find_entry(res, *e);
223                     if (le != NULL) {
224                               /*
225                                * Put the new entry in le, return the
226                                * old entry from le.
227                                */
228                               t = *e;
229                               *e = le->entry;
230                               le->entry = t;
231                               /* Make the old entry into a hardlink. */
232                               archive_entry_unset_size(*e);
233                               archive_entry_copy_hardlink(*e,
234                                   archive_entry_pathname(le->canonical));
235                               /* If we ran out of links, return the
236                                * final entry as well. */
237                               if (le->links == 0) {
238                                         *f = le->entry;
239                                         le->entry = NULL;
240                               }
241                     } else {
242                               /*
243                                * If we haven't seen it, tuck it away
244                                * for future use.
245                                */
246                               le = insert_entry(res, *e);
247                               if (le == NULL)
248                                         /* XXX We should return an error code XXX */
249                                         return;
250                               le->entry = *e;
251                               *e = NULL;
252                     }
253                     return;
254           default:
255                     break;
256           }
257           return;
258 }
259 
260 static struct links_entry *
find_entry(struct archive_entry_linkresolver * res,struct archive_entry * entry)261 find_entry(struct archive_entry_linkresolver *res,
262     struct archive_entry *entry)
263 {
264           struct links_entry  *le;
265           size_t                         hash, bucket;
266           dev_t                          dev;
267           int64_t                        ino;
268 
269           /* Free a held entry. */
270           if (res->spare != NULL) {
271                     archive_entry_free(res->spare->canonical);
272                     archive_entry_free(res->spare->entry);
273                     free(res->spare);
274                     res->spare = NULL;
275           }
276 
277           dev = archive_entry_dev(entry);
278           ino = archive_entry_ino64(entry);
279           hash = (size_t)(dev ^ ino);
280 
281           /* Try to locate this entry in the links cache. */
282           bucket = hash & (res->number_buckets - 1);
283           for (le = res->buckets[bucket]; le != NULL; le = le->next) {
284                     if (le->hash == hash
285                         && dev == archive_entry_dev(le->canonical)
286                         && ino == archive_entry_ino64(le->canonical)) {
287                               /*
288                                * Decrement link count each time and release
289                                * the entry if it hits zero.  This saves
290                                * memory and is necessary for detecting
291                                * missed links.
292                                */
293                               --le->links;
294                               if (le->links > 0)
295                                         return (le);
296                               /* Remove it from this hash bucket. */
297                               if (le->previous != NULL)
298                                         le->previous->next = le->next;
299                               if (le->next != NULL)
300                                         le->next->previous = le->previous;
301                               if (res->buckets[bucket] == le)
302                                         res->buckets[bucket] = le->next;
303                               res->number_entries--;
304                               /* Defer freeing this entry. */
305                               res->spare = le;
306                               return (le);
307                     }
308           }
309           return (NULL);
310 }
311 
312 static struct links_entry *
next_entry(struct archive_entry_linkresolver * res,int mode)313 next_entry(struct archive_entry_linkresolver *res, int mode)
314 {
315           struct links_entry  *le;
316           size_t                         bucket;
317 
318           /* Free a held entry. */
319           if (res->spare != NULL) {
320                     archive_entry_free(res->spare->canonical);
321                     archive_entry_free(res->spare->entry);
322                     free(res->spare);
323                     res->spare = NULL;
324           }
325 
326           /* Look for next non-empty bucket in the links cache. */
327           for (bucket = 0; bucket < res->number_buckets; bucket++) {
328                     for (le = res->buckets[bucket]; le != NULL; le = le->next) {
329                               if (le->entry != NULL &&
330                                   (mode & NEXT_ENTRY_DEFERRED) == 0)
331                                         continue;
332                               if (le->entry == NULL &&
333                                   (mode & NEXT_ENTRY_PARTIAL) == 0)
334                                         continue;
335                               /* Remove it from this hash bucket. */
336                               if (le->next != NULL)
337                                         le->next->previous = le->previous;
338                               if (le->previous != NULL)
339                                         le->previous->next = le->next;
340                               else
341                                         res->buckets[bucket] = le->next;
342                               res->number_entries--;
343                               /* Defer freeing this entry. */
344                               res->spare = le;
345                               return (le);
346                     }
347           }
348           return (NULL);
349 }
350 
351 static struct links_entry *
insert_entry(struct archive_entry_linkresolver * res,struct archive_entry * entry)352 insert_entry(struct archive_entry_linkresolver *res,
353     struct archive_entry *entry)
354 {
355           struct links_entry *le;
356           size_t hash, bucket;
357 
358           /* Add this entry to the links cache. */
359           le = calloc(1, sizeof(struct links_entry));
360           if (le == NULL)
361                     return (NULL);
362           le->canonical = archive_entry_clone(entry);
363 
364           /* If the links cache is getting too full, enlarge the hash table. */
365           if (res->number_entries > res->number_buckets * 2)
366                     grow_hash(res);
367 
368           hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
369           bucket = hash & (res->number_buckets - 1);
370 
371           /* If we could allocate the entry, record it. */
372           if (res->buckets[bucket] != NULL)
373                     res->buckets[bucket]->previous = le;
374           res->number_entries++;
375           le->next = res->buckets[bucket];
376           le->previous = NULL;
377           res->buckets[bucket] = le;
378           le->hash = hash;
379           le->links = archive_entry_nlink(entry) - 1;
380           return (le);
381 }
382 
383 static void
grow_hash(struct archive_entry_linkresolver * res)384 grow_hash(struct archive_entry_linkresolver *res)
385 {
386           struct links_entry *le, **new_buckets;
387           size_t new_size;
388           size_t i, bucket;
389 
390           /* Try to enlarge the bucket list. */
391           new_size = res->number_buckets * 2;
392           if (new_size < res->number_buckets)
393                     return;
394           new_buckets = calloc(new_size, sizeof(struct links_entry *));
395 
396           if (new_buckets == NULL)
397                     return;
398 
399           for (i = 0; i < res->number_buckets; i++) {
400                     while (res->buckets[i] != NULL) {
401                               /* Remove entry from old bucket. */
402                               le = res->buckets[i];
403                               res->buckets[i] = le->next;
404 
405                               /* Add entry to new bucket. */
406                               bucket = le->hash & (new_size - 1);
407 
408                               if (new_buckets[bucket] != NULL)
409                                         new_buckets[bucket]->previous = le;
410                               le->next = new_buckets[bucket];
411                               le->previous = NULL;
412                               new_buckets[bucket] = le;
413                     }
414           }
415           free(res->buckets);
416           res->buckets = new_buckets;
417           res->number_buckets = new_size;
418 }
419 
420 struct archive_entry *
archive_entry_partial_links(struct archive_entry_linkresolver * res,unsigned int * links)421 archive_entry_partial_links(struct archive_entry_linkresolver *res,
422     unsigned int *links)
423 {
424           struct archive_entry          *e;
425           struct links_entry  *le;
426 
427           /* Free a held entry. */
428           if (res->spare != NULL) {
429                     archive_entry_free(res->spare->canonical);
430                     archive_entry_free(res->spare->entry);
431                     free(res->spare);
432                     res->spare = NULL;
433           }
434 
435           le = next_entry(res, NEXT_ENTRY_PARTIAL);
436           if (le != NULL) {
437                     e = le->canonical;
438                     if (links != NULL)
439                               *links = le->links;
440                     le->canonical = NULL;
441           } else {
442                     e = NULL;
443                     if (links != NULL)
444                               *links = 0;
445           }
446           return (e);
447 }
448