1 /*-
2 * Copyright (c) 2014 Juniper Networks, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/mman.h>
31 #include <sys/stat.h>
32 #include <assert.h>
33 #include <err.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <paths.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42
43 #include "image.h"
44 #include "mkimg.h"
45
46 #ifndef MAP_NOCORE
47 #define MAP_NOCORE 0
48 #endif
49 #ifndef MAP_NOSYNC
50 #define MAP_NOSYNC 0
51 #endif
52
53 #ifndef SEEK_DATA
54 #define SEEK_DATA -1
55 #endif
56 #ifndef SEEK_HOLE
57 #define SEEK_HOLE -1
58 #endif
59
60 struct chunk {
61 TAILQ_ENTRY(chunk) ch_list;
62 size_t ch_size; /* Size of chunk in bytes. */
63 lba_t ch_block; /* Block address in image. */
64 union {
65 struct {
66 off_t ofs; /* Offset in backing file. */
67 int fd; /* FD of backing file. */
68 } file;
69 struct {
70 void *ptr; /* Pointer to data in memory */
71 } mem;
72 } ch_u;
73 u_int ch_type;
74 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */
75 #define CH_TYPE_FILE 1 /* File-backed chunk. */
76 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */
77 };
78
79 static TAILQ_HEAD(chunk_head, chunk) image_chunks;
80 static u_int image_nchunks;
81
82 static char image_swap_file[PATH_MAX];
83 static int image_swap_fd = -1;
84 static u_int image_swap_pgsz;
85 static off_t image_swap_size;
86
87 static lba_t image_size;
88
89 static int
is_empty_sector(void * buf)90 is_empty_sector(void *buf)
91 {
92 uint64_t *p = buf;
93 size_t n, max;
94
95 assert(((uintptr_t)p & 3) == 0);
96
97 max = secsz / sizeof(uint64_t);
98 for (n = 0; n < max; n++) {
99 if (p[n] != 0UL)
100 return (0);
101 }
102 return (1);
103 }
104
105 /*
106 * Swap file handlng.
107 */
108
109 static off_t
image_swap_alloc(size_t size)110 image_swap_alloc(size_t size)
111 {
112 off_t ofs;
113 size_t unit;
114
115 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
116 assert((unit & (unit - 1)) == 0);
117
118 size = (size + unit - 1) & ~(unit - 1);
119
120 ofs = image_swap_size;
121 image_swap_size += size;
122 if (ftruncate(image_swap_fd, image_swap_size) == -1) {
123 image_swap_size = ofs;
124 ofs = -1LL;
125 }
126 return (ofs);
127 }
128
129 /*
130 * Image chunk handling.
131 */
132
133 static struct chunk *
image_chunk_find(lba_t blk)134 image_chunk_find(lba_t blk)
135 {
136 static struct chunk *last = NULL;
137 struct chunk *ch;
138
139 ch = (last != NULL && last->ch_block <= blk)
140 ? last : TAILQ_FIRST(&image_chunks);
141 while (ch != NULL) {
142 if (ch->ch_block <= blk &&
143 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
144 last = ch;
145 break;
146 }
147 ch = TAILQ_NEXT(ch, ch_list);
148 }
149 return (ch);
150 }
151
152 static size_t
image_chunk_grow(struct chunk * ch,size_t sz)153 image_chunk_grow(struct chunk *ch, size_t sz)
154 {
155 size_t dsz, newsz;
156
157 newsz = ch->ch_size + sz;
158 if (newsz > ch->ch_size) {
159 ch->ch_size = newsz;
160 return (0);
161 }
162 /* We would overflow -- create new chunk for remainder. */
163 dsz = SIZE_MAX - ch->ch_size;
164 assert(dsz < sz);
165 ch->ch_size = SIZE_MAX;
166 return (sz - dsz);
167 }
168
169 static struct chunk *
image_chunk_memory(struct chunk * ch,lba_t blk)170 image_chunk_memory(struct chunk *ch, lba_t blk)
171 {
172 struct chunk *new;
173 void *ptr;
174
175 ptr = calloc(1, secsz);
176 if (ptr == NULL)
177 return (NULL);
178
179 if (ch->ch_block < blk) {
180 new = malloc(sizeof(*new));
181 if (new == NULL) {
182 free(ptr);
183 return (NULL);
184 }
185 memcpy(new, ch, sizeof(*new));
186 ch->ch_size = (blk - ch->ch_block) * secsz;
187 new->ch_block = blk;
188 new->ch_size -= ch->ch_size;
189 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
190 image_nchunks++;
191 ch = new;
192 }
193
194 if (ch->ch_size > secsz) {
195 new = malloc(sizeof(*new));
196 if (new == NULL) {
197 free(ptr);
198 return (NULL);
199 }
200 memcpy(new, ch, sizeof(*new));
201 ch->ch_size = secsz;
202 new->ch_block++;
203 new->ch_size -= secsz;
204 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
205 image_nchunks++;
206 }
207
208 ch->ch_type = CH_TYPE_MEMORY;
209 ch->ch_u.mem.ptr = ptr;
210 return (ch);
211 }
212
213 static int
image_chunk_skipto(lba_t to)214 image_chunk_skipto(lba_t to)
215 {
216 struct chunk *ch;
217 lba_t from;
218 size_t sz;
219
220 ch = TAILQ_LAST(&image_chunks, chunk_head);
221 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
222
223 assert(from <= to);
224
225 /* Nothing to do? */
226 if (from == to)
227 return (0);
228 /* Avoid bugs due to overflows. */
229 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
230 return (EFBIG);
231 sz = (to - from) * secsz;
232 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
233 sz = image_chunk_grow(ch, sz);
234 if (sz == 0)
235 return (0);
236 from = ch->ch_block + (ch->ch_size / secsz);
237 }
238 ch = malloc(sizeof(*ch));
239 if (ch == NULL)
240 return (ENOMEM);
241 memset(ch, 0, sizeof(*ch));
242 ch->ch_block = from;
243 ch->ch_size = sz;
244 ch->ch_type = CH_TYPE_ZEROES;
245 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
246 image_nchunks++;
247 return (0);
248 }
249
250 static int
image_chunk_append(lba_t blk,size_t sz,off_t ofs,int fd)251 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
252 {
253 struct chunk *ch;
254
255 ch = TAILQ_LAST(&image_chunks, chunk_head);
256 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
257 if (fd == ch->ch_u.file.fd &&
258 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
259 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
260 sz = image_chunk_grow(ch, sz);
261 if (sz == 0)
262 return (0);
263 blk = ch->ch_block + (ch->ch_size / secsz);
264 ofs = ch->ch_u.file.ofs + ch->ch_size;
265 }
266 }
267 ch = malloc(sizeof(*ch));
268 if (ch == NULL)
269 return (ENOMEM);
270 memset(ch, 0, sizeof(*ch));
271 ch->ch_block = blk;
272 ch->ch_size = sz;
273 ch->ch_type = CH_TYPE_FILE;
274 ch->ch_u.file.ofs = ofs;
275 ch->ch_u.file.fd = fd;
276 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
277 image_nchunks++;
278 return (0);
279 }
280
281 static int
image_chunk_copyin(lba_t blk,void * buf,size_t sz,off_t ofs,int fd)282 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
283 {
284 uint8_t *p = buf;
285 int error;
286
287 error = 0;
288 sz = (sz + secsz - 1) & ~(secsz - 1);
289 while (!error && sz > 0) {
290 if (is_empty_sector(p))
291 error = image_chunk_skipto(blk + 1);
292 else
293 error = image_chunk_append(blk, secsz, ofs, fd);
294 blk++;
295 p += secsz;
296 sz -= secsz;
297 ofs += secsz;
298 }
299 return (error);
300 }
301
302 /*
303 * File mapping support.
304 */
305
306 static void *
image_file_map(int fd,off_t ofs,size_t sz)307 image_file_map(int fd, off_t ofs, size_t sz)
308 {
309 void *ptr;
310 size_t unit;
311 int flags, prot;
312
313 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
314 assert((unit & (unit - 1)) == 0);
315
316 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
317 /* Allow writing to our swap file only. */
318 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
319 sz = (sz + unit - 1) & ~(unit - 1);
320 ptr = mmap(NULL, sz, prot, flags, fd, ofs);
321 return ((ptr == MAP_FAILED) ? NULL : ptr);
322 }
323
324 static int
image_file_unmap(void * buffer,size_t sz)325 image_file_unmap(void *buffer, size_t sz)
326 {
327 size_t unit;
328
329 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
330 sz = (sz + unit - 1) & ~(unit - 1);
331 if (madvise(buffer, sz, MADV_DONTNEED) != 0)
332 warn("madvise");
333 munmap(buffer, sz);
334 return (0);
335 }
336
337 /*
338 * Input/source file handling.
339 */
340
341 static int
image_copyin_stream(lba_t blk,int fd,uint64_t * sizep)342 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
343 {
344 char *buffer;
345 uint64_t bytesize;
346 off_t swofs;
347 size_t iosz;
348 ssize_t rdsz;
349 int error;
350
351 /*
352 * This makes sure we're doing I/O in multiples of the page
353 * size as well as of the sector size. 2MB is the minimum
354 * by virtue of secsz at least 512 bytes and the page size
355 * at least 4K bytes.
356 */
357 iosz = secsz * image_swap_pgsz;
358
359 bytesize = 0;
360 do {
361 swofs = image_swap_alloc(iosz);
362 if (swofs == -1LL)
363 return (errno);
364 buffer = image_file_map(image_swap_fd, swofs, iosz);
365 if (buffer == NULL)
366 return (errno);
367 rdsz = read(fd, buffer, iosz);
368 if (rdsz > 0)
369 error = image_chunk_copyin(blk, buffer, rdsz, swofs,
370 image_swap_fd);
371 else if (rdsz < 0)
372 error = errno;
373 else
374 error = 0;
375 image_file_unmap(buffer, iosz);
376 /* XXX should we relinguish unused swap space? */
377 if (error)
378 return (error);
379
380 bytesize += rdsz;
381 blk += (rdsz + secsz - 1) / secsz;
382 } while (rdsz > 0);
383
384 if (sizep != NULL)
385 *sizep = bytesize;
386 return (0);
387 }
388
389 static int
image_copyin_mapped(lba_t blk,int fd,uint64_t * sizep)390 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
391 {
392 off_t cur, data, end, hole, pos;
393 void *buf;
394 uint64_t bytesize;
395 size_t iosz, sz;
396 int error;
397
398 /*
399 * We'd like to know the size of the file and we must
400 * be able to seek in order to mmap(2). If this isn't
401 * possible, then treat the file as a stream/pipe.
402 */
403 end = lseek(fd, 0L, SEEK_END);
404 if (end == -1L)
405 return (image_copyin_stream(blk, fd, sizep));
406
407 /*
408 * We need the file opened for the duration and our
409 * caller is going to close the file. Make a dup(2)
410 * so that control the faith of the descriptor.
411 */
412 fd = dup(fd);
413 if (fd == -1)
414 return (errno);
415
416 iosz = secsz * image_swap_pgsz;
417
418 bytesize = 0;
419 cur = pos = 0;
420 error = 0;
421 while (!error && cur < end) {
422 hole = lseek(fd, cur, SEEK_HOLE);
423 if (hole == -1)
424 hole = end;
425 data = lseek(fd, cur, SEEK_DATA);
426 if (data == -1)
427 data = end;
428
429 /*
430 * Treat the entire file as data if sparse files
431 * are not supported by the underlying file system.
432 */
433 if (hole == end && data == end)
434 data = cur;
435
436 if (cur == hole && data > hole) {
437 hole = pos;
438 pos = data & ~((uint64_t)secsz - 1);
439
440 blk += (pos - hole) / secsz;
441 error = image_chunk_skipto(blk);
442
443 bytesize += pos - hole;
444 cur = data;
445 } else if (cur == data && hole > data) {
446 data = pos;
447 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
448
449 while (data < pos) {
450 sz = (pos - data > (off_t)iosz)
451 ? iosz : (size_t)(pos - data);
452
453 buf = image_file_map(fd, data, sz);
454 if (buf != NULL) {
455 error = image_chunk_copyin(blk, buf,
456 sz, data, fd);
457 image_file_unmap(buf, sz);
458 } else
459 error = errno;
460
461 blk += sz / secsz;
462 bytesize += sz;
463 data += sz;
464 }
465 cur = hole;
466 } else {
467 /*
468 * I don't know what this means or whether it
469 * can happen at all...
470 */
471 assert(0);
472 }
473 }
474 if (error)
475 close(fd);
476 if (!error && sizep != NULL)
477 *sizep = bytesize;
478 return (error);
479 }
480
481 int
image_copyin(lba_t blk,int fd,uint64_t * sizep)482 image_copyin(lba_t blk, int fd, uint64_t *sizep)
483 {
484 struct stat sb;
485 int error;
486
487 error = image_chunk_skipto(blk);
488 if (!error) {
489 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
490 error = image_copyin_stream(blk, fd, sizep);
491 else
492 error = image_copyin_mapped(blk, fd, sizep);
493 }
494 return (error);
495 }
496
497 /*
498 * Output/sink file handling.
499 */
500
501 int
image_copyout(int fd)502 image_copyout(int fd)
503 {
504 int error;
505
506 error = image_copyout_region(fd, 0, image_size);
507 if (!error)
508 error = image_copyout_done(fd);
509 return (error);
510 }
511
512 int
image_copyout_done(int fd)513 image_copyout_done(int fd)
514 {
515 off_t ofs;
516 int error;
517
518 ofs = lseek(fd, 0L, SEEK_CUR);
519 if (ofs == -1)
520 return (0);
521 error = (ftruncate(fd, ofs) == -1) ? errno : 0;
522 return (error);
523 }
524
525 static int
image_copyout_memory(int fd,size_t size,void * ptr)526 image_copyout_memory(int fd, size_t size, void *ptr)
527 {
528
529 if (write(fd, ptr, size) == -1)
530 return (errno);
531 return (0);
532 }
533
534 int
image_copyout_zeroes(int fd,size_t count)535 image_copyout_zeroes(int fd, size_t count)
536 {
537 static uint8_t *zeroes = NULL;
538 size_t sz;
539 int error;
540
541 if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
542 return (0);
543
544 /*
545 * If we can't seek, we must write.
546 */
547
548 if (zeroes == NULL) {
549 zeroes = calloc(1, secsz);
550 if (zeroes == NULL)
551 return (ENOMEM);
552 }
553
554 while (count > 0) {
555 sz = (count > secsz) ? secsz : count;
556 error = image_copyout_memory(fd, sz, zeroes);
557 if (error)
558 return (error);
559 count -= sz;
560 }
561 return (0);
562 }
563
564 static int
image_copyout_file(int fd,size_t size,int ifd,off_t iofs)565 image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
566 {
567 void *buf;
568 size_t iosz, sz;
569 int error;
570
571 iosz = secsz * image_swap_pgsz;
572
573 while (size > 0) {
574 sz = (size > iosz) ? iosz : size;
575 buf = image_file_map(ifd, iofs, sz);
576 if (buf == NULL)
577 return (errno);
578 error = image_copyout_memory(fd, sz, buf);
579 image_file_unmap(buf, sz);
580 if (error)
581 return (error);
582 size -= sz;
583 iofs += sz;
584 }
585 return (0);
586 }
587
588 int
image_copyout_region(int fd,lba_t blk,lba_t size)589 image_copyout_region(int fd, lba_t blk, lba_t size)
590 {
591 struct chunk *ch;
592 size_t ofs, sz;
593 int error;
594
595 size *= secsz;
596
597 error = 0;
598 while (!error && size > 0) {
599 ch = image_chunk_find(blk);
600 if (ch == NULL) {
601 error = EINVAL;
602 break;
603 }
604 ofs = (blk - ch->ch_block) * secsz;
605 sz = ch->ch_size - ofs;
606 sz = ((lba_t)sz < size) ? sz : (size_t)size;
607 switch (ch->ch_type) {
608 case CH_TYPE_ZEROES:
609 error = image_copyout_zeroes(fd, sz);
610 break;
611 case CH_TYPE_FILE:
612 error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
613 ch->ch_u.file.ofs + ofs);
614 break;
615 case CH_TYPE_MEMORY:
616 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
617 break;
618 default:
619 assert(0);
620 }
621 size -= sz;
622 blk += sz / secsz;
623 }
624 return (error);
625 }
626
627 int
image_data(lba_t blk,lba_t size)628 image_data(lba_t blk, lba_t size)
629 {
630 struct chunk *ch;
631 lba_t lim;
632
633 while (1) {
634 ch = image_chunk_find(blk);
635 if (ch == NULL)
636 return (0);
637 if (ch->ch_type != CH_TYPE_ZEROES)
638 return (1);
639 lim = ch->ch_block + (ch->ch_size / secsz);
640 if (lim >= blk + size)
641 return (0);
642 size -= lim - blk;
643 blk = lim;
644 }
645 /*NOTREACHED*/
646 }
647
648 lba_t
image_get_size(void)649 image_get_size(void)
650 {
651
652 return (image_size);
653 }
654
655 int
image_set_size(lba_t blk)656 image_set_size(lba_t blk)
657 {
658 int error;
659
660 error = image_chunk_skipto(blk);
661 if (!error)
662 image_size = blk;
663 return (error);
664 }
665
666 int
image_write(lba_t blk,void * buf,ssize_t len)667 image_write(lba_t blk, void *buf, ssize_t len)
668 {
669 struct chunk *ch;
670
671 while (len > 0) {
672 if (!is_empty_sector(buf)) {
673 ch = image_chunk_find(blk);
674 if (ch == NULL)
675 return (ENXIO);
676 /* We may not be able to write to files. */
677 if (ch->ch_type == CH_TYPE_FILE)
678 return (EINVAL);
679 if (ch->ch_type == CH_TYPE_ZEROES) {
680 ch = image_chunk_memory(ch, blk);
681 if (ch == NULL)
682 return (ENOMEM);
683 }
684 assert(ch->ch_type == CH_TYPE_MEMORY);
685 memcpy(ch->ch_u.mem.ptr, buf, secsz);
686 }
687 blk++;
688 buf = (char *)buf + secsz;
689 len--;
690 }
691 return (0);
692 }
693
694 static void
image_cleanup(void)695 image_cleanup(void)
696 {
697 struct chunk *ch;
698
699 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) {
700 switch (ch->ch_type) {
701 case CH_TYPE_FILE:
702 /* We may be closing the same file multiple times. */
703 if (ch->ch_u.file.fd != -1)
704 close(ch->ch_u.file.fd);
705 break;
706 case CH_TYPE_MEMORY:
707 free(ch->ch_u.mem.ptr);
708 break;
709 default:
710 break;
711 }
712 TAILQ_REMOVE(&image_chunks, ch, ch_list);
713 free(ch);
714 }
715 if (image_swap_fd != -1)
716 close(image_swap_fd);
717 unlink(image_swap_file);
718 }
719
720 int
image_init(void)721 image_init(void)
722 {
723 const char *tmpdir;
724
725 TAILQ_INIT(&image_chunks);
726 image_nchunks = 0;
727
728 image_swap_size = 0;
729 image_swap_pgsz = getpagesize();
730
731 if (atexit(image_cleanup) == -1)
732 return (errno);
733 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
734 tmpdir = _PATH_TMP;
735 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
736 tmpdir);
737 image_swap_fd = mkstemp(image_swap_file);
738 if (image_swap_fd == -1)
739 return (errno);
740 return (0);
741 }
742