1 /*-
2  * Copyright (c) 2000 David O'Brien
3  * Copyright (c) 1995-1996 S�ren Schmidt
4  * Copyright (c) 1996 Peter Wemm
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: stable/9/sys/kern/imgact_elf.c 280966 2015-04-01 19:48:19Z jhb $");
33 
34 #include "opt_capsicum.h"
35 #include "opt_compat.h"
36 #include "opt_core.h"
37 
38 #include <sys/param.h>
39 #include <sys/capability.h>
40 #include <sys/exec.h>
41 #include <sys/fcntl.h>
42 #include <sys/filedesc.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mount.h>
49 #include <sys/mutex.h>
50 #include <sys/mman.h>
51 #include <sys/namei.h>
52 #include <sys/pioctl.h>
53 #include <sys/proc.h>
54 #include <sys/procfs.h>
55 #include <sys/racct.h>
56 #include <sys/resourcevar.h>
57 #include <sys/sbuf.h>
58 #include <sys/sf_buf.h>
59 #include <sys/smp.h>
60 #include <sys/systm.h>
61 #include <sys/signalvar.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/syscall.h>
65 #include <sys/sysctl.h>
66 #include <sys/sysent.h>
67 #include <sys/vnode.h>
68 #include <sys/syslog.h>
69 #include <sys/eventhandler.h>
70 #include <sys/user.h>
71 
72 #include <net/zlib.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_param.h>
77 #include <vm/pmap.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_extern.h>
81 
82 #include <machine/elf.h>
83 #include <machine/md_var.h>
84 
85 #define ELF_NOTE_ROUNDSIZE	4
86 #define OLD_EI_BRAND	8
87 
88 static int __elfN(check_header)(const Elf_Ehdr *hdr);
89 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
90     const char *interp, int interp_name_len, int32_t *osrel);
91 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
92     u_long *entry, size_t pagesize);
93 static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
94     vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
95     vm_prot_t prot, size_t pagesize);
96 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
97 static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note,
98     int32_t *osrel);
99 static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
100 static boolean_t __elfN(check_note)(struct image_params *imgp,
101     Elf_Brandnote *checknote, int32_t *osrel);
102 static vm_prot_t __elfN(trans_prot)(Elf_Word);
103 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
104 
105 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
106     "");
107 
108 #ifdef COMPRESS_USER_CORES
109 static int compress_core(gzFile, char *, char *, unsigned int,
110     struct thread * td);
111 #endif
112 #define CORE_BUF_SIZE	(16 * 1024)
113 
114 int __elfN(fallback_brand) = -1;
115 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
116     fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
117     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
118 TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
119     &__elfN(fallback_brand));
120 
121 static int elf_legacy_coredump = 0;
122 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
123     &elf_legacy_coredump, 0, "");
124 
125 int __elfN(nxstack) =
126 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */
127 	1;
128 #else
129 	0;
130 #endif
131 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
132     nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
133     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
134 
135 #if __ELF_WORD_SIZE == 32
136 #if defined(__amd64__) || defined(__ia64__)
137 int i386_read_exec = 0;
138 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
139     "enable execution from readable segments");
140 #endif
141 #endif
142 
143 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
144 
145 #define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
146 #define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
147 #define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
148 
149 static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
150 
151 Elf_Brandnote __elfN(freebsd_brandnote) = {
152 	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
153 	.hdr.n_descsz	= sizeof(int32_t),
154 	.hdr.n_type	= 1,
155 	.vendor		= FREEBSD_ABI_VENDOR,
156 	.flags		= BN_TRANSLATE_OSREL,
157 	.trans_osrel	= __elfN(freebsd_trans_osrel)
158 };
159 
160 static boolean_t
__elfN(freebsd_trans_osrel)161 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
162 {
163 	uintptr_t p;
164 
165 	p = (uintptr_t)(note + 1);
166 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
167 	*osrel = *(const int32_t *)(p);
168 
169 	return (TRUE);
170 }
171 
172 static const char GNU_ABI_VENDOR[] = "GNU";
173 static int GNU_KFREEBSD_ABI_DESC = 3;
174 
175 Elf_Brandnote __elfN(kfreebsd_brandnote) = {
176 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
177 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
178 	.hdr.n_type	= 1,
179 	.vendor		= GNU_ABI_VENDOR,
180 	.flags		= BN_TRANSLATE_OSREL,
181 	.trans_osrel	= kfreebsd_trans_osrel
182 };
183 
184 static boolean_t
kfreebsd_trans_osrel(const Elf_Note * note,int32_t * osrel)185 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
186 {
187 	const Elf32_Word *desc;
188 	uintptr_t p;
189 
190 	p = (uintptr_t)(note + 1);
191 	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
192 
193 	desc = (const Elf32_Word *)p;
194 	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
195 		return (FALSE);
196 
197 	/*
198 	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
199 	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
200 	 */
201 	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
202 
203 	return (TRUE);
204 }
205 
206 int
__elfN(insert_brand_entry)207 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
208 {
209 	int i;
210 
211 	for (i = 0; i < MAX_BRANDS; i++) {
212 		if (elf_brand_list[i] == NULL) {
213 			elf_brand_list[i] = entry;
214 			break;
215 		}
216 	}
217 	if (i == MAX_BRANDS) {
218 		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
219 			__func__, entry);
220 		return (-1);
221 	}
222 	return (0);
223 }
224 
225 int
__elfN(remove_brand_entry)226 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
227 {
228 	int i;
229 
230 	for (i = 0; i < MAX_BRANDS; i++) {
231 		if (elf_brand_list[i] == entry) {
232 			elf_brand_list[i] = NULL;
233 			break;
234 		}
235 	}
236 	if (i == MAX_BRANDS)
237 		return (-1);
238 	return (0);
239 }
240 
241 int
__elfN(brand_inuse)242 __elfN(brand_inuse)(Elf_Brandinfo *entry)
243 {
244 	struct proc *p;
245 	int rval = FALSE;
246 
247 	sx_slock(&allproc_lock);
248 	FOREACH_PROC_IN_SYSTEM(p) {
249 		if (p->p_sysent == entry->sysvec) {
250 			rval = TRUE;
251 			break;
252 		}
253 	}
254 	sx_sunlock(&allproc_lock);
255 
256 	return (rval);
257 }
258 
259 static Elf_Brandinfo *
__elfN(get_brandinfo)260 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
261     int interp_name_len, int32_t *osrel)
262 {
263 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
264 	Elf_Brandinfo *bi;
265 	boolean_t ret;
266 	int i;
267 
268 	/*
269 	 * We support four types of branding -- (1) the ELF EI_OSABI field
270 	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
271 	 * branding w/in the ELF header, (3) path of the `interp_path'
272 	 * field, and (4) the ".note.ABI-tag" ELF section.
273 	 */
274 
275 	/* Look for an ".note.ABI-tag" ELF section */
276 	for (i = 0; i < MAX_BRANDS; i++) {
277 		bi = elf_brand_list[i];
278 		if (bi == NULL)
279 			continue;
280 		if (hdr->e_machine == bi->machine && (bi->flags &
281 		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
282 			ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
283 			if (ret)
284 				return (bi);
285 		}
286 	}
287 
288 	/* If the executable has a brand, search for it in the brand list. */
289 	for (i = 0; i < MAX_BRANDS; i++) {
290 		bi = elf_brand_list[i];
291 		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
292 			continue;
293 		if (hdr->e_machine == bi->machine &&
294 		    (hdr->e_ident[EI_OSABI] == bi->brand ||
295 		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
296 		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
297 			return (bi);
298 	}
299 
300 	/* Lacking a known brand, search for a recognized interpreter. */
301 	if (interp != NULL) {
302 		for (i = 0; i < MAX_BRANDS; i++) {
303 			bi = elf_brand_list[i];
304 			if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
305 				continue;
306 			if (hdr->e_machine == bi->machine &&
307 			    /* ELF image p_filesz includes terminating zero */
308 			    strlen(bi->interp_path) + 1 == interp_name_len &&
309 			    strncmp(interp, bi->interp_path, interp_name_len)
310 			    == 0)
311 				return (bi);
312 		}
313 	}
314 
315 	/* Lacking a recognized interpreter, try the default brand */
316 	for (i = 0; i < MAX_BRANDS; i++) {
317 		bi = elf_brand_list[i];
318 		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
319 			continue;
320 		if (hdr->e_machine == bi->machine &&
321 		    __elfN(fallback_brand) == bi->brand)
322 			return (bi);
323 	}
324 	return (NULL);
325 }
326 
327 static int
__elfN(check_header)328 __elfN(check_header)(const Elf_Ehdr *hdr)
329 {
330 	Elf_Brandinfo *bi;
331 	int i;
332 
333 	if (!IS_ELF(*hdr) ||
334 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
335 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
336 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
337 	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
338 	    hdr->e_version != ELF_TARG_VER)
339 		return (ENOEXEC);
340 
341 	/*
342 	 * Make sure we have at least one brand for this machine.
343 	 */
344 
345 	for (i = 0; i < MAX_BRANDS; i++) {
346 		bi = elf_brand_list[i];
347 		if (bi != NULL && bi->machine == hdr->e_machine)
348 			break;
349 	}
350 	if (i == MAX_BRANDS)
351 		return (ENOEXEC);
352 
353 	return (0);
354 }
355 
356 static int
__elfN(map_partial)357 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
358     vm_offset_t start, vm_offset_t end, vm_prot_t prot)
359 {
360 	struct sf_buf *sf;
361 	int error;
362 	vm_offset_t off;
363 
364 	/*
365 	 * Create the page if it doesn't exist yet. Ignore errors.
366 	 */
367 	vm_map_lock(map);
368 	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
369 	    VM_PROT_ALL, VM_PROT_ALL, 0);
370 	vm_map_unlock(map);
371 
372 	/*
373 	 * Find the page from the underlying object.
374 	 */
375 	if (object) {
376 		sf = vm_imgact_map_page(object, offset);
377 		if (sf == NULL)
378 			return (KERN_FAILURE);
379 		off = offset - trunc_page(offset);
380 		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
381 		    end - start);
382 		vm_imgact_unmap_page(sf);
383 		if (error) {
384 			return (KERN_FAILURE);
385 		}
386 	}
387 
388 	return (KERN_SUCCESS);
389 }
390 
391 static int
__elfN(map_insert)392 __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
393     vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
394 {
395 	struct sf_buf *sf;
396 	vm_offset_t off;
397 	vm_size_t sz;
398 	int error, rv;
399 
400 	if (start != trunc_page(start)) {
401 		rv = __elfN(map_partial)(map, object, offset, start,
402 		    round_page(start), prot);
403 		if (rv)
404 			return (rv);
405 		offset += round_page(start) - start;
406 		start = round_page(start);
407 	}
408 	if (end != round_page(end)) {
409 		rv = __elfN(map_partial)(map, object, offset +
410 		    trunc_page(end) - start, trunc_page(end), end, prot);
411 		if (rv)
412 			return (rv);
413 		end = trunc_page(end);
414 	}
415 	if (end > start) {
416 		if (offset & PAGE_MASK) {
417 			/*
418 			 * The mapping is not page aligned. This means we have
419 			 * to copy the data. Sigh.
420 			 */
421 			rv = vm_map_find(map, NULL, 0, &start, end - start,
422 			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
423 			if (rv)
424 				return (rv);
425 			if (object == NULL)
426 				return (KERN_SUCCESS);
427 			for (; start < end; start += sz) {
428 				sf = vm_imgact_map_page(object, offset);
429 				if (sf == NULL)
430 					return (KERN_FAILURE);
431 				off = offset - trunc_page(offset);
432 				sz = end - start;
433 				if (sz > PAGE_SIZE - off)
434 					sz = PAGE_SIZE - off;
435 				error = copyout((caddr_t)sf_buf_kva(sf) + off,
436 				    (caddr_t)start, sz);
437 				vm_imgact_unmap_page(sf);
438 				if (error) {
439 					return (KERN_FAILURE);
440 				}
441 				offset += sz;
442 			}
443 			rv = KERN_SUCCESS;
444 		} else {
445 			vm_object_reference(object);
446 			vm_map_lock(map);
447 			rv = vm_map_insert(map, object, offset, start, end,
448 			    prot, VM_PROT_ALL, cow);
449 			vm_map_unlock(map);
450 			if (rv != KERN_SUCCESS)
451 				vm_object_deallocate(object);
452 		}
453 		return (rv);
454 	} else {
455 		return (KERN_SUCCESS);
456 	}
457 }
458 
459 static int
__elfN(load_section)460 __elfN(load_section)(struct vmspace *vmspace,
461 	vm_object_t object, vm_offset_t offset,
462 	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
463 	size_t pagesize)
464 {
465 	struct sf_buf *sf;
466 	size_t map_len;
467 	vm_offset_t map_addr;
468 	int error, rv, cow;
469 	size_t copy_len;
470 	vm_offset_t file_addr;
471 
472 	/*
473 	 * It's necessary to fail if the filsz + offset taken from the
474 	 * header is greater than the actual file pager object's size.
475 	 * If we were to allow this, then the vm_map_find() below would
476 	 * walk right off the end of the file object and into the ether.
477 	 *
478 	 * While I'm here, might as well check for something else that
479 	 * is invalid: filsz cannot be greater than memsz.
480 	 */
481 	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
482 	    filsz > memsz) {
483 		uprintf("elf_load_section: truncated ELF file\n");
484 		return (ENOEXEC);
485 	}
486 
487 	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
488 	file_addr = trunc_page_ps(offset, pagesize);
489 
490 	/*
491 	 * We have two choices.  We can either clear the data in the last page
492 	 * of an oversized mapping, or we can start the anon mapping a page
493 	 * early and copy the initialized data into that first page.  We
494 	 * choose the second..
495 	 */
496 	if (memsz > filsz)
497 		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
498 	else
499 		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
500 
501 	if (map_len != 0) {
502 		/* cow flags: don't dump readonly sections in core */
503 		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
504 		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
505 
506 		rv = __elfN(map_insert)(&vmspace->vm_map,
507 				      object,
508 				      file_addr,	/* file offset */
509 				      map_addr,		/* virtual start */
510 				      map_addr + map_len,/* virtual end */
511 				      prot,
512 				      cow);
513 		if (rv != KERN_SUCCESS)
514 			return (EINVAL);
515 
516 		/* we can stop now if we've covered it all */
517 		if (memsz == filsz) {
518 			return (0);
519 		}
520 	}
521 
522 
523 	/*
524 	 * We have to get the remaining bit of the file into the first part
525 	 * of the oversized map segment.  This is normally because the .data
526 	 * segment in the file is extended to provide bss.  It's a neat idea
527 	 * to try and save a page, but it's a pain in the behind to implement.
528 	 */
529 	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
530 	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
531 	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
532 	    map_addr;
533 
534 	/* This had damn well better be true! */
535 	if (map_len != 0) {
536 		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
537 		    map_addr + map_len, VM_PROT_ALL, 0);
538 		if (rv != KERN_SUCCESS) {
539 			return (EINVAL);
540 		}
541 	}
542 
543 	if (copy_len != 0) {
544 		vm_offset_t off;
545 
546 		sf = vm_imgact_map_page(object, offset + filsz);
547 		if (sf == NULL)
548 			return (EIO);
549 
550 		/* send the page fragment to user space */
551 		off = trunc_page_ps(offset + filsz, pagesize) -
552 		    trunc_page(offset + filsz);
553 		error = copyout((caddr_t)sf_buf_kva(sf) + off,
554 		    (caddr_t)map_addr, copy_len);
555 		vm_imgact_unmap_page(sf);
556 		if (error) {
557 			return (error);
558 		}
559 	}
560 
561 	/*
562 	 * set it to the specified protection.
563 	 * XXX had better undo the damage from pasting over the cracks here!
564 	 */
565 	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
566 	    round_page(map_addr + map_len),  prot, FALSE);
567 
568 	return (0);
569 }
570 
571 /*
572  * Load the file "file" into memory.  It may be either a shared object
573  * or an executable.
574  *
575  * The "addr" reference parameter is in/out.  On entry, it specifies
576  * the address where a shared object should be loaded.  If the file is
577  * an executable, this value is ignored.  On exit, "addr" specifies
578  * where the file was actually loaded.
579  *
580  * The "entry" reference parameter is out only.  On exit, it specifies
581  * the entry point for the loaded file.
582  */
583 static int
__elfN(load_file)584 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
585 	u_long *entry, size_t pagesize)
586 {
587 	struct {
588 		struct nameidata nd;
589 		struct vattr attr;
590 		struct image_params image_params;
591 	} *tempdata;
592 	const Elf_Ehdr *hdr = NULL;
593 	const Elf_Phdr *phdr = NULL;
594 	struct nameidata *nd;
595 	struct vmspace *vmspace = p->p_vmspace;
596 	struct vattr *attr;
597 	struct image_params *imgp;
598 	vm_prot_t prot;
599 	u_long rbase;
600 	u_long base_addr = 0;
601 	int vfslocked, error, i, numsegs;
602 
603 #ifdef CAPABILITY_MODE
604 	/*
605 	 * XXXJA: This check can go away once we are sufficiently confident
606 	 * that the checks in namei() are correct.
607 	 */
608 	if (IN_CAPABILITY_MODE(curthread))
609 		return (ECAPMODE);
610 #endif
611 
612 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
613 	nd = &tempdata->nd;
614 	attr = &tempdata->attr;
615 	imgp = &tempdata->image_params;
616 
617 	/*
618 	 * Initialize part of the common data
619 	 */
620 	imgp->proc = p;
621 	imgp->attr = attr;
622 	imgp->firstpage = NULL;
623 	imgp->image_header = NULL;
624 	imgp->object = NULL;
625 	imgp->execlabel = NULL;
626 
627 	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
628 	    curthread);
629 	vfslocked = 0;
630 	if ((error = namei(nd)) != 0) {
631 		nd->ni_vp = NULL;
632 		goto fail;
633 	}
634 	vfslocked = NDHASGIANT(nd);
635 	NDFREE(nd, NDF_ONLY_PNBUF);
636 	imgp->vp = nd->ni_vp;
637 
638 	/*
639 	 * Check permissions, modes, uid, etc on the file, and "open" it.
640 	 */
641 	error = exec_check_permissions(imgp);
642 	if (error)
643 		goto fail;
644 
645 	error = exec_map_first_page(imgp);
646 	if (error)
647 		goto fail;
648 
649 	/*
650 	 * Also make certain that the interpreter stays the same, so set
651 	 * its VV_TEXT flag, too.
652 	 */
653 	VOP_SET_TEXT(nd->ni_vp);
654 
655 	imgp->object = nd->ni_vp->v_object;
656 
657 	hdr = (const Elf_Ehdr *)imgp->image_header;
658 	if ((error = __elfN(check_header)(hdr)) != 0)
659 		goto fail;
660 	if (hdr->e_type == ET_DYN)
661 		rbase = *addr;
662 	else if (hdr->e_type == ET_EXEC)
663 		rbase = 0;
664 	else {
665 		error = ENOEXEC;
666 		goto fail;
667 	}
668 
669 	/* Only support headers that fit within first page for now      */
670 	if ((hdr->e_phoff > PAGE_SIZE) ||
671 	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
672 		error = ENOEXEC;
673 		goto fail;
674 	}
675 
676 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
677 	if (!aligned(phdr, Elf_Addr)) {
678 		error = ENOEXEC;
679 		goto fail;
680 	}
681 
682 	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
683 		if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
684 			/* Loadable segment */
685 			prot = __elfN(trans_prot)(phdr[i].p_flags);
686 			if ((error = __elfN(load_section)(vmspace,
687 			    imgp->object, phdr[i].p_offset,
688 			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
689 			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
690 			    pagesize)) != 0)
691 				goto fail;
692 			/*
693 			 * Establish the base address if this is the
694 			 * first segment.
695 			 */
696 			if (numsegs == 0)
697   				base_addr = trunc_page(phdr[i].p_vaddr +
698 				    rbase);
699 			numsegs++;
700 		}
701 	}
702 	*addr = base_addr;
703 	*entry = (unsigned long)hdr->e_entry + rbase;
704 
705 fail:
706 	if (imgp->firstpage)
707 		exec_unmap_first_page(imgp);
708 
709 	if (nd->ni_vp)
710 		vput(nd->ni_vp);
711 
712 	VFS_UNLOCK_GIANT(vfslocked);
713 	free(tempdata, M_TEMP);
714 
715 	return (error);
716 }
717 
718 static int
__CONCAT(exec_,__elfN (imgact))719 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
720 {
721 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
722 	const Elf_Phdr *phdr;
723 	Elf_Auxargs *elf_auxargs;
724 	struct vmspace *vmspace;
725 	vm_prot_t prot;
726 	u_long text_size = 0, data_size = 0, total_size = 0;
727 	u_long text_addr = 0, data_addr = 0;
728 	u_long seg_size, seg_addr;
729 	u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
730 	int32_t osrel = 0;
731 	int error = 0, i, n, interp_name_len = 0;
732 	const char *interp = NULL, *newinterp = NULL;
733 	Elf_Brandinfo *brand_info;
734 	char *path;
735 	struct sysentvec *sv;
736 
737 	/*
738 	 * Do we have a valid ELF header ?
739 	 *
740 	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
741 	 * if particular brand doesn't support it.
742 	 */
743 	if (__elfN(check_header)(hdr) != 0 ||
744 	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
745 		return (-1);
746 
747 	/*
748 	 * From here on down, we return an errno, not -1, as we've
749 	 * detected an ELF file.
750 	 */
751 
752 	if ((hdr->e_phoff > PAGE_SIZE) ||
753 	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
754 		/* Only support headers in first page for now */
755 		return (ENOEXEC);
756 	}
757 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
758 	if (!aligned(phdr, Elf_Addr))
759 		return (ENOEXEC);
760 	n = 0;
761 	baddr = 0;
762 	for (i = 0; i < hdr->e_phnum; i++) {
763 		switch (phdr[i].p_type) {
764 		case PT_LOAD:
765 			if (n == 0)
766 				baddr = phdr[i].p_vaddr;
767 			n++;
768 			break;
769 		case PT_INTERP:
770 			/* Path to interpreter */
771 			if (phdr[i].p_filesz > MAXPATHLEN ||
772 			    phdr[i].p_offset > PAGE_SIZE ||
773 			    phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset)
774 				return (ENOEXEC);
775 			interp = imgp->image_header + phdr[i].p_offset;
776 			interp_name_len = phdr[i].p_filesz;
777 			break;
778 		case PT_GNU_STACK:
779 			if (__elfN(nxstack))
780 				imgp->stack_prot =
781 				    __elfN(trans_prot)(phdr[i].p_flags);
782 			break;
783 		}
784 	}
785 
786 	brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
787 	    &osrel);
788 	if (brand_info == NULL) {
789 		uprintf("ELF binary type \"%u\" not known.\n",
790 		    hdr->e_ident[EI_OSABI]);
791 		return (ENOEXEC);
792 	}
793 	if (hdr->e_type == ET_DYN) {
794 		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
795 			return (ENOEXEC);
796 		/*
797 		 * Honour the base load address from the dso if it is
798 		 * non-zero for some reason.
799 		 */
800 		if (baddr == 0)
801 			et_dyn_addr = ET_DYN_LOAD_ADDR;
802 		else
803 			et_dyn_addr = 0;
804 	} else
805 		et_dyn_addr = 0;
806 	sv = brand_info->sysvec;
807 	if (interp != NULL && brand_info->interp_newpath != NULL)
808 		newinterp = brand_info->interp_newpath;
809 
810 	/*
811 	 * Avoid a possible deadlock if the current address space is destroyed
812 	 * and that address space maps the locked vnode.  In the common case,
813 	 * the locked vnode's v_usecount is decremented but remains greater
814 	 * than zero.  Consequently, the vnode lock is not needed by vrele().
815 	 * However, in cases where the vnode lock is external, such as nullfs,
816 	 * v_usecount may become zero.
817 	 */
818 	VOP_UNLOCK(imgp->vp, 0);
819 
820 	error = exec_new_vmspace(imgp, sv);
821 	imgp->proc->p_sysent = sv;
822 
823 	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
824 	if (error)
825 		return (error);
826 
827 	vmspace = imgp->proc->p_vmspace;
828 
829 	for (i = 0; i < hdr->e_phnum; i++) {
830 		switch (phdr[i].p_type) {
831 		case PT_LOAD:	/* Loadable segment */
832 			if (phdr[i].p_memsz == 0)
833 				break;
834 			prot = __elfN(trans_prot)(phdr[i].p_flags);
835 			if ((error = __elfN(load_section)(vmspace,
836 			    imgp->object, phdr[i].p_offset,
837 			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
838 			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
839 			    sv->sv_pagesize)) != 0)
840 				return (error);
841 
842 			/*
843 			 * If this segment contains the program headers,
844 			 * remember their virtual address for the AT_PHDR
845 			 * aux entry. Static binaries don't usually include
846 			 * a PT_PHDR entry.
847 			 */
848 			if (phdr[i].p_offset == 0 &&
849 			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
850 				<= phdr[i].p_filesz)
851 				proghdr = phdr[i].p_vaddr + hdr->e_phoff +
852 				    et_dyn_addr;
853 
854 			seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
855 			seg_size = round_page(phdr[i].p_memsz +
856 			    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
857 
858 			/*
859 			 * Make the largest executable segment the official
860 			 * text segment and all others data.
861 			 *
862 			 * Note that obreak() assumes that data_addr +
863 			 * data_size == end of data load area, and the ELF
864 			 * file format expects segments to be sorted by
865 			 * address.  If multiple data segments exist, the
866 			 * last one will be used.
867 			 */
868 
869 			if (phdr[i].p_flags & PF_X && text_size < seg_size) {
870 				text_size = seg_size;
871 				text_addr = seg_addr;
872 			} else {
873 				data_size = seg_size;
874 				data_addr = seg_addr;
875 			}
876 			total_size += seg_size;
877 			break;
878 		case PT_PHDR: 	/* Program header table info */
879 			proghdr = phdr[i].p_vaddr + et_dyn_addr;
880 			break;
881 		default:
882 			break;
883 		}
884 	}
885 
886 	if (data_addr == 0 && data_size == 0) {
887 		data_addr = text_addr;
888 		data_size = text_size;
889 	}
890 
891 	entry = (u_long)hdr->e_entry + et_dyn_addr;
892 
893 	/*
894 	 * Check limits.  It should be safe to check the
895 	 * limits after loading the segments since we do
896 	 * not actually fault in all the segments pages.
897 	 */
898 	PROC_LOCK(imgp->proc);
899 	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
900 	    text_size > maxtsiz ||
901 	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM) ||
902 	    racct_set(imgp->proc, RACCT_DATA, data_size) != 0 ||
903 	    racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) {
904 		PROC_UNLOCK(imgp->proc);
905 		return (ENOMEM);
906 	}
907 
908 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
909 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
910 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
911 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
912 
913 	/*
914 	 * We load the dynamic linker where a userland call
915 	 * to mmap(0, ...) would put it.  The rationale behind this
916 	 * calculation is that it leaves room for the heap to grow to
917 	 * its maximum allowed size.
918 	 */
919 	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
920 	    lim_max(imgp->proc, RLIMIT_DATA));
921 	PROC_UNLOCK(imgp->proc);
922 
923 	imgp->entry_addr = entry;
924 
925 	if (interp != NULL) {
926 		int have_interp = FALSE;
927 		VOP_UNLOCK(imgp->vp, 0);
928 		if (brand_info->emul_path != NULL &&
929 		    brand_info->emul_path[0] != '\0') {
930 			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
931 			snprintf(path, MAXPATHLEN, "%s%s",
932 			    brand_info->emul_path, interp);
933 			error = __elfN(load_file)(imgp->proc, path, &addr,
934 			    &imgp->entry_addr, sv->sv_pagesize);
935 			free(path, M_TEMP);
936 			if (error == 0)
937 				have_interp = TRUE;
938 		}
939 		if (!have_interp && newinterp != NULL) {
940 			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
941 			    &imgp->entry_addr, sv->sv_pagesize);
942 			if (error == 0)
943 				have_interp = TRUE;
944 		}
945 		if (!have_interp) {
946 			error = __elfN(load_file)(imgp->proc, interp, &addr,
947 			    &imgp->entry_addr, sv->sv_pagesize);
948 		}
949 		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
950 		if (error != 0) {
951 			uprintf("ELF interpreter %s not found\n", interp);
952 			return (error);
953 		}
954 	} else
955 		addr = et_dyn_addr;
956 
957 	/*
958 	 * Construct auxargs table (used by the fixup routine)
959 	 */
960 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
961 	elf_auxargs->execfd = -1;
962 	elf_auxargs->phdr = proghdr;
963 	elf_auxargs->phent = hdr->e_phentsize;
964 	elf_auxargs->phnum = hdr->e_phnum;
965 	elf_auxargs->pagesz = PAGE_SIZE;
966 	elf_auxargs->base = addr;
967 	elf_auxargs->flags = 0;
968 	elf_auxargs->entry = entry;
969 
970 	imgp->auxargs = elf_auxargs;
971 	imgp->interpreted = 0;
972 	imgp->reloc_base = addr;
973 	imgp->proc->p_osrel = osrel;
974 
975 	return (error);
976 }
977 
978 #define	suword __CONCAT(suword, __ELF_WORD_SIZE)
979 
980 int
__elfN(freebsd_fixup)981 __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
982 {
983 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
984 	Elf_Addr *base;
985 	Elf_Addr *pos;
986 
987 	base = (Elf_Addr *)*stack_base;
988 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
989 
990 	if (args->execfd != -1)
991 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
992 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
993 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
994 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
995 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
996 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
997 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
998 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
999 	if (imgp->execpathp != 0)
1000 		AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
1001 	AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate);
1002 	if (imgp->canary != 0) {
1003 		AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary);
1004 		AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
1005 	}
1006 	AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
1007 	if (imgp->pagesizes != 0) {
1008 		AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
1009 		AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
1010 	}
1011 	if (imgp->sysent->sv_timekeep_base != 0) {
1012 		AUXARGS_ENTRY(pos, AT_TIMEKEEP,
1013 		    imgp->sysent->sv_timekeep_base);
1014 	}
1015 	AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
1016 	    != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
1017 	    imgp->sysent->sv_stackprot);
1018 	AUXARGS_ENTRY(pos, AT_NULL, 0);
1019 
1020 	free(imgp->auxargs, M_TEMP);
1021 	imgp->auxargs = NULL;
1022 
1023 	base--;
1024 	suword(base, (long)imgp->args->argc);
1025 	*stack_base = (register_t *)base;
1026 	return (0);
1027 }
1028 
1029 /*
1030  * Code for generating ELF core dumps.
1031  */
1032 
1033 typedef void (*segment_callback)(vm_map_entry_t, void *);
1034 
1035 /* Closure for cb_put_phdr(). */
1036 struct phdr_closure {
1037 	Elf_Phdr *phdr;		/* Program header to fill in */
1038 	Elf_Off offset;		/* Offset of segment in core file */
1039 };
1040 
1041 /* Closure for cb_size_segment(). */
1042 struct sseg_closure {
1043 	int count;		/* Count of writable segments. */
1044 	size_t size;		/* Total size of all writable segments. */
1045 };
1046 
1047 typedef void (*outfunc_t)(void *, struct sbuf *, size_t *);
1048 
1049 struct note_info {
1050 	int		type;		/* Note type. */
1051 	outfunc_t 	outfunc; 	/* Output function. */
1052 	void		*outarg;	/* Argument for the output function. */
1053 	size_t		outsize;	/* Output size. */
1054 	TAILQ_ENTRY(note_info) link;	/* Link to the next note info. */
1055 };
1056 
1057 TAILQ_HEAD(note_info_list, note_info);
1058 
1059 static void cb_put_phdr(vm_map_entry_t, void *);
1060 static void cb_size_segment(vm_map_entry_t, void *);
1061 static void each_writable_segment(struct thread *, segment_callback, void *);
1062 static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
1063     int, void *, size_t, struct note_info_list *, size_t, gzFile);
1064 static void __elfN(prepare_notes)(struct thread *, struct note_info_list *,
1065     size_t *);
1066 static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t);
1067 static void __elfN(putnote)(struct note_info *, struct sbuf *);
1068 static size_t register_note(struct note_info_list *, int, outfunc_t, void *);
1069 static int sbuf_drain_core_output(void *, const char *, int);
1070 static int sbuf_drain_count(void *arg, const char *data, int len);
1071 
1072 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *);
1073 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
1074 static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *);
1075 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *);
1076 static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *);
1077 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *);
1078 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *);
1079 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *);
1080 static void note_procstat_files(void *, struct sbuf *, size_t *);
1081 static void note_procstat_groups(void *, struct sbuf *, size_t *);
1082 static void note_procstat_osrel(void *, struct sbuf *, size_t *);
1083 static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
1084 static void note_procstat_umask(void *, struct sbuf *, size_t *);
1085 static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
1086 
1087 #ifdef COMPRESS_USER_CORES
1088 extern int compress_user_cores;
1089 extern int compress_user_cores_gzlevel;
1090 #endif
1091 
1092 static int
core_output(struct vnode * vp,void * base,size_t len,off_t offset,struct ucred * active_cred,struct ucred * file_cred,struct thread * td,char * core_buf,gzFile gzfile)1093 core_output(struct vnode *vp, void *base, size_t len, off_t offset,
1094     struct ucred *active_cred, struct ucred *file_cred,
1095     struct thread *td, char *core_buf, gzFile gzfile) {
1096 
1097 	int error;
1098 	if (gzfile) {
1099 #ifdef COMPRESS_USER_CORES
1100 		error = compress_core(gzfile, base, core_buf, len, td);
1101 #else
1102 		panic("shouldn't be here");
1103 #endif
1104 	} else {
1105 		error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset,
1106 		    UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred,
1107 		    NULL, td);
1108 	}
1109 	return (error);
1110 }
1111 
1112 /* Coredump output parameters for sbuf drain routine. */
1113 struct sbuf_drain_core_params {
1114 	off_t		offset;
1115 	struct ucred	*active_cred;
1116 	struct ucred	*file_cred;
1117 	struct thread	*td;
1118 	struct vnode	*vp;
1119 #ifdef COMPRESS_USER_CORES
1120 	gzFile		gzfile;
1121 #endif
1122 };
1123 
1124 /*
1125  * Drain into a core file.
1126  */
1127 static int
sbuf_drain_core_output(void * arg,const char * data,int len)1128 sbuf_drain_core_output(void *arg, const char *data, int len)
1129 {
1130 	struct sbuf_drain_core_params *p;
1131 	int error, locked;
1132 
1133 	p = (struct sbuf_drain_core_params *)arg;
1134 
1135 	/*
1136 	 * Some kern_proc out routines that print to this sbuf may
1137 	 * call us with the process lock held. Draining with the
1138 	 * non-sleepable lock held is unsafe. The lock is needed for
1139 	 * those routines when dumping a live process. In our case we
1140 	 * can safely release the lock before draining and acquire
1141 	 * again after.
1142 	 */
1143 	locked = PROC_LOCKED(p->td->td_proc);
1144 	if (locked)
1145 		PROC_UNLOCK(p->td->td_proc);
1146 #ifdef COMPRESS_USER_CORES
1147 	if (p->gzfile != Z_NULL)
1148 		error = compress_core(p->gzfile, NULL, __DECONST(char *, data),
1149 		    len, p->td);
1150 	else
1151 #endif
1152 		error = vn_rdwr_inchunks(UIO_WRITE, p->vp,
1153 		    __DECONST(void *, data), len, p->offset, UIO_SYSSPACE,
1154 		    IO_UNIT | IO_DIRECT, p->active_cred, p->file_cred, NULL,
1155 		    p->td);
1156 	if (locked)
1157 		PROC_LOCK(p->td->td_proc);
1158 	if (error != 0)
1159 		return (-error);
1160 	p->offset += len;
1161 	return (len);
1162 }
1163 
1164 /*
1165  * Drain into a counter.
1166  */
1167 static int
sbuf_drain_count(void * arg,const char * data __unused,int len)1168 sbuf_drain_count(void *arg, const char *data __unused, int len)
1169 {
1170 	size_t *sizep;
1171 
1172 	sizep = (size_t *)arg;
1173 	*sizep += len;
1174 	return (len);
1175 }
1176 
1177 int
__elfN(coredump)1178 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
1179 {
1180 	struct ucred *cred = td->td_ucred;
1181 	int error = 0;
1182 	struct sseg_closure seginfo;
1183 	struct note_info_list notelst;
1184 	struct note_info *ninfo;
1185 	void *hdr;
1186 	size_t hdrsize, notesz, coresize;
1187 
1188 	gzFile gzfile = Z_NULL;
1189 	char *core_buf = NULL;
1190 #ifdef COMPRESS_USER_CORES
1191 	char gzopen_flags[8];
1192 	char *p;
1193 	int doing_compress = flags & IMGACT_CORE_COMPRESS;
1194 #endif
1195 
1196 	hdr = NULL;
1197 	TAILQ_INIT(&notelst);
1198 
1199 #ifdef COMPRESS_USER_CORES
1200         if (doing_compress) {
1201                 p = gzopen_flags;
1202                 *p++ = 'w';
1203                 if (compress_user_cores_gzlevel >= 0 &&
1204                     compress_user_cores_gzlevel <= 9)
1205                         *p++ = '0' + compress_user_cores_gzlevel;
1206                 *p = 0;
1207                 gzfile = gz_open("", gzopen_flags, vp);
1208                 if (gzfile == Z_NULL) {
1209                         error = EFAULT;
1210                         goto done;
1211                 }
1212                 core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
1213                 if (!core_buf) {
1214                         error = ENOMEM;
1215                         goto done;
1216                 }
1217         }
1218 #endif
1219 
1220 	/* Size the program segments. */
1221 	seginfo.count = 0;
1222 	seginfo.size = 0;
1223 	each_writable_segment(td, cb_size_segment, &seginfo);
1224 
1225 	/*
1226 	 * Collect info about the core file header area.
1227 	 */
1228 	hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
1229 	__elfN(prepare_notes)(td, &notelst, &notesz);
1230 	coresize = round_page(hdrsize + notesz) + seginfo.size;
1231 
1232 #ifdef RACCT
1233 	PROC_LOCK(td->td_proc);
1234 	error = racct_add(td->td_proc, RACCT_CORE, coresize);
1235 	PROC_UNLOCK(td->td_proc);
1236 	if (error != 0) {
1237 		error = EFAULT;
1238 		goto done;
1239 	}
1240 #endif
1241 	if (coresize >= limit) {
1242 		error = EFAULT;
1243 		goto done;
1244 	}
1245 
1246 	/*
1247 	 * Allocate memory for building the header, fill it up,
1248 	 * and write it out following the notes.
1249 	 */
1250 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
1251 	if (hdr == NULL) {
1252 		error = EINVAL;
1253 		goto done;
1254 	}
1255 	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize,
1256 	    &notelst, notesz, gzfile);
1257 
1258 	/* Write the contents of all of the writable segments. */
1259 	if (error == 0) {
1260 		Elf_Phdr *php;
1261 		off_t offset;
1262 		int i;
1263 
1264 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
1265 		offset = round_page(hdrsize + notesz);
1266 		for (i = 0; i < seginfo.count; i++) {
1267 			error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr,
1268 			    php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile);
1269 			if (error != 0)
1270 				break;
1271 			offset += php->p_filesz;
1272 			php++;
1273 		}
1274 	}
1275 	if (error) {
1276 		log(LOG_WARNING,
1277 		    "Failed to write core file for process %s (error %d)\n",
1278 		    curproc->p_comm, error);
1279 	}
1280 
1281 done:
1282 #ifdef COMPRESS_USER_CORES
1283 	if (core_buf)
1284 		free(core_buf, M_TEMP);
1285 	if (gzfile)
1286 		gzclose(gzfile);
1287 #endif
1288 	while ((ninfo = TAILQ_FIRST(&notelst)) != NULL) {
1289 		TAILQ_REMOVE(&notelst, ninfo, link);
1290 		free(ninfo, M_TEMP);
1291 	}
1292 	if (hdr != NULL)
1293 		free(hdr, M_TEMP);
1294 
1295 	return (error);
1296 }
1297 
1298 /*
1299  * A callback for each_writable_segment() to write out the segment's
1300  * program header entry.
1301  */
1302 static void
cb_put_phdr(entry,closure)1303 cb_put_phdr(entry, closure)
1304 	vm_map_entry_t entry;
1305 	void *closure;
1306 {
1307 	struct phdr_closure *phc = (struct phdr_closure *)closure;
1308 	Elf_Phdr *phdr = phc->phdr;
1309 
1310 	phc->offset = round_page(phc->offset);
1311 
1312 	phdr->p_type = PT_LOAD;
1313 	phdr->p_offset = phc->offset;
1314 	phdr->p_vaddr = entry->start;
1315 	phdr->p_paddr = 0;
1316 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1317 	phdr->p_align = PAGE_SIZE;
1318 	phdr->p_flags = __elfN(untrans_prot)(entry->protection);
1319 
1320 	phc->offset += phdr->p_filesz;
1321 	phc->phdr++;
1322 }
1323 
1324 /*
1325  * A callback for each_writable_segment() to gather information about
1326  * the number of segments and their total size.
1327  */
1328 static void
cb_size_segment(entry,closure)1329 cb_size_segment(entry, closure)
1330 	vm_map_entry_t entry;
1331 	void *closure;
1332 {
1333 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1334 
1335 	ssc->count++;
1336 	ssc->size += entry->end - entry->start;
1337 }
1338 
1339 /*
1340  * For each writable segment in the process's memory map, call the given
1341  * function with a pointer to the map entry and some arbitrary
1342  * caller-supplied data.
1343  */
1344 static void
each_writable_segment(td,func,closure)1345 each_writable_segment(td, func, closure)
1346 	struct thread *td;
1347 	segment_callback func;
1348 	void *closure;
1349 {
1350 	struct proc *p = td->td_proc;
1351 	vm_map_t map = &p->p_vmspace->vm_map;
1352 	vm_map_entry_t entry;
1353 	vm_object_t backing_object, object;
1354 	boolean_t ignore_entry;
1355 
1356 	vm_map_lock_read(map);
1357 	for (entry = map->header.next; entry != &map->header;
1358 	    entry = entry->next) {
1359 		/*
1360 		 * Don't dump inaccessible mappings, deal with legacy
1361 		 * coredump mode.
1362 		 *
1363 		 * Note that read-only segments related to the elf binary
1364 		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1365 		 * need to arbitrarily ignore such segments.
1366 		 */
1367 		if (elf_legacy_coredump) {
1368 			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1369 				continue;
1370 		} else {
1371 			if ((entry->protection & VM_PROT_ALL) == 0)
1372 				continue;
1373 		}
1374 
1375 		/*
1376 		 * Dont include memory segment in the coredump if
1377 		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1378 		 * madvise(2).  Do not dump submaps (i.e. parts of the
1379 		 * kernel map).
1380 		 */
1381 		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1382 			continue;
1383 
1384 		if ((object = entry->object.vm_object) == NULL)
1385 			continue;
1386 
1387 		/* Ignore memory-mapped devices and such things. */
1388 		VM_OBJECT_LOCK(object);
1389 		while ((backing_object = object->backing_object) != NULL) {
1390 			VM_OBJECT_LOCK(backing_object);
1391 			VM_OBJECT_UNLOCK(object);
1392 			object = backing_object;
1393 		}
1394 		ignore_entry = object->type != OBJT_DEFAULT &&
1395 		    object->type != OBJT_SWAP && object->type != OBJT_VNODE &&
1396 		    object->type != OBJT_PHYS;
1397 		VM_OBJECT_UNLOCK(object);
1398 		if (ignore_entry)
1399 			continue;
1400 
1401 		(*func)(entry, closure);
1402 	}
1403 	vm_map_unlock_read(map);
1404 }
1405 
1406 /*
1407  * Write the core file header to the file, including padding up to
1408  * the page boundary.
1409  */
1410 static int
__elfN(corehdr)1411 __elfN(corehdr)(struct thread *td, struct vnode *vp, struct ucred *cred,
1412     int numsegs, void *hdr, size_t hdrsize, struct note_info_list *notelst,
1413     size_t notesz, gzFile gzfile)
1414 {
1415 	struct sbuf_drain_core_params params;
1416 	struct note_info *ninfo;
1417 	struct sbuf *sb;
1418 	int error;
1419 
1420 	/* Fill in the header. */
1421 	bzero(hdr, hdrsize);
1422 	__elfN(puthdr)(td, hdr, hdrsize, numsegs, notesz);
1423 
1424 	params.offset = 0;
1425 	params.active_cred = cred;
1426 	params.file_cred = NOCRED;
1427 	params.td = td;
1428 	params.vp = vp;
1429 #ifdef COMPRESS_USER_CORES
1430 	params.gzfile = gzfile;
1431 #endif
1432 	sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN);
1433 	sbuf_set_drain(sb, sbuf_drain_core_output, &params);
1434 	sbuf_start_section(sb, NULL);
1435 	sbuf_bcat(sb, hdr, hdrsize);
1436 	TAILQ_FOREACH(ninfo, notelst, link)
1437 	    __elfN(putnote)(ninfo, sb);
1438 	/* Align up to a page boundary for the program segments. */
1439 	sbuf_end_section(sb, -1, PAGE_SIZE, 0);
1440 	error = sbuf_finish(sb);
1441 	sbuf_delete(sb);
1442 
1443 	return (error);
1444 }
1445 
1446 static void
__elfN(prepare_notes)1447 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list,
1448     size_t *sizep)
1449 {
1450 	struct proc *p;
1451 	struct thread *thr;
1452 	size_t size;
1453 
1454 	p = td->td_proc;
1455 	size = 0;
1456 
1457 	size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p);
1458 
1459 	/*
1460 	 * To have the debugger select the right thread (LWP) as the initial
1461 	 * thread, we dump the state of the thread passed to us in td first.
1462 	 * This is the thread that causes the core dump and thus likely to
1463 	 * be the right thread one wants to have selected in the debugger.
1464 	 */
1465 	thr = td;
1466 	while (thr != NULL) {
1467 		size += register_note(list, NT_PRSTATUS,
1468 		    __elfN(note_prstatus), thr);
1469 		size += register_note(list, NT_FPREGSET,
1470 		    __elfN(note_fpregset), thr);
1471 		size += register_note(list, NT_THRMISC,
1472 		    __elfN(note_thrmisc), thr);
1473 		size += register_note(list, -1,
1474 		    __elfN(note_threadmd), thr);
1475 
1476 		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1477 		    TAILQ_NEXT(thr, td_plist);
1478 		if (thr == td)
1479 			thr = TAILQ_NEXT(thr, td_plist);
1480 	}
1481 
1482 	size += register_note(list, NT_PROCSTAT_PROC,
1483 	    __elfN(note_procstat_proc), p);
1484 	size += register_note(list, NT_PROCSTAT_FILES,
1485 	    note_procstat_files, p);
1486 	size += register_note(list, NT_PROCSTAT_VMMAP,
1487 	    note_procstat_vmmap, p);
1488 	size += register_note(list, NT_PROCSTAT_GROUPS,
1489 	    note_procstat_groups, p);
1490 	size += register_note(list, NT_PROCSTAT_UMASK,
1491 	    note_procstat_umask, p);
1492 	size += register_note(list, NT_PROCSTAT_RLIMIT,
1493 	    note_procstat_rlimit, p);
1494 	size += register_note(list, NT_PROCSTAT_OSREL,
1495 	    note_procstat_osrel, p);
1496 	size += register_note(list, NT_PROCSTAT_PSSTRINGS,
1497 	    __elfN(note_procstat_psstrings), p);
1498 	size += register_note(list, NT_PROCSTAT_AUXV,
1499 	    __elfN(note_procstat_auxv), p);
1500 
1501 	*sizep = size;
1502 }
1503 
1504 static void
__elfN(puthdr)1505 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
1506     size_t notesz)
1507 {
1508 	Elf_Ehdr *ehdr;
1509 	Elf_Phdr *phdr;
1510 	struct phdr_closure phc;
1511 
1512 	ehdr = (Elf_Ehdr *)hdr;
1513 	phdr = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr));
1514 
1515 	ehdr->e_ident[EI_MAG0] = ELFMAG0;
1516 	ehdr->e_ident[EI_MAG1] = ELFMAG1;
1517 	ehdr->e_ident[EI_MAG2] = ELFMAG2;
1518 	ehdr->e_ident[EI_MAG3] = ELFMAG3;
1519 	ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1520 	ehdr->e_ident[EI_DATA] = ELF_DATA;
1521 	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1522 	ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1523 	ehdr->e_ident[EI_ABIVERSION] = 0;
1524 	ehdr->e_ident[EI_PAD] = 0;
1525 	ehdr->e_type = ET_CORE;
1526 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1527 	ehdr->e_machine = ELF_ARCH32;
1528 #else
1529 	ehdr->e_machine = ELF_ARCH;
1530 #endif
1531 	ehdr->e_version = EV_CURRENT;
1532 	ehdr->e_entry = 0;
1533 	ehdr->e_phoff = sizeof(Elf_Ehdr);
1534 	ehdr->e_flags = 0;
1535 	ehdr->e_ehsize = sizeof(Elf_Ehdr);
1536 	ehdr->e_phentsize = sizeof(Elf_Phdr);
1537 	ehdr->e_phnum = numsegs + 1;
1538 	ehdr->e_shentsize = sizeof(Elf_Shdr);
1539 	ehdr->e_shnum = 0;
1540 	ehdr->e_shstrndx = SHN_UNDEF;
1541 
1542 	/*
1543 	 * Fill in the program header entries.
1544 	 */
1545 
1546 	/* The note segement. */
1547 	phdr->p_type = PT_NOTE;
1548 	phdr->p_offset = hdrsize;
1549 	phdr->p_vaddr = 0;
1550 	phdr->p_paddr = 0;
1551 	phdr->p_filesz = notesz;
1552 	phdr->p_memsz = 0;
1553 	phdr->p_flags = PF_R;
1554 	phdr->p_align = ELF_NOTE_ROUNDSIZE;
1555 	phdr++;
1556 
1557 	/* All the writable segments from the program. */
1558 	phc.phdr = phdr;
1559 	phc.offset = round_page(hdrsize + notesz);
1560 	each_writable_segment(td, cb_put_phdr, &phc);
1561 }
1562 
1563 static size_t
register_note(struct note_info_list * list,int type,outfunc_t out,void * arg)1564 register_note(struct note_info_list *list, int type, outfunc_t out, void *arg)
1565 {
1566 	struct note_info *ninfo;
1567 	size_t size, notesize;
1568 
1569 	size = 0;
1570 	out(arg, NULL, &size);
1571 	ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
1572 	ninfo->type = type;
1573 	ninfo->outfunc = out;
1574 	ninfo->outarg = arg;
1575 	ninfo->outsize = size;
1576 	TAILQ_INSERT_TAIL(list, ninfo, link);
1577 
1578 	if (type == -1)
1579 		return (size);
1580 
1581 	notesize = sizeof(Elf_Note) +		/* note header */
1582 	    roundup2(8, ELF_NOTE_ROUNDSIZE) +	/* note name ("FreeBSD") */
1583 	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
1584 
1585 	return (notesize);
1586 }
1587 
1588 static void
__elfN(putnote)1589 __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
1590 {
1591 	Elf_Note note;
1592 	ssize_t old_len;
1593 
1594 	if (ninfo->type == -1) {
1595 		ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
1596 		return;
1597 	}
1598 
1599 	note.n_namesz = 8; /* strlen("FreeBSD") + 1 */
1600 	note.n_descsz = ninfo->outsize;
1601 	note.n_type = ninfo->type;
1602 
1603 	sbuf_bcat(sb, &note, sizeof(note));
1604 	sbuf_start_section(sb, &old_len);
1605 	sbuf_bcat(sb, "FreeBSD", note.n_namesz);
1606 	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
1607 	if (note.n_descsz == 0)
1608 		return;
1609 	sbuf_start_section(sb, &old_len);
1610 	ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
1611 	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
1612 }
1613 
1614 /*
1615  * Miscellaneous note out functions.
1616  */
1617 
1618 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1619 #include <compat/freebsd32/freebsd32.h>
1620 
1621 typedef struct prstatus32 elf_prstatus_t;
1622 typedef struct prpsinfo32 elf_prpsinfo_t;
1623 typedef struct fpreg32 elf_prfpregset_t;
1624 typedef struct fpreg32 elf_fpregset_t;
1625 typedef struct reg32 elf_gregset_t;
1626 typedef struct thrmisc32 elf_thrmisc_t;
1627 #define ELF_KERN_PROC_MASK	KERN_PROC_MASK32
1628 typedef struct kinfo_proc32 elf_kinfo_proc_t;
1629 typedef uint32_t elf_ps_strings_t;
1630 #else
1631 typedef prstatus_t elf_prstatus_t;
1632 typedef prpsinfo_t elf_prpsinfo_t;
1633 typedef prfpregset_t elf_prfpregset_t;
1634 typedef prfpregset_t elf_fpregset_t;
1635 typedef gregset_t elf_gregset_t;
1636 typedef thrmisc_t elf_thrmisc_t;
1637 #define ELF_KERN_PROC_MASK	0
1638 typedef struct kinfo_proc elf_kinfo_proc_t;
1639 typedef vm_offset_t elf_ps_strings_t;
1640 #endif
1641 
1642 static void
__elfN(note_prpsinfo)1643 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
1644 {
1645 	struct proc *p;
1646 	elf_prpsinfo_t *psinfo;
1647 
1648 	p = (struct proc *)arg;
1649 	if (sb != NULL) {
1650 		KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
1651 		psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
1652 		psinfo->pr_version = PRPSINFO_VERSION;
1653 		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1654 		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1655 		/*
1656 		 * XXX - We don't fill in the command line arguments properly
1657 		 * yet.
1658 		 */
1659 		strlcpy(psinfo->pr_psargs, p->p_comm,
1660 		    sizeof(psinfo->pr_psargs));
1661 
1662 		sbuf_bcat(sb, psinfo, sizeof(*psinfo));
1663 		free(psinfo, M_TEMP);
1664 	}
1665 	*sizep = sizeof(*psinfo);
1666 }
1667 
1668 static void
__elfN(note_prstatus)1669 __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep)
1670 {
1671 	struct thread *td;
1672 	elf_prstatus_t *status;
1673 
1674 	td = (struct thread *)arg;
1675 	if (sb != NULL) {
1676 		KASSERT(*sizep == sizeof(*status), ("invalid size"));
1677 		status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK);
1678 		status->pr_version = PRSTATUS_VERSION;
1679 		status->pr_statussz = sizeof(elf_prstatus_t);
1680 		status->pr_gregsetsz = sizeof(elf_gregset_t);
1681 		status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1682 		status->pr_osreldate = osreldate;
1683 		status->pr_cursig = td->td_proc->p_sig;
1684 		status->pr_pid = td->td_tid;
1685 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1686 		fill_regs32(td, &status->pr_reg);
1687 #else
1688 		fill_regs(td, &status->pr_reg);
1689 #endif
1690 		sbuf_bcat(sb, status, sizeof(*status));
1691 		free(status, M_TEMP);
1692 	}
1693 	*sizep = sizeof(*status);
1694 }
1695 
1696 static void
__elfN(note_fpregset)1697 __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep)
1698 {
1699 	struct thread *td;
1700 	elf_prfpregset_t *fpregset;
1701 
1702 	td = (struct thread *)arg;
1703 	if (sb != NULL) {
1704 		KASSERT(*sizep == sizeof(*fpregset), ("invalid size"));
1705 		fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK);
1706 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1707 		fill_fpregs32(td, fpregset);
1708 #else
1709 		fill_fpregs(td, fpregset);
1710 #endif
1711 		sbuf_bcat(sb, fpregset, sizeof(*fpregset));
1712 		free(fpregset, M_TEMP);
1713 	}
1714 	*sizep = sizeof(*fpregset);
1715 }
1716 
1717 static void
__elfN(note_thrmisc)1718 __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep)
1719 {
1720 	struct thread *td;
1721 	elf_thrmisc_t thrmisc;
1722 
1723 	td = (struct thread *)arg;
1724 	if (sb != NULL) {
1725 		KASSERT(*sizep == sizeof(thrmisc), ("invalid size"));
1726 		bzero(&thrmisc._pad, sizeof(thrmisc._pad));
1727 		strcpy(thrmisc.pr_tname, td->td_name);
1728 		sbuf_bcat(sb, &thrmisc, sizeof(thrmisc));
1729 	}
1730 	*sizep = sizeof(thrmisc);
1731 }
1732 
1733 /*
1734  * Allow for MD specific notes, as well as any MD
1735  * specific preparations for writing MI notes.
1736  */
1737 static void
__elfN(note_threadmd)1738 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
1739 {
1740 	struct thread *td;
1741 	void *buf;
1742 	size_t size;
1743 
1744 	td = (struct thread *)arg;
1745 	size = *sizep;
1746 	if (size != 0 && sb != NULL)
1747 		buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
1748 	else
1749 		buf = NULL;
1750 	size = 0;
1751 	__elfN(dump_thread)(td, buf, &size);
1752 	KASSERT(*sizep == size, ("invalid size"));
1753 	if (size != 0 && sb != NULL)
1754 		sbuf_bcat(sb, buf, size);
1755 	free(buf, M_TEMP);
1756 	*sizep = size;
1757 }
1758 
1759 #ifdef KINFO_PROC_SIZE
1760 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
1761 #endif
1762 
1763 static void
__elfN(note_procstat_proc)1764 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
1765 {
1766 	struct proc *p;
1767 	size_t size;
1768 	int structsize;
1769 
1770 	p = (struct proc *)arg;
1771 	size = sizeof(structsize) + p->p_numthreads *
1772 	    sizeof(elf_kinfo_proc_t);
1773 
1774 	if (sb != NULL) {
1775 		KASSERT(*sizep == size, ("invalid size"));
1776 		structsize = sizeof(elf_kinfo_proc_t);
1777 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1778 		PROC_LOCK(p);
1779 		kern_proc_out(p, sb, ELF_KERN_PROC_MASK);
1780 	}
1781 	*sizep = size;
1782 }
1783 
1784 #ifdef KINFO_FILE_SIZE
1785 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
1786 #endif
1787 
1788 static void
note_procstat_files(void * arg,struct sbuf * sb,size_t * sizep)1789 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
1790 {
1791 	struct proc *p;
1792 	size_t size;
1793 	int structsize;
1794 
1795 	p = (struct proc *)arg;
1796 	if (sb == NULL) {
1797 		size = 0;
1798 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1799 		sbuf_set_drain(sb, sbuf_drain_count, &size);
1800 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1801 		PROC_LOCK(p);
1802 		kern_proc_filedesc_out(p, sb, -1);
1803 		sbuf_finish(sb);
1804 		sbuf_delete(sb);
1805 		*sizep = size;
1806 	} else {
1807 		structsize = sizeof(struct kinfo_file);
1808 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1809 		PROC_LOCK(p);
1810 		kern_proc_filedesc_out(p, sb, -1);
1811 	}
1812 }
1813 
1814 #ifdef KINFO_VMENTRY_SIZE
1815 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
1816 #endif
1817 
1818 static void
note_procstat_vmmap(void * arg,struct sbuf * sb,size_t * sizep)1819 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
1820 {
1821 	struct proc *p;
1822 	size_t size;
1823 	int structsize;
1824 
1825 	p = (struct proc *)arg;
1826 	if (sb == NULL) {
1827 		size = 0;
1828 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1829 		sbuf_set_drain(sb, sbuf_drain_count, &size);
1830 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1831 		PROC_LOCK(p);
1832 		kern_proc_vmmap_out(p, sb);
1833 		sbuf_finish(sb);
1834 		sbuf_delete(sb);
1835 		*sizep = size;
1836 	} else {
1837 		structsize = sizeof(struct kinfo_vmentry);
1838 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1839 		PROC_LOCK(p);
1840 		kern_proc_vmmap_out(p, sb);
1841 	}
1842 }
1843 
1844 static void
note_procstat_groups(void * arg,struct sbuf * sb,size_t * sizep)1845 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
1846 {
1847 	struct proc *p;
1848 	size_t size;
1849 	int structsize;
1850 
1851 	p = (struct proc *)arg;
1852 	size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
1853 	if (sb != NULL) {
1854 		KASSERT(*sizep == size, ("invalid size"));
1855 		structsize = sizeof(gid_t);
1856 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1857 		sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
1858 		    sizeof(gid_t));
1859 	}
1860 	*sizep = size;
1861 }
1862 
1863 static void
note_procstat_umask(void * arg,struct sbuf * sb,size_t * sizep)1864 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep)
1865 {
1866 	struct proc *p;
1867 	size_t size;
1868 	int structsize;
1869 
1870 	p = (struct proc *)arg;
1871 	size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask);
1872 	if (sb != NULL) {
1873 		KASSERT(*sizep == size, ("invalid size"));
1874 		structsize = sizeof(p->p_fd->fd_cmask);
1875 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1876 		sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask));
1877 	}
1878 	*sizep = size;
1879 }
1880 
1881 static void
note_procstat_rlimit(void * arg,struct sbuf * sb,size_t * sizep)1882 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep)
1883 {
1884 	struct proc *p;
1885 	struct rlimit rlim[RLIM_NLIMITS];
1886 	size_t size;
1887 	int structsize, i;
1888 
1889 	p = (struct proc *)arg;
1890 	size = sizeof(structsize) + sizeof(rlim);
1891 	if (sb != NULL) {
1892 		KASSERT(*sizep == size, ("invalid size"));
1893 		structsize = sizeof(rlim);
1894 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1895 		PROC_LOCK(p);
1896 		for (i = 0; i < RLIM_NLIMITS; i++)
1897 			lim_rlimit(p, i, &rlim[i]);
1898 		PROC_UNLOCK(p);
1899 		sbuf_bcat(sb, rlim, sizeof(rlim));
1900 	}
1901 	*sizep = size;
1902 }
1903 
1904 static void
note_procstat_osrel(void * arg,struct sbuf * sb,size_t * sizep)1905 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep)
1906 {
1907 	struct proc *p;
1908 	size_t size;
1909 	int structsize;
1910 
1911 	p = (struct proc *)arg;
1912 	size = sizeof(structsize) + sizeof(p->p_osrel);
1913 	if (sb != NULL) {
1914 		KASSERT(*sizep == size, ("invalid size"));
1915 		structsize = sizeof(p->p_osrel);
1916 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1917 		sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel));
1918 	}
1919 	*sizep = size;
1920 }
1921 
1922 static void
__elfN(note_procstat_psstrings)1923 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep)
1924 {
1925 	struct proc *p;
1926 	elf_ps_strings_t ps_strings;
1927 	size_t size;
1928 	int structsize;
1929 
1930 	p = (struct proc *)arg;
1931 	size = sizeof(structsize) + sizeof(ps_strings);
1932 	if (sb != NULL) {
1933 		KASSERT(*sizep == size, ("invalid size"));
1934 		structsize = sizeof(ps_strings);
1935 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1936 		ps_strings = PTROUT(p->p_sysent->sv_psstrings);
1937 #else
1938 		ps_strings = p->p_sysent->sv_psstrings;
1939 #endif
1940 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1941 		sbuf_bcat(sb, &ps_strings, sizeof(ps_strings));
1942 	}
1943 	*sizep = size;
1944 }
1945 
1946 static void
__elfN(note_procstat_auxv)1947 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
1948 {
1949 	struct proc *p;
1950 	size_t size;
1951 	int structsize;
1952 
1953 	p = (struct proc *)arg;
1954 	if (sb == NULL) {
1955 		size = 0;
1956 		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1957 		sbuf_set_drain(sb, sbuf_drain_count, &size);
1958 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1959 		PHOLD(p);
1960 		proc_getauxv(curthread, p, sb);
1961 		PRELE(p);
1962 		sbuf_finish(sb);
1963 		sbuf_delete(sb);
1964 		*sizep = size;
1965 	} else {
1966 		structsize = sizeof(Elf_Auxinfo);
1967 		sbuf_bcat(sb, &structsize, sizeof(structsize));
1968 		PHOLD(p);
1969 		proc_getauxv(curthread, p, sb);
1970 		PRELE(p);
1971 	}
1972 }
1973 
1974 static boolean_t
__elfN(parse_notes)1975 __elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote,
1976     int32_t *osrel, const Elf_Phdr *pnote)
1977 {
1978 	const Elf_Note *note, *note0, *note_end;
1979 	const char *note_name;
1980 	int i;
1981 
1982 	if (pnote == NULL || pnote->p_offset > PAGE_SIZE ||
1983 	    pnote->p_filesz > PAGE_SIZE - pnote->p_offset)
1984 		return (FALSE);
1985 
1986 	note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1987 	note_end = (const Elf_Note *)(imgp->image_header +
1988 	    pnote->p_offset + pnote->p_filesz);
1989 	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
1990 		if (!aligned(note, Elf32_Addr) || (const char *)note_end -
1991 		    (const char *)note < sizeof(Elf_Note))
1992 			return (FALSE);
1993 		if (note->n_namesz != checknote->hdr.n_namesz ||
1994 		    note->n_descsz != checknote->hdr.n_descsz ||
1995 		    note->n_type != checknote->hdr.n_type)
1996 			goto nextnote;
1997 		note_name = (const char *)(note + 1);
1998 		if (note_name + checknote->hdr.n_namesz >=
1999 		    (const char *)note_end || strncmp(checknote->vendor,
2000 		    note_name, checknote->hdr.n_namesz) != 0)
2001 			goto nextnote;
2002 
2003 		/*
2004 		 * Fetch the osreldate for binary
2005 		 * from the ELF OSABI-note if necessary.
2006 		 */
2007 		if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 &&
2008 		    checknote->trans_osrel != NULL)
2009 			return (checknote->trans_osrel(note, osrel));
2010 		return (TRUE);
2011 
2012 nextnote:
2013 		note = (const Elf_Note *)((const char *)(note + 1) +
2014 		    roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
2015 		    roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
2016 	}
2017 
2018 	return (FALSE);
2019 }
2020 
2021 /*
2022  * Try to find the appropriate ABI-note section for checknote,
2023  * fetch the osreldate for binary from the ELF OSABI-note. Only the
2024  * first page of the image is searched, the same as for headers.
2025  */
2026 static boolean_t
__elfN(check_note)2027 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
2028     int32_t *osrel)
2029 {
2030 	const Elf_Phdr *phdr;
2031 	const Elf_Ehdr *hdr;
2032 	int i;
2033 
2034 	hdr = (const Elf_Ehdr *)imgp->image_header;
2035 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
2036 
2037 	for (i = 0; i < hdr->e_phnum; i++) {
2038 		if (phdr[i].p_type == PT_NOTE &&
2039 		    __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i]))
2040 			return (TRUE);
2041 	}
2042 	return (FALSE);
2043 
2044 }
2045 
2046 /*
2047  * Tell kern_execve.c about it, with a little help from the linker.
2048  */
2049 static struct execsw __elfN(execsw) = {
2050 	__CONCAT(exec_, __elfN(imgact)),
2051 	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
2052 };
2053 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
2054 
2055 #ifdef COMPRESS_USER_CORES
2056 /*
2057  * Compress and write out a core segment for a user process.
2058  *
2059  * 'inbuf' is the starting address of a VM segment in the process' address
2060  * space that is to be compressed and written out to the core file.  'dest_buf'
2061  * is a buffer in the kernel's address space.  The segment is copied from
2062  * 'inbuf' to 'dest_buf' first before being processed by the compression
2063  * routine gzwrite().  This copying is necessary because the content of the VM
2064  * segment may change between the compression pass and the crc-computation pass
2065  * in gzwrite().  This is because realtime threads may preempt the UNIX kernel.
2066  *
2067  * If inbuf is NULL it is assumed that data is already copied to 'dest_buf'.
2068  */
2069 static int
compress_core(gzFile file,char * inbuf,char * dest_buf,unsigned int len,struct thread * td)2070 compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len,
2071     struct thread *td)
2072 {
2073 	int len_compressed;
2074 	int error = 0;
2075 	unsigned int chunk_len;
2076 
2077 	while (len) {
2078 		if (inbuf != NULL) {
2079 			chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len;
2080 			copyin(inbuf, dest_buf, chunk_len);
2081 			inbuf += chunk_len;
2082 		} else {
2083 			chunk_len = len;
2084 		}
2085 		len_compressed = gzwrite(file, dest_buf, chunk_len);
2086 
2087 		EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed);
2088 
2089 		if ((unsigned int)len_compressed != chunk_len) {
2090 			log(LOG_WARNING,
2091 			    "compress_core: length mismatch (0x%x returned, "
2092 			    "0x%x expected)\n", len_compressed, chunk_len);
2093 			EVENTHANDLER_INVOKE(app_coredump_error, td,
2094 			    "compress_core: length mismatch %x -> %x",
2095 			    chunk_len, len_compressed);
2096 			error = EFAULT;
2097 			break;
2098 		}
2099 		len -= chunk_len;
2100 		maybe_yield();
2101 	}
2102 
2103 	return (error);
2104 }
2105 #endif /* COMPRESS_USER_CORES */
2106 
2107 static vm_prot_t
__elfN(trans_prot)2108 __elfN(trans_prot)(Elf_Word flags)
2109 {
2110 	vm_prot_t prot;
2111 
2112 	prot = 0;
2113 	if (flags & PF_X)
2114 		prot |= VM_PROT_EXECUTE;
2115 	if (flags & PF_W)
2116 		prot |= VM_PROT_WRITE;
2117 	if (flags & PF_R)
2118 		prot |= VM_PROT_READ;
2119 #if __ELF_WORD_SIZE == 32
2120 #if defined(__amd64__) || defined(__ia64__)
2121 	if (i386_read_exec && (flags & PF_R))
2122 		prot |= VM_PROT_EXECUTE;
2123 #endif
2124 #endif
2125 	return (prot);
2126 }
2127 
2128 static Elf_Word
__elfN(untrans_prot)2129 __elfN(untrans_prot)(vm_prot_t prot)
2130 {
2131 	Elf_Word flags;
2132 
2133 	flags = 0;
2134 	if (prot & VM_PROT_EXECUTE)
2135 		flags |= PF_X;
2136 	if (prot & VM_PROT_READ)
2137 		flags |= PF_R;
2138 	if (prot & VM_PROT_WRITE)
2139 		flags |= PF_W;
2140 	return (flags);
2141 }
2142