xref: /freebsd-11-stable/sys/kern/kern_shutdown.c (revision 1b63c8348ca48ead7da98be3d502e619e7dd98f0)
1 /*-
2  * Copyright (c) 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_ddb.h"
41 #include "opt_kdb.h"
42 #include "opt_panic.h"
43 #include "opt_sched.h"
44 #include "opt_watchdog.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/cons.h>
52 #include <sys/eventhandler.h>
53 #include <sys/filedesc.h>
54 #include <sys/jail.h>
55 #include <sys/kdb.h>
56 #include <sys/kernel.h>
57 #include <sys/kerneldump.h>
58 #include <sys/kthread.h>
59 #include <sys/ktr.h>
60 #include <sys/malloc.h>
61 #include <sys/mount.h>
62 #include <sys/priv.h>
63 #include <sys/proc.h>
64 #include <sys/reboot.h>
65 #include <sys/resourcevar.h>
66 #include <sys/rwlock.h>
67 #include <sys/sched.h>
68 #include <sys/smp.h>
69 #include <sys/sysctl.h>
70 #include <sys/sysproto.h>
71 #include <sys/taskqueue.h>
72 #include <sys/vnode.h>
73 #include <sys/watchdog.h>
74 
75 #include <ddb/ddb.h>
76 
77 #include <machine/cpu.h>
78 #include <machine/dump.h>
79 #include <machine/pcb.h>
80 #include <machine/smp.h>
81 
82 #include <security/mac/mac_framework.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_pager.h>
88 #include <vm/swap_pager.h>
89 
90 #include <sys/signalvar.h>
91 
92 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer");
93 
94 #ifndef PANIC_REBOOT_WAIT_TIME
95 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
96 #endif
97 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME;
98 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN,
99     &panic_reboot_wait_time, 0,
100     "Seconds to wait before rebooting after a panic");
101 
102 /*
103  * Note that stdarg.h and the ANSI style va_start macro is used for both
104  * ANSI and traditional C compilers.
105  */
106 #include <machine/stdarg.h>
107 
108 #ifdef KDB
109 #ifdef KDB_UNATTENDED
110 static int debugger_on_panic = 0;
111 #else
112 static int debugger_on_panic = 1;
113 #endif
114 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic,
115     CTLFLAG_RWTUN | CTLFLAG_SECURE,
116     &debugger_on_panic, 0, "Run debugger on kernel panic");
117 
118 int debugger_on_trap = 0;
119 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap,
120     CTLFLAG_RWTUN | CTLFLAG_SECURE,
121     &debugger_on_trap, 0, "Run debugger on kernel trap before panic");
122 
123 #ifdef KDB_TRACE
124 static int trace_on_panic = 1;
125 #else
126 static int trace_on_panic = 0;
127 #endif
128 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic,
129     CTLFLAG_RWTUN | CTLFLAG_SECURE,
130     &trace_on_panic, 0, "Print stack trace on kernel panic");
131 #endif /* KDB */
132 
133 static int sync_on_panic = 0;
134 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN,
135 	&sync_on_panic, 0, "Do a sync before rebooting from a panic");
136 
137 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0,
138     "Shutdown environment");
139 
140 #ifndef DIAGNOSTIC
141 static int show_busybufs;
142 #else
143 static int show_busybufs = 1;
144 #endif
145 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
146 	&show_busybufs, 0, "");
147 
148 int suspend_blocked = 0;
149 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW,
150 	&suspend_blocked, 0, "Block suspend due to a pending shutdown");
151 
152 /*
153  * Variable panicstr contains argument to first call to panic; used as flag
154  * to indicate that the kernel has already called panic.
155  */
156 const char *panicstr;
157 
158 int dumping;				/* system is dumping */
159 int rebooting;				/* system is rebooting */
160 static struct dumperinfo dumper;	/* our selected dumper */
161 
162 /* Context information for dump-debuggers. */
163 static struct pcb dumppcb;		/* Registers. */
164 lwpid_t dumptid;			/* Thread ID. */
165 
166 static struct cdevsw reroot_cdevsw = {
167      .d_version = D_VERSION,
168      .d_name    = "reroot",
169 };
170 
171 static void poweroff_wait(void *, int);
172 static void shutdown_halt(void *junk, int howto);
173 static void shutdown_panic(void *junk, int howto);
174 static void shutdown_reset(void *junk, int howto);
175 static int kern_reroot(void);
176 
177 /* register various local shutdown events */
178 static void
shutdown_conf(void * unused)179 shutdown_conf(void *unused)
180 {
181 
182 	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
183 	    SHUTDOWN_PRI_FIRST);
184 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
185 	    SHUTDOWN_PRI_LAST + 100);
186 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
187 	    SHUTDOWN_PRI_LAST + 100);
188 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
189 	    SHUTDOWN_PRI_LAST + 200);
190 }
191 
192 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
193 
194 /*
195  * The only reason this exists is to create the /dev/reroot/ directory,
196  * used by reroot code in init(8) as a mountpoint for tmpfs.
197  */
198 static void
reroot_conf(void * unused)199 reroot_conf(void *unused)
200 {
201 	int error;
202 	struct cdev *cdev;
203 
204 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
205 	    &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
206 	if (error != 0) {
207 		printf("%s: failed to create device node, error %d",
208 		    __func__, error);
209 	}
210 }
211 
212 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
213 
214 /*
215  * The system call that results in a reboot.
216  */
217 /* ARGSUSED */
218 int
sys_reboot(struct thread * td,struct reboot_args * uap)219 sys_reboot(struct thread *td, struct reboot_args *uap)
220 {
221 	int error;
222 
223 	error = 0;
224 #ifdef MAC
225 	error = mac_system_check_reboot(td->td_ucred, uap->opt);
226 #endif
227 	if (error == 0)
228 		error = priv_check(td, PRIV_REBOOT);
229 	if (error == 0) {
230 		if (uap->opt & RB_REROOT) {
231 			error = kern_reroot();
232 		} else {
233 			mtx_lock(&Giant);
234 			kern_reboot(uap->opt);
235 			mtx_unlock(&Giant);
236 		}
237 	}
238 	return (error);
239 }
240 
241 static void
shutdown_nice_task_fn(void * arg,int pending __unused)242 shutdown_nice_task_fn(void *arg, int pending __unused)
243 {
244 	int howto;
245 
246 	howto = (uintptr_t)arg;
247 	/* Send a signal to init(8) and have it shutdown the world. */
248 	PROC_LOCK(initproc);
249 	if (howto & RB_POWEROFF)
250 		kern_psignal(initproc, SIGUSR2);
251 	else if (howto & RB_HALT)
252 		kern_psignal(initproc, SIGUSR1);
253 	else
254 		kern_psignal(initproc, SIGINT);
255 	PROC_UNLOCK(initproc);
256 }
257 
258 static struct task shutdown_nice_task = TASK_INITIALIZER(0,
259     &shutdown_nice_task_fn, NULL);
260 
261 /*
262  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
263  */
264 void
shutdown_nice(int howto)265 shutdown_nice(int howto)
266 {
267 
268 	if (initproc != NULL && !SCHEDULER_STOPPED()) {
269 		shutdown_nice_task.ta_context = (void *)(uintptr_t)howto;
270 		taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task);
271 	} else {
272 		/*
273 		 * No init(8) running, or scheduler would not allow it
274 		 * to run, so simply reboot.
275 		 */
276 		kern_reboot(howto | RB_NOSYNC);
277 	}
278 }
279 
280 static void
print_uptime(void)281 print_uptime(void)
282 {
283 	int f;
284 	struct timespec ts;
285 
286 	getnanouptime(&ts);
287 	printf("Uptime: ");
288 	f = 0;
289 	if (ts.tv_sec >= 86400) {
290 		printf("%ldd", (long)ts.tv_sec / 86400);
291 		ts.tv_sec %= 86400;
292 		f = 1;
293 	}
294 	if (f || ts.tv_sec >= 3600) {
295 		printf("%ldh", (long)ts.tv_sec / 3600);
296 		ts.tv_sec %= 3600;
297 		f = 1;
298 	}
299 	if (f || ts.tv_sec >= 60) {
300 		printf("%ldm", (long)ts.tv_sec / 60);
301 		ts.tv_sec %= 60;
302 		f = 1;
303 	}
304 	printf("%lds\n", (long)ts.tv_sec);
305 }
306 
307 int
doadump(boolean_t textdump)308 doadump(boolean_t textdump)
309 {
310 	boolean_t coredump;
311 	int error;
312 
313 	error = 0;
314 	if (dumping)
315 		return (EBUSY);
316 	if (dumper.dumper == NULL)
317 		return (ENXIO);
318 
319 	savectx(&dumppcb);
320 	dumptid = curthread->td_tid;
321 	dumping++;
322 
323 	coredump = TRUE;
324 #ifdef DDB
325 	if (textdump && textdump_pending) {
326 		coredump = FALSE;
327 		textdump_dumpsys(&dumper);
328 	}
329 #endif
330 	if (coredump)
331 		error = dumpsys(&dumper);
332 
333 	dumping--;
334 	return (error);
335 }
336 
337 /*
338  * Shutdown the system cleanly to prepare for reboot, halt, or power off.
339  */
340 void
kern_reboot(int howto)341 kern_reboot(int howto)
342 {
343 	static int once = 0;
344 
345 #if defined(SMP)
346 	/*
347 	 * Bind us to CPU 0 so that all shutdown code runs there.  Some
348 	 * systems don't shutdown properly (i.e., ACPI power off) if we
349 	 * run on another processor.
350 	 */
351 	if (!SCHEDULER_STOPPED()) {
352 		thread_lock(curthread);
353 		sched_bind(curthread, 0);
354 		thread_unlock(curthread);
355 		KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0"));
356 	}
357 #endif
358 	/* We're in the process of rebooting. */
359 	rebooting = 1;
360 
361 	/* We are out of the debugger now. */
362 	kdb_active = 0;
363 
364 	/*
365 	 * Do any callouts that should be done BEFORE syncing the filesystems.
366 	 */
367 	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
368 
369 	/*
370 	 * Now sync filesystems
371 	 */
372 	if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) {
373 		once = 1;
374 		bufshutdown(show_busybufs);
375 	}
376 
377 	print_uptime();
378 
379 	cngrab();
380 
381 	/*
382 	 * Ok, now do things that assume all filesystem activity has
383 	 * been completed.
384 	 */
385 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
386 
387 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping)
388 		doadump(TRUE);
389 
390 	/* Now that we're going to really halt the system... */
391 	EVENTHANDLER_INVOKE(shutdown_final, howto);
392 
393 	for(;;) ;	/* safety against shutdown_reset not working */
394 	/* NOTREACHED */
395 }
396 
397 /*
398  * The system call that results in changing the rootfs.
399  */
400 static int
kern_reroot(void)401 kern_reroot(void)
402 {
403 	struct vnode *oldrootvnode, *vp;
404 	struct mount *mp, *devmp;
405 	int error;
406 
407 	if (curproc != initproc)
408 		return (EPERM);
409 
410 	/*
411 	 * Mark the filesystem containing currently-running executable
412 	 * (the temporary copy of init(8)) busy.
413 	 */
414 	vp = curproc->p_textvp;
415 	error = vn_lock(vp, LK_SHARED);
416 	if (error != 0)
417 		return (error);
418 	mp = vp->v_mount;
419 	error = vfs_busy(mp, MBF_NOWAIT);
420 	if (error != 0) {
421 		vfs_ref(mp);
422 		VOP_UNLOCK(vp, 0);
423 		error = vfs_busy(mp, 0);
424 		vn_lock(vp, LK_SHARED | LK_RETRY);
425 		vfs_rel(mp);
426 		if (error != 0) {
427 			VOP_UNLOCK(vp, 0);
428 			return (ENOENT);
429 		}
430 		if (vp->v_iflag & VI_DOOMED) {
431 			VOP_UNLOCK(vp, 0);
432 			vfs_unbusy(mp);
433 			return (ENOENT);
434 		}
435 	}
436 	VOP_UNLOCK(vp, 0);
437 
438 	/*
439 	 * Remove the filesystem containing currently-running executable
440 	 * from the mount list, to prevent it from being unmounted
441 	 * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
442 	 *
443 	 * Also preserve /dev - forcibly unmounting it could cause driver
444 	 * reinitialization.
445 	 */
446 
447 	vfs_ref(rootdevmp);
448 	devmp = rootdevmp;
449 	rootdevmp = NULL;
450 
451 	mtx_lock(&mountlist_mtx);
452 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
453 	TAILQ_REMOVE(&mountlist, devmp, mnt_list);
454 	mtx_unlock(&mountlist_mtx);
455 
456 	oldrootvnode = rootvnode;
457 
458 	/*
459 	 * Unmount everything except for the two filesystems preserved above.
460 	 */
461 	vfs_unmountall();
462 
463 	/*
464 	 * Add /dev back; vfs_mountroot() will move it into its new place.
465 	 */
466 	mtx_lock(&mountlist_mtx);
467 	TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
468 	mtx_unlock(&mountlist_mtx);
469 	rootdevmp = devmp;
470 	vfs_rel(rootdevmp);
471 
472 	/*
473 	 * Mount the new rootfs.
474 	 */
475 	vfs_mountroot();
476 
477 	/*
478 	 * Update all references to the old rootvnode.
479 	 */
480 	mountcheckdirs(oldrootvnode, rootvnode);
481 
482 	/*
483 	 * Add the temporary filesystem back and unbusy it.
484 	 */
485 	mtx_lock(&mountlist_mtx);
486 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
487 	mtx_unlock(&mountlist_mtx);
488 	vfs_unbusy(mp);
489 
490 	return (0);
491 }
492 
493 /*
494  * If the shutdown was a clean halt, behave accordingly.
495  */
496 static void
shutdown_halt(void * junk,int howto)497 shutdown_halt(void *junk, int howto)
498 {
499 
500 	if (howto & RB_HALT) {
501 		printf("\n");
502 		printf("The operating system has halted.\n");
503 		printf("Please press any key to reboot.\n\n");
504 
505 		wdog_kern_pat(WD_TO_NEVER);
506 
507 		switch (cngetc()) {
508 		case -1:		/* No console, just die */
509 			cpu_halt();
510 			/* NOTREACHED */
511 		default:
512 			howto &= ~RB_HALT;
513 			break;
514 		}
515 	}
516 }
517 
518 /*
519  * Check to see if the system paniced, pause and then reboot
520  * according to the specified delay.
521  */
522 static void
shutdown_panic(void * junk,int howto)523 shutdown_panic(void *junk, int howto)
524 {
525 	int loop;
526 
527 	if (howto & RB_DUMP) {
528 		if (panic_reboot_wait_time != 0) {
529 			if (panic_reboot_wait_time != -1) {
530 				printf("Automatic reboot in %d seconds - "
531 				       "press a key on the console to abort\n",
532 					panic_reboot_wait_time);
533 				for (loop = panic_reboot_wait_time * 10;
534 				     loop > 0; --loop) {
535 					DELAY(1000 * 100); /* 1/10th second */
536 					/* Did user type a key? */
537 					if (cncheckc() != -1)
538 						break;
539 				}
540 				if (!loop)
541 					return;
542 			}
543 		} else { /* zero time specified - reboot NOW */
544 			return;
545 		}
546 		printf("--> Press a key on the console to reboot,\n");
547 		printf("--> or switch off the system now.\n");
548 		cngetc();
549 	}
550 }
551 
552 /*
553  * Everything done, now reset
554  */
555 static void
shutdown_reset(void * junk,int howto)556 shutdown_reset(void *junk, int howto)
557 {
558 
559 	printf("Rebooting...\n");
560 	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
561 
562 	/*
563 	 * Acquiring smp_ipi_mtx here has a double effect:
564 	 * - it disables interrupts avoiding CPU0 preemption
565 	 *   by fast handlers (thus deadlocking  against other CPUs)
566 	 * - it avoids deadlocks against smp_rendezvous() or, more
567 	 *   generally, threads busy-waiting, with this spinlock held,
568 	 *   and waiting for responses by threads on other CPUs
569 	 *   (ie. smp_tlb_shootdown()).
570 	 *
571 	 * For the !SMP case it just needs to handle the former problem.
572 	 */
573 #ifdef SMP
574 	mtx_lock_spin(&smp_ipi_mtx);
575 #else
576 	spinlock_enter();
577 #endif
578 
579 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
580 	cpu_reset();
581 	/* NOTREACHED */ /* assuming reset worked */
582 }
583 
584 #if defined(WITNESS) || defined(INVARIANT_SUPPORT)
585 static int kassert_warn_only = 0;
586 #ifdef KDB
587 static int kassert_do_kdb = 0;
588 #endif
589 #ifdef KTR
590 static int kassert_do_ktr = 0;
591 #endif
592 static int kassert_do_log = 1;
593 static int kassert_log_pps_limit = 4;
594 static int kassert_log_mute_at = 0;
595 static int kassert_log_panic_at = 0;
596 static int kassert_warnings = 0;
597 
598 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options");
599 
600 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN,
601     &kassert_warn_only, 0,
602     "KASSERT triggers a panic (1) or just a warning (0)");
603 
604 #ifdef KDB
605 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN,
606     &kassert_do_kdb, 0, "KASSERT will enter the debugger");
607 #endif
608 
609 #ifdef KTR
610 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN,
611     &kassert_do_ktr, 0,
612     "KASSERT does a KTR, set this to the KTRMASK you want");
613 #endif
614 
615 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN,
616     &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)");
617 
618 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN,
619     &kassert_warnings, 0, "number of KASSERTs that have been triggered");
620 
621 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN,
622     &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
623 
624 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN,
625     &kassert_log_pps_limit, 0, "limit number of log messages per second");
626 
627 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN,
628     &kassert_log_mute_at, 0, "max number of KASSERTS to log");
629 
630 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
631 
632 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
633     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
634     kassert_sysctl_kassert, "I", "set to trigger a test kassert");
635 
636 static int
kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)637 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
638 {
639 	int error, i;
640 
641 	error = sysctl_wire_old_buffer(req, sizeof(int));
642 	if (error == 0) {
643 		i = 0;
644 		error = sysctl_handle_int(oidp, &i, 0, req);
645 	}
646 	if (error != 0 || req->newptr == NULL)
647 		return (error);
648 	KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
649 	return (0);
650 }
651 
652 /*
653  * Called by KASSERT, this decides if we will panic
654  * or if we will log via printf and/or ktr.
655  */
656 void
kassert_panic(const char * fmt,...)657 kassert_panic(const char *fmt, ...)
658 {
659 	static char buf[256];
660 	va_list ap;
661 
662 	va_start(ap, fmt);
663 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
664 	va_end(ap);
665 
666 	/*
667 	 * panic if we're not just warning, or if we've exceeded
668 	 * kassert_log_panic_at warnings.
669 	 */
670 	if (!kassert_warn_only ||
671 	    (kassert_log_panic_at > 0 &&
672 	     kassert_warnings >= kassert_log_panic_at)) {
673 		va_start(ap, fmt);
674 		vpanic(fmt, ap);
675 		/* NORETURN */
676 	}
677 #ifdef KTR
678 	if (kassert_do_ktr)
679 		CTR0(ktr_mask, buf);
680 #endif /* KTR */
681 	/*
682 	 * log if we've not yet met the mute limit.
683 	 */
684 	if (kassert_do_log &&
685 	    (kassert_log_mute_at == 0 ||
686 	     kassert_warnings < kassert_log_mute_at)) {
687 		static  struct timeval lasterr;
688 		static  int curerr;
689 
690 		if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
691 			printf("KASSERT failed: %s\n", buf);
692 			kdb_backtrace();
693 		}
694 	}
695 #ifdef KDB
696 	if (kassert_do_kdb) {
697 		kdb_enter(KDB_WHY_KASSERT, buf);
698 	}
699 #endif
700 	atomic_add_int(&kassert_warnings, 1);
701 }
702 #endif
703 
704 /*
705  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
706  * and then reboots.  If we are called twice, then we avoid trying to sync
707  * the disks as this often leads to recursive panics.
708  */
709 void
panic(const char * fmt,...)710 panic(const char *fmt, ...)
711 {
712 	va_list ap;
713 
714 	va_start(ap, fmt);
715 	vpanic(fmt, ap);
716 }
717 
718 void
vpanic(const char * fmt,va_list ap)719 vpanic(const char *fmt, va_list ap)
720 {
721 #ifdef SMP
722 	cpuset_t other_cpus;
723 #endif
724 	struct thread *td = curthread;
725 	int bootopt, newpanic;
726 	static char buf[256];
727 
728 	spinlock_enter();
729 
730 #ifdef SMP
731 	/*
732 	 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from
733 	 * concurrently entering panic.  Only the winner will proceed
734 	 * further.
735 	 */
736 	if (panicstr == NULL && !kdb_active) {
737 		other_cpus = all_cpus;
738 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
739 		stop_cpus_hard(other_cpus);
740 	}
741 #endif
742 
743 	/*
744 	 * Ensure that the scheduler is stopped while panicking, even if panic
745 	 * has been entered from kdb.
746 	 */
747 	td->td_stopsched = 1;
748 
749 	bootopt = RB_AUTOBOOT;
750 	newpanic = 0;
751 	if (panicstr)
752 		bootopt |= RB_NOSYNC;
753 	else {
754 		bootopt |= RB_DUMP;
755 		panicstr = fmt;
756 		newpanic = 1;
757 	}
758 
759 	if (newpanic) {
760 		(void)vsnprintf(buf, sizeof(buf), fmt, ap);
761 		panicstr = buf;
762 		cngrab();
763 		printf("panic: %s\n", buf);
764 	} else {
765 		printf("panic: ");
766 		vprintf(fmt, ap);
767 		printf("\n");
768 	}
769 #ifdef SMP
770 	printf("cpuid = %d\n", PCPU_GET(cpuid));
771 #endif
772 
773 #ifdef KDB
774 	if (newpanic && trace_on_panic)
775 		kdb_backtrace();
776 	if (debugger_on_panic)
777 		kdb_enter(KDB_WHY_PANIC, "panic");
778 #endif
779 	/*thread_lock(td); */
780 	td->td_flags |= TDF_INPANIC;
781 	/* thread_unlock(td); */
782 	if (!sync_on_panic)
783 		bootopt |= RB_NOSYNC;
784 	kern_reboot(bootopt);
785 }
786 
787 /*
788  * Support for poweroff delay.
789  *
790  * Please note that setting this delay too short might power off your machine
791  * before the write cache on your hard disk has been flushed, leading to
792  * soft-updates inconsistencies.
793  */
794 #ifndef POWEROFF_DELAY
795 # define POWEROFF_DELAY 5000
796 #endif
797 static int poweroff_delay = POWEROFF_DELAY;
798 
799 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
800     &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)");
801 
802 static void
poweroff_wait(void * junk,int howto)803 poweroff_wait(void *junk, int howto)
804 {
805 
806 	if (!(howto & RB_POWEROFF) || poweroff_delay <= 0)
807 		return;
808 	DELAY(poweroff_delay * 1000);
809 }
810 
811 /*
812  * Some system processes (e.g. syncer) need to be stopped at appropriate
813  * points in their main loops prior to a system shutdown, so that they
814  * won't interfere with the shutdown process (e.g. by holding a disk buf
815  * to cause sync to fail).  For each of these system processes, register
816  * shutdown_kproc() as a handler for one of shutdown events.
817  */
818 static int kproc_shutdown_wait = 60;
819 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
820     &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process");
821 
822 void
kproc_shutdown(void * arg,int howto)823 kproc_shutdown(void *arg, int howto)
824 {
825 	struct proc *p;
826 	int error;
827 
828 	if (panicstr)
829 		return;
830 
831 	p = (struct proc *)arg;
832 	printf("Waiting (max %d seconds) for system process `%s' to stop... ",
833 	    kproc_shutdown_wait, p->p_comm);
834 	error = kproc_suspend(p, kproc_shutdown_wait * hz);
835 
836 	if (error == EWOULDBLOCK)
837 		printf("timed out\n");
838 	else
839 		printf("done\n");
840 }
841 
842 void
kthread_shutdown(void * arg,int howto)843 kthread_shutdown(void *arg, int howto)
844 {
845 	struct thread *td;
846 	int error;
847 
848 	if (panicstr)
849 		return;
850 
851 	td = (struct thread *)arg;
852 	printf("Waiting (max %d seconds) for system thread `%s' to stop... ",
853 	    kproc_shutdown_wait, td->td_name);
854 	error = kthread_suspend(td, kproc_shutdown_wait * hz);
855 
856 	if (error == EWOULDBLOCK)
857 		printf("timed out\n");
858 	else
859 		printf("done\n");
860 }
861 
862 static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)];
863 SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD,
864     dumpdevname, 0, "Device for kernel dumps");
865 
866 /* Registration of dumpers */
867 int
set_dumper(struct dumperinfo * di,const char * devname,struct thread * td)868 set_dumper(struct dumperinfo *di, const char *devname, struct thread *td)
869 {
870 	size_t wantcopy;
871 	int error;
872 
873 	error = priv_check(td, PRIV_SETDUMPER);
874 	if (error != 0)
875 		return (error);
876 
877 	if (di == NULL) {
878 		if (dumper.blockbuf != NULL)
879 			free(dumper.blockbuf, M_DUMPER);
880 		bzero(&dumper, sizeof(dumper));
881 		dumpdevname[0] = '\0';
882 		return (0);
883 	}
884 	if (dumper.dumper != NULL)
885 		return (EBUSY);
886 	dumper = *di;
887 	wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname));
888 	if (wantcopy >= sizeof(dumpdevname)) {
889 		printf("set_dumper: device name truncated from '%s' -> '%s'\n",
890 			devname, dumpdevname);
891 	}
892 	dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO);
893 	return (0);
894 }
895 
896 /* Call dumper with bounds checking. */
897 int
dump_write(struct dumperinfo * di,void * virtual,vm_offset_t physical,off_t offset,size_t length)898 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical,
899     off_t offset, size_t length)
900 {
901 
902 	if (length != 0 && (offset < di->mediaoffset ||
903 	    offset - di->mediaoffset + length > di->mediasize)) {
904 		printf("Attempt to write outside dump device boundaries.\n"
905 	    "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n",
906 		    (intmax_t)offset, (intmax_t)di->mediaoffset,
907 		    (uintmax_t)length, (intmax_t)di->mediasize);
908 		return (ENOSPC);
909 	}
910 	return (di->dumper(di->priv, virtual, physical, offset, length));
911 }
912 
913 /* Call dumper with bounds checking. */
914 int
dump_write_pad(struct dumperinfo * di,void * virtual,vm_offset_t physical,off_t offset,size_t length,size_t * size)915 dump_write_pad(struct dumperinfo *di, void *virtual, vm_offset_t physical,
916     off_t offset, size_t length, size_t *size)
917 {
918 	char *temp;
919 	int ret;
920 
921 	if (length > di->blocksize)
922 		return (ENOMEM);
923 
924 	*size = di->blocksize;
925 	if (length == di->blocksize)
926 		temp = virtual;
927 	else {
928 		temp = di->blockbuf;
929 		memset(temp + length, 0, di->blocksize - length);
930 		memcpy(temp, virtual, length);
931 	}
932 	ret = dump_write(di, temp, physical, offset, *size);
933 
934 	return (ret);
935 }
936 
937 
938 void
mkdumpheader(struct kerneldumpheader * kdh,char * magic,uint32_t archver,uint64_t dumplen,uint32_t blksz)939 mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver,
940     uint64_t dumplen, uint32_t blksz)
941 {
942 	size_t dstsize;
943 
944 	bzero(kdh, sizeof(*kdh));
945 	strlcpy(kdh->magic, magic, sizeof(kdh->magic));
946 	strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
947 	kdh->version = htod32(KERNELDUMPVERSION);
948 	kdh->architectureversion = htod32(archver);
949 	kdh->dumplength = htod64(dumplen);
950 	kdh->dumptime = htod64(time_second);
951 	kdh->blocksize = htod32(blksz);
952 	strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
953 	dstsize = sizeof(kdh->versionstring);
954 	if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize)
955 		kdh->versionstring[dstsize - 2] = '\n';
956 	if (panicstr != NULL)
957 		strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
958 	kdh->parity = kerneldump_parity(kdh);
959 }
960 
961 #ifdef DDB
DB_SHOW_COMMAND(panic,db_show_panic)962 DB_SHOW_COMMAND(panic, db_show_panic)
963 {
964 
965 	if (panicstr == NULL)
966 		db_printf("panicstr not set\n");
967 	else
968 		db_printf("panic: %s\n", panicstr);
969 }
970 #endif
971