xref: /NextBSD/sys/kern/kern_thread.c (revision 79ec12eeb6f427ceaaaa94ca98073316d8c24e47)
1 /*-
2  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3  *  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_witness.h"
30 #include "opt_hwpmc_hooks.h"
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mutex.h>
40 #include <sys/proc.h>
41 #include <sys/rangelock.h>
42 #include <sys/resourcevar.h>
43 #include <sys/sdt.h>
44 #include <sys/smp.h>
45 #include <sys/sched.h>
46 #include <sys/sleepqueue.h>
47 #include <sys/selinfo.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/turnstile.h>
51 #include <sys/ktr.h>
52 #include <sys/rwlock.h>
53 #include <sys/umtx.h>
54 #include <sys/cpuset.h>
55 #ifdef	HWPMC_HOOKS
56 #include <sys/pmckern.h>
57 #endif
58 
59 #include <security/audit/audit.h>
60 
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64 #include <vm/vm_domain.h>
65 #include <sys/eventhandler.h>
66 
67 SDT_PROVIDER_DECLARE(proc);
68 SDT_PROBE_DEFINE(proc, , , lwp__exit);
69 
70 /*
71  * thread related storage.
72  */
73 static uma_zone_t thread_zone;
74 
75 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
76 static struct mtx zombie_lock;
77 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
78 
79 static void thread_zombie(struct thread *);
80 static int thread_unsuspend_one(struct thread *td, struct proc *p,
81     bool boundary);
82 
83 #define TID_BUFFER_SIZE	1024
84 
85 struct mtx tid_lock;
86 static struct unrhdr *tid_unrhdr;
87 static lwpid_t tid_buffer[TID_BUFFER_SIZE];
88 static int tid_head, tid_tail;
89 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
90 
91 struct	tidhashhead *tidhashtbl;
92 u_long	tidhash;
93 struct	rwlock tidhash_lock;
94 
95 static lwpid_t
tid_alloc(void)96 tid_alloc(void)
97 {
98 	lwpid_t	tid;
99 
100 	tid = alloc_unr(tid_unrhdr);
101 	if (tid != -1)
102 		return (tid);
103 	mtx_lock(&tid_lock);
104 	if (tid_head == tid_tail) {
105 		mtx_unlock(&tid_lock);
106 		return (-1);
107 	}
108 	tid = tid_buffer[tid_head];
109 	tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
110 	mtx_unlock(&tid_lock);
111 	return (tid);
112 }
113 
114 static void
tid_free(lwpid_t tid)115 tid_free(lwpid_t tid)
116 {
117 	lwpid_t tmp_tid = -1;
118 
119 	mtx_lock(&tid_lock);
120 	if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) {
121 		tmp_tid = tid_buffer[tid_head];
122 		tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
123 	}
124 	tid_buffer[tid_tail] = tid;
125 	tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE;
126 	mtx_unlock(&tid_lock);
127 	if (tmp_tid != -1)
128 		free_unr(tid_unrhdr, tmp_tid);
129 }
130 
131 /*
132  * Prepare a thread for use.
133  */
134 static int
thread_ctor(void * mem,int size,void * arg,int flags)135 thread_ctor(void *mem, int size, void *arg, int flags)
136 {
137 	struct thread	*td;
138 
139 	td = (struct thread *)mem;
140 	td->td_state = TDS_INACTIVE;
141 	td->td_oncpu = NOCPU;
142 
143 	td->td_tid = tid_alloc();
144 
145 	td->td_cswitchcb = NULL;
146 	td->td_threadlist = NULL;
147 	td->td_reuse_stack = NULL;
148 	/*
149 	 * Note that td_critnest begins life as 1 because the thread is not
150 	 * running and is thereby implicitly waiting to be on the receiving
151 	 * end of a context switch.
152 	 */
153 	td->td_critnest = 1;
154 	td->td_lend_user_pri = PRI_MAX;
155 	EVENTHANDLER_INVOKE(thread_ctor, td);
156 #ifdef AUDIT
157 	audit_thread_alloc(td);
158 #endif
159 	umtx_thread_alloc(td);
160 	return (0);
161 }
162 
163 /*
164  * Reclaim a thread after use.
165  */
166 static void
thread_dtor(void * mem,int size,void * arg)167 thread_dtor(void *mem, int size, void *arg)
168 {
169 	struct thread *td;
170 
171 	td = (struct thread *)mem;
172 
173 #ifdef INVARIANTS
174 	/* Verify that this thread is in a safe state to free. */
175 	switch (td->td_state) {
176 	case TDS_INHIBITED:
177 	case TDS_RUNNING:
178 	case TDS_CAN_RUN:
179 	case TDS_RUNQ:
180 		/*
181 		 * We must never unlink a thread that is in one of
182 		 * these states, because it is currently active.
183 		 */
184 		panic("bad state for thread unlinking");
185 		/* NOTREACHED */
186 	case TDS_INACTIVE:
187 		break;
188 	default:
189 		panic("bad thread state");
190 		/* NOTREACHED */
191 	}
192 #endif
193 #ifdef AUDIT
194 	audit_thread_free(td);
195 #endif
196 	/* Free all OSD associated to this thread. */
197 	osd_thread_exit(td);
198 
199 	EVENTHANDLER_INVOKE(thread_dtor, td);
200 	tid_free(td->td_tid);
201 }
202 
203 /*
204  * Initialize type-stable parts of a thread (when newly created).
205  */
206 static int
thread_init(void * mem,int size,int flags)207 thread_init(void *mem, int size, int flags)
208 {
209 	struct thread *td;
210 
211 	td = (struct thread *)mem;
212 
213 	td->td_sleepqueue = sleepq_alloc();
214 	td->td_turnstile = turnstile_alloc();
215 	td->td_rlqe = NULL;
216 	EVENTHANDLER_INVOKE(thread_init, td);
217 	td->td_sched = (struct td_sched *)&td[1];
218 	umtx_thread_init(td);
219 	td->td_kstack = 0;
220 	td->td_sel = NULL;
221 	return (0);
222 }
223 
224 /*
225  * Tear down type-stable parts of a thread (just before being discarded).
226  */
227 static void
thread_fini(void * mem,int size)228 thread_fini(void *mem, int size)
229 {
230 	struct thread *td;
231 
232 	td = (struct thread *)mem;
233 	EVENTHANDLER_INVOKE(thread_fini, td);
234 	rlqentry_free(td->td_rlqe);
235 	turnstile_free(td->td_turnstile);
236 	sleepq_free(td->td_sleepqueue);
237 	umtx_thread_fini(td);
238 	seltdfini(td);
239 }
240 
241 /*
242  * For a newly created process,
243  * link up all the structures and its initial threads etc.
244  * called from:
245  * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
246  * proc_dtor() (should go away)
247  * proc_init()
248  */
249 void
proc_linkup0(struct proc * p,struct thread * td)250 proc_linkup0(struct proc *p, struct thread *td)
251 {
252 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
253 	proc_linkup(p, td);
254 }
255 
256 void
proc_linkup(struct proc * p,struct thread * td)257 proc_linkup(struct proc *p, struct thread *td)
258 {
259 
260 	sigqueue_init(&p->p_sigqueue, p);
261 	p->p_ksi = ksiginfo_alloc(1);
262 	if (p->p_ksi != NULL) {
263 		/* XXX p_ksi may be null if ksiginfo zone is not ready */
264 		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
265 	}
266 	LIST_INIT(&p->p_mqnotifier);
267 	p->p_numthreads = 0;
268 	thread_link(td, p);
269 }
270 
271 /*
272  * Initialize global thread allocation resources.
273  */
274 void
threadinit(void)275 threadinit(void)
276 {
277 
278 	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
279 
280 	/*
281 	 * pid_max cannot be greater than PID_MAX.
282 	 * leave one number for thread0.
283 	 */
284 	tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
285 
286 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
287 	    thread_ctor, thread_dtor, thread_init, thread_fini,
288 	    16 - 1, UMA_ZONE_NOFREE);
289 	tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
290 	rw_init(&tidhash_lock, "tidhash");
291 }
292 
293 /*
294  * Place an unused thread on the zombie list.
295  * Use the slpq as that must be unused by now.
296  */
297 void
thread_zombie(struct thread * td)298 thread_zombie(struct thread *td)
299 {
300 	mtx_lock_spin(&zombie_lock);
301 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
302 	mtx_unlock_spin(&zombie_lock);
303 }
304 
305 /*
306  * Release a thread that has exited after cpu_throw().
307  */
308 void
thread_stash(struct thread * td)309 thread_stash(struct thread *td)
310 {
311 	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
312 	thread_zombie(td);
313 }
314 
315 /*
316  * Reap zombie resources.
317  */
318 void
thread_reap(void)319 thread_reap(void)
320 {
321 	struct thread *td_first, *td_next;
322 
323 	/*
324 	 * Don't even bother to lock if none at this instant,
325 	 * we really don't care about the next instant..
326 	 */
327 	if (!TAILQ_EMPTY(&zombie_threads)) {
328 		mtx_lock_spin(&zombie_lock);
329 		td_first = TAILQ_FIRST(&zombie_threads);
330 		if (td_first)
331 			TAILQ_INIT(&zombie_threads);
332 		mtx_unlock_spin(&zombie_lock);
333 		while (td_first) {
334 			td_next = TAILQ_NEXT(td_first, td_slpq);
335 			thread_cow_free(td_first);
336 			thread_free(td_first);
337 			td_first = td_next;
338 		}
339 	}
340 }
341 
342 /*
343  * Allocate a thread.
344  */
345 struct thread *
thread_alloc(int pages)346 thread_alloc(int pages)
347 {
348 	struct thread *td;
349 
350 	thread_reap(); /* check if any zombies to get */
351 
352 	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
353 	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
354 	if (!vm_thread_new(td, pages)) {
355 		uma_zfree(thread_zone, td);
356 		return (NULL);
357 	}
358 	cpu_thread_alloc(td);
359 	vm_domain_policy_init(&td->td_vm_dom_policy);
360 	return (td);
361 }
362 
363 int
thread_alloc_stack(struct thread * td,int pages)364 thread_alloc_stack(struct thread *td, int pages)
365 {
366 
367 	KASSERT(td->td_kstack == 0,
368 	    ("thread_alloc_stack called on a thread with kstack"));
369 	if (!vm_thread_new(td, pages))
370 		return (0);
371 	cpu_thread_alloc(td);
372 	return (1);
373 }
374 
375 /*
376  * Deallocate a thread.
377  */
378 void
thread_free(struct thread * td)379 thread_free(struct thread *td)
380 {
381 
382 	lock_profile_thread_exit(td);
383 	if (td->td_cpuset)
384 		cpuset_rel(td->td_cpuset);
385 	td->td_cpuset = NULL;
386 	cpu_thread_free(td);
387 	if (td->td_kstack != 0)
388 		vm_thread_dispose(td);
389 	vm_domain_policy_cleanup(&td->td_vm_dom_policy);
390 	uma_zfree(thread_zone, td);
391 }
392 
393 void
thread_cow_get_proc(struct thread * newtd,struct proc * p)394 thread_cow_get_proc(struct thread *newtd, struct proc *p)
395 {
396 
397 	PROC_LOCK_ASSERT(p, MA_OWNED);
398 	newtd->td_ucred = crhold(p->p_ucred);
399 	newtd->td_limit = lim_hold(p->p_limit);
400 	newtd->td_cowgen = p->p_cowgen;
401 }
402 
403 void
thread_cow_get(struct thread * newtd,struct thread * td)404 thread_cow_get(struct thread *newtd, struct thread *td)
405 {
406 
407 	newtd->td_ucred = crhold(td->td_ucred);
408 	newtd->td_limit = lim_hold(td->td_limit);
409 	newtd->td_cowgen = td->td_cowgen;
410 }
411 
412 void
thread_cow_free(struct thread * td)413 thread_cow_free(struct thread *td)
414 {
415 
416 	if (td->td_ucred != NULL)
417 		crfree(td->td_ucred);
418 	if (td->td_limit != NULL)
419 		lim_free(td->td_limit);
420 }
421 
422 void
thread_cow_update(struct thread * td)423 thread_cow_update(struct thread *td)
424 {
425 	struct proc *p;
426 	struct ucred *oldcred;
427 	struct plimit *oldlimit;
428 
429 	p = td->td_proc;
430 	oldcred = NULL;
431 	oldlimit = NULL;
432 	PROC_LOCK(p);
433 	if (td->td_ucred != p->p_ucred) {
434 		oldcred = td->td_ucred;
435 		td->td_ucred = crhold(p->p_ucred);
436 	}
437 	if (td->td_limit != p->p_limit) {
438 		oldlimit = td->td_limit;
439 		td->td_limit = lim_hold(p->p_limit);
440 	}
441 	td->td_cowgen = p->p_cowgen;
442 	PROC_UNLOCK(p);
443 	if (oldcred != NULL)
444 		crfree(oldcred);
445 	if (oldlimit != NULL)
446 		lim_free(oldlimit);
447 }
448 
449 /*
450  * Discard the current thread and exit from its context.
451  * Always called with scheduler locked.
452  *
453  * Because we can't free a thread while we're operating under its context,
454  * push the current thread into our CPU's deadthread holder. This means
455  * we needn't worry about someone else grabbing our context before we
456  * do a cpu_throw().
457  */
458 void
thread_exit(void)459 thread_exit(void)
460 {
461 	uint64_t runtime, new_switchtime;
462 	struct thread *td;
463 	struct thread *td2;
464 	struct proc *p;
465 	int wakeup_swapper;
466 
467 	td = curthread;
468 	p = td->td_proc;
469 
470 	PROC_SLOCK_ASSERT(p, MA_OWNED);
471 	mtx_assert(&Giant, MA_NOTOWNED);
472 
473 	PROC_LOCK_ASSERT(p, MA_OWNED);
474 	KASSERT(p != NULL, ("thread exiting without a process"));
475 	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
476 	    (long)p->p_pid, td->td_name);
477 	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
478 
479 #ifdef AUDIT
480 	AUDIT_SYSCALL_EXIT(0, td);
481 #endif
482 	/*
483 	 * drop FPU & debug register state storage, or any other
484 	 * architecture specific resources that
485 	 * would not be on a new untouched process.
486 	 */
487 	cpu_thread_exit(td);	/* XXXSMP */
488 
489 	/*
490 	 * The last thread is left attached to the process
491 	 * So that the whole bundle gets recycled. Skip
492 	 * all this stuff if we never had threads.
493 	 * EXIT clears all sign of other threads when
494 	 * it goes to single threading, so the last thread always
495 	 * takes the short path.
496 	 */
497 	if (p->p_flag & P_HADTHREADS) {
498 		if (p->p_numthreads > 1) {
499 			atomic_add_int(&td->td_proc->p_exitthreads, 1);
500 			thread_unlink(td);
501 			td2 = FIRST_THREAD_IN_PROC(p);
502 			sched_exit_thread(td2, td);
503 
504 			/*
505 			 * The test below is NOT true if we are the
506 			 * sole exiting thread. P_STOPPED_SINGLE is unset
507 			 * in exit1() after it is the only survivor.
508 			 */
509 			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
510 				if (p->p_numthreads == p->p_suspcount) {
511 					thread_lock(p->p_singlethread);
512 					wakeup_swapper = thread_unsuspend_one(
513 						p->p_singlethread, p, false);
514 					thread_unlock(p->p_singlethread);
515 					if (wakeup_swapper)
516 						kick_proc0();
517 				}
518 			}
519 
520 			PCPU_SET(deadthread, td);
521 		} else {
522 			/*
523 			 * The last thread is exiting.. but not through exit()
524 			 */
525 			panic ("thread_exit: Last thread exiting on its own");
526 		}
527 	}
528 #ifdef	HWPMC_HOOKS
529 	/*
530 	 * If this thread is part of a process that is being tracked by hwpmc(4),
531 	 * inform the module of the thread's impending exit.
532 	 */
533 	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
534 		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
535 #endif
536 	PROC_UNLOCK(p);
537 	PROC_STATLOCK(p);
538 	thread_lock(td);
539 	PROC_SUNLOCK(p);
540 
541 	/* Do the same timestamp bookkeeping that mi_switch() would do. */
542 	new_switchtime = cpu_ticks();
543 	runtime = new_switchtime - PCPU_GET(switchtime);
544 	td->td_runtime += runtime;
545 	td->td_incruntime += runtime;
546 	PCPU_SET(switchtime, new_switchtime);
547 	PCPU_SET(switchticks, ticks);
548 	PCPU_INC(cnt.v_swtch);
549 
550 	/* Save our resource usage in our process. */
551 	td->td_ru.ru_nvcsw++;
552 	ruxagg(p, td);
553 	rucollect(&p->p_ru, &td->td_ru);
554 	PROC_STATUNLOCK(p);
555 
556 	td->td_state = TDS_INACTIVE;
557 #ifdef WITNESS
558 	witness_thread_exit(td);
559 #endif
560 	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
561 	sched_throw(td);
562 	panic("I'm a teapot!");
563 	/* NOTREACHED */
564 }
565 
566 /*
567  * Do any thread specific cleanups that may be needed in wait()
568  * called with Giant, proc and schedlock not held.
569  */
570 void
thread_wait(struct proc * p)571 thread_wait(struct proc *p)
572 {
573 	struct thread *td;
574 
575 	mtx_assert(&Giant, MA_NOTOWNED);
576 	KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
577 	KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
578 	td = FIRST_THREAD_IN_PROC(p);
579 	/* Lock the last thread so we spin until it exits cpu_throw(). */
580 	thread_lock(td);
581 	thread_unlock(td);
582 	lock_profile_thread_exit(td);
583 	cpuset_rel(td->td_cpuset);
584 	td->td_cpuset = NULL;
585 	cpu_thread_clean(td);
586 	thread_cow_free(td);
587 	thread_reap();	/* check for zombie threads etc. */
588 }
589 
590 /*
591  * Link a thread to a process.
592  * set up anything that needs to be initialized for it to
593  * be used by the process.
594  */
595 void
thread_link(struct thread * td,struct proc * p)596 thread_link(struct thread *td, struct proc *p)
597 {
598 
599 	/*
600 	 * XXX This can't be enabled because it's called for proc0 before
601 	 * its lock has been created.
602 	 * PROC_LOCK_ASSERT(p, MA_OWNED);
603 	 */
604 	td->td_state    = TDS_INACTIVE;
605 	td->td_proc     = p;
606 	td->td_flags    = TDF_INMEM;
607 
608 	LIST_INIT(&td->td_contested);
609 	LIST_INIT(&td->td_lprof[0]);
610 	LIST_INIT(&td->td_lprof[1]);
611 	sigqueue_init(&td->td_sigqueue, p);
612 	callout_init(&td->td_slpcallout, 1);
613 	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
614 	p->p_numthreads++;
615 }
616 
617 /*
618  * Called from:
619  *  thread_exit()
620  */
621 void
thread_unlink(struct thread * td)622 thread_unlink(struct thread *td)
623 {
624 	struct proc *p = td->td_proc;
625 
626 	PROC_LOCK_ASSERT(p, MA_OWNED);
627 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
628 	p->p_numthreads--;
629 	/* could clear a few other things here */
630 	/* Must  NOT clear links to proc! */
631 }
632 
633 static int
calc_remaining(struct proc * p,int mode)634 calc_remaining(struct proc *p, int mode)
635 {
636 	int remaining;
637 
638 	PROC_LOCK_ASSERT(p, MA_OWNED);
639 	PROC_SLOCK_ASSERT(p, MA_OWNED);
640 	if (mode == SINGLE_EXIT)
641 		remaining = p->p_numthreads;
642 	else if (mode == SINGLE_BOUNDARY)
643 		remaining = p->p_numthreads - p->p_boundary_count;
644 	else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
645 		remaining = p->p_numthreads - p->p_suspcount;
646 	else
647 		panic("calc_remaining: wrong mode %d", mode);
648 	return (remaining);
649 }
650 
651 static int
remain_for_mode(int mode)652 remain_for_mode(int mode)
653 {
654 
655 	return (mode == SINGLE_ALLPROC ? 0 : 1);
656 }
657 
658 static int
weed_inhib(int mode,struct thread * td2,struct proc * p)659 weed_inhib(int mode, struct thread *td2, struct proc *p)
660 {
661 	int wakeup_swapper;
662 
663 	PROC_LOCK_ASSERT(p, MA_OWNED);
664 	PROC_SLOCK_ASSERT(p, MA_OWNED);
665 	THREAD_LOCK_ASSERT(td2, MA_OWNED);
666 
667 	wakeup_swapper = 0;
668 	switch (mode) {
669 	case SINGLE_EXIT:
670 		if (TD_IS_SUSPENDED(td2))
671 			wakeup_swapper |= thread_unsuspend_one(td2, p, true);
672 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
673 			wakeup_swapper |= sleepq_abort(td2, EINTR);
674 		break;
675 	case SINGLE_BOUNDARY:
676 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
677 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
678 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
679 			wakeup_swapper |= sleepq_abort(td2, ERESTART);
680 		break;
681 	case SINGLE_NO_EXIT:
682 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
683 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
684 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
685 			wakeup_swapper |= sleepq_abort(td2, ERESTART);
686 		break;
687 	case SINGLE_ALLPROC:
688 		/*
689 		 * ALLPROC suspend tries to avoid spurious EINTR for
690 		 * threads sleeping interruptable, by suspending the
691 		 * thread directly, similarly to sig_suspend_threads().
692 		 * Since such sleep is not performed at the user
693 		 * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP
694 		 * is used to avoid immediate un-suspend.
695 		 */
696 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
697 		    TDF_ALLPROCSUSP)) == 0)
698 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
699 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {
700 			if ((td2->td_flags & TDF_SBDRY) == 0) {
701 				thread_suspend_one(td2);
702 				td2->td_flags |= TDF_ALLPROCSUSP;
703 			} else {
704 				wakeup_swapper |= sleepq_abort(td2, ERESTART);
705 			}
706 		}
707 		break;
708 	}
709 	return (wakeup_swapper);
710 }
711 
712 /*
713  * Enforce single-threading.
714  *
715  * Returns 1 if the caller must abort (another thread is waiting to
716  * exit the process or similar). Process is locked!
717  * Returns 0 when you are successfully the only thread running.
718  * A process has successfully single threaded in the suspend mode when
719  * There are no threads in user mode. Threads in the kernel must be
720  * allowed to continue until they get to the user boundary. They may even
721  * copy out their return values and data before suspending. They may however be
722  * accelerated in reaching the user boundary as we will wake up
723  * any sleeping threads that are interruptable. (PCATCH).
724  */
725 int
thread_single(struct proc * p,int mode)726 thread_single(struct proc *p, int mode)
727 {
728 	struct thread *td;
729 	struct thread *td2;
730 	int remaining, wakeup_swapper;
731 
732 	td = curthread;
733 	KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
734 	    mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
735 	    ("invalid mode %d", mode));
736 	/*
737 	 * If allowing non-ALLPROC singlethreading for non-curproc
738 	 * callers, calc_remaining() and remain_for_mode() should be
739 	 * adjusted to also account for td->td_proc != p.  For now
740 	 * this is not implemented because it is not used.
741 	 */
742 	KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
743 	    (mode != SINGLE_ALLPROC && td->td_proc == p),
744 	    ("mode %d proc %p curproc %p", mode, p, td->td_proc));
745 	mtx_assert(&Giant, MA_NOTOWNED);
746 	PROC_LOCK_ASSERT(p, MA_OWNED);
747 
748 	if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC)
749 		return (0);
750 
751 	/* Is someone already single threading? */
752 	if (p->p_singlethread != NULL && p->p_singlethread != td)
753 		return (1);
754 
755 	if (mode == SINGLE_EXIT) {
756 		p->p_flag |= P_SINGLE_EXIT;
757 		p->p_flag &= ~P_SINGLE_BOUNDARY;
758 	} else {
759 		p->p_flag &= ~P_SINGLE_EXIT;
760 		if (mode == SINGLE_BOUNDARY)
761 			p->p_flag |= P_SINGLE_BOUNDARY;
762 		else
763 			p->p_flag &= ~P_SINGLE_BOUNDARY;
764 	}
765 	if (mode == SINGLE_ALLPROC)
766 		p->p_flag |= P_TOTAL_STOP;
767 	p->p_flag |= P_STOPPED_SINGLE;
768 	PROC_SLOCK(p);
769 	p->p_singlethread = td;
770 	remaining = calc_remaining(p, mode);
771 	while (remaining != remain_for_mode(mode)) {
772 		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
773 			goto stopme;
774 		wakeup_swapper = 0;
775 		FOREACH_THREAD_IN_PROC(p, td2) {
776 			if (td2 == td)
777 				continue;
778 			thread_lock(td2);
779 			/* a workq thread may not actually be runnable */
780 			if (td2->td_state == TDS_INACTIVE && (td2->td_flags & TDF_WORKQ)) {
781 				thread_unlock(td2);
782 				thread_stopped(p);
783 				continue;
784 			}
785 			td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
786 			if (TD_IS_INHIBITED(td2)) {
787 				wakeup_swapper |= weed_inhib(mode, td2, p);
788 #ifdef SMP
789 			} else if (TD_IS_RUNNING(td2) && td != td2) {
790 				forward_signal(td2);
791 #endif
792 			}
793 			thread_unlock(td2);
794 		}
795 		if (wakeup_swapper)
796 			kick_proc0();
797 		remaining = calc_remaining(p, mode);
798 
799 		/*
800 		 * Maybe we suspended some threads.. was it enough?
801 		 */
802 		if (remaining == remain_for_mode(mode))
803 			break;
804 
805 stopme:
806 		/*
807 		 * Wake us up when everyone else has suspended.
808 		 * In the mean time we suspend as well.
809 		 */
810 		thread_suspend_switch(td, p);
811 		remaining = calc_remaining(p, mode);
812 	}
813 	if (mode == SINGLE_EXIT) {
814 		/*
815 		 * Convert the process to an unthreaded process.  The
816 		 * SINGLE_EXIT is called by exit1() or execve(), in
817 		 * both cases other threads must be retired.
818 		 */
819 		KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
820 		p->p_singlethread = NULL;
821 		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
822 
823 		/*
824 		 * Wait for any remaining threads to exit cpu_throw().
825 		 */
826 		while (p->p_exitthreads != 0) {
827 			PROC_SUNLOCK(p);
828 			PROC_UNLOCK(p);
829 			sched_relinquish(td);
830 			PROC_LOCK(p);
831 			PROC_SLOCK(p);
832 		}
833 	} else if (mode == SINGLE_BOUNDARY) {
834 		/*
835 		 * Wait until all suspended threads are removed from
836 		 * the processors.  The thread_suspend_check()
837 		 * increments p_boundary_count while it is still
838 		 * running, which makes it possible for the execve()
839 		 * to destroy vmspace while our other threads are
840 		 * still using the address space.
841 		 *
842 		 * We lock the thread, which is only allowed to
843 		 * succeed after context switch code finished using
844 		 * the address space.
845 		 */
846 		FOREACH_THREAD_IN_PROC(p, td2) {
847 			if (td2 == td)
848 				continue;
849 			thread_lock(td2);
850 			KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
851 			    ("td %p not on boundary", td2));
852 			KASSERT(TD_IS_SUSPENDED(td2),
853 			    ("td %p is not suspended", td2));
854 			thread_unlock(td2);
855 		}
856 	}
857 	PROC_SUNLOCK(p);
858 	return (0);
859 }
860 
861 bool
thread_suspend_check_needed(void)862 thread_suspend_check_needed(void)
863 {
864 	struct proc *p;
865 	struct thread *td;
866 
867 	td = curthread;
868 	p = td->td_proc;
869 	PROC_LOCK_ASSERT(p, MA_OWNED);
870 	return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
871 	    (td->td_dbgflags & TDB_SUSPEND) != 0));
872 }
873 
874 /*
875  * Called in from locations that can safely check to see
876  * whether we have to suspend or at least throttle for a
877  * single-thread event (e.g. fork).
878  *
879  * Such locations include userret().
880  * If the "return_instead" argument is non zero, the thread must be able to
881  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
882  *
883  * The 'return_instead' argument tells the function if it may do a
884  * thread_exit() or suspend, or whether the caller must abort and back
885  * out instead.
886  *
887  * If the thread that set the single_threading request has set the
888  * P_SINGLE_EXIT bit in the process flags then this call will never return
889  * if 'return_instead' is false, but will exit.
890  *
891  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
892  *---------------+--------------------+---------------------
893  *       0       | returns 0          |   returns 0 or 1
894  *               | when ST ends       |   immediately
895  *---------------+--------------------+---------------------
896  *       1       | thread exits       |   returns 1
897  *               |                    |  immediately
898  * 0 = thread_exit() or suspension ok,
899  * other = return error instead of stopping the thread.
900  *
901  * While a full suspension is under effect, even a single threading
902  * thread would be suspended if it made this call (but it shouldn't).
903  * This call should only be made from places where
904  * thread_exit() would be safe as that may be the outcome unless
905  * return_instead is set.
906  */
907 int
thread_suspend_check(int return_instead)908 thread_suspend_check(int return_instead)
909 {
910 	struct thread *td;
911 	struct proc *p;
912 	int wakeup_swapper;
913 
914 	td = curthread;
915 	p = td->td_proc;
916 	mtx_assert(&Giant, MA_NOTOWNED);
917 	PROC_LOCK_ASSERT(p, MA_OWNED);
918 	while (thread_suspend_check_needed()) {
919 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
920 			KASSERT(p->p_singlethread != NULL,
921 			    ("singlethread not set"));
922 			/*
923 			 * The only suspension in action is a
924 			 * single-threading. Single threader need not stop.
925 			 * XXX Should be safe to access unlocked
926 			 * as it can only be set to be true by us.
927 			 */
928 			if (p->p_singlethread == td)
929 				return (0);	/* Exempt from stopping. */
930 		}
931 		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
932 			return (EINTR);
933 
934 		/* Should we goto user boundary if we didn't come from there? */
935 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
936 		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
937 			return (ERESTART);
938 
939 		/*
940 		 * Ignore suspend requests if they are deferred.
941 		 */
942 		if ((td->td_flags & TDF_SBDRY) != 0) {
943 			KASSERT(return_instead,
944 			    ("TDF_SBDRY set for unsafe thread_suspend_check"));
945 			return (0);
946 		}
947 
948 		/*
949 		 * If the process is waiting for us to exit,
950 		 * this thread should just suicide.
951 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
952 		 */
953 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
954 			PROC_UNLOCK(p);
955 
956 			/*
957 			 * Allow Linux emulation layer to do some work
958 			 * before thread suicide.
959 			 */
960 			if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
961 				(p->p_sysent->sv_thread_detach)(td);
962 			kern_thr_exit(td);
963 			panic("stopped thread did not exit");
964 		}
965 
966 		PROC_SLOCK(p);
967 		thread_stopped(p);
968 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
969 			if (p->p_numthreads == p->p_suspcount + 1) {
970 				thread_lock(p->p_singlethread);
971 				wakeup_swapper = thread_unsuspend_one(
972 				    p->p_singlethread, p, false);
973 				thread_unlock(p->p_singlethread);
974 				if (wakeup_swapper)
975 					kick_proc0();
976 			}
977 		}
978 		PROC_UNLOCK(p);
979 		thread_lock(td);
980 		/*
981 		 * When a thread suspends, it just
982 		 * gets taken off all queues.
983 		 */
984 		thread_suspend_one(td);
985 		if (return_instead == 0) {
986 			p->p_boundary_count++;
987 			td->td_flags |= TDF_BOUNDARY;
988 		}
989 		PROC_SUNLOCK(p);
990 		mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
991 		thread_unlock(td);
992 		PROC_LOCK(p);
993 	}
994 	return (0);
995 }
996 
997 void
thread_suspend_switch(struct thread * td,struct proc * p)998 thread_suspend_switch(struct thread *td, struct proc *p)
999 {
1000 
1001 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1002 	PROC_LOCK_ASSERT(p, MA_OWNED);
1003 	PROC_SLOCK_ASSERT(p, MA_OWNED);
1004 	/*
1005 	 * We implement thread_suspend_one in stages here to avoid
1006 	 * dropping the proc lock while the thread lock is owned.
1007 	 */
1008 	if (p == td->td_proc) {
1009 		thread_stopped(p);
1010 		p->p_suspcount++;
1011 	}
1012 	PROC_UNLOCK(p);
1013 	thread_lock(td);
1014 	td->td_flags &= ~TDF_NEEDSUSPCHK;
1015 	TD_SET_SUSPENDED(td);
1016 	sched_sleep(td, 0);
1017 	PROC_SUNLOCK(p);
1018 	DROP_GIANT();
1019 	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
1020 	thread_unlock(td);
1021 	PICKUP_GIANT();
1022 	PROC_LOCK(p);
1023 	PROC_SLOCK(p);
1024 }
1025 
1026 void
thread_suspend_one(struct thread * td)1027 thread_suspend_one(struct thread *td)
1028 {
1029 	struct proc *p;
1030 
1031 	p = td->td_proc;
1032 	PROC_SLOCK_ASSERT(p, MA_OWNED);
1033 	THREAD_LOCK_ASSERT(td, MA_OWNED);
1034 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1035 	p->p_suspcount++;
1036 	td->td_flags &= ~TDF_NEEDSUSPCHK;
1037 	TD_SET_SUSPENDED(td);
1038 	sched_sleep(td, 0);
1039 }
1040 
1041 static int
thread_unsuspend_one(struct thread * td,struct proc * p,bool boundary)1042 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
1043 {
1044 
1045 	THREAD_LOCK_ASSERT(td, MA_OWNED);
1046 	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
1047 	TD_CLR_SUSPENDED(td);
1048 	td->td_flags &= ~TDF_ALLPROCSUSP;
1049 	if (td->td_proc == p) {
1050 		PROC_SLOCK_ASSERT(p, MA_OWNED);
1051 		p->p_suspcount--;
1052 		if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
1053 			td->td_flags &= ~TDF_BOUNDARY;
1054 			p->p_boundary_count--;
1055 		}
1056 	}
1057 	return (setrunnable(td));
1058 }
1059 
1060 /*
1061  * Allow all threads blocked by single threading to continue running.
1062  */
1063 void
thread_unsuspend(struct proc * p)1064 thread_unsuspend(struct proc *p)
1065 {
1066 	struct thread *td;
1067 	int wakeup_swapper;
1068 
1069 	PROC_LOCK_ASSERT(p, MA_OWNED);
1070 	PROC_SLOCK_ASSERT(p, MA_OWNED);
1071 	wakeup_swapper = 0;
1072 	if (!P_SHOULDSTOP(p)) {
1073                 FOREACH_THREAD_IN_PROC(p, td) {
1074 			thread_lock(td);
1075 			if (TD_IS_SUSPENDED(td)) {
1076 				wakeup_swapper |= thread_unsuspend_one(td, p,
1077 				    true);
1078 			}
1079 			thread_unlock(td);
1080 		}
1081 	} else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
1082 	    p->p_numthreads == p->p_suspcount) {
1083 		/*
1084 		 * Stopping everything also did the job for the single
1085 		 * threading request. Now we've downgraded to single-threaded,
1086 		 * let it continue.
1087 		 */
1088 		if (p->p_singlethread->td_proc == p) {
1089 			thread_lock(p->p_singlethread);
1090 			wakeup_swapper = thread_unsuspend_one(
1091 			    p->p_singlethread, p, false);
1092 			thread_unlock(p->p_singlethread);
1093 		}
1094 	}
1095 	if (wakeup_swapper)
1096 		kick_proc0();
1097 }
1098 
1099 /*
1100  * End the single threading mode..
1101  */
1102 void
thread_single_end(struct proc * p,int mode)1103 thread_single_end(struct proc *p, int mode)
1104 {
1105 	struct thread *td;
1106 	int wakeup_swapper;
1107 
1108 	KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
1109 	    mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
1110 	    ("invalid mode %d", mode));
1111 	PROC_LOCK_ASSERT(p, MA_OWNED);
1112 	KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
1113 	    (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
1114 	    ("mode %d does not match P_TOTAL_STOP", mode));
1115 	KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
1116 	    ("thread_single_end from other thread %p %p",
1117 	    curthread, p->p_singlethread));
1118 	KASSERT(mode != SINGLE_BOUNDARY ||
1119 	    (p->p_flag & P_SINGLE_BOUNDARY) != 0,
1120 	    ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
1121 	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
1122 	    P_TOTAL_STOP);
1123 	PROC_SLOCK(p);
1124 	p->p_singlethread = NULL;
1125 	wakeup_swapper = 0;
1126 	/*
1127 	 * If there are other threads they may now run,
1128 	 * unless of course there is a blanket 'stop order'
1129 	 * on the process. The single threader must be allowed
1130 	 * to continue however as this is a bad place to stop.
1131 	 */
1132 	if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
1133                 FOREACH_THREAD_IN_PROC(p, td) {
1134 			thread_lock(td);
1135 			if (TD_IS_SUSPENDED(td)) {
1136 				wakeup_swapper |= thread_unsuspend_one(td, p,
1137 				    mode == SINGLE_BOUNDARY);
1138 			}
1139 			thread_unlock(td);
1140 		}
1141 	}
1142 	KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
1143 	    ("inconsistent boundary count %d", p->p_boundary_count));
1144 	PROC_SUNLOCK(p);
1145 	if (wakeup_swapper)
1146 		kick_proc0();
1147 }
1148 
1149 struct thread *
thread_find(struct proc * p,lwpid_t tid)1150 thread_find(struct proc *p, lwpid_t tid)
1151 {
1152 	struct thread *td;
1153 
1154 	PROC_LOCK_ASSERT(p, MA_OWNED);
1155 	FOREACH_THREAD_IN_PROC(p, td) {
1156 		if (td->td_tid == tid)
1157 			break;
1158 	}
1159 	return (td);
1160 }
1161 
1162 /* Locate a thread by number; return with proc lock held. */
1163 struct thread *
tdfind(lwpid_t tid,pid_t pid)1164 tdfind(lwpid_t tid, pid_t pid)
1165 {
1166 #define RUN_THRESH	16
1167 	struct thread *td;
1168 	int run = 0;
1169 
1170 	rw_rlock(&tidhash_lock);
1171 	LIST_FOREACH(td, TIDHASH(tid), td_hash) {
1172 		if (td->td_tid == tid) {
1173 			if (pid != -1 && td->td_proc->p_pid != pid) {
1174 				td = NULL;
1175 				break;
1176 			}
1177 			PROC_LOCK(td->td_proc);
1178 			if (td->td_proc->p_state == PRS_NEW) {
1179 				PROC_UNLOCK(td->td_proc);
1180 				td = NULL;
1181 				break;
1182 			}
1183 			if (run > RUN_THRESH) {
1184 				if (rw_try_upgrade(&tidhash_lock)) {
1185 					LIST_REMOVE(td, td_hash);
1186 					LIST_INSERT_HEAD(TIDHASH(td->td_tid),
1187 						td, td_hash);
1188 					rw_wunlock(&tidhash_lock);
1189 					return (td);
1190 				}
1191 			}
1192 			break;
1193 		}
1194 		run++;
1195 	}
1196 	rw_runlock(&tidhash_lock);
1197 	return (td);
1198 }
1199 
1200 void
tidhash_add(struct thread * td)1201 tidhash_add(struct thread *td)
1202 {
1203 	rw_wlock(&tidhash_lock);
1204 	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
1205 	rw_wunlock(&tidhash_lock);
1206 }
1207 
1208 void
tidhash_remove(struct thread * td)1209 tidhash_remove(struct thread *td)
1210 {
1211 	rw_wlock(&tidhash_lock);
1212 	LIST_REMOVE(td, td_hash);
1213 	rw_wunlock(&tidhash_lock);
1214 }
1215