1 /*-
2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29 #include "opt_witness.h"
30 #include "opt_hwpmc_hooks.h"
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mutex.h>
40 #include <sys/proc.h>
41 #include <sys/rangelock.h>
42 #include <sys/resourcevar.h>
43 #include <sys/sdt.h>
44 #include <sys/smp.h>
45 #include <sys/sched.h>
46 #include <sys/sleepqueue.h>
47 #include <sys/selinfo.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/turnstile.h>
51 #include <sys/ktr.h>
52 #include <sys/rwlock.h>
53 #include <sys/umtx.h>
54 #include <sys/cpuset.h>
55 #ifdef HWPMC_HOOKS
56 #include <sys/pmckern.h>
57 #endif
58
59 #include <security/audit/audit.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64 #include <vm/vm_domain.h>
65 #include <sys/eventhandler.h>
66
67 SDT_PROVIDER_DECLARE(proc);
68 SDT_PROBE_DEFINE(proc, , , lwp__exit);
69
70 /*
71 * thread related storage.
72 */
73 static uma_zone_t thread_zone;
74
75 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
76 static struct mtx zombie_lock;
77 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
78
79 static void thread_zombie(struct thread *);
80 static int thread_unsuspend_one(struct thread *td, struct proc *p,
81 bool boundary);
82
83 #define TID_BUFFER_SIZE 1024
84
85 struct mtx tid_lock;
86 static struct unrhdr *tid_unrhdr;
87 static lwpid_t tid_buffer[TID_BUFFER_SIZE];
88 static int tid_head, tid_tail;
89 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
90
91 struct tidhashhead *tidhashtbl;
92 u_long tidhash;
93 struct rwlock tidhash_lock;
94
95 static lwpid_t
tid_alloc(void)96 tid_alloc(void)
97 {
98 lwpid_t tid;
99
100 tid = alloc_unr(tid_unrhdr);
101 if (tid != -1)
102 return (tid);
103 mtx_lock(&tid_lock);
104 if (tid_head == tid_tail) {
105 mtx_unlock(&tid_lock);
106 return (-1);
107 }
108 tid = tid_buffer[tid_head];
109 tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
110 mtx_unlock(&tid_lock);
111 return (tid);
112 }
113
114 static void
tid_free(lwpid_t tid)115 tid_free(lwpid_t tid)
116 {
117 lwpid_t tmp_tid = -1;
118
119 mtx_lock(&tid_lock);
120 if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) {
121 tmp_tid = tid_buffer[tid_head];
122 tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
123 }
124 tid_buffer[tid_tail] = tid;
125 tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE;
126 mtx_unlock(&tid_lock);
127 if (tmp_tid != -1)
128 free_unr(tid_unrhdr, tmp_tid);
129 }
130
131 /*
132 * Prepare a thread for use.
133 */
134 static int
thread_ctor(void * mem,int size,void * arg,int flags)135 thread_ctor(void *mem, int size, void *arg, int flags)
136 {
137 struct thread *td;
138
139 td = (struct thread *)mem;
140 td->td_state = TDS_INACTIVE;
141 td->td_oncpu = NOCPU;
142
143 td->td_tid = tid_alloc();
144
145 td->td_cswitchcb = NULL;
146 td->td_threadlist = NULL;
147 td->td_reuse_stack = NULL;
148 /*
149 * Note that td_critnest begins life as 1 because the thread is not
150 * running and is thereby implicitly waiting to be on the receiving
151 * end of a context switch.
152 */
153 td->td_critnest = 1;
154 td->td_lend_user_pri = PRI_MAX;
155 EVENTHANDLER_INVOKE(thread_ctor, td);
156 #ifdef AUDIT
157 audit_thread_alloc(td);
158 #endif
159 umtx_thread_alloc(td);
160 return (0);
161 }
162
163 /*
164 * Reclaim a thread after use.
165 */
166 static void
thread_dtor(void * mem,int size,void * arg)167 thread_dtor(void *mem, int size, void *arg)
168 {
169 struct thread *td;
170
171 td = (struct thread *)mem;
172
173 #ifdef INVARIANTS
174 /* Verify that this thread is in a safe state to free. */
175 switch (td->td_state) {
176 case TDS_INHIBITED:
177 case TDS_RUNNING:
178 case TDS_CAN_RUN:
179 case TDS_RUNQ:
180 /*
181 * We must never unlink a thread that is in one of
182 * these states, because it is currently active.
183 */
184 panic("bad state for thread unlinking");
185 /* NOTREACHED */
186 case TDS_INACTIVE:
187 break;
188 default:
189 panic("bad thread state");
190 /* NOTREACHED */
191 }
192 #endif
193 #ifdef AUDIT
194 audit_thread_free(td);
195 #endif
196 /* Free all OSD associated to this thread. */
197 osd_thread_exit(td);
198
199 EVENTHANDLER_INVOKE(thread_dtor, td);
200 tid_free(td->td_tid);
201 }
202
203 /*
204 * Initialize type-stable parts of a thread (when newly created).
205 */
206 static int
thread_init(void * mem,int size,int flags)207 thread_init(void *mem, int size, int flags)
208 {
209 struct thread *td;
210
211 td = (struct thread *)mem;
212
213 td->td_sleepqueue = sleepq_alloc();
214 td->td_turnstile = turnstile_alloc();
215 td->td_rlqe = NULL;
216 EVENTHANDLER_INVOKE(thread_init, td);
217 td->td_sched = (struct td_sched *)&td[1];
218 umtx_thread_init(td);
219 td->td_kstack = 0;
220 td->td_sel = NULL;
221 return (0);
222 }
223
224 /*
225 * Tear down type-stable parts of a thread (just before being discarded).
226 */
227 static void
thread_fini(void * mem,int size)228 thread_fini(void *mem, int size)
229 {
230 struct thread *td;
231
232 td = (struct thread *)mem;
233 EVENTHANDLER_INVOKE(thread_fini, td);
234 rlqentry_free(td->td_rlqe);
235 turnstile_free(td->td_turnstile);
236 sleepq_free(td->td_sleepqueue);
237 umtx_thread_fini(td);
238 seltdfini(td);
239 }
240
241 /*
242 * For a newly created process,
243 * link up all the structures and its initial threads etc.
244 * called from:
245 * {arch}/{arch}/machdep.c {arch}_init(), init386() etc.
246 * proc_dtor() (should go away)
247 * proc_init()
248 */
249 void
proc_linkup0(struct proc * p,struct thread * td)250 proc_linkup0(struct proc *p, struct thread *td)
251 {
252 TAILQ_INIT(&p->p_threads); /* all threads in proc */
253 proc_linkup(p, td);
254 }
255
256 void
proc_linkup(struct proc * p,struct thread * td)257 proc_linkup(struct proc *p, struct thread *td)
258 {
259
260 sigqueue_init(&p->p_sigqueue, p);
261 p->p_ksi = ksiginfo_alloc(1);
262 if (p->p_ksi != NULL) {
263 /* XXX p_ksi may be null if ksiginfo zone is not ready */
264 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
265 }
266 LIST_INIT(&p->p_mqnotifier);
267 p->p_numthreads = 0;
268 thread_link(td, p);
269 }
270
271 /*
272 * Initialize global thread allocation resources.
273 */
274 void
threadinit(void)275 threadinit(void)
276 {
277
278 mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
279
280 /*
281 * pid_max cannot be greater than PID_MAX.
282 * leave one number for thread0.
283 */
284 tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
285
286 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
287 thread_ctor, thread_dtor, thread_init, thread_fini,
288 16 - 1, UMA_ZONE_NOFREE);
289 tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
290 rw_init(&tidhash_lock, "tidhash");
291 }
292
293 /*
294 * Place an unused thread on the zombie list.
295 * Use the slpq as that must be unused by now.
296 */
297 void
thread_zombie(struct thread * td)298 thread_zombie(struct thread *td)
299 {
300 mtx_lock_spin(&zombie_lock);
301 TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
302 mtx_unlock_spin(&zombie_lock);
303 }
304
305 /*
306 * Release a thread that has exited after cpu_throw().
307 */
308 void
thread_stash(struct thread * td)309 thread_stash(struct thread *td)
310 {
311 atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
312 thread_zombie(td);
313 }
314
315 /*
316 * Reap zombie resources.
317 */
318 void
thread_reap(void)319 thread_reap(void)
320 {
321 struct thread *td_first, *td_next;
322
323 /*
324 * Don't even bother to lock if none at this instant,
325 * we really don't care about the next instant..
326 */
327 if (!TAILQ_EMPTY(&zombie_threads)) {
328 mtx_lock_spin(&zombie_lock);
329 td_first = TAILQ_FIRST(&zombie_threads);
330 if (td_first)
331 TAILQ_INIT(&zombie_threads);
332 mtx_unlock_spin(&zombie_lock);
333 while (td_first) {
334 td_next = TAILQ_NEXT(td_first, td_slpq);
335 thread_cow_free(td_first);
336 thread_free(td_first);
337 td_first = td_next;
338 }
339 }
340 }
341
342 /*
343 * Allocate a thread.
344 */
345 struct thread *
thread_alloc(int pages)346 thread_alloc(int pages)
347 {
348 struct thread *td;
349
350 thread_reap(); /* check if any zombies to get */
351
352 td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
353 KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
354 if (!vm_thread_new(td, pages)) {
355 uma_zfree(thread_zone, td);
356 return (NULL);
357 }
358 cpu_thread_alloc(td);
359 vm_domain_policy_init(&td->td_vm_dom_policy);
360 return (td);
361 }
362
363 int
thread_alloc_stack(struct thread * td,int pages)364 thread_alloc_stack(struct thread *td, int pages)
365 {
366
367 KASSERT(td->td_kstack == 0,
368 ("thread_alloc_stack called on a thread with kstack"));
369 if (!vm_thread_new(td, pages))
370 return (0);
371 cpu_thread_alloc(td);
372 return (1);
373 }
374
375 /*
376 * Deallocate a thread.
377 */
378 void
thread_free(struct thread * td)379 thread_free(struct thread *td)
380 {
381
382 lock_profile_thread_exit(td);
383 if (td->td_cpuset)
384 cpuset_rel(td->td_cpuset);
385 td->td_cpuset = NULL;
386 cpu_thread_free(td);
387 if (td->td_kstack != 0)
388 vm_thread_dispose(td);
389 vm_domain_policy_cleanup(&td->td_vm_dom_policy);
390 uma_zfree(thread_zone, td);
391 }
392
393 void
thread_cow_get_proc(struct thread * newtd,struct proc * p)394 thread_cow_get_proc(struct thread *newtd, struct proc *p)
395 {
396
397 PROC_LOCK_ASSERT(p, MA_OWNED);
398 newtd->td_ucred = crhold(p->p_ucred);
399 newtd->td_limit = lim_hold(p->p_limit);
400 newtd->td_cowgen = p->p_cowgen;
401 }
402
403 void
thread_cow_get(struct thread * newtd,struct thread * td)404 thread_cow_get(struct thread *newtd, struct thread *td)
405 {
406
407 newtd->td_ucred = crhold(td->td_ucred);
408 newtd->td_limit = lim_hold(td->td_limit);
409 newtd->td_cowgen = td->td_cowgen;
410 }
411
412 void
thread_cow_free(struct thread * td)413 thread_cow_free(struct thread *td)
414 {
415
416 if (td->td_ucred != NULL)
417 crfree(td->td_ucred);
418 if (td->td_limit != NULL)
419 lim_free(td->td_limit);
420 }
421
422 void
thread_cow_update(struct thread * td)423 thread_cow_update(struct thread *td)
424 {
425 struct proc *p;
426 struct ucred *oldcred;
427 struct plimit *oldlimit;
428
429 p = td->td_proc;
430 oldcred = NULL;
431 oldlimit = NULL;
432 PROC_LOCK(p);
433 if (td->td_ucred != p->p_ucred) {
434 oldcred = td->td_ucred;
435 td->td_ucred = crhold(p->p_ucred);
436 }
437 if (td->td_limit != p->p_limit) {
438 oldlimit = td->td_limit;
439 td->td_limit = lim_hold(p->p_limit);
440 }
441 td->td_cowgen = p->p_cowgen;
442 PROC_UNLOCK(p);
443 if (oldcred != NULL)
444 crfree(oldcred);
445 if (oldlimit != NULL)
446 lim_free(oldlimit);
447 }
448
449 /*
450 * Discard the current thread and exit from its context.
451 * Always called with scheduler locked.
452 *
453 * Because we can't free a thread while we're operating under its context,
454 * push the current thread into our CPU's deadthread holder. This means
455 * we needn't worry about someone else grabbing our context before we
456 * do a cpu_throw().
457 */
458 void
thread_exit(void)459 thread_exit(void)
460 {
461 uint64_t runtime, new_switchtime;
462 struct thread *td;
463 struct thread *td2;
464 struct proc *p;
465 int wakeup_swapper;
466
467 td = curthread;
468 p = td->td_proc;
469
470 PROC_SLOCK_ASSERT(p, MA_OWNED);
471 mtx_assert(&Giant, MA_NOTOWNED);
472
473 PROC_LOCK_ASSERT(p, MA_OWNED);
474 KASSERT(p != NULL, ("thread exiting without a process"));
475 CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
476 (long)p->p_pid, td->td_name);
477 KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
478
479 #ifdef AUDIT
480 AUDIT_SYSCALL_EXIT(0, td);
481 #endif
482 /*
483 * drop FPU & debug register state storage, or any other
484 * architecture specific resources that
485 * would not be on a new untouched process.
486 */
487 cpu_thread_exit(td); /* XXXSMP */
488
489 /*
490 * The last thread is left attached to the process
491 * So that the whole bundle gets recycled. Skip
492 * all this stuff if we never had threads.
493 * EXIT clears all sign of other threads when
494 * it goes to single threading, so the last thread always
495 * takes the short path.
496 */
497 if (p->p_flag & P_HADTHREADS) {
498 if (p->p_numthreads > 1) {
499 atomic_add_int(&td->td_proc->p_exitthreads, 1);
500 thread_unlink(td);
501 td2 = FIRST_THREAD_IN_PROC(p);
502 sched_exit_thread(td2, td);
503
504 /*
505 * The test below is NOT true if we are the
506 * sole exiting thread. P_STOPPED_SINGLE is unset
507 * in exit1() after it is the only survivor.
508 */
509 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
510 if (p->p_numthreads == p->p_suspcount) {
511 thread_lock(p->p_singlethread);
512 wakeup_swapper = thread_unsuspend_one(
513 p->p_singlethread, p, false);
514 thread_unlock(p->p_singlethread);
515 if (wakeup_swapper)
516 kick_proc0();
517 }
518 }
519
520 PCPU_SET(deadthread, td);
521 } else {
522 /*
523 * The last thread is exiting.. but not through exit()
524 */
525 panic ("thread_exit: Last thread exiting on its own");
526 }
527 }
528 #ifdef HWPMC_HOOKS
529 /*
530 * If this thread is part of a process that is being tracked by hwpmc(4),
531 * inform the module of the thread's impending exit.
532 */
533 if (PMC_PROC_IS_USING_PMCS(td->td_proc))
534 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
535 #endif
536 PROC_UNLOCK(p);
537 PROC_STATLOCK(p);
538 thread_lock(td);
539 PROC_SUNLOCK(p);
540
541 /* Do the same timestamp bookkeeping that mi_switch() would do. */
542 new_switchtime = cpu_ticks();
543 runtime = new_switchtime - PCPU_GET(switchtime);
544 td->td_runtime += runtime;
545 td->td_incruntime += runtime;
546 PCPU_SET(switchtime, new_switchtime);
547 PCPU_SET(switchticks, ticks);
548 PCPU_INC(cnt.v_swtch);
549
550 /* Save our resource usage in our process. */
551 td->td_ru.ru_nvcsw++;
552 ruxagg(p, td);
553 rucollect(&p->p_ru, &td->td_ru);
554 PROC_STATUNLOCK(p);
555
556 td->td_state = TDS_INACTIVE;
557 #ifdef WITNESS
558 witness_thread_exit(td);
559 #endif
560 CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
561 sched_throw(td);
562 panic("I'm a teapot!");
563 /* NOTREACHED */
564 }
565
566 /*
567 * Do any thread specific cleanups that may be needed in wait()
568 * called with Giant, proc and schedlock not held.
569 */
570 void
thread_wait(struct proc * p)571 thread_wait(struct proc *p)
572 {
573 struct thread *td;
574
575 mtx_assert(&Giant, MA_NOTOWNED);
576 KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
577 KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
578 td = FIRST_THREAD_IN_PROC(p);
579 /* Lock the last thread so we spin until it exits cpu_throw(). */
580 thread_lock(td);
581 thread_unlock(td);
582 lock_profile_thread_exit(td);
583 cpuset_rel(td->td_cpuset);
584 td->td_cpuset = NULL;
585 cpu_thread_clean(td);
586 thread_cow_free(td);
587 thread_reap(); /* check for zombie threads etc. */
588 }
589
590 /*
591 * Link a thread to a process.
592 * set up anything that needs to be initialized for it to
593 * be used by the process.
594 */
595 void
thread_link(struct thread * td,struct proc * p)596 thread_link(struct thread *td, struct proc *p)
597 {
598
599 /*
600 * XXX This can't be enabled because it's called for proc0 before
601 * its lock has been created.
602 * PROC_LOCK_ASSERT(p, MA_OWNED);
603 */
604 td->td_state = TDS_INACTIVE;
605 td->td_proc = p;
606 td->td_flags = TDF_INMEM;
607
608 LIST_INIT(&td->td_contested);
609 LIST_INIT(&td->td_lprof[0]);
610 LIST_INIT(&td->td_lprof[1]);
611 sigqueue_init(&td->td_sigqueue, p);
612 callout_init(&td->td_slpcallout, 1);
613 TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
614 p->p_numthreads++;
615 }
616
617 /*
618 * Called from:
619 * thread_exit()
620 */
621 void
thread_unlink(struct thread * td)622 thread_unlink(struct thread *td)
623 {
624 struct proc *p = td->td_proc;
625
626 PROC_LOCK_ASSERT(p, MA_OWNED);
627 TAILQ_REMOVE(&p->p_threads, td, td_plist);
628 p->p_numthreads--;
629 /* could clear a few other things here */
630 /* Must NOT clear links to proc! */
631 }
632
633 static int
calc_remaining(struct proc * p,int mode)634 calc_remaining(struct proc *p, int mode)
635 {
636 int remaining;
637
638 PROC_LOCK_ASSERT(p, MA_OWNED);
639 PROC_SLOCK_ASSERT(p, MA_OWNED);
640 if (mode == SINGLE_EXIT)
641 remaining = p->p_numthreads;
642 else if (mode == SINGLE_BOUNDARY)
643 remaining = p->p_numthreads - p->p_boundary_count;
644 else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
645 remaining = p->p_numthreads - p->p_suspcount;
646 else
647 panic("calc_remaining: wrong mode %d", mode);
648 return (remaining);
649 }
650
651 static int
remain_for_mode(int mode)652 remain_for_mode(int mode)
653 {
654
655 return (mode == SINGLE_ALLPROC ? 0 : 1);
656 }
657
658 static int
weed_inhib(int mode,struct thread * td2,struct proc * p)659 weed_inhib(int mode, struct thread *td2, struct proc *p)
660 {
661 int wakeup_swapper;
662
663 PROC_LOCK_ASSERT(p, MA_OWNED);
664 PROC_SLOCK_ASSERT(p, MA_OWNED);
665 THREAD_LOCK_ASSERT(td2, MA_OWNED);
666
667 wakeup_swapper = 0;
668 switch (mode) {
669 case SINGLE_EXIT:
670 if (TD_IS_SUSPENDED(td2))
671 wakeup_swapper |= thread_unsuspend_one(td2, p, true);
672 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
673 wakeup_swapper |= sleepq_abort(td2, EINTR);
674 break;
675 case SINGLE_BOUNDARY:
676 if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
677 wakeup_swapper |= thread_unsuspend_one(td2, p, false);
678 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
679 wakeup_swapper |= sleepq_abort(td2, ERESTART);
680 break;
681 case SINGLE_NO_EXIT:
682 if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
683 wakeup_swapper |= thread_unsuspend_one(td2, p, false);
684 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
685 wakeup_swapper |= sleepq_abort(td2, ERESTART);
686 break;
687 case SINGLE_ALLPROC:
688 /*
689 * ALLPROC suspend tries to avoid spurious EINTR for
690 * threads sleeping interruptable, by suspending the
691 * thread directly, similarly to sig_suspend_threads().
692 * Since such sleep is not performed at the user
693 * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP
694 * is used to avoid immediate un-suspend.
695 */
696 if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
697 TDF_ALLPROCSUSP)) == 0)
698 wakeup_swapper |= thread_unsuspend_one(td2, p, false);
699 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {
700 if ((td2->td_flags & TDF_SBDRY) == 0) {
701 thread_suspend_one(td2);
702 td2->td_flags |= TDF_ALLPROCSUSP;
703 } else {
704 wakeup_swapper |= sleepq_abort(td2, ERESTART);
705 }
706 }
707 break;
708 }
709 return (wakeup_swapper);
710 }
711
712 /*
713 * Enforce single-threading.
714 *
715 * Returns 1 if the caller must abort (another thread is waiting to
716 * exit the process or similar). Process is locked!
717 * Returns 0 when you are successfully the only thread running.
718 * A process has successfully single threaded in the suspend mode when
719 * There are no threads in user mode. Threads in the kernel must be
720 * allowed to continue until they get to the user boundary. They may even
721 * copy out their return values and data before suspending. They may however be
722 * accelerated in reaching the user boundary as we will wake up
723 * any sleeping threads that are interruptable. (PCATCH).
724 */
725 int
thread_single(struct proc * p,int mode)726 thread_single(struct proc *p, int mode)
727 {
728 struct thread *td;
729 struct thread *td2;
730 int remaining, wakeup_swapper;
731
732 td = curthread;
733 KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
734 mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
735 ("invalid mode %d", mode));
736 /*
737 * If allowing non-ALLPROC singlethreading for non-curproc
738 * callers, calc_remaining() and remain_for_mode() should be
739 * adjusted to also account for td->td_proc != p. For now
740 * this is not implemented because it is not used.
741 */
742 KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
743 (mode != SINGLE_ALLPROC && td->td_proc == p),
744 ("mode %d proc %p curproc %p", mode, p, td->td_proc));
745 mtx_assert(&Giant, MA_NOTOWNED);
746 PROC_LOCK_ASSERT(p, MA_OWNED);
747
748 if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC)
749 return (0);
750
751 /* Is someone already single threading? */
752 if (p->p_singlethread != NULL && p->p_singlethread != td)
753 return (1);
754
755 if (mode == SINGLE_EXIT) {
756 p->p_flag |= P_SINGLE_EXIT;
757 p->p_flag &= ~P_SINGLE_BOUNDARY;
758 } else {
759 p->p_flag &= ~P_SINGLE_EXIT;
760 if (mode == SINGLE_BOUNDARY)
761 p->p_flag |= P_SINGLE_BOUNDARY;
762 else
763 p->p_flag &= ~P_SINGLE_BOUNDARY;
764 }
765 if (mode == SINGLE_ALLPROC)
766 p->p_flag |= P_TOTAL_STOP;
767 p->p_flag |= P_STOPPED_SINGLE;
768 PROC_SLOCK(p);
769 p->p_singlethread = td;
770 remaining = calc_remaining(p, mode);
771 while (remaining != remain_for_mode(mode)) {
772 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
773 goto stopme;
774 wakeup_swapper = 0;
775 FOREACH_THREAD_IN_PROC(p, td2) {
776 if (td2 == td)
777 continue;
778 thread_lock(td2);
779 /* a workq thread may not actually be runnable */
780 if (td2->td_state == TDS_INACTIVE && (td2->td_flags & TDF_WORKQ)) {
781 thread_unlock(td2);
782 thread_stopped(p);
783 continue;
784 }
785 td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
786 if (TD_IS_INHIBITED(td2)) {
787 wakeup_swapper |= weed_inhib(mode, td2, p);
788 #ifdef SMP
789 } else if (TD_IS_RUNNING(td2) && td != td2) {
790 forward_signal(td2);
791 #endif
792 }
793 thread_unlock(td2);
794 }
795 if (wakeup_swapper)
796 kick_proc0();
797 remaining = calc_remaining(p, mode);
798
799 /*
800 * Maybe we suspended some threads.. was it enough?
801 */
802 if (remaining == remain_for_mode(mode))
803 break;
804
805 stopme:
806 /*
807 * Wake us up when everyone else has suspended.
808 * In the mean time we suspend as well.
809 */
810 thread_suspend_switch(td, p);
811 remaining = calc_remaining(p, mode);
812 }
813 if (mode == SINGLE_EXIT) {
814 /*
815 * Convert the process to an unthreaded process. The
816 * SINGLE_EXIT is called by exit1() or execve(), in
817 * both cases other threads must be retired.
818 */
819 KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
820 p->p_singlethread = NULL;
821 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
822
823 /*
824 * Wait for any remaining threads to exit cpu_throw().
825 */
826 while (p->p_exitthreads != 0) {
827 PROC_SUNLOCK(p);
828 PROC_UNLOCK(p);
829 sched_relinquish(td);
830 PROC_LOCK(p);
831 PROC_SLOCK(p);
832 }
833 } else if (mode == SINGLE_BOUNDARY) {
834 /*
835 * Wait until all suspended threads are removed from
836 * the processors. The thread_suspend_check()
837 * increments p_boundary_count while it is still
838 * running, which makes it possible for the execve()
839 * to destroy vmspace while our other threads are
840 * still using the address space.
841 *
842 * We lock the thread, which is only allowed to
843 * succeed after context switch code finished using
844 * the address space.
845 */
846 FOREACH_THREAD_IN_PROC(p, td2) {
847 if (td2 == td)
848 continue;
849 thread_lock(td2);
850 KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
851 ("td %p not on boundary", td2));
852 KASSERT(TD_IS_SUSPENDED(td2),
853 ("td %p is not suspended", td2));
854 thread_unlock(td2);
855 }
856 }
857 PROC_SUNLOCK(p);
858 return (0);
859 }
860
861 bool
thread_suspend_check_needed(void)862 thread_suspend_check_needed(void)
863 {
864 struct proc *p;
865 struct thread *td;
866
867 td = curthread;
868 p = td->td_proc;
869 PROC_LOCK_ASSERT(p, MA_OWNED);
870 return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
871 (td->td_dbgflags & TDB_SUSPEND) != 0));
872 }
873
874 /*
875 * Called in from locations that can safely check to see
876 * whether we have to suspend or at least throttle for a
877 * single-thread event (e.g. fork).
878 *
879 * Such locations include userret().
880 * If the "return_instead" argument is non zero, the thread must be able to
881 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
882 *
883 * The 'return_instead' argument tells the function if it may do a
884 * thread_exit() or suspend, or whether the caller must abort and back
885 * out instead.
886 *
887 * If the thread that set the single_threading request has set the
888 * P_SINGLE_EXIT bit in the process flags then this call will never return
889 * if 'return_instead' is false, but will exit.
890 *
891 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
892 *---------------+--------------------+---------------------
893 * 0 | returns 0 | returns 0 or 1
894 * | when ST ends | immediately
895 *---------------+--------------------+---------------------
896 * 1 | thread exits | returns 1
897 * | | immediately
898 * 0 = thread_exit() or suspension ok,
899 * other = return error instead of stopping the thread.
900 *
901 * While a full suspension is under effect, even a single threading
902 * thread would be suspended if it made this call (but it shouldn't).
903 * This call should only be made from places where
904 * thread_exit() would be safe as that may be the outcome unless
905 * return_instead is set.
906 */
907 int
thread_suspend_check(int return_instead)908 thread_suspend_check(int return_instead)
909 {
910 struct thread *td;
911 struct proc *p;
912 int wakeup_swapper;
913
914 td = curthread;
915 p = td->td_proc;
916 mtx_assert(&Giant, MA_NOTOWNED);
917 PROC_LOCK_ASSERT(p, MA_OWNED);
918 while (thread_suspend_check_needed()) {
919 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
920 KASSERT(p->p_singlethread != NULL,
921 ("singlethread not set"));
922 /*
923 * The only suspension in action is a
924 * single-threading. Single threader need not stop.
925 * XXX Should be safe to access unlocked
926 * as it can only be set to be true by us.
927 */
928 if (p->p_singlethread == td)
929 return (0); /* Exempt from stopping. */
930 }
931 if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
932 return (EINTR);
933
934 /* Should we goto user boundary if we didn't come from there? */
935 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
936 (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
937 return (ERESTART);
938
939 /*
940 * Ignore suspend requests if they are deferred.
941 */
942 if ((td->td_flags & TDF_SBDRY) != 0) {
943 KASSERT(return_instead,
944 ("TDF_SBDRY set for unsafe thread_suspend_check"));
945 return (0);
946 }
947
948 /*
949 * If the process is waiting for us to exit,
950 * this thread should just suicide.
951 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
952 */
953 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
954 PROC_UNLOCK(p);
955
956 /*
957 * Allow Linux emulation layer to do some work
958 * before thread suicide.
959 */
960 if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
961 (p->p_sysent->sv_thread_detach)(td);
962 kern_thr_exit(td);
963 panic("stopped thread did not exit");
964 }
965
966 PROC_SLOCK(p);
967 thread_stopped(p);
968 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
969 if (p->p_numthreads == p->p_suspcount + 1) {
970 thread_lock(p->p_singlethread);
971 wakeup_swapper = thread_unsuspend_one(
972 p->p_singlethread, p, false);
973 thread_unlock(p->p_singlethread);
974 if (wakeup_swapper)
975 kick_proc0();
976 }
977 }
978 PROC_UNLOCK(p);
979 thread_lock(td);
980 /*
981 * When a thread suspends, it just
982 * gets taken off all queues.
983 */
984 thread_suspend_one(td);
985 if (return_instead == 0) {
986 p->p_boundary_count++;
987 td->td_flags |= TDF_BOUNDARY;
988 }
989 PROC_SUNLOCK(p);
990 mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
991 thread_unlock(td);
992 PROC_LOCK(p);
993 }
994 return (0);
995 }
996
997 void
thread_suspend_switch(struct thread * td,struct proc * p)998 thread_suspend_switch(struct thread *td, struct proc *p)
999 {
1000
1001 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1002 PROC_LOCK_ASSERT(p, MA_OWNED);
1003 PROC_SLOCK_ASSERT(p, MA_OWNED);
1004 /*
1005 * We implement thread_suspend_one in stages here to avoid
1006 * dropping the proc lock while the thread lock is owned.
1007 */
1008 if (p == td->td_proc) {
1009 thread_stopped(p);
1010 p->p_suspcount++;
1011 }
1012 PROC_UNLOCK(p);
1013 thread_lock(td);
1014 td->td_flags &= ~TDF_NEEDSUSPCHK;
1015 TD_SET_SUSPENDED(td);
1016 sched_sleep(td, 0);
1017 PROC_SUNLOCK(p);
1018 DROP_GIANT();
1019 mi_switch(SW_VOL | SWT_SUSPEND, NULL);
1020 thread_unlock(td);
1021 PICKUP_GIANT();
1022 PROC_LOCK(p);
1023 PROC_SLOCK(p);
1024 }
1025
1026 void
thread_suspend_one(struct thread * td)1027 thread_suspend_one(struct thread *td)
1028 {
1029 struct proc *p;
1030
1031 p = td->td_proc;
1032 PROC_SLOCK_ASSERT(p, MA_OWNED);
1033 THREAD_LOCK_ASSERT(td, MA_OWNED);
1034 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1035 p->p_suspcount++;
1036 td->td_flags &= ~TDF_NEEDSUSPCHK;
1037 TD_SET_SUSPENDED(td);
1038 sched_sleep(td, 0);
1039 }
1040
1041 static int
thread_unsuspend_one(struct thread * td,struct proc * p,bool boundary)1042 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
1043 {
1044
1045 THREAD_LOCK_ASSERT(td, MA_OWNED);
1046 KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
1047 TD_CLR_SUSPENDED(td);
1048 td->td_flags &= ~TDF_ALLPROCSUSP;
1049 if (td->td_proc == p) {
1050 PROC_SLOCK_ASSERT(p, MA_OWNED);
1051 p->p_suspcount--;
1052 if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
1053 td->td_flags &= ~TDF_BOUNDARY;
1054 p->p_boundary_count--;
1055 }
1056 }
1057 return (setrunnable(td));
1058 }
1059
1060 /*
1061 * Allow all threads blocked by single threading to continue running.
1062 */
1063 void
thread_unsuspend(struct proc * p)1064 thread_unsuspend(struct proc *p)
1065 {
1066 struct thread *td;
1067 int wakeup_swapper;
1068
1069 PROC_LOCK_ASSERT(p, MA_OWNED);
1070 PROC_SLOCK_ASSERT(p, MA_OWNED);
1071 wakeup_swapper = 0;
1072 if (!P_SHOULDSTOP(p)) {
1073 FOREACH_THREAD_IN_PROC(p, td) {
1074 thread_lock(td);
1075 if (TD_IS_SUSPENDED(td)) {
1076 wakeup_swapper |= thread_unsuspend_one(td, p,
1077 true);
1078 }
1079 thread_unlock(td);
1080 }
1081 } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
1082 p->p_numthreads == p->p_suspcount) {
1083 /*
1084 * Stopping everything also did the job for the single
1085 * threading request. Now we've downgraded to single-threaded,
1086 * let it continue.
1087 */
1088 if (p->p_singlethread->td_proc == p) {
1089 thread_lock(p->p_singlethread);
1090 wakeup_swapper = thread_unsuspend_one(
1091 p->p_singlethread, p, false);
1092 thread_unlock(p->p_singlethread);
1093 }
1094 }
1095 if (wakeup_swapper)
1096 kick_proc0();
1097 }
1098
1099 /*
1100 * End the single threading mode..
1101 */
1102 void
thread_single_end(struct proc * p,int mode)1103 thread_single_end(struct proc *p, int mode)
1104 {
1105 struct thread *td;
1106 int wakeup_swapper;
1107
1108 KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
1109 mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
1110 ("invalid mode %d", mode));
1111 PROC_LOCK_ASSERT(p, MA_OWNED);
1112 KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
1113 (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
1114 ("mode %d does not match P_TOTAL_STOP", mode));
1115 KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
1116 ("thread_single_end from other thread %p %p",
1117 curthread, p->p_singlethread));
1118 KASSERT(mode != SINGLE_BOUNDARY ||
1119 (p->p_flag & P_SINGLE_BOUNDARY) != 0,
1120 ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
1121 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
1122 P_TOTAL_STOP);
1123 PROC_SLOCK(p);
1124 p->p_singlethread = NULL;
1125 wakeup_swapper = 0;
1126 /*
1127 * If there are other threads they may now run,
1128 * unless of course there is a blanket 'stop order'
1129 * on the process. The single threader must be allowed
1130 * to continue however as this is a bad place to stop.
1131 */
1132 if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
1133 FOREACH_THREAD_IN_PROC(p, td) {
1134 thread_lock(td);
1135 if (TD_IS_SUSPENDED(td)) {
1136 wakeup_swapper |= thread_unsuspend_one(td, p,
1137 mode == SINGLE_BOUNDARY);
1138 }
1139 thread_unlock(td);
1140 }
1141 }
1142 KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
1143 ("inconsistent boundary count %d", p->p_boundary_count));
1144 PROC_SUNLOCK(p);
1145 if (wakeup_swapper)
1146 kick_proc0();
1147 }
1148
1149 struct thread *
thread_find(struct proc * p,lwpid_t tid)1150 thread_find(struct proc *p, lwpid_t tid)
1151 {
1152 struct thread *td;
1153
1154 PROC_LOCK_ASSERT(p, MA_OWNED);
1155 FOREACH_THREAD_IN_PROC(p, td) {
1156 if (td->td_tid == tid)
1157 break;
1158 }
1159 return (td);
1160 }
1161
1162 /* Locate a thread by number; return with proc lock held. */
1163 struct thread *
tdfind(lwpid_t tid,pid_t pid)1164 tdfind(lwpid_t tid, pid_t pid)
1165 {
1166 #define RUN_THRESH 16
1167 struct thread *td;
1168 int run = 0;
1169
1170 rw_rlock(&tidhash_lock);
1171 LIST_FOREACH(td, TIDHASH(tid), td_hash) {
1172 if (td->td_tid == tid) {
1173 if (pid != -1 && td->td_proc->p_pid != pid) {
1174 td = NULL;
1175 break;
1176 }
1177 PROC_LOCK(td->td_proc);
1178 if (td->td_proc->p_state == PRS_NEW) {
1179 PROC_UNLOCK(td->td_proc);
1180 td = NULL;
1181 break;
1182 }
1183 if (run > RUN_THRESH) {
1184 if (rw_try_upgrade(&tidhash_lock)) {
1185 LIST_REMOVE(td, td_hash);
1186 LIST_INSERT_HEAD(TIDHASH(td->td_tid),
1187 td, td_hash);
1188 rw_wunlock(&tidhash_lock);
1189 return (td);
1190 }
1191 }
1192 break;
1193 }
1194 run++;
1195 }
1196 rw_runlock(&tidhash_lock);
1197 return (td);
1198 }
1199
1200 void
tidhash_add(struct thread * td)1201 tidhash_add(struct thread *td)
1202 {
1203 rw_wlock(&tidhash_lock);
1204 LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
1205 rw_wunlock(&tidhash_lock);
1206 }
1207
1208 void
tidhash_remove(struct thread * td)1209 tidhash_remove(struct thread *td)
1210 {
1211 rw_wlock(&tidhash_lock);
1212 LIST_REMOVE(td, td_hash);
1213 rw_wunlock(&tidhash_lock);
1214 }
1215