xref: /trueos/lib/libkse/thread/thr_kern.c (revision 420be2d1af153983145ce323e4e2d26cc85e0505)
1 /*
2  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3  * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by John Birrell.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/types.h>
39 #include <sys/kse.h>
40 #include <sys/ptrace.h>
41 #include <sys/signalvar.h>
42 #include <sys/queue.h>
43 #include <machine/atomic.h>
44 #include <machine/sigframe.h>
45 
46 #include <assert.h>
47 #include <errno.h>
48 #include <signal.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <time.h>
52 #include <ucontext.h>
53 #include <unistd.h>
54 
55 #include "atomic_ops.h"
56 #include "thr_private.h"
57 #include "libc_private.h"
58 #ifdef NOTYET
59 #include "spinlock.h"
60 #endif
61 
62 /* #define DEBUG_THREAD_KERN */
63 #ifdef DEBUG_THREAD_KERN
64 #define DBG_MSG		stdout_debug
65 #else
66 #define DBG_MSG(x...)
67 #endif
68 
69 /*
70  * Define a high water mark for the maximum number of threads that
71  * will be cached.  Once this level is reached, any extra threads
72  * will be free()'d.
73  */
74 #define	MAX_CACHED_THREADS	100
75 /*
76  * Define high water marks for the maximum number of KSEs and KSE groups
77  * that will be cached. Because we support 1:1 threading, there could have
78  * same number of KSEs and KSE groups as threads. Once these levels are
79  * reached, any extra KSE and KSE groups will be free()'d.
80  */
81 #define	MAX_CACHED_KSES		((_thread_scope_system <= 0) ? 50 : 100)
82 #define	MAX_CACHED_KSEGS	((_thread_scope_system <= 0) ? 50 : 100)
83 
84 #define	KSE_SET_MBOX(kse, thrd) \
85 	(kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
86 
87 #define	KSE_SET_EXITED(kse)	(kse)->k_flags |= KF_EXITED
88 
89 /*
90  * Macros for manipulating the run queues.  The priority queue
91  * routines use the thread's pqe link and also handle the setting
92  * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93  */
94 #define	KSE_RUNQ_INSERT_HEAD(kse, thrd)			\
95 	_pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96 #define	KSE_RUNQ_INSERT_TAIL(kse, thrd)			\
97 	_pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98 #define	KSE_RUNQ_REMOVE(kse, thrd)			\
99 	_pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100 #define	KSE_RUNQ_FIRST(kse)				\
101 	((_libkse_debug == 0) ?				\
102 	 _pq_first(&(kse)->k_schedq->sq_runq) :		\
103 	 _pq_first_debug(&(kse)->k_schedq->sq_runq))
104 
105 #define KSE_RUNQ_THREADS(kse)	((kse)->k_schedq->sq_runq.pq_threads)
106 
107 #define THR_NEED_CANCEL(thrd)						\
108 	 (((thrd)->cancelflags & THR_CANCELLING) != 0 &&		\
109 	  ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&	\
110 	  (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||		\
111 	   ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112 
113 #define THR_NEED_ASYNC_CANCEL(thrd)					\
114 	 (((thrd)->cancelflags & THR_CANCELLING) != 0 &&		\
115 	  ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&	\
116 	  (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&		\
117 	   ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118 
119 /*
120  * We've got to keep track of everything that is allocated, not only
121  * to have a speedy free list, but also so they can be deallocated
122  * after a fork().
123  */
124 static TAILQ_HEAD(, kse)	active_kseq;
125 static TAILQ_HEAD(, kse)	free_kseq;
126 static TAILQ_HEAD(, kse_group)	free_kse_groupq;
127 static TAILQ_HEAD(, kse_group)	active_kse_groupq;
128 static TAILQ_HEAD(, kse_group)	gc_ksegq;
129 static struct lock		kse_lock;	/* also used for kseg queue */
130 static int			free_kse_count = 0;
131 static int			free_kseg_count = 0;
132 static TAILQ_HEAD(, pthread)	free_threadq;
133 static struct lock		thread_lock;
134 static int			free_thread_count = 0;
135 static int			inited = 0;
136 static int			active_kse_count = 0;
137 static int			active_kseg_count = 0;
138 static u_int64_t		next_uniqueid = 1;
139 
140 LIST_HEAD(thread_hash_head, pthread);
141 #define THREAD_HASH_QUEUES	127
142 static struct thread_hash_head	thr_hashtable[THREAD_HASH_QUEUES];
143 #define	THREAD_HASH(thrd)	((unsigned long)thrd % THREAD_HASH_QUEUES)
144 
145 /* Lock for thread tcb constructor/destructor */
146 static pthread_mutex_t		_tcb_mutex;
147 
148 #ifdef DEBUG_THREAD_KERN
149 static void	dump_queues(struct kse *curkse);
150 #endif
151 static void	kse_check_completed(struct kse *kse);
152 static void	kse_check_waitq(struct kse *kse);
153 static void	kse_fini(struct kse *curkse);
154 static void	kse_reinit(struct kse *kse, int sys_scope);
155 static void	kse_sched_multi(struct kse_mailbox *kmbx);
156 static void	kse_sched_single(struct kse_mailbox *kmbx);
157 static void	kse_switchout_thread(struct kse *kse, struct pthread *thread);
158 static void	kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159 static void	kse_free_unlocked(struct kse *kse);
160 static void	kse_destroy(struct kse *kse);
161 static void	kseg_free_unlocked(struct kse_group *kseg);
162 static void	kseg_init(struct kse_group *kseg);
163 static void	kseg_reinit(struct kse_group *kseg);
164 static void	kseg_destroy(struct kse_group *kseg);
165 static void	kse_waitq_insert(struct pthread *thread);
166 static void	kse_wakeup_multi(struct kse *curkse);
167 static struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168 static void	thr_cleanup(struct kse *kse, struct pthread *curthread);
169 static void	thr_link(struct pthread *thread);
170 static void	thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171 static void	thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172 static int	thr_timedout(struct pthread *thread, struct timespec *curtime);
173 static void	thr_unlink(struct pthread *thread);
174 static void	thr_destroy(struct pthread *curthread, struct pthread *thread);
175 static void	thread_gc(struct pthread *thread);
176 static void	kse_gc(struct pthread *thread);
177 static void	kseg_gc(struct pthread *thread);
178 
179 static __inline void
thr_accounting(struct pthread * thread)180 thr_accounting(struct pthread *thread)
181 {
182 	if ((thread->slice_usec != -1) &&
183 	    (thread->slice_usec <= TIMESLICE_USEC) &&
184 	    (thread->attr.sched_policy != SCHED_FIFO)) {
185 		thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186 		    + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187 		/* Check for time quantum exceeded: */
188 		if (thread->slice_usec > TIMESLICE_USEC)
189 			thread->slice_usec = -1;
190 	}
191 	thread->tcb->tcb_tmbx.tm_uticks = 0;
192 	thread->tcb->tcb_tmbx.tm_sticks = 0;
193 }
194 
195 /*
196  * This is called after a fork().
197  * No locks need to be taken here since we are guaranteed to be
198  * single threaded.
199  *
200  * XXX
201  * POSIX says for threaded process, fork() function is used
202  * only to run new programs, and the effects of calling functions
203  * that require certain resources between the call to fork() and
204  * the call to an exec function are undefined.
205  *
206  * It is not safe to free memory after fork(), because these data
207  * structures may be in inconsistent state.
208  */
209 void
_kse_single_thread(struct pthread * curthread)210 _kse_single_thread(struct pthread *curthread)
211 {
212 #ifdef NOTYET
213 	struct kse *kse;
214 	struct kse_group *kseg;
215 	struct pthread *thread;
216 
217 	_thr_spinlock_init();
218 	*__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219 	if (__isthreaded) {
220 		_thr_rtld_fini();
221 		_thr_signal_deinit();
222 	}
223 	__isthreaded = 0;
224 	/*
225 	 * Restore signal mask early, so any memory problems could
226 	 * dump core.
227 	 */
228 	__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229 	_thread_active_threads = 1;
230 
231 	curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
232 	curthread->attr.flags &= ~PTHREAD_SCOPE_PROCESS;
233 	curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
234 
235 	/*
236 	 * Enter a loop to remove and free all threads other than
237 	 * the running thread from the active thread list:
238 	 */
239 	while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
240 		THR_GCLIST_REMOVE(thread);
241 		/*
242 		 * Remove this thread from the list (the current
243 		 * thread will be removed but re-added by libpthread
244 		 * initialization.
245 		 */
246 		TAILQ_REMOVE(&_thread_list, thread, tle);
247 		/* Make sure this isn't the running thread: */
248 		if (thread != curthread) {
249 			_thr_stack_free(&thread->attr);
250 			if (thread->specific != NULL)
251 				free(thread->specific);
252 			thr_destroy(curthread, thread);
253 		}
254 	}
255 
256 	TAILQ_INIT(&curthread->mutexq);		/* initialize mutex queue */
257 	curthread->joiner = NULL;		/* no joining threads yet */
258 	curthread->refcount = 0;
259 	SIGEMPTYSET(curthread->sigpend);	/* clear pending signals */
260 
261 	/* Don't free thread-specific data as the caller may require it */
262 
263 	/* Free the free KSEs: */
264 	while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
265 		TAILQ_REMOVE(&free_kseq, kse, k_qe);
266 		kse_destroy(kse);
267 	}
268 	free_kse_count = 0;
269 
270 	/* Free the active KSEs: */
271 	while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
272 		TAILQ_REMOVE(&active_kseq, kse, k_qe);
273 		kse_destroy(kse);
274 	}
275 	active_kse_count = 0;
276 
277 	/* Free the free KSEGs: */
278 	while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
279 		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
280 		kseg_destroy(kseg);
281 	}
282 	free_kseg_count = 0;
283 
284 	/* Free the active KSEGs: */
285 	while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
286 		TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
287 		kseg_destroy(kseg);
288 	}
289 	active_kseg_count = 0;
290 
291 	/* Free the free threads. */
292 	while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
293 		TAILQ_REMOVE(&free_threadq, thread, tle);
294 		thr_destroy(curthread, thread);
295 	}
296 	free_thread_count = 0;
297 
298 	/* Free the to-be-gc'd threads. */
299 	while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
300 		TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
301 		thr_destroy(curthread, thread);
302 	}
303 	TAILQ_INIT(&gc_ksegq);
304 	_gc_count = 0;
305 
306 	if (inited != 0) {
307 		/*
308 		 * Destroy these locks; they'll be recreated to assure they
309 		 * are in the unlocked state.
310 		 */
311 		_lock_destroy(&kse_lock);
312 		_lock_destroy(&thread_lock);
313 		_lock_destroy(&_thread_list_lock);
314 		inited = 0;
315 	}
316 
317 	/* We're no longer part of any lists */
318 	curthread->tlflags = 0;
319 
320 	/*
321 	 * After a fork, we are still operating on the thread's original
322 	 * stack.  Don't clear the THR_FLAGS_USER from the thread's
323 	 * attribute flags.
324 	 */
325 
326 	/* Initialize the threads library. */
327 	curthread->kse = NULL;
328 	curthread->kseg = NULL;
329 	_kse_initial = NULL;
330 	_libpthread_init(curthread);
331 #else
332 	int i;
333 
334 	/* Reset the current thread and KSE lock data. */
335 	for (i = 0; i < curthread->locklevel; i++) {
336 		_lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
337 	}
338 	curthread->locklevel = 0;
339 	for (i = 0; i < curthread->kse->k_locklevel; i++) {
340 		_lockuser_reinit(&curthread->kse->k_lockusers[i],
341 		    (void *)curthread->kse);
342 		_LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
343 	}
344 	curthread->kse->k_locklevel = 0;
345 
346 	/*
347 	 * Reinitialize the thread and signal locks so that
348 	 * sigaction() will work after a fork().
349 	 */
350 	_lock_reinit(&curthread->lock, LCK_ADAPTIVE, _thr_lock_wait,
351 	    _thr_lock_wakeup);
352 	_lock_reinit(&_thread_signal_lock, LCK_ADAPTIVE, _kse_lock_wait,
353 	    _kse_lock_wakeup);
354 
355 	_thr_spinlock_init();
356 	if (__isthreaded) {
357 		_thr_rtld_fini();
358 		_thr_signal_deinit();
359 	}
360 	__isthreaded = 0;
361 	curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
362 	curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
363 
364 	/*
365 	 * After a fork, it is possible that an upcall occurs in
366 	 * the parent KSE that fork()'d before the child process
367 	 * is fully created and before its vm space is copied.
368 	 * During the upcall, the tcb is set to null or to another
369 	 * thread, and this is what gets copied in the child process
370 	 * when the vm space is cloned sometime after the upcall
371 	 * occurs.  Note that we shouldn't have to set the kcb, but
372 	 * we do it for completeness.
373 	 */
374 	_kcb_set(curthread->kse->k_kcb);
375 	_tcb_set(curthread->kse->k_kcb, curthread->tcb);
376 
377 	/* After a fork(), there child should have no pending signals. */
378 	sigemptyset(&curthread->sigpend);
379 
380 	/*
381 	 * Restore signal mask early, so any memory problems could
382 	 * dump core.
383 	 */
384 	sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
385 	_thread_active_threads = 1;
386 #endif
387 }
388 
389 /*
390  * This is used to initialize housekeeping and to initialize the
391  * KSD for the KSE.
392  */
393 void
_kse_init(void)394 _kse_init(void)
395 {
396 	if (inited == 0) {
397 		TAILQ_INIT(&active_kseq);
398 		TAILQ_INIT(&active_kse_groupq);
399 		TAILQ_INIT(&free_kseq);
400 		TAILQ_INIT(&free_kse_groupq);
401 		TAILQ_INIT(&free_threadq);
402 		TAILQ_INIT(&gc_ksegq);
403 		if (_lock_init(&kse_lock, LCK_ADAPTIVE,
404 		    _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
405 			PANIC("Unable to initialize free KSE queue lock");
406 		if (_lock_init(&thread_lock, LCK_ADAPTIVE,
407 		    _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
408 			PANIC("Unable to initialize free thread queue lock");
409 		if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
410 		    _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
411 			PANIC("Unable to initialize thread list lock");
412 		_pthread_mutex_init(&_tcb_mutex, NULL);
413 		active_kse_count = 0;
414 		active_kseg_count = 0;
415 		_gc_count = 0;
416 		inited = 1;
417 	}
418 }
419 
420 /*
421  * This is called when the first thread (other than the initial
422  * thread) is created.
423  */
424 int
_kse_setthreaded(int threaded)425 _kse_setthreaded(int threaded)
426 {
427 	sigset_t sigset;
428 
429 	if ((threaded != 0) && (__isthreaded == 0)) {
430 		SIGFILLSET(sigset);
431 		__sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
432 
433 		/*
434 		 * Tell the kernel to create a KSE for the initial thread
435 		 * and enable upcalls in it.
436 		 */
437 		_kse_initial->k_flags |= KF_STARTED;
438 
439 		if (_thread_scope_system <= 0) {
440 			_thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
441 			_kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
442 			_kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
443 		}
444 		else {
445 			/*
446 			 * For bound thread, kernel reads mailbox pointer
447 			 * once, we'd set it here before calling kse_create.
448 			 */
449 			_tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
450 			KSE_SET_MBOX(_kse_initial, _thr_initial);
451 			_kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
452 		}
453 
454 		/*
455 		 * Locking functions in libc are required when there are
456 		 * threads other than the initial thread.
457 		 */
458 		_thr_rtld_init();
459 
460 		__isthreaded = 1;
461 		if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
462 			_kse_initial->k_flags &= ~KF_STARTED;
463 			__isthreaded = 0;
464 			PANIC("kse_create() failed\n");
465 			return (-1);
466 		}
467 		_thr_initial->tcb->tcb_tmbx.tm_lwp =
468 			_kse_initial->k_kcb->kcb_kmbx.km_lwp;
469 		_thread_activated = 1;
470 
471 #ifndef SYSTEM_SCOPE_ONLY
472 		if (_thread_scope_system <= 0) {
473 			/* Set current thread to initial thread */
474 			_tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
475 			KSE_SET_MBOX(_kse_initial, _thr_initial);
476 			_thr_start_sig_daemon();
477 			_thr_setmaxconcurrency();
478 		}
479 		else
480 #endif
481 			__sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
482 			    NULL);
483 	}
484 	return (0);
485 }
486 
487 /*
488  * Lock wait and wakeup handlers for KSE locks.  These are only used by
489  * KSEs, and should never be used by threads.  KSE locks include the
490  * KSE group lock (used for locking the scheduling queue) and the
491  * kse_lock defined above.
492  *
493  * When a KSE lock attempt blocks, the entire KSE blocks allowing another
494  * KSE to run.  For the most part, it doesn't make much sense to try and
495  * schedule another thread because you need to lock the scheduling queue
496  * in order to do that.  And since the KSE lock is used to lock the scheduling
497  * queue, you would just end up blocking again.
498  */
499 void
_kse_lock_wait(struct lock * lock __unused,struct lockuser * lu)500 _kse_lock_wait(struct lock *lock __unused, struct lockuser *lu)
501 {
502 	struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
503 	struct timespec ts;
504 	int saved_flags;
505 
506 	if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
507 		PANIC("kse_lock_wait does not disable upcall.\n");
508 	/*
509 	 * Enter a loop to wait until we get the lock.
510 	 */
511 	ts.tv_sec = 0;
512 	ts.tv_nsec = 1000000;  /* 1 sec */
513 	while (!_LCK_GRANTED(lu)) {
514 		/*
515 		 * Yield the kse and wait to be notified when the lock
516 		 * is granted.
517 		 */
518 		saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
519 		curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
520 		    KMF_NOCOMPLETED;
521 		kse_release(&ts);
522 		curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
523 	}
524 }
525 
526 void
_kse_lock_wakeup(struct lock * lock,struct lockuser * lu)527 _kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
528 {
529 	struct kse *curkse;
530 	struct kse *kse;
531 	struct kse_mailbox *mbx;
532 
533 	curkse = _get_curkse();
534 	kse = (struct kse *)_LCK_GET_PRIVATE(lu);
535 
536 	if (kse == curkse)
537 		PANIC("KSE trying to wake itself up in lock");
538 	else {
539 		mbx = &kse->k_kcb->kcb_kmbx;
540 		_lock_grant(lock, lu);
541 		/*
542 		 * Notify the owning kse that it has the lock.
543 		 * It is safe to pass invalid address to kse_wakeup
544 		 * even if the mailbox is not in kernel at all,
545 		 * and waking up a wrong kse is also harmless.
546 		 */
547 		kse_wakeup(mbx);
548 	}
549 }
550 
551 /*
552  * Thread wait and wakeup handlers for thread locks.  These are only used
553  * by threads, never by KSEs.  Thread locks include the per-thread lock
554  * (defined in its structure), and condition variable and mutex locks.
555  */
556 void
_thr_lock_wait(struct lock * lock __unused,struct lockuser * lu)557 _thr_lock_wait(struct lock *lock __unused, struct lockuser *lu)
558 {
559 	struct pthread *curthread = (struct pthread *)lu->lu_private;
560 
561 	do {
562 		THR_LOCK_SWITCH(curthread);
563 		THR_SET_STATE(curthread, PS_LOCKWAIT);
564 		_thr_sched_switch_unlocked(curthread);
565 	} while (!_LCK_GRANTED(lu));
566 }
567 
568 void
_thr_lock_wakeup(struct lock * lock __unused,struct lockuser * lu)569 _thr_lock_wakeup(struct lock *lock __unused, struct lockuser *lu)
570 {
571 	struct pthread *thread;
572 	struct pthread *curthread;
573 	struct kse_mailbox *kmbx;
574 
575 	curthread = _get_curthread();
576 	thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
577 
578 	THR_SCHED_LOCK(curthread, thread);
579 	_lock_grant(lock, lu);
580 	kmbx = _thr_setrunnable_unlocked(thread);
581 	THR_SCHED_UNLOCK(curthread, thread);
582 	if (kmbx != NULL)
583 		kse_wakeup(kmbx);
584 }
585 
586 kse_critical_t
_kse_critical_enter(void)587 _kse_critical_enter(void)
588 {
589 	kse_critical_t crit;
590 
591 	crit = (kse_critical_t)_kcb_critical_enter();
592 	return (crit);
593 }
594 
595 void
_kse_critical_leave(kse_critical_t crit)596 _kse_critical_leave(kse_critical_t crit)
597 {
598 	struct pthread *curthread;
599 
600 	_kcb_critical_leave((struct kse_thr_mailbox *)crit);
601 	if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
602 		THR_YIELD_CHECK(curthread);
603 }
604 
605 int
_kse_in_critical(void)606 _kse_in_critical(void)
607 {
608 	return (_kcb_in_critical());
609 }
610 
611 void
_thr_critical_enter(struct pthread * thread)612 _thr_critical_enter(struct pthread *thread)
613 {
614 	thread->critical_count++;
615 }
616 
617 void
_thr_critical_leave(struct pthread * thread)618 _thr_critical_leave(struct pthread *thread)
619 {
620 	thread->critical_count--;
621 	THR_YIELD_CHECK(thread);
622 }
623 
624 void
_thr_sched_switch(struct pthread * curthread)625 _thr_sched_switch(struct pthread *curthread)
626 {
627 	struct kse *curkse;
628 
629 	(void)_kse_critical_enter();
630 	curkse = _get_curkse();
631 	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
632 	_thr_sched_switch_unlocked(curthread);
633 }
634 
635 /*
636  * XXX - We may need to take the scheduling lock before calling
637  *       this, or perhaps take the lock within here before
638  *       doing anything else.
639  */
640 void
_thr_sched_switch_unlocked(struct pthread * curthread)641 _thr_sched_switch_unlocked(struct pthread *curthread)
642 {
643 	struct kse *curkse;
644 	volatile int resume_once = 0;
645 	ucontext_t *uc;
646 
647 	/* We're in the scheduler, 5 by 5: */
648 	curkse = curthread->kse;
649 
650 	curthread->need_switchout = 1;	/* The thread yielded on its own. */
651 	curthread->critical_yield = 0;	/* No need to yield anymore. */
652 
653 	/* Thread can unlock the scheduler lock. */
654 	curthread->lock_switch = 1;
655 
656 	if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
657 		kse_sched_single(&curkse->k_kcb->kcb_kmbx);
658 	else {
659 		if (__predict_false(_libkse_debug != 0)) {
660 			/*
661 			 * Because debugger saves single step status in thread
662 			 * mailbox's tm_dflags, we can safely clear single
663 			 * step status here. the single step status will be
664 			 * restored by kse_switchin when the thread is
665 			 * switched in again. This also lets uts run in full
666 			 * speed.
667 			 */
668 			 ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
669 				(caddr_t) 1, 0);
670 		}
671 
672 		KSE_SET_SWITCH(curkse);
673 		_thread_enter_uts(curthread->tcb, curkse->k_kcb);
674 	}
675 
676 	/*
677 	 * Unlock the scheduling queue and leave the
678 	 * critical region.
679 	 */
680 	/* Don't trust this after a switch! */
681 	curkse = curthread->kse;
682 
683 	curthread->lock_switch = 0;
684 	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
685 	_kse_critical_leave(&curthread->tcb->tcb_tmbx);
686 
687 	/*
688 	 * This thread is being resumed; check for cancellations.
689 	 */
690 	if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
691 		uc = alloca(sizeof(ucontext_t));
692 		resume_once = 0;
693 		THR_GETCONTEXT(uc);
694 		if (resume_once == 0) {
695 			resume_once = 1;
696 			curthread->check_pending = 0;
697 			thr_resume_check(curthread, uc);
698 		}
699 	}
700 	THR_ACTIVATE_LAST_LOCK(curthread);
701 }
702 
703 /*
704  * This is the scheduler for a KSE which runs a scope system thread.
705  * The multi-thread KSE scheduler should also work for a single threaded
706  * KSE, but we use a separate scheduler so that it can be fine-tuned
707  * to be more efficient (and perhaps not need a separate stack for
708  * the KSE, allowing it to use the thread's stack).
709  */
710 
711 static void
kse_sched_single(struct kse_mailbox * kmbx)712 kse_sched_single(struct kse_mailbox *kmbx)
713 {
714 	struct kse *curkse;
715 	struct pthread *curthread;
716 	struct timespec ts;
717 	sigset_t sigmask;
718 	int i, sigseqno, level, first = 0;
719 
720 	curkse = (struct kse *)kmbx->km_udata;
721 	curthread = curkse->k_curthread;
722 
723 	if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
724 		/* Setup this KSEs specific data. */
725 		_kcb_set(curkse->k_kcb);
726 		_tcb_set(curkse->k_kcb, curthread->tcb);
727 		curkse->k_flags |= KF_INITIALIZED;
728 		first = 1;
729 		curthread->active = 1;
730 
731 		/* Setup kernel signal masks for new thread. */
732 		__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
733 		/*
734 		 * Enter critical region, this is meanless for bound thread,
735 		 * It is used to let other code work, those code want mailbox
736 		 * to be cleared.
737 		 */
738 		(void)_kse_critical_enter();
739  	} else {
740 		/*
741 		 * Bound thread always has tcb set, this prevent some
742 		 * code from blindly setting bound thread tcb to NULL,
743 		 * buggy code ?
744 		 */
745 		_tcb_set(curkse->k_kcb, curthread->tcb);
746 	}
747 
748 	curthread->critical_yield = 0;
749 	curthread->need_switchout = 0;
750 
751 	/*
752 	 * Lock the scheduling queue.
753 	 *
754 	 * There is no scheduling queue for single threaded KSEs,
755 	 * but we need a lock for protection regardless.
756 	 */
757 	if (curthread->lock_switch == 0)
758 		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
759 
760 	/*
761 	 * This has to do the job of kse_switchout_thread(), only
762 	 * for a single threaded KSE/KSEG.
763 	 */
764 
765 	switch (curthread->state) {
766 	case PS_MUTEX_WAIT:
767 	case PS_COND_WAIT:
768 		if (THR_NEED_CANCEL(curthread)) {
769 			curthread->interrupted = 1;
770 			curthread->continuation = _thr_finish_cancellation;
771 			THR_SET_STATE(curthread, PS_RUNNING);
772 		}
773 		break;
774 
775 	case PS_LOCKWAIT:
776 		/*
777 		 * This state doesn't timeout.
778 		 */
779 		curthread->wakeup_time.tv_sec = -1;
780 		curthread->wakeup_time.tv_nsec = -1;
781 		level = curthread->locklevel - 1;
782 		if (_LCK_GRANTED(&curthread->lockusers[level]))
783 			THR_SET_STATE(curthread, PS_RUNNING);
784 		break;
785 
786 	case PS_DEAD:
787 		/* Unlock the scheduling queue and exit the KSE and thread. */
788 		thr_cleanup(curkse, curthread);
789 		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
790 		PANIC("bound thread shouldn't get here\n");
791 		break;
792 
793 	case PS_JOIN:
794 		if (THR_NEED_CANCEL(curthread)) {
795 			curthread->join_status.thread = NULL;
796 			THR_SET_STATE(curthread, PS_RUNNING);
797 		} else {
798 			/*
799 			 * This state doesn't timeout.
800 			 */
801 			curthread->wakeup_time.tv_sec = -1;
802 			curthread->wakeup_time.tv_nsec = -1;
803 		}
804 		break;
805 
806 	case PS_SUSPENDED:
807 		if (THR_NEED_CANCEL(curthread)) {
808 			curthread->interrupted = 1;
809 			THR_SET_STATE(curthread, PS_RUNNING);
810 		} else {
811 			/*
812 			 * These states don't timeout.
813 			 */
814 			curthread->wakeup_time.tv_sec = -1;
815 			curthread->wakeup_time.tv_nsec = -1;
816 		}
817 		break;
818 
819 	case PS_RUNNING:
820 		if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
821 		    !THR_NEED_CANCEL(curthread)) {
822 			THR_SET_STATE(curthread, PS_SUSPENDED);
823 			/*
824 			 * These states don't timeout.
825 			 */
826 			curthread->wakeup_time.tv_sec = -1;
827 			curthread->wakeup_time.tv_nsec = -1;
828 		}
829 		break;
830 
831 	case PS_SIGWAIT:
832 		PANIC("bound thread does not have SIGWAIT state\n");
833 
834 	case PS_SLEEP_WAIT:
835 		PANIC("bound thread does not have SLEEP_WAIT state\n");
836 
837 	case PS_SIGSUSPEND:
838 		PANIC("bound thread does not have SIGSUSPEND state\n");
839 
840 	case PS_DEADLOCK:
841 		/*
842 		 * These states don't timeout and don't need
843 		 * to be in the waiting queue.
844 		 */
845 		curthread->wakeup_time.tv_sec = -1;
846 		curthread->wakeup_time.tv_nsec = -1;
847 		break;
848 
849 	default:
850 		PANIC("Unknown state\n");
851 		break;
852 	}
853 
854 	while (curthread->state != PS_RUNNING) {
855 		sigseqno = curkse->k_sigseqno;
856 		if (curthread->check_pending != 0) {
857 			/*
858 			 * Install pending signals into the frame, possible
859 			 * cause mutex or condvar backout.
860 			 */
861 			curthread->check_pending = 0;
862 			SIGFILLSET(sigmask);
863 
864 			/*
865 			 * Lock out kernel signal code when we are processing
866 			 * signals, and get a fresh copy of signal mask.
867 			 */
868 			__sys_sigprocmask(SIG_SETMASK, &sigmask,
869 					  &curthread->sigmask);
870 			for (i = 1; i <= _SIG_MAXSIG; i++) {
871 				if (SIGISMEMBER(curthread->sigmask, i))
872 					continue;
873 				if (SIGISMEMBER(curthread->sigpend, i))
874 					(void)_thr_sig_add(curthread, i,
875 					    &curthread->siginfo[i-1]);
876 			}
877 			__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
878 				NULL);
879 			/* The above code might make thread runnable */
880 			if (curthread->state == PS_RUNNING)
881 				break;
882 		}
883 		THR_DEACTIVATE_LAST_LOCK(curthread);
884 		kse_wait(curkse, curthread, sigseqno);
885 		THR_ACTIVATE_LAST_LOCK(curthread);
886 		if (curthread->wakeup_time.tv_sec >= 0) {
887 			KSE_GET_TOD(curkse, &ts);
888 			if (thr_timedout(curthread, &ts)) {
889 				/* Indicate the thread timedout: */
890 				curthread->timeout = 1;
891 				/* Make the thread runnable. */
892 				THR_SET_STATE(curthread, PS_RUNNING);
893 			}
894 		}
895 	}
896 
897 	if (curthread->lock_switch == 0) {
898 		/* Unlock the scheduling queue. */
899 		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
900 	}
901 
902 	DBG_MSG("Continuing bound thread %p\n", curthread);
903 	if (first) {
904 		_kse_critical_leave(&curthread->tcb->tcb_tmbx);
905 		pthread_exit(curthread->start_routine(curthread->arg));
906 	}
907 }
908 
909 #ifdef DEBUG_THREAD_KERN
910 static void
dump_queues(struct kse * curkse)911 dump_queues(struct kse *curkse)
912 {
913 	struct pthread *thread;
914 
915 	DBG_MSG("Threads in waiting queue:\n");
916 	TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
917 		DBG_MSG("  thread %p, state %d, blocked %d\n",
918 		    thread, thread->state, thread->blocked);
919 	}
920 }
921 #endif
922 
923 /*
924  * This is the scheduler for a KSE which runs multiple threads.
925  */
926 static void
kse_sched_multi(struct kse_mailbox * kmbx)927 kse_sched_multi(struct kse_mailbox *kmbx)
928 {
929 	struct kse *curkse;
930 	struct pthread *curthread, *td_wait;
931 	int ret;
932 
933 	curkse = (struct kse *)kmbx->km_udata;
934 	THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
935 	    "Mailbox not null in kse_sched_multi");
936 
937 	/* Check for first time initialization: */
938 	if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
939 		/* Setup this KSEs specific data. */
940 		_kcb_set(curkse->k_kcb);
941 
942 		/* Set this before grabbing the context. */
943 		curkse->k_flags |= KF_INITIALIZED;
944 	}
945 
946 	/*
947 	 * No current thread anymore, calling _get_curthread in UTS
948 	 * should dump core
949 	 */
950 	_tcb_set(curkse->k_kcb, NULL);
951 
952 	/* If this is an upcall; take the scheduler lock. */
953 	if (!KSE_IS_SWITCH(curkse))
954 		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
955 	else
956 		KSE_CLEAR_SWITCH(curkse);
957 
958 	if (KSE_IS_IDLE(curkse)) {
959 		KSE_CLEAR_IDLE(curkse);
960 		curkse->k_kseg->kg_idle_kses--;
961 	}
962 
963 	/*
964 	 * Now that the scheduler lock is held, get the current
965 	 * thread.  The KSE's current thread cannot be safely
966 	 * examined without the lock because it could have returned
967 	 * as completed on another KSE.  See kse_check_completed().
968 	 */
969 	curthread = curkse->k_curthread;
970 
971 	/*
972 	 * If the current thread was completed in another KSE, then
973 	 * it will be in the run queue.  Don't mark it as being blocked.
974 	 */
975 	if ((curthread != NULL) &&
976 	    ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
977 	    (curthread->need_switchout == 0)) {
978 		/*
979 		 * Assume the current thread is blocked; when the
980 		 * completed threads are checked and if the current
981 		 * thread is among the completed, the blocked flag
982 		 * will be cleared.
983 		 */
984 		curthread->blocked = 1;
985 		DBG_MSG("Running thread %p is now blocked in kernel.\n",
986 		    curthread);
987 	}
988 
989 	/* Check for any unblocked threads in the kernel. */
990 	kse_check_completed(curkse);
991 
992 	/*
993 	 * Check for threads that have timed-out.
994 	 */
995 	kse_check_waitq(curkse);
996 
997 	/*
998 	 * Switchout the current thread, if necessary, as the last step
999 	 * so that it is inserted into the run queue (if it's runnable)
1000 	 * _after_ any other threads that were added to it above.
1001 	 */
1002 	if (curthread == NULL)
1003 		;  /* Nothing to do here. */
1004 	else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
1005 	    (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
1006 		/*
1007 		 * Resume the thread and tell it to yield when
1008 		 * it leaves the critical region.
1009 		 */
1010 		curthread->critical_yield = 1;
1011 		curthread->active = 1;
1012 		if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
1013 			KSE_RUNQ_REMOVE(curkse, curthread);
1014 		curkse->k_curthread = curthread;
1015 		curthread->kse = curkse;
1016 		DBG_MSG("Continuing thread %p in critical region\n",
1017 		    curthread);
1018 		kse_wakeup_multi(curkse);
1019 		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1020 		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1021 		if (ret != 0)
1022 			PANIC("Can't resume thread in critical region\n");
1023 	}
1024 	else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1025 		curthread->tcb->tcb_tmbx.tm_lwp = 0;
1026 		kse_switchout_thread(curkse, curthread);
1027 	}
1028 	curkse->k_curthread = NULL;
1029 
1030 #ifdef DEBUG_THREAD_KERN
1031 	dump_queues(curkse);
1032 #endif
1033 
1034 	/* Check if there are no threads ready to run: */
1035 	while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1036 	    (curkse->k_kseg->kg_threadcount != 0) &&
1037 	    ((curkse->k_flags & KF_TERMINATED) == 0)) {
1038 		/*
1039 		 * Wait for a thread to become active or until there are
1040 		 * no more threads.
1041 		 */
1042 		td_wait = KSE_WAITQ_FIRST(curkse);
1043 		kse_wait(curkse, td_wait, 0);
1044 		kse_check_completed(curkse);
1045 		kse_check_waitq(curkse);
1046 	}
1047 
1048 	/* Check for no more threads: */
1049 	if ((curkse->k_kseg->kg_threadcount == 0) ||
1050 	    ((curkse->k_flags & KF_TERMINATED) != 0)) {
1051 		/*
1052 		 * Normally this shouldn't return, but it will if there
1053 		 * are other KSEs running that create new threads that
1054 		 * are assigned to this KSE[G].  For instance, if a scope
1055 		 * system thread were to create a scope process thread
1056 		 * and this kse[g] is the initial kse[g], then that newly
1057 		 * created thread would be assigned to us (the initial
1058 		 * kse[g]).
1059 		 */
1060 		kse_wakeup_multi(curkse);
1061 		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1062 		kse_fini(curkse);
1063 		/* never returns */
1064 	}
1065 
1066 	THR_ASSERT(curthread != NULL,
1067 	    "Return from kse_wait/fini without thread.");
1068 	THR_ASSERT(curthread->state != PS_DEAD,
1069 	    "Trying to resume dead thread!");
1070 	KSE_RUNQ_REMOVE(curkse, curthread);
1071 
1072 	/*
1073 	 * Make the selected thread the current thread.
1074 	 */
1075 	curkse->k_curthread = curthread;
1076 
1077 	/*
1078 	 * Make sure the current thread's kse points to this kse.
1079 	 */
1080 	curthread->kse = curkse;
1081 
1082 	/*
1083 	 * Reset the time slice if this thread is running for the first
1084 	 * time or running again after using its full time slice allocation.
1085 	 */
1086 	if (curthread->slice_usec == -1)
1087 		curthread->slice_usec = 0;
1088 
1089 	/* Mark the thread active. */
1090 	curthread->active = 1;
1091 
1092 	/*
1093 	 * The thread's current signal frame will only be NULL if it
1094 	 * is being resumed after being blocked in the kernel.  In
1095 	 * this case, and if the thread needs to run down pending
1096 	 * signals or needs a cancellation check, we need to add a
1097 	 * signal frame to the thread's context.
1098 	 */
1099 	if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1100 	    (curthread->check_pending != 0 ||
1101 	     THR_NEED_ASYNC_CANCEL(curthread)) &&
1102 	    !THR_IN_CRITICAL(curthread)) {
1103 		curthread->check_pending = 0;
1104 		signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1105 		    (__sighandler_t *)thr_resume_wrapper);
1106 	}
1107 	kse_wakeup_multi(curkse);
1108 	/*
1109 	 * Continue the thread at its current frame:
1110 	 */
1111 	if (curthread->lock_switch != 0) {
1112 		/*
1113 		 * This thread came from a scheduler switch; it will
1114 		 * unlock the scheduler lock and set the mailbox.
1115 		 */
1116 		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1117 	} else {
1118 		/* This thread won't unlock the scheduler lock. */
1119 		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1120 		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1121 	}
1122 	if (ret != 0)
1123 		PANIC("Thread has returned from _thread_switch");
1124 
1125 	/* This point should not be reached. */
1126 	PANIC("Thread has returned from _thread_switch");
1127 }
1128 
1129 static void
thr_resume_wrapper(int sig __unused,siginfo_t * siginfo __unused,ucontext_t * ucp)1130 thr_resume_wrapper(int sig __unused, siginfo_t *siginfo __unused,
1131     ucontext_t *ucp)
1132 {
1133 	struct pthread *curthread = _get_curthread();
1134 	struct kse *curkse;
1135 	int ret, err_save = errno;
1136 
1137 	DBG_MSG(">>> sig wrapper\n");
1138 	if (curthread->lock_switch)
1139 		PANIC("thr_resume_wrapper, lock_switch != 0\n");
1140 	thr_resume_check(curthread, ucp);
1141 	errno = err_save;
1142 	_kse_critical_enter();
1143 	curkse = curthread->kse;
1144 	curthread->tcb->tcb_tmbx.tm_context = *ucp;
1145 	ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1146 	if (ret != 0)
1147 		PANIC("thr_resume_wrapper: thread has returned "
1148 		      "from _thread_switch");
1149 	/* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1150 }
1151 
1152 static void
thr_resume_check(struct pthread * curthread,ucontext_t * ucp)1153 thr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1154 {
1155 	_thr_sig_rundown(curthread, ucp);
1156 
1157 	if (THR_NEED_ASYNC_CANCEL(curthread))
1158 		pthread_testcancel();
1159 }
1160 
1161 /*
1162  * Clean up a thread.  This must be called with the thread's KSE
1163  * scheduling lock held.  The thread must be a thread from the
1164  * KSE's group.
1165  */
1166 static void
thr_cleanup(struct kse * curkse,struct pthread * thread)1167 thr_cleanup(struct kse *curkse, struct pthread *thread)
1168 {
1169 	struct pthread *joiner;
1170 	struct kse_mailbox *kmbx = NULL;
1171 	int sys_scope;
1172 
1173 	thread->active = 0;
1174 	thread->need_switchout = 0;
1175 	thread->lock_switch = 0;
1176 	thread->check_pending = 0;
1177 
1178 	if ((joiner = thread->joiner) != NULL) {
1179 		/* Joinee scheduler lock held; joiner won't leave. */
1180 		if (joiner->kseg == curkse->k_kseg) {
1181 			if (joiner->join_status.thread == thread) {
1182 				joiner->join_status.thread = NULL;
1183 				joiner->join_status.ret = thread->ret;
1184 				(void)_thr_setrunnable_unlocked(joiner);
1185 			}
1186 		} else {
1187 			KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1188 			/* The joiner may have removed itself and exited. */
1189 			if (_thr_ref_add(thread, joiner, 0) == 0) {
1190 				KSE_SCHED_LOCK(curkse, joiner->kseg);
1191 				if (joiner->join_status.thread == thread) {
1192 					joiner->join_status.thread = NULL;
1193 					joiner->join_status.ret = thread->ret;
1194 					kmbx = _thr_setrunnable_unlocked(joiner);
1195 				}
1196 				KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1197 				_thr_ref_delete(thread, joiner);
1198 				if (kmbx != NULL)
1199 					kse_wakeup(kmbx);
1200 			}
1201 			KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1202 		}
1203 		thread->attr.flags |= PTHREAD_DETACHED;
1204 	}
1205 
1206 	if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1207 		/*
1208 		 * Remove the thread from the KSEG's list of threads.
1209 	 	 */
1210 		KSEG_THRQ_REMOVE(thread->kseg, thread);
1211 		/*
1212 		 * Migrate the thread to the main KSE so that this
1213 		 * KSE and KSEG can be cleaned when their last thread
1214 		 * exits.
1215 		 */
1216 		thread->kseg = _kse_initial->k_kseg;
1217 		thread->kse = _kse_initial;
1218 	}
1219 
1220 	/*
1221 	 * We can't hold the thread list lock while holding the
1222 	 * scheduler lock.
1223 	 */
1224 	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1225 	DBG_MSG("Adding thread %p to GC list\n", thread);
1226 	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1227 	thread->tlflags |= TLFLAGS_GC_SAFE;
1228 	THR_GCLIST_ADD(thread);
1229 	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1230 	if (sys_scope) {
1231 		/*
1232 		 * System scope thread is single thread group,
1233 		 * when thread is exited, its kse and ksegrp should
1234 		 * be recycled as well.
1235 		 * kse upcall stack belongs to thread, clear it here.
1236 		 */
1237 		curkse->k_stack.ss_sp = 0;
1238 		curkse->k_stack.ss_size = 0;
1239 		kse_exit();
1240 		PANIC("kse_exit() failed for system scope thread");
1241 	}
1242 	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1243 }
1244 
1245 void
_thr_gc(struct pthread * curthread)1246 _thr_gc(struct pthread *curthread)
1247 {
1248 	thread_gc(curthread);
1249 	kse_gc(curthread);
1250 	kseg_gc(curthread);
1251 }
1252 
1253 static void
thread_gc(struct pthread * curthread)1254 thread_gc(struct pthread *curthread)
1255 {
1256 	struct pthread *td, *td_next;
1257 	kse_critical_t crit;
1258 	TAILQ_HEAD(, pthread) worklist;
1259 
1260 	TAILQ_INIT(&worklist);
1261 	crit = _kse_critical_enter();
1262 	KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1263 
1264 	/* Check the threads waiting for GC. */
1265 	for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1266 		td_next = TAILQ_NEXT(td, gcle);
1267 		if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1268 			continue;
1269 		else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1270 		    ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1271 			/*
1272 			 * The thread and KSE are operating on the same
1273 			 * stack.  Wait for the KSE to exit before freeing
1274 			 * the thread's stack as well as everything else.
1275 			 */
1276 			continue;
1277 		}
1278 		/*
1279 		 * Remove the thread from the GC list.  If the thread
1280 		 * isn't yet detached, it will get added back to the
1281 		 * GC list at a later time.
1282 		 */
1283 		THR_GCLIST_REMOVE(td);
1284 		DBG_MSG("Freeing thread %p stack\n", td);
1285 		/*
1286 		 * We can free the thread stack since it's no longer
1287 		 * in use.
1288 		 */
1289 		_thr_stack_free(&td->attr);
1290 		if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1291 		    (td->refcount == 0)) {
1292 			/*
1293 			 * The thread has detached and is no longer
1294 			 * referenced.  It is safe to remove all
1295 			 * remnants of the thread.
1296 			 */
1297 			THR_LIST_REMOVE(td);
1298 			TAILQ_INSERT_HEAD(&worklist, td, gcle);
1299 		}
1300 	}
1301 	KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1302 	_kse_critical_leave(crit);
1303 
1304 	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1305 		TAILQ_REMOVE(&worklist, td, gcle);
1306 		/*
1307 		 * XXX we don't free initial thread and its kse
1308 		 * (if thread is a bound thread), because there might
1309 		 * have some code referencing initial thread and kse.
1310 		 */
1311 		if (td == _thr_initial) {
1312 			DBG_MSG("Initial thread won't be freed\n");
1313 			continue;
1314 		}
1315 
1316 		if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1317 			crit = _kse_critical_enter();
1318 			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1319 			kse_free_unlocked(td->kse);
1320 			kseg_free_unlocked(td->kseg);
1321 			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1322 			_kse_critical_leave(crit);
1323 		}
1324 		DBG_MSG("Freeing thread %p\n", td);
1325 		_thr_free(curthread, td);
1326 	}
1327 }
1328 
1329 static void
kse_gc(struct pthread * curthread)1330 kse_gc(struct pthread *curthread)
1331 {
1332 	kse_critical_t crit;
1333 	TAILQ_HEAD(, kse) worklist;
1334 	struct kse *kse;
1335 
1336 	if (free_kse_count <= MAX_CACHED_KSES)
1337 		return;
1338 	TAILQ_INIT(&worklist);
1339 	crit = _kse_critical_enter();
1340 	KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1341 	while (free_kse_count > MAX_CACHED_KSES) {
1342 		kse = TAILQ_FIRST(&free_kseq);
1343 		TAILQ_REMOVE(&free_kseq, kse, k_qe);
1344 		TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1345 		free_kse_count--;
1346 	}
1347 	KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1348 	_kse_critical_leave(crit);
1349 
1350 	while ((kse = TAILQ_FIRST(&worklist))) {
1351 		TAILQ_REMOVE(&worklist, kse, k_qe);
1352 		kse_destroy(kse);
1353 	}
1354 }
1355 
1356 static void
kseg_gc(struct pthread * curthread)1357 kseg_gc(struct pthread *curthread)
1358 {
1359 	kse_critical_t crit;
1360 	TAILQ_HEAD(, kse_group) worklist;
1361 	struct kse_group *kseg;
1362 
1363 	if (free_kseg_count <= MAX_CACHED_KSEGS)
1364 		return;
1365 	TAILQ_INIT(&worklist);
1366 	crit = _kse_critical_enter();
1367 	KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1368 	while (free_kseg_count > MAX_CACHED_KSEGS) {
1369 		kseg = TAILQ_FIRST(&free_kse_groupq);
1370 		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1371 		free_kseg_count--;
1372 		TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1373 	}
1374 	KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1375 	_kse_critical_leave(crit);
1376 
1377 	while ((kseg = TAILQ_FIRST(&worklist))) {
1378 		TAILQ_REMOVE(&worklist, kseg, kg_qe);
1379 		kseg_destroy(kseg);
1380 	}
1381 }
1382 
1383 /*
1384  * Only new threads that are running or suspended may be scheduled.
1385  */
1386 int
_thr_schedule_add(struct pthread * curthread,struct pthread * newthread)1387 _thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1388 {
1389 	kse_critical_t crit;
1390 	int ret;
1391 
1392 	/* Add the new thread. */
1393 	thr_link(newthread);
1394 
1395 	/*
1396 	 * If this is the first time creating a thread, make sure
1397 	 * the mailbox is set for the current thread.
1398 	 */
1399 	if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1400 		/* We use the thread's stack as the KSE's stack. */
1401 		newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1402 		    newthread->attr.stackaddr_attr;
1403 		newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1404 		    newthread->attr.stacksize_attr;
1405 
1406 		/*
1407 		 * No need to lock the scheduling queue since the
1408 		 * KSE/KSEG pair have not yet been started.
1409 		 */
1410 		KSEG_THRQ_ADD(newthread->kseg, newthread);
1411 		/* this thread never gives up kse */
1412 		newthread->active = 1;
1413 		newthread->kse->k_curthread = newthread;
1414 		newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1415 		newthread->kse->k_kcb->kcb_kmbx.km_func =
1416 		    (kse_func_t *)kse_sched_single;
1417 		newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1418 		KSE_SET_MBOX(newthread->kse, newthread);
1419 		/*
1420 		 * This thread needs a new KSE and KSEG.
1421 		 */
1422 		newthread->kse->k_flags &= ~KF_INITIALIZED;
1423 		newthread->kse->k_flags |= KF_STARTED;
1424 		/* Fire up! */
1425 		ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1426 		if (ret != 0)
1427 			ret = errno;
1428 	}
1429 	else {
1430 		/*
1431 		 * Lock the KSE and add the new thread to its list of
1432 		 * assigned threads.  If the new thread is runnable, also
1433 		 * add it to the KSE's run queue.
1434 		 */
1435 		crit = _kse_critical_enter();
1436 		KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1437 		KSEG_THRQ_ADD(newthread->kseg, newthread);
1438 		if (newthread->state == PS_RUNNING)
1439 			THR_RUNQ_INSERT_TAIL(newthread);
1440 		if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1441 			/*
1442 			 * This KSE hasn't been started yet.  Start it
1443 			 * outside of holding the lock.
1444 			 */
1445 			newthread->kse->k_flags |= KF_STARTED;
1446 			newthread->kse->k_kcb->kcb_kmbx.km_func =
1447 			    (kse_func_t *)kse_sched_multi;
1448 			newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1449 			kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1450 		 } else if ((newthread->state == PS_RUNNING) &&
1451 		     KSE_IS_IDLE(newthread->kse)) {
1452 			/*
1453 			 * The thread is being scheduled on another KSEG.
1454 			 */
1455 			kse_wakeup_one(newthread);
1456 		}
1457 		KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1458 		_kse_critical_leave(crit);
1459 		ret = 0;
1460 	}
1461 	if (ret != 0)
1462 		thr_unlink(newthread);
1463 
1464 	return (ret);
1465 }
1466 
1467 void
kse_waitq_insert(struct pthread * thread)1468 kse_waitq_insert(struct pthread *thread)
1469 {
1470 	struct pthread *td;
1471 
1472 	if (thread->wakeup_time.tv_sec == -1)
1473 		TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1474 		    pqe);
1475 	else {
1476 		td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1477 		while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1478 		    ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1479 		    ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1480 		    (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1481 			td = TAILQ_NEXT(td, pqe);
1482 		if (td == NULL)
1483 			TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1484 			    thread, pqe);
1485 		else
1486 			TAILQ_INSERT_BEFORE(td, thread, pqe);
1487 	}
1488 	thread->flags |= THR_FLAGS_IN_WAITQ;
1489 }
1490 
1491 /*
1492  * This must be called with the scheduling lock held.
1493  */
1494 static void
kse_check_completed(struct kse * kse)1495 kse_check_completed(struct kse *kse)
1496 {
1497 	struct pthread *thread;
1498 	struct kse_thr_mailbox *completed;
1499 	int sig;
1500 
1501 	if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1502 		kse->k_kcb->kcb_kmbx.km_completed = NULL;
1503 		while (completed != NULL) {
1504 			thread = completed->tm_udata;
1505 			DBG_MSG("Found completed thread %p, name %s\n",
1506 			    thread,
1507 			    (thread->name == NULL) ? "none" : thread->name);
1508 			thread->blocked = 0;
1509 			if (thread != kse->k_curthread) {
1510 				thr_accounting(thread);
1511 				if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1512 					THR_SET_STATE(thread, PS_SUSPENDED);
1513 				else
1514 					KSE_RUNQ_INSERT_TAIL(kse, thread);
1515 				if ((thread->kse != kse) &&
1516 				    (thread->kse->k_curthread == thread)) {
1517 					/*
1518 					 * Remove this thread from its
1519 					 * previous KSE so that it (the KSE)
1520 					 * doesn't think it is still active.
1521 					 */
1522 					thread->kse->k_curthread = NULL;
1523 					thread->active = 0;
1524 				}
1525 			}
1526 			if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1527 			    != 0) {
1528 				if (SIGISMEMBER(thread->sigmask, sig))
1529 					SIGADDSET(thread->sigpend, sig);
1530 				else if (THR_IN_CRITICAL(thread))
1531 					kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1532 				else
1533 					(void)_thr_sig_add(thread, sig,
1534 					    &thread->tcb->tcb_tmbx.tm_syncsig);
1535 				thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1536 			}
1537 			completed = completed->tm_next;
1538 		}
1539 	}
1540 }
1541 
1542 /*
1543  * This must be called with the scheduling lock held.
1544  */
1545 static void
kse_check_waitq(struct kse * kse)1546 kse_check_waitq(struct kse *kse)
1547 {
1548 	struct pthread	*pthread;
1549 	struct timespec ts;
1550 
1551 	KSE_GET_TOD(kse, &ts);
1552 
1553 	/*
1554 	 * Wake up threads that have timedout.  This has to be
1555 	 * done before adding the current thread to the run queue
1556 	 * so that a CPU intensive thread doesn't get preference
1557 	 * over waiting threads.
1558 	 */
1559 	while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1560 	    thr_timedout(pthread, &ts)) {
1561 		/* Remove the thread from the wait queue: */
1562 		KSE_WAITQ_REMOVE(kse, pthread);
1563 		DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1564 
1565 		/* Indicate the thread timedout: */
1566 		pthread->timeout = 1;
1567 
1568 		/* Add the thread to the priority queue: */
1569 		if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1570 			THR_SET_STATE(pthread, PS_SUSPENDED);
1571 		else {
1572 			THR_SET_STATE(pthread, PS_RUNNING);
1573 			KSE_RUNQ_INSERT_TAIL(kse, pthread);
1574 		}
1575 	}
1576 }
1577 
1578 static int
thr_timedout(struct pthread * thread,struct timespec * curtime)1579 thr_timedout(struct pthread *thread, struct timespec *curtime)
1580 {
1581 	if (thread->wakeup_time.tv_sec < 0)
1582 		return (0);
1583 	else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1584 		return (0);
1585 	else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1586 	    (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1587 		return (0);
1588 	else
1589 		return (1);
1590 }
1591 
1592 /*
1593  * This must be called with the scheduling lock held.
1594  *
1595  * Each thread has a time slice, a wakeup time (used when it wants
1596  * to wait for a specified amount of time), a run state, and an
1597  * active flag.
1598  *
1599  * When a thread gets run by the scheduler, the active flag is
1600  * set to non-zero (1).  When a thread performs an explicit yield
1601  * or schedules a state change, it enters the scheduler and the
1602  * active flag is cleared.  When the active flag is still seen
1603  * set in the scheduler, that means that the thread is blocked in
1604  * the kernel (because it is cleared before entering the scheduler
1605  * in all other instances).
1606  *
1607  * The wakeup time is only set for those states that can timeout.
1608  * It is set to (-1, -1) for all other instances.
1609  *
1610  * The thread's run state, aside from being useful when debugging,
1611  * is used to place the thread in an appropriate queue.  There
1612  * are 2 basic queues:
1613  *
1614  *   o run queue - queue ordered by priority for all threads
1615  *                 that are runnable
1616  *   o waiting queue - queue sorted by wakeup time for all threads
1617  *                     that are not otherwise runnable (not blocked
1618  *                     in kernel, not waiting for locks)
1619  *
1620  * The thread's time slice is used for round-robin scheduling
1621  * (the default scheduling policy).  While a SCHED_RR thread
1622  * is runnable it's time slice accumulates.  When it reaches
1623  * the time slice interval, it gets reset and added to the end
1624  * of the queue of threads at its priority.  When a thread no
1625  * longer becomes runnable (blocks in kernel, waits, etc), its
1626  * time slice is reset.
1627  *
1628  * The job of kse_switchout_thread() is to handle all of the above.
1629  */
1630 static void
kse_switchout_thread(struct kse * kse,struct pthread * thread)1631 kse_switchout_thread(struct kse *kse, struct pthread *thread)
1632 {
1633 	int level;
1634 	int i;
1635 	int restart;
1636 	siginfo_t siginfo;
1637 
1638 	/*
1639 	 * Place the currently running thread into the
1640 	 * appropriate queue(s).
1641 	 */
1642 	DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1643 
1644 	THR_DEACTIVATE_LAST_LOCK(thread);
1645 	if (thread->blocked != 0) {
1646 		thread->active = 0;
1647 		thread->need_switchout = 0;
1648 		/* This thread must have blocked in the kernel. */
1649 		/*
1650 		 * Check for pending signals and cancellation for
1651 		 * this thread to see if we need to interrupt it
1652 		 * in the kernel.
1653 		 */
1654 		if (THR_NEED_CANCEL(thread)) {
1655 			kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1656 					  KSE_INTR_INTERRUPT, 0);
1657 		} else if (thread->check_pending != 0) {
1658 			for (i = 1; i <= _SIG_MAXSIG; ++i) {
1659 				if (SIGISMEMBER(thread->sigpend, i) &&
1660 				    !SIGISMEMBER(thread->sigmask, i)) {
1661 					restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1662 					kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1663 					    restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1664 					break;
1665 				}
1666 			}
1667 		}
1668 	}
1669 	else {
1670 		switch (thread->state) {
1671 		case PS_MUTEX_WAIT:
1672 		case PS_COND_WAIT:
1673 			if (THR_NEED_CANCEL(thread)) {
1674 				thread->interrupted = 1;
1675 				thread->continuation = _thr_finish_cancellation;
1676 				THR_SET_STATE(thread, PS_RUNNING);
1677 			} else {
1678 				/* Insert into the waiting queue: */
1679 				KSE_WAITQ_INSERT(kse, thread);
1680 			}
1681 			break;
1682 
1683 		case PS_LOCKWAIT:
1684 			/*
1685 			 * This state doesn't timeout.
1686 			 */
1687 			thread->wakeup_time.tv_sec = -1;
1688 			thread->wakeup_time.tv_nsec = -1;
1689 			level = thread->locklevel - 1;
1690 			if (!_LCK_GRANTED(&thread->lockusers[level]))
1691 				KSE_WAITQ_INSERT(kse, thread);
1692 			else
1693 				THR_SET_STATE(thread, PS_RUNNING);
1694 			break;
1695 
1696 		case PS_SLEEP_WAIT:
1697 		case PS_SIGWAIT:
1698 			if (THR_NEED_CANCEL(thread)) {
1699 				thread->interrupted = 1;
1700 				THR_SET_STATE(thread, PS_RUNNING);
1701 			} else {
1702 				KSE_WAITQ_INSERT(kse, thread);
1703 			}
1704 			break;
1705 
1706 		case PS_JOIN:
1707 			if (THR_NEED_CANCEL(thread)) {
1708 				thread->join_status.thread = NULL;
1709 				THR_SET_STATE(thread, PS_RUNNING);
1710 			} else {
1711 				/*
1712 				 * This state doesn't timeout.
1713 				 */
1714 				thread->wakeup_time.tv_sec = -1;
1715 				thread->wakeup_time.tv_nsec = -1;
1716 
1717 				/* Insert into the waiting queue: */
1718 				KSE_WAITQ_INSERT(kse, thread);
1719 			}
1720 			break;
1721 
1722 		case PS_SIGSUSPEND:
1723 		case PS_SUSPENDED:
1724 			if (THR_NEED_CANCEL(thread)) {
1725 				thread->interrupted = 1;
1726 				THR_SET_STATE(thread, PS_RUNNING);
1727 			} else {
1728 				/*
1729 				 * These states don't timeout.
1730 				 */
1731 				thread->wakeup_time.tv_sec = -1;
1732 				thread->wakeup_time.tv_nsec = -1;
1733 
1734 				/* Insert into the waiting queue: */
1735 				KSE_WAITQ_INSERT(kse, thread);
1736 			}
1737 			break;
1738 
1739 		case PS_DEAD:
1740 			/*
1741 			 * The scheduler is operating on a different
1742 			 * stack.  It is safe to do garbage collecting
1743 			 * here.
1744 			 */
1745 			thr_cleanup(kse, thread);
1746 			return;
1747 			break;
1748 
1749 		case PS_RUNNING:
1750 			if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1751 			    !THR_NEED_CANCEL(thread))
1752 				THR_SET_STATE(thread, PS_SUSPENDED);
1753 			break;
1754 
1755 		case PS_DEADLOCK:
1756 			/*
1757 			 * These states don't timeout.
1758 			 */
1759 			thread->wakeup_time.tv_sec = -1;
1760 			thread->wakeup_time.tv_nsec = -1;
1761 
1762 			/* Insert into the waiting queue: */
1763 			KSE_WAITQ_INSERT(kse, thread);
1764 			break;
1765 
1766 		default:
1767 			PANIC("Unknown state\n");
1768 			break;
1769 		}
1770 
1771 		thr_accounting(thread);
1772 		if (thread->state == PS_RUNNING) {
1773 			if (thread->slice_usec == -1) {
1774 				/*
1775 				 * The thread exceeded its time quantum or
1776 				 * it yielded the CPU; place it at the tail
1777 				 * of the queue for its priority.
1778 				 */
1779 				KSE_RUNQ_INSERT_TAIL(kse, thread);
1780 			} else {
1781 				/*
1782 				 * The thread hasn't exceeded its interval
1783 				 * Place it at the head of the queue for its
1784 				 * priority.
1785 				 */
1786 				KSE_RUNQ_INSERT_HEAD(kse, thread);
1787 			}
1788 		}
1789 	}
1790 	thread->active = 0;
1791 	thread->need_switchout = 0;
1792 	if (thread->check_pending != 0) {
1793 		/* Install pending signals into the frame. */
1794 		thread->check_pending = 0;
1795 		KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1796 		for (i = 1; i <= _SIG_MAXSIG; i++) {
1797 			if (SIGISMEMBER(thread->sigmask, i))
1798 				continue;
1799 			if (SIGISMEMBER(thread->sigpend, i))
1800 				(void)_thr_sig_add(thread, i,
1801 				    &thread->siginfo[i-1]);
1802 			else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1803 				_thr_getprocsig_unlocked(i, &siginfo)) {
1804 				(void)_thr_sig_add(thread, i, &siginfo);
1805 			}
1806 		}
1807 		KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1808 	}
1809 }
1810 
1811 /*
1812  * This function waits for the smallest timeout value of any waiting
1813  * thread, or until it receives a message from another KSE.
1814  *
1815  * This must be called with the scheduling lock held.
1816  */
1817 static void
kse_wait(struct kse * kse,struct pthread * td_wait,int sigseqno)1818 kse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
1819 {
1820 	struct timespec ts, ts_sleep;
1821 	int saved_flags;
1822 
1823 	if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1824 		/* Limit sleep to no more than 1 minute. */
1825 		ts_sleep.tv_sec = 60;
1826 		ts_sleep.tv_nsec = 0;
1827 	} else {
1828 		KSE_GET_TOD(kse, &ts);
1829 		TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1830 		if (ts_sleep.tv_sec > 60) {
1831 			ts_sleep.tv_sec = 60;
1832 			ts_sleep.tv_nsec = 0;
1833 		}
1834 	}
1835 	/* Don't sleep for negative times. */
1836 	if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1837 		KSE_SET_IDLE(kse);
1838 		kse->k_kseg->kg_idle_kses++;
1839 		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1840 		if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1841 		    (kse->k_sigseqno != sigseqno))
1842 			; /* don't sleep */
1843 		else {
1844 			saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1845 			kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1846 			kse_release(&ts_sleep);
1847 			kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1848 		}
1849 		KSE_SCHED_LOCK(kse, kse->k_kseg);
1850 		if (KSE_IS_IDLE(kse)) {
1851 			KSE_CLEAR_IDLE(kse);
1852 			kse->k_kseg->kg_idle_kses--;
1853 		}
1854 	}
1855 }
1856 
1857 /*
1858  * Avoid calling this kse_exit() so as not to confuse it with the
1859  * system call of the same name.
1860  */
1861 static void
kse_fini(struct kse * kse)1862 kse_fini(struct kse *kse)
1863 {
1864 	/* struct kse_group *free_kseg = NULL; */
1865 	struct timespec ts;
1866 	struct pthread *td;
1867 
1868 	/*
1869 	 * Check to see if this is one of the main kses.
1870 	 */
1871 	if (kse->k_kseg != _kse_initial->k_kseg) {
1872 		PANIC("shouldn't get here");
1873 		/* This is for supporting thread groups. */
1874 #ifdef NOT_YET
1875 		/* Remove this KSE from the KSEG's list of KSEs. */
1876 		KSE_SCHED_LOCK(kse, kse->k_kseg);
1877 		TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1878 		kse->k_kseg->kg_ksecount--;
1879 		if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1880 			free_kseg = kse->k_kseg;
1881 		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1882 
1883 		/*
1884 		 * Add this KSE to the list of free KSEs along with
1885 		 * the KSEG if is now orphaned.
1886 		 */
1887 		KSE_LOCK_ACQUIRE(kse, &kse_lock);
1888 		if (free_kseg != NULL)
1889 			kseg_free_unlocked(free_kseg);
1890 		kse_free_unlocked(kse);
1891 		KSE_LOCK_RELEASE(kse, &kse_lock);
1892 		kse_exit();
1893 		/* Never returns. */
1894 		PANIC("kse_exit()");
1895 #endif
1896 	} else {
1897 		/*
1898 		 * We allow program to kill kse in initial group (by
1899 		 * lowering the concurrency).
1900 		 */
1901 		if ((kse != _kse_initial) &&
1902 		    ((kse->k_flags & KF_TERMINATED) != 0)) {
1903 			KSE_SCHED_LOCK(kse, kse->k_kseg);
1904 			TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1905 			kse->k_kseg->kg_ksecount--;
1906 			/*
1907 			 * Migrate thread to  _kse_initial if its lastest
1908 			 * kse it ran on is the kse.
1909 			 */
1910 			td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1911 			while (td != NULL) {
1912 				if (td->kse == kse)
1913 					td->kse = _kse_initial;
1914 				td = TAILQ_NEXT(td, kle);
1915 			}
1916 			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1917 			KSE_LOCK_ACQUIRE(kse, &kse_lock);
1918 			kse_free_unlocked(kse);
1919 			KSE_LOCK_RELEASE(kse, &kse_lock);
1920 			/* Make sure there is always at least one is awake */
1921 			KSE_WAKEUP(_kse_initial);
1922 			kse_exit();
1923                         /* Never returns. */
1924                         PANIC("kse_exit() failed for initial kseg");
1925                 }
1926 		KSE_SCHED_LOCK(kse, kse->k_kseg);
1927 		KSE_SET_IDLE(kse);
1928 		kse->k_kseg->kg_idle_kses++;
1929 		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1930 		ts.tv_sec = 120;
1931 		ts.tv_nsec = 0;
1932 		kse->k_kcb->kcb_kmbx.km_flags = 0;
1933 		kse_release(&ts);
1934 		/* Never reach */
1935 	}
1936 }
1937 
1938 void
_thr_set_timeout(const struct timespec * timeout)1939 _thr_set_timeout(const struct timespec *timeout)
1940 {
1941 	struct pthread	*curthread = _get_curthread();
1942 	struct timespec ts;
1943 
1944 	/* Reset the timeout flag for the running thread: */
1945 	curthread->timeout = 0;
1946 
1947 	/* Check if the thread is to wait forever: */
1948 	if (timeout == NULL) {
1949 		/*
1950 		 * Set the wakeup time to something that can be recognised as
1951 		 * different to an actual time of day:
1952 		 */
1953 		curthread->wakeup_time.tv_sec = -1;
1954 		curthread->wakeup_time.tv_nsec = -1;
1955 	}
1956 	/* Check if no waiting is required: */
1957 	else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1958 		/* Set the wake up time to 'immediately': */
1959 		curthread->wakeup_time.tv_sec = 0;
1960 		curthread->wakeup_time.tv_nsec = 0;
1961 	} else {
1962 		/* Calculate the time for the current thread to wakeup: */
1963 		KSE_GET_TOD(curthread->kse, &ts);
1964 		TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1965 	}
1966 }
1967 
1968 void
_thr_panic_exit(char * file,int line,char * msg)1969 _thr_panic_exit(char *file, int line, char *msg)
1970 {
1971 	char buf[256];
1972 
1973 	snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1974 	__sys_write(2, buf, strlen(buf));
1975 	abort();
1976 }
1977 
1978 void
_thr_setrunnable(struct pthread * curthread,struct pthread * thread)1979 _thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1980 {
1981 	kse_critical_t crit;
1982 	struct kse_mailbox *kmbx;
1983 
1984 	crit = _kse_critical_enter();
1985 	KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1986 	kmbx = _thr_setrunnable_unlocked(thread);
1987 	KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1988 	_kse_critical_leave(crit);
1989 	if ((kmbx != NULL) && (__isthreaded != 0))
1990 		kse_wakeup(kmbx);
1991 }
1992 
1993 struct kse_mailbox *
_thr_setrunnable_unlocked(struct pthread * thread)1994 _thr_setrunnable_unlocked(struct pthread *thread)
1995 {
1996 	struct kse_mailbox *kmbx = NULL;
1997 
1998 	if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
1999 		/* No silly queues for these threads. */
2000 		if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2001 			THR_SET_STATE(thread, PS_SUSPENDED);
2002 		else {
2003 			THR_SET_STATE(thread, PS_RUNNING);
2004 			kmbx = kse_wakeup_one(thread);
2005 		}
2006 
2007 	} else if (thread->state != PS_RUNNING) {
2008 		if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
2009 			KSE_WAITQ_REMOVE(thread->kse, thread);
2010 		if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2011 			THR_SET_STATE(thread, PS_SUSPENDED);
2012 		else {
2013 			THR_SET_STATE(thread, PS_RUNNING);
2014 			if ((thread->blocked == 0) && (thread->active == 0) &&
2015 			    (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
2016 				THR_RUNQ_INSERT_TAIL(thread);
2017 			/*
2018 			 * XXX - Threads are not yet assigned to specific
2019 			 *       KSEs; they are assigned to the KSEG.  So
2020 			 *       the fact that a thread's KSE is waiting
2021 			 *       doesn't necessarily mean that it will be
2022 			 *       the KSE that runs the thread after the
2023 			 *       lock is granted.  But we don't know if the
2024 			 *       other KSEs within the same KSEG are also
2025 			 *       in a waiting state or not so we err on the
2026 			 *       side of caution and wakeup the thread's
2027 			 *       last known KSE.  We ensure that the
2028 			 *       threads KSE doesn't change while it's
2029 			 *       scheduling lock is held so it is safe to
2030 			 *       reference it (the KSE).  If the KSE wakes
2031 			 *       up and doesn't find any more work it will
2032 			 *       again go back to waiting so no harm is
2033 			 *       done.
2034 			 */
2035 			kmbx = kse_wakeup_one(thread);
2036 		}
2037 	}
2038 	return (kmbx);
2039 }
2040 
2041 static struct kse_mailbox *
kse_wakeup_one(struct pthread * thread)2042 kse_wakeup_one(struct pthread *thread)
2043 {
2044 	struct kse *ke;
2045 
2046 	if (KSE_IS_IDLE(thread->kse)) {
2047 		KSE_CLEAR_IDLE(thread->kse);
2048 		thread->kseg->kg_idle_kses--;
2049 		return (&thread->kse->k_kcb->kcb_kmbx);
2050 	} else {
2051 		TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2052 			if (KSE_IS_IDLE(ke)) {
2053 				KSE_CLEAR_IDLE(ke);
2054 				ke->k_kseg->kg_idle_kses--;
2055 				return (&ke->k_kcb->kcb_kmbx);
2056 			}
2057 		}
2058 	}
2059 	return (NULL);
2060 }
2061 
2062 static void
kse_wakeup_multi(struct kse * curkse)2063 kse_wakeup_multi(struct kse *curkse)
2064 {
2065 	struct kse *ke;
2066 	int tmp;
2067 
2068 	if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2069 		TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2070 			if (KSE_IS_IDLE(ke)) {
2071 				KSE_CLEAR_IDLE(ke);
2072 				ke->k_kseg->kg_idle_kses--;
2073 				KSE_WAKEUP(ke);
2074 				if (--tmp == 0)
2075 					break;
2076 			}
2077 		}
2078 	}
2079 }
2080 
2081 /*
2082  * Allocate a new KSEG.
2083  *
2084  * We allow the current thread to be NULL in the case that this
2085  * is the first time a KSEG is being created (library initialization).
2086  * In this case, we don't need to (and can't) take any locks.
2087  */
2088 struct kse_group *
_kseg_alloc(struct pthread * curthread)2089 _kseg_alloc(struct pthread *curthread)
2090 {
2091 	struct kse_group *kseg = NULL;
2092 	kse_critical_t crit;
2093 
2094 	if ((curthread != NULL) && (free_kseg_count > 0)) {
2095 		/* Use the kse lock for the kseg queue. */
2096 		crit = _kse_critical_enter();
2097 		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2098 		if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2099 			TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2100 			free_kseg_count--;
2101 			active_kseg_count++;
2102 			TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2103 		}
2104 		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2105 		_kse_critical_leave(crit);
2106 		if (kseg)
2107 			kseg_reinit(kseg);
2108 	}
2109 
2110 	/*
2111 	 * If requested, attempt to allocate a new KSE group only if the
2112 	 * KSE allocation was successful and a KSE group wasn't found in
2113 	 * the free list.
2114 	 */
2115 	if ((kseg == NULL) &&
2116 	    ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2117 		if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2118 		    THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2119 			free(kseg);
2120 			kseg = NULL;
2121 		} else {
2122 			kseg_init(kseg);
2123 			/* Add the KSEG to the list of active KSEGs. */
2124 			if (curthread != NULL) {
2125 				crit = _kse_critical_enter();
2126 				KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2127 				active_kseg_count++;
2128 				TAILQ_INSERT_TAIL(&active_kse_groupq,
2129 				    kseg, kg_qe);
2130 				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2131 				_kse_critical_leave(crit);
2132 			} else {
2133 				active_kseg_count++;
2134 				TAILQ_INSERT_TAIL(&active_kse_groupq,
2135 				    kseg, kg_qe);
2136 			}
2137 		}
2138 	}
2139 	return (kseg);
2140 }
2141 
2142 static void
kseg_init(struct kse_group * kseg)2143 kseg_init(struct kse_group *kseg)
2144 {
2145 	kseg_reinit(kseg);
2146 	_lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2147 	    _kse_lock_wakeup, calloc);
2148 }
2149 
2150 static void
kseg_reinit(struct kse_group * kseg)2151 kseg_reinit(struct kse_group *kseg)
2152 {
2153 	TAILQ_INIT(&kseg->kg_kseq);
2154 	TAILQ_INIT(&kseg->kg_threadq);
2155 	TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2156 	kseg->kg_threadcount = 0;
2157 	kseg->kg_ksecount = 0;
2158 	kseg->kg_idle_kses = 0;
2159 	kseg->kg_flags = 0;
2160 }
2161 
2162 /*
2163  * This must be called with the kse lock held and when there are
2164  * no more threads that reference it.
2165  */
2166 static void
kseg_free_unlocked(struct kse_group * kseg)2167 kseg_free_unlocked(struct kse_group *kseg)
2168 {
2169 	TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2170 	TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2171 	free_kseg_count++;
2172 	active_kseg_count--;
2173 }
2174 
2175 void
_kseg_free(struct kse_group * kseg)2176 _kseg_free(struct kse_group *kseg)
2177 {
2178 	struct kse *curkse;
2179 	kse_critical_t crit;
2180 
2181 	crit = _kse_critical_enter();
2182 	curkse = _get_curkse();
2183 	KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2184 	kseg_free_unlocked(kseg);
2185 	KSE_LOCK_RELEASE(curkse, &kse_lock);
2186 	_kse_critical_leave(crit);
2187 }
2188 
2189 static void
kseg_destroy(struct kse_group * kseg)2190 kseg_destroy(struct kse_group *kseg)
2191 {
2192 	_lock_destroy(&kseg->kg_lock);
2193 	_pq_free(&kseg->kg_schedq.sq_runq);
2194 	free(kseg);
2195 }
2196 
2197 /*
2198  * Allocate a new KSE.
2199  *
2200  * We allow the current thread to be NULL in the case that this
2201  * is the first time a KSE is being created (library initialization).
2202  * In this case, we don't need to (and can't) take any locks.
2203  */
2204 struct kse *
_kse_alloc(struct pthread * curthread,int sys_scope)2205 _kse_alloc(struct pthread *curthread, int sys_scope)
2206 {
2207 	struct kse *kse = NULL;
2208 	char *stack;
2209 	kse_critical_t crit;
2210 	int i;
2211 
2212 	if ((curthread != NULL) && (free_kse_count > 0)) {
2213 		crit = _kse_critical_enter();
2214 		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2215 		/* Search for a finished KSE. */
2216 		kse = TAILQ_FIRST(&free_kseq);
2217 		while ((kse != NULL) &&
2218 		    ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2219 			kse = TAILQ_NEXT(kse, k_qe);
2220 		}
2221 		if (kse != NULL) {
2222 			DBG_MSG("found an unused kse.\n");
2223 			TAILQ_REMOVE(&free_kseq, kse, k_qe);
2224 			free_kse_count--;
2225 			TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2226 			active_kse_count++;
2227 		}
2228 		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2229 		_kse_critical_leave(crit);
2230 		if (kse != NULL)
2231 			kse_reinit(kse, sys_scope);
2232 	}
2233 	if ((kse == NULL) &&
2234 	    ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2235 		if (sys_scope != 0)
2236 			stack = NULL;
2237 		else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2238 			free(kse);
2239 			return (NULL);
2240 		}
2241 		bzero(kse, sizeof(*kse));
2242 
2243 		/* Initialize KCB without the lock. */
2244 		if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2245 			if (stack != NULL)
2246 				free(stack);
2247 			free(kse);
2248 			return (NULL);
2249 		}
2250 
2251 		/* Initialize the lockusers. */
2252 		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2253 			_lockuser_init(&kse->k_lockusers[i], (void *)kse);
2254 			_LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2255 		}
2256 		/* _lock_init(kse->k_lock, ...) */
2257 
2258 		if (curthread != NULL) {
2259 			crit = _kse_critical_enter();
2260 			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2261 		}
2262 		kse->k_flags = 0;
2263 		TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2264 		active_kse_count++;
2265 		if (curthread != NULL) {
2266 			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2267 			_kse_critical_leave(crit);
2268 		}
2269 		/*
2270 		 * Create the KSE context.
2271 		 * Scope system threads (one thread per KSE) are not required
2272 		 * to have a stack for an unneeded kse upcall.
2273 		 */
2274 		if (!sys_scope) {
2275 			kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2276 			kse->k_stack.ss_sp = stack;
2277 			kse->k_stack.ss_size = KSE_STACKSIZE;
2278 		} else {
2279 			kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2280 			kse->k_stack.ss_sp = NULL;
2281 			kse->k_stack.ss_size = 0;
2282 		}
2283 		kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2284 		kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2285 		/*
2286 		 * We need to keep a copy of the stack in case it
2287 		 * doesn't get used; a KSE running a scope system
2288 		 * thread will use that thread's stack.
2289 		 */
2290 		kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2291 	}
2292 	return (kse);
2293 }
2294 
2295 static void
kse_reinit(struct kse * kse,int sys_scope)2296 kse_reinit(struct kse *kse, int sys_scope)
2297 {
2298 	if (!sys_scope) {
2299 		kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2300 		if (kse->k_stack.ss_sp == NULL) {
2301 			/* XXX check allocation failure */
2302 			kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2303 			kse->k_stack.ss_size = KSE_STACKSIZE;
2304 		}
2305 		kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2306 	} else {
2307 		kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2308 		if (kse->k_stack.ss_sp)
2309 			free(kse->k_stack.ss_sp);
2310 		kse->k_stack.ss_sp = NULL;
2311 		kse->k_stack.ss_size = 0;
2312 		kse->k_kcb->kcb_kmbx.km_quantum = 0;
2313 	}
2314 	kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2315 	kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2316 	kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2317 	kse->k_kcb->kcb_kmbx.km_flags = 0;
2318 	kse->k_curthread = NULL;
2319 	kse->k_kseg = 0;
2320 	kse->k_schedq = 0;
2321 	kse->k_locklevel = 0;
2322 	kse->k_flags = 0;
2323 	kse->k_error = 0;
2324 	kse->k_cpu = 0;
2325 	kse->k_sigseqno = 0;
2326 }
2327 
2328 void
kse_free_unlocked(struct kse * kse)2329 kse_free_unlocked(struct kse *kse)
2330 {
2331 	TAILQ_REMOVE(&active_kseq, kse, k_qe);
2332 	active_kse_count--;
2333 	kse->k_kseg = NULL;
2334 	kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2335 	kse->k_flags = 0;
2336 	TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2337 	free_kse_count++;
2338 }
2339 
2340 void
_kse_free(struct pthread * curthread,struct kse * kse)2341 _kse_free(struct pthread *curthread, struct kse *kse)
2342 {
2343 	kse_critical_t crit;
2344 
2345 	if (curthread == NULL)
2346 		kse_free_unlocked(kse);
2347 	else {
2348 		crit = _kse_critical_enter();
2349 		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2350 		kse_free_unlocked(kse);
2351 		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2352 		_kse_critical_leave(crit);
2353 	}
2354 }
2355 
2356 static void
kse_destroy(struct kse * kse)2357 kse_destroy(struct kse *kse)
2358 {
2359 	int i;
2360 
2361 	if (kse->k_stack.ss_sp != NULL)
2362 		free(kse->k_stack.ss_sp);
2363 	_kcb_dtor(kse->k_kcb);
2364 	for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2365 		_lockuser_destroy(&kse->k_lockusers[i]);
2366 	_lock_destroy(&kse->k_lock);
2367 	free(kse);
2368 }
2369 
2370 struct pthread *
_thr_alloc(struct pthread * curthread)2371 _thr_alloc(struct pthread *curthread)
2372 {
2373 	kse_critical_t	crit;
2374 	struct pthread	*thread = NULL;
2375 	int i;
2376 
2377 	if (curthread != NULL) {
2378 		if (GC_NEEDED())
2379 			_thr_gc(curthread);
2380 		if (free_thread_count > 0) {
2381 			crit = _kse_critical_enter();
2382 			KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2383 			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2384 				TAILQ_REMOVE(&free_threadq, thread, tle);
2385 				free_thread_count--;
2386 			}
2387 			KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2388 			_kse_critical_leave(crit);
2389 		}
2390 	}
2391 	if ((thread == NULL) &&
2392 	    ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2393 		bzero(thread, sizeof(struct pthread));
2394 		thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2395 		if (thread->siginfo == NULL) {
2396 			free(thread);
2397 			return (NULL);
2398 		}
2399 		if (curthread) {
2400 			_pthread_mutex_lock(&_tcb_mutex);
2401 			thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2402 			_pthread_mutex_unlock(&_tcb_mutex);
2403 		} else {
2404 			thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2405 		}
2406 		if (thread->tcb == NULL) {
2407 			free(thread->siginfo);
2408 			free(thread);
2409 			return (NULL);
2410 		}
2411 		/*
2412 		 * Initialize thread locking.
2413 		 * Lock initializing needs malloc, so don't
2414 		 * enter critical region before doing this!
2415 		 */
2416 		if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2417 		    _thr_lock_wait, _thr_lock_wakeup, calloc) != 0)
2418 			PANIC("Cannot initialize thread lock");
2419 		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2420 			_lockuser_init(&thread->lockusers[i], (void *)thread);
2421 			_LCK_SET_PRIVATE2(&thread->lockusers[i],
2422 			    (void *)thread);
2423 		}
2424 	}
2425 	return (thread);
2426 }
2427 
2428 void
_thr_free(struct pthread * curthread,struct pthread * thread)2429 _thr_free(struct pthread *curthread, struct pthread *thread)
2430 {
2431 	kse_critical_t crit;
2432 
2433 	DBG_MSG("Freeing thread %p\n", thread);
2434 	if (thread->name) {
2435 		free(thread->name);
2436 		thread->name = NULL;
2437 	}
2438 	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2439 		thr_destroy(curthread, thread);
2440 	} else {
2441 		/* Add the thread to the free thread list. */
2442 		crit = _kse_critical_enter();
2443 		KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2444 		TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2445 		free_thread_count++;
2446 		KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2447 		_kse_critical_leave(crit);
2448 	}
2449 }
2450 
2451 static void
thr_destroy(struct pthread * curthread,struct pthread * thread)2452 thr_destroy(struct pthread *curthread, struct pthread *thread)
2453 {
2454 	int i;
2455 
2456 	for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2457 		_lockuser_destroy(&thread->lockusers[i]);
2458 	_lock_destroy(&thread->lock);
2459 	if (curthread) {
2460 		_pthread_mutex_lock(&_tcb_mutex);
2461 		_tcb_dtor(thread->tcb);
2462 		_pthread_mutex_unlock(&_tcb_mutex);
2463 	} else {
2464 		_tcb_dtor(thread->tcb);
2465 	}
2466 	free(thread->siginfo);
2467 	free(thread);
2468 }
2469 
2470 /*
2471  * Add an active thread:
2472  *
2473  *   o Assign the thread a unique id (which GDB uses to track
2474  *     threads.
2475  *   o Add the thread to the list of all threads and increment
2476  *     number of active threads.
2477  */
2478 static void
thr_link(struct pthread * thread)2479 thr_link(struct pthread *thread)
2480 {
2481 	kse_critical_t crit;
2482 	struct kse *curkse;
2483 
2484 	crit = _kse_critical_enter();
2485 	curkse = _get_curkse();
2486 	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2487 	/*
2488 	 * Initialize the unique id (which GDB uses to track
2489 	 * threads), add the thread to the list of all threads,
2490 	 * and
2491 	 */
2492 	thread->uniqueid = next_uniqueid++;
2493 	THR_LIST_ADD(thread);
2494 	_thread_active_threads++;
2495 	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2496 	_kse_critical_leave(crit);
2497 }
2498 
2499 /*
2500  * Remove an active thread.
2501  */
2502 static void
thr_unlink(struct pthread * thread)2503 thr_unlink(struct pthread *thread)
2504 {
2505 	kse_critical_t crit;
2506 	struct kse *curkse;
2507 
2508 	crit = _kse_critical_enter();
2509 	curkse = _get_curkse();
2510 	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2511 	THR_LIST_REMOVE(thread);
2512 	_thread_active_threads--;
2513 	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2514 	_kse_critical_leave(crit);
2515 }
2516 
2517 void
_thr_hash_add(struct pthread * thread)2518 _thr_hash_add(struct pthread *thread)
2519 {
2520 	struct thread_hash_head *head;
2521 
2522 	head = &thr_hashtable[THREAD_HASH(thread)];
2523 	LIST_INSERT_HEAD(head, thread, hle);
2524 }
2525 
2526 void
_thr_hash_remove(struct pthread * thread)2527 _thr_hash_remove(struct pthread *thread)
2528 {
2529 	LIST_REMOVE(thread, hle);
2530 }
2531 
2532 struct pthread *
_thr_hash_find(struct pthread * thread)2533 _thr_hash_find(struct pthread *thread)
2534 {
2535 	struct pthread *td;
2536 	struct thread_hash_head *head;
2537 
2538 	head = &thr_hashtable[THREAD_HASH(thread)];
2539 	LIST_FOREACH(td, head, hle) {
2540 		if (td == thread)
2541 			return (thread);
2542 	}
2543 	return (NULL);
2544 }
2545 
2546 void
_thr_debug_check_yield(struct pthread * curthread)2547 _thr_debug_check_yield(struct pthread *curthread)
2548 {
2549 	/*
2550 	 * Note that TMDF_SUSPEND is set after process is suspended.
2551 	 * When we are being debugged, every suspension in process
2552 	 * will cause all KSEs to schedule an upcall in kernel, unless the
2553 	 * KSE is in critical region.
2554 	 * If the function is being called, it means the KSE is no longer
2555 	 * in critical region, if the TMDF_SUSPEND is set by debugger
2556 	 * before KSE leaves critical region, we will catch it here, else
2557 	 * if the flag is changed during testing, it also not a problem,
2558 	 * because the change only occurs after a process suspension event
2559 	 * occurs. A suspension event will always cause KSE to schedule an
2560 	 * upcall, in the case, because we are not in critical region,
2561 	 * upcall will be scheduled sucessfully, the flag will be checked
2562 	 * again in kse_sched_multi, we won't back until the flag
2563 	 * is cleared by debugger, the flag will be cleared in next
2564 	 * suspension event.
2565 	 */
2566 	if (!DBG_CAN_RUN(curthread)) {
2567 		if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2568 			_thr_sched_switch(curthread);
2569 		else
2570 			kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2571 				KSE_INTR_DBSUSPEND, 0);
2572 	}
2573 }
2574