1 /*	$OpenBSD: uthread_kern.c,v 1.30 2005/01/28 20:35:49 marc Exp $	*/
2 /*
3  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by John Birrell.
17  * 4. Neither the name of the author nor the names of any co-contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * $FreeBSD: uthread_kern.c,v 1.23 1999/09/29 15:18:39 marcel Exp $
34  *
35  */
36 #include <errno.h>
37 #include <poll.h>
38 #include <stdlib.h>
39 #include <stdarg.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <sys/time.h>
45 #include <sys/socket.h>
46 #include <sys/uio.h>
47 #include <sys/syscall.h>
48 #include <fcntl.h>
49 #ifdef _THREAD_SAFE
50 #include <pthread.h>
51 #include "pthread_private.h"
52 
53 /*
54  * local functions.   Do NOT make these static... we want so see them in
55  * crash dumps.
56  */
57 void		_thread_kern_poll(int);
58 void		_dequeue_signals(void);
59 inline void	_thread_run_switch_hook(pthread_t, pthread_t);
60 
61 /* Static variables: */
62 static unsigned int	last_tick = 0;
63 
64 void
_thread_kern_sched(struct sigcontext * scp)65 _thread_kern_sched(struct sigcontext * scp)
66 {
67 	struct timespec	ts;
68 	struct timeval	tv;
69 	struct pthread	*curthread = _get_curthread();
70 	pthread_t       pthread, pthread_h;
71 	unsigned int	current_tick;
72 	int		add_to_prioq;
73 	pthread_t	old_thread_run;
74 
75 	/*
76 	 * Flag the pthread kernel as executing scheduler code
77 	 * to avoid a scheduler signal from interrupting this
78 	 * execution and calling the scheduler again.
79 	 */
80 	_thread_kern_in_sched = 1;
81 
82 	/* Check if this function was called from the signal handler: */
83 	if (scp != NULL) {
84 		/*
85 		 * The signal handler should have saved the state of
86 		 * the current thread. Restore the process signal
87 		 * mask.
88 		 */
89 		if (_thread_sys_sigprocmask(SIG_SETMASK,
90 		    &_process_sigmask, NULL) != 0)
91 			PANIC("Unable to restore process mask after signal");
92 
93 		/*
94 		 * Copy the signal context to the current thread's jump
95 		 * buffer:
96 		 */
97 		memcpy(&curthread->saved_sigcontext, scp,
98 		    sizeof(curthread->saved_sigcontext));
99 
100 		/* Flag the signal context as the last state saved: */
101 		curthread->sig_saved = 1;
102 	} else
103 		/* Flag the jump buffer was the last state saved: */
104 		curthread->sig_saved = 0;
105 
106 	/* If the currently running thread is a user thread, save it: */
107 	if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
108 		_last_user_thread = curthread;
109 
110 	/* Save floating point state. */
111 	_thread_machdep_save_float_state(&curthread->_machdep);
112 
113 	/* Save errno. */
114 	curthread->error = errno;
115 
116 	/* Save the current thread to switch from */
117 	old_thread_run = curthread;
118 
119 	/*
120 	 * Enter a scheduling loop that finds the next thread that is
121 	 * ready to run. This loop completes when there are no more threads
122 	 * in the global list or when a thread has its state restored by
123 	 * either a sigreturn (if the state was saved as a sigcontext) or a
124 	 * switch.
125 	 */
126 	while (!(TAILQ_EMPTY(&_thread_list))) {
127 		/* Get the current time of day: */
128 		GET_CURRENT_TOD(tv);
129 		TIMEVAL_TO_TIMESPEC(&tv, &ts);
130 		current_tick = _sched_ticks;
131 
132 		/*
133 		 * Protect the scheduling queues from access by the signal
134 		 * handler.
135 		 */
136 		_queue_signals = 1;
137 		add_to_prioq = 0;
138 
139 		if (curthread != &_thread_kern_thread) {
140 			/*
141 			 * This thread no longer needs to yield the CPU.
142 			 */
143 			curthread->yield_on_sig_undefer = 0;
144 
145 			if (curthread->state != PS_RUNNING) {
146 				/*
147 				 * Save the current time as the time that the
148 				 * thread became inactive:
149 				 */
150 				curthread->last_inactive = (long)current_tick;
151 				if (curthread->last_inactive <
152 				    curthread->last_active) {
153 					/* Account for a rollover: */
154 					curthread->last_inactive =+
155 					    UINT_MAX + 1;
156 				}
157 			}
158 
159 			/*
160 			 * Place the currently running thread into the
161 			 * appropriate queue(s).
162 			 */
163 			switch (curthread->state) {
164 			case PS_DEAD:
165 			case PS_STATE_MAX: /* to silence -Wall */
166 			case PS_SUSPENDED:
167 				/*
168 				 * Dead and suspended threads are not placed
169 				 * in any queue:
170 				 */
171 				break;
172 
173 			case PS_RUNNING:
174 				/*
175 				 * Runnable threads can't be placed in the
176 				 * priority queue until after waiting threads
177 				 * are polled (to preserve round-robin
178 				 * scheduling).
179 				 */
180 				add_to_prioq = 1;
181 				break;
182 
183 			/*
184 			 * States which do not depend on file descriptor I/O
185 			 * operations or timeouts:
186 			 */
187 			case PS_DEADLOCK:
188 			case PS_FDLR_WAIT:
189 			case PS_FDLW_WAIT:
190 			case PS_FILE_WAIT:
191 			case PS_JOIN:
192 			case PS_MUTEX_WAIT:
193 			case PS_SIGSUSPEND:
194 			case PS_SIGTHREAD:
195 			case PS_SIGWAIT:
196 			case PS_WAIT_WAIT:
197 				/* No timeouts for these states: */
198 				curthread->wakeup_time.tv_sec = -1;
199 				curthread->wakeup_time.tv_nsec = -1;
200 
201 				/* Restart the time slice: */
202 				curthread->slice_usec = -1;
203 
204 				/* Insert into the waiting queue: */
205 				PTHREAD_WAITQ_INSERT(curthread);
206 				break;
207 
208 			/* States which can timeout: */
209 			case PS_COND_WAIT:
210 			case PS_SLEEP_WAIT:
211 				/* Restart the time slice: */
212 				curthread->slice_usec = -1;
213 
214 				/* Insert into the waiting queue: */
215 				PTHREAD_WAITQ_INSERT(curthread);
216 				break;
217 
218 			/* States that require periodic work: */
219 			case PS_SPINBLOCK:
220 				/* No timeouts for this state: */
221 				curthread->wakeup_time.tv_sec = -1;
222 				curthread->wakeup_time.tv_nsec = -1;
223 
224 				/* Increment spinblock count: */
225 				_spinblock_count++;
226 
227 				/* FALLTHROUGH */
228 			case PS_FDR_WAIT:
229 			case PS_FDW_WAIT:
230 			case PS_POLL_WAIT:
231 			case PS_SELECT_WAIT:
232 				/* Restart the time slice: */
233 				curthread->slice_usec = -1;
234 
235 				/* Insert into the waiting queue: */
236 				PTHREAD_WAITQ_INSERT(curthread);
237 
238 				/* Insert into the work queue: */
239 				PTHREAD_WORKQ_INSERT(curthread);
240 				break;
241 			}
242 		}
243 
244 		/*
245 		 * Avoid polling file descriptors if there are none
246 		 * waiting:
247 		 */
248 		if (TAILQ_EMPTY(&_workq) != 0) {
249 		}
250 		/*
251 		 * Poll file descriptors only if a new scheduling signal
252 		 * has occurred or if we have no more runnable threads.
253 		 */
254 		else if (((current_tick = _sched_ticks) != last_tick) ||
255 		    ((curthread->state != PS_RUNNING) &&
256 		    (PTHREAD_PRIOQ_FIRST() == NULL))) {
257 			/* Unprotect the scheduling queues: */
258 			_queue_signals = 0;
259 
260 			/*
261 			 * Poll file descriptors to update the state of threads
262 			 * waiting on file I/O where data may be available:
263 			 */
264 			_thread_kern_poll(0);
265 
266 			/* Protect the scheduling queues: */
267 			_queue_signals = 1;
268 		}
269 		last_tick = current_tick;
270 
271 		/*
272 		 * Wake up threads that have timedout.  This has to be
273 		 * done after polling in case a thread does a poll or
274 		 * select with zero time.
275 		 */
276 		PTHREAD_WAITQ_SETACTIVE();
277 		while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
278 		       (pthread->wakeup_time.tv_sec != -1) &&
279 		       (((pthread->wakeup_time.tv_sec == 0) &&
280 			 (pthread->wakeup_time.tv_nsec == 0)) ||
281 			(pthread->wakeup_time.tv_sec < ts.tv_sec) ||
282 			((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
283 			 (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
284 			switch (pthread->state) {
285 			case PS_POLL_WAIT:
286 			case PS_SELECT_WAIT:
287 				/* Return zero file descriptors ready: */
288 				pthread->data.poll_data->nfds = 0;
289 				/* fall through */
290 			default:
291 				/*
292 				 * Remove this thread from the waiting queue
293 				 * (and work queue if necessary) and place it
294 				 * in the ready queue.
295 				 */
296 				PTHREAD_WAITQ_CLEARACTIVE();
297 				if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
298 					PTHREAD_WORKQ_REMOVE(pthread);
299 				PTHREAD_NEW_STATE(pthread, PS_RUNNING);
300 				PTHREAD_WAITQ_SETACTIVE();
301 				break;
302 			}
303 			/*
304 			 * Flag the timeout in the thread structure:
305 			 */
306 			pthread->timeout = 1;
307 		}
308 		PTHREAD_WAITQ_CLEARACTIVE();
309 
310 		/*
311 		 * Check to see if the current thread needs to be added
312 		 * to the priority queue:
313 		 */
314 		if (add_to_prioq != 0) {
315 			/*
316 			 * Save the current time as the time that the
317 			 * thread became inactive:
318 			 */
319 			current_tick = _sched_ticks;
320 			curthread->last_inactive = (long)current_tick;
321 			if (curthread->last_inactive <
322 			    curthread->last_active) {
323 				/* Account for a rollover: */
324 				curthread->last_inactive =+ UINT_MAX + 1;
325 			}
326 
327 			if ((curthread->slice_usec != -1) &&
328 			   (curthread->attr.sched_policy != SCHED_FIFO)) {
329 				/*
330 				 * Accumulate the number of microseconds for
331 				 * which the current thread has run:
332 				 */
333 				curthread->slice_usec +=
334 				    (curthread->last_inactive -
335 				    curthread->last_active) *
336 				    (long)_clock_res_usec;
337 				/* Check for time quantum exceeded: */
338 				if (curthread->slice_usec > TIMESLICE_USEC)
339 					curthread->slice_usec = -1;
340 			}
341 
342 			if (curthread->slice_usec == -1) {
343 				/*
344 				 * The thread exceeded its time
345 				 * quantum or it yielded the CPU;
346 				 * place it at the tail of the
347 				 * queue for its priority.
348 				 */
349 				PTHREAD_PRIOQ_INSERT_TAIL(curthread);
350 			} else {
351 				/*
352 				 * The thread hasn't exceeded its
353 				 * interval.  Place it at the head
354 				 * of the queue for its priority.
355 				 */
356 				PTHREAD_PRIOQ_INSERT_HEAD(curthread);
357 			}
358 		}
359 
360 		/*
361 		 * Get the highest priority thread in the ready queue.
362 		 */
363 		pthread_h = PTHREAD_PRIOQ_FIRST();
364 
365 		/* Check if there are no threads ready to run: */
366 		if (pthread_h == NULL) {
367 			/*
368 			 * Lock the pthread kernel by changing the pointer to
369 			 * the running thread to point to the global kernel
370 			 * thread structure:
371 			 */
372 			_set_curthread(&_thread_kern_thread);
373 			curthread = &_thread_kern_thread;
374 
375 			/* Unprotect the scheduling queues: */
376 			_queue_signals = 0;
377 
378 			/*
379 			 * There are no threads ready to run, so wait until
380 			 * something happens that changes this condition:
381 			 */
382 			_thread_kern_poll(1);
383 
384 			/*
385 			 * This process' usage will likely be very small
386 			 * while waiting in a poll.  Since the scheduling
387 			 * clock is based on the profiling timer, it is
388 			 * unlikely that the profiling timer will fire
389 			 * and update the time of day.  To account for this,
390 			 * get the time of day after polling with a timeout.
391 			 */
392 			gettimeofday((struct timeval *) &_sched_tod, NULL);
393 
394 			/* Check once more for a runnable thread: */
395 			_queue_signals = 1;
396 			pthread_h = PTHREAD_PRIOQ_FIRST();
397 			_queue_signals = 0;
398 		}
399 
400 		if (pthread_h != NULL) {
401 			/* Remove the thread from the ready queue: */
402 			PTHREAD_PRIOQ_REMOVE(pthread_h);
403 
404 			/* Unprotect the scheduling queues: */
405 			_queue_signals = 0;
406 
407 			/*
408 			 * Check for signals queued while the scheduling
409 			 * queues were protected:
410 			 */
411 			while (_sigq_check_reqd != 0) {
412 				/* Clear before handling queued signals: */
413 				_sigq_check_reqd = 0;
414 
415 				/* Protect the scheduling queues again: */
416 				_queue_signals = 1;
417 
418 				_dequeue_signals();
419 
420 				/*
421 				 * Check for a higher priority thread that
422 				 * became runnable due to signal handling.
423 				 */
424 				if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
425 				    (pthread->active_priority > pthread_h->active_priority)) {
426 					/* Remove the thread from the ready queue: */
427 					PTHREAD_PRIOQ_REMOVE(pthread);
428 
429 					/*
430 					 * Insert the lower priority thread
431 					 * at the head of its priority list:
432 					 */
433 					PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
434 
435 					/* There's a new thread in town: */
436 					pthread_h = pthread;
437 				}
438 
439 				/* Unprotect the scheduling queues: */
440 				_queue_signals = 0;
441 			}
442 
443 			/* Make the selected thread the current thread: */
444 			_set_curthread(pthread_h);
445 			curthread = pthread_h;
446 
447 			/*
448 			 * Save the current time as the time that the thread
449 			 * became active:
450 			 */
451 			current_tick = _sched_ticks;
452 			curthread->last_active = (long) current_tick;
453 
454 			/*
455 			 * Check if this thread is running for the first time
456 			 * or running again after using its full time slice
457 			 * allocation:
458 			 */
459 			if (curthread->slice_usec == -1) {
460 				/* Reset the accumulated time slice period: */
461 				curthread->slice_usec = 0;
462 			}
463 
464 			/* Restore errno. */
465 			errno = curthread->error;
466 
467 			/* Restore floating point state. */
468 			_thread_machdep_restore_float_state(&curthread->_machdep);
469 
470 			/* Restore the new thread, saving current. */
471 			_thread_machdep_switch(&curthread->_machdep,
472 					       &old_thread_run->_machdep);
473 
474 			/*
475 			 * DANGER WILL ROBINSON
476 			 * All stack local variables now contain the values
477 			 * they had when this thread was last running.  In
478 			 * particular, curthread is NOT pointing to the
479 			 * current thread.   Make it point to the current
480 			 * before use.
481 			 */
482 			curthread = _get_curthread();
483 			_thread_kern_in_sched = 0;
484 
485 			/* run any installed switch-hooks */
486 			if ((_sched_switch_hook != NULL) &&
487 			    (_last_user_thread != curthread)) {
488 				_thread_run_switch_hook(_last_user_thread,
489 							curthread);
490 			}
491 
492 			/* check for thread cancellation */
493 			if (((curthread->cancelflags &
494 			      PTHREAD_AT_CANCEL_POINT) == 0) &&
495 			    ((curthread->cancelflags &
496 			      PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
497 				pthread_testcancel();
498 
499 			/* dispatch any pending signals if possible */
500 			if (curthread->sig_defer_count == 0)
501 				_dispatch_signals(scp);
502 
503 			/* Check if a signal context was saved: */
504 			if (curthread->sig_saved == 1) {
505 				/* return to signal handler.   This code
506 				   should be:
507 				   _thread_sys_sigreturn(&curthread->saved_sigcontext);
508 				   but that doesn't currently work on the
509 				   sparc */
510 				return;
511 			} else {
512 				/* This is the normal way out */
513 				return;
514 			}
515 
516 			/* This point should not be reached. */
517 			PANIC("Thread has returned from sigreturn or switch");
518 		}
519 	}
520 
521 	/* There are no more threads, so exit this process: */
522 	exit(0);
523 }
524 
525 void
_thread_kern_sched_state(enum pthread_state state,const char * fname,int lineno)526 _thread_kern_sched_state(enum pthread_state state, const char *fname,
527 			 int lineno)
528 {
529 	struct pthread	*curthread = _get_curthread();
530 
531 	/*
532 	 * Flag the pthread kernel as executing scheduler code
533 	 * to avoid a scheduler signal from interrupting this
534 	 * execution and calling the scheduler again.
535 	 */
536 	_thread_kern_in_sched = 1;
537 
538 	/*
539 	 * Prevent the signal handler from fiddling with this thread
540 	 * before its state is set and is placed into the proper queue.
541 	 */
542 	_queue_signals = 1;
543 
544 	/* Change the state of the current thread: */
545 	curthread->state = state;
546 	curthread->fname = fname;
547 	curthread->lineno = lineno;
548 
549 	/* Schedule the next thread that is ready: */
550 	_thread_kern_sched(NULL);
551 }
552 
553 void
_thread_kern_sched_state_unlock(enum pthread_state state,spinlock_t * lock,const char * fname,int lineno)554 _thread_kern_sched_state_unlock(enum pthread_state state, spinlock_t *lock,
555 				const char *fname, int lineno)
556 {
557 	struct pthread	*curthread = _get_curthread();
558 
559 	/*
560 	 * Flag the pthread kernel as executing scheduler code
561 	 * to avoid a scheduler signal from interrupting this
562 	 * execution and calling the scheduler again.
563 	 */
564 	_thread_kern_in_sched = 1;
565 
566 	/*
567 	 * Prevent the signal handler from fiddling with this thread
568 	 * before its state is set and it is placed into the proper
569 	 * queue(s).
570 	 */
571 	_queue_signals = 1;
572 
573 	/* Change the state of the current thread: */
574 	curthread->state = state;
575 	curthread->fname = fname;
576 	curthread->lineno = lineno;
577 
578 	_SPINUNLOCK(lock);
579 
580 	/* Schedule the next thread that is ready: */
581 	_thread_kern_sched(NULL);
582 }
583 
584 void
_thread_kern_poll(int wait_reqd)585 _thread_kern_poll(int wait_reqd)
586 {
587 	int             count = 0;
588 	int             i, found;
589 	int		kern_pipe_added = 0;
590 	int             nfds = 0;
591 	int		timeout_ms = 0;
592 	struct pthread	*pthread;
593 	struct timespec ts;
594 	struct timeval  tv;
595 
596 	/* Check if the caller wants to wait: */
597 	if (wait_reqd == 0) {
598 		timeout_ms = 0;
599 	}
600 	else {
601 		/* Get the current time of day: */
602 		GET_CURRENT_TOD(tv);
603 		TIMEVAL_TO_TIMESPEC(&tv, &ts);
604 
605 		_queue_signals = 1;
606 		pthread = TAILQ_FIRST(&_waitingq);
607 		_queue_signals = 0;
608 
609 		if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
610 			/*
611 			 * Either there are no threads in the waiting queue,
612 			 * or there are no threads that can timeout.
613 			 */
614 			timeout_ms = INFTIM;
615 		}
616 		else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
617 			/* Limit maximum timeout to prevent rollover. */
618 			timeout_ms = 60000;
619 		else {
620 			/*
621 			 * Calculate the time left for the next thread to
622 			 * timeout:
623 			 */
624 			timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
625 			    1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
626 			    1000000);
627 			/*
628 			 * Don't allow negative timeouts:
629 			 */
630 			if (timeout_ms < 0)
631 				timeout_ms = 0;
632 		}
633 	}
634 
635 	/* Protect the scheduling queues: */
636 	_queue_signals = 1;
637 
638 	/*
639 	 * Check to see if the signal queue needs to be walked to look
640 	 * for threads awoken by a signal while in the scheduler.
641 	 */
642 	if (_sigq_check_reqd != 0) {
643 		/* Reset flag before handling queued signals: */
644 		_sigq_check_reqd = 0;
645 		_dequeue_signals();
646 	}
647 
648 	/*
649 	 * Check for a thread that became runnable due to a signal:
650 	 */
651 	if (PTHREAD_PRIOQ_FIRST() != NULL) {
652 		/*
653 		 * Since there is at least one runnable thread,
654 		 * disable the wait.
655 		 */
656 		timeout_ms = 0;
657 	}
658 
659 	/*
660 	 * Form the poll table:
661 	 */
662 	nfds = 0;
663 	if (timeout_ms != 0) {
664 		/* Add the kernel pipe to the poll table: */
665 		_thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
666 		_thread_pfd_table[nfds].events = POLLRDNORM;
667 		_thread_pfd_table[nfds].revents = 0;
668 		nfds++;
669 		kern_pipe_added = 1;
670 	}
671 
672 	PTHREAD_WAITQ_SETACTIVE();
673 	TAILQ_FOREACH(pthread, &_workq, qe) {
674 		switch (pthread->state) {
675 		case PS_SPINBLOCK:
676 			/*
677 			 * If the lock is available, let the thread run.
678 			 */
679 			if (pthread->data.spinlock->access_lock ==
680 			    _SPINLOCK_UNLOCKED) {
681 				PTHREAD_WAITQ_CLEARACTIVE();
682 				PTHREAD_WORKQ_REMOVE(pthread);
683 				PTHREAD_NEW_STATE(pthread,PS_RUNNING);
684 				PTHREAD_WAITQ_SETACTIVE();
685 				/* One less thread in a spinblock state: */
686 				_spinblock_count--;
687 				/*
688 				 * Since there is at least one runnable
689 				 * thread, disable the wait.
690 				 */
691 				timeout_ms = 0;
692 			}
693 			break;
694 
695 		/* File descriptor read wait: */
696 		case PS_FDR_WAIT:
697 			/* Limit number of polled files to table size: */
698 			if (nfds < _thread_dtablesize) {
699 				_thread_pfd_table[nfds].events = POLLRDNORM;
700 				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
701 				nfds++;
702 			}
703 			break;
704 
705 		/* File descriptor write wait: */
706 		case PS_FDW_WAIT:
707 			/* Limit number of polled files to table size: */
708 			if (nfds < _thread_dtablesize) {
709 				_thread_pfd_table[nfds].events = POLLWRNORM;
710 				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
711 				nfds++;
712 			}
713 			break;
714 
715 		/* File descriptor poll or select wait: */
716 		case PS_POLL_WAIT:
717 		case PS_SELECT_WAIT:
718 			/* Limit number of polled files to table size: */
719 			if (pthread->data.poll_data->nfds + nfds <
720 			    _thread_dtablesize) {
721 				for (i = 0; i < pthread->data.poll_data->nfds; i++) {
722 					_thread_pfd_table[nfds + i].fd =
723 					    pthread->data.poll_data->fds[i].fd;
724 					_thread_pfd_table[nfds + i].events =
725 					    pthread->data.poll_data->fds[i].events;
726 				}
727 				nfds += pthread->data.poll_data->nfds;
728 			}
729 			break;
730 
731 		/* Other states do not depend on file I/O. */
732 		default:
733 			break;
734 		}
735 	}
736 	PTHREAD_WAITQ_CLEARACTIVE();
737 
738 	/*
739 	 * Wait for a file descriptor to be ready for read, write, or
740 	 * an exception, or a timeout to occur:
741 	 */
742 	count = _thread_sys_poll(_thread_pfd_table, nfds, timeout_ms);
743 
744 	if (kern_pipe_added != 0)
745 		/*
746 		 * Remove the pthread kernel pipe file descriptor
747 		 * from the pollfd table:
748 		 */
749 		nfds = 1;
750 	else
751 		nfds = 0;
752 
753 	/*
754 	 * Check if it is possible that there are bytes in the kernel
755 	 * read pipe waiting to be read:
756 	 */
757 	if (count < 0 || ((kern_pipe_added != 0) &&
758 	    (_thread_pfd_table[0].revents & POLLRDNORM))) {
759 		/*
760 		 * If the kernel read pipe was included in the
761 		 * count:
762 		 */
763 		if (count > 0) {
764 			/* Decrement the count of file descriptors: */
765 			count--;
766 		}
767 
768 		if (_sigq_check_reqd != 0) {
769 			/* Reset flag before handling signals: */
770 			_sigq_check_reqd = 0;
771 			_dequeue_signals();
772 		}
773 	}
774 
775 	/*
776 	 * Check if any file descriptors are ready:
777 	 */
778 	if (count > 0) {
779 		/*
780 		 * Enter a loop to look for threads waiting on file
781 		 * descriptors that are flagged as available by the
782 		 * _poll syscall:
783 		 */
784 		PTHREAD_WAITQ_SETACTIVE();
785 		TAILQ_FOREACH(pthread, &_workq, qe) {
786 			switch (pthread->state) {
787 			case PS_SPINBLOCK:
788 				/*
789 				 * If the lock is available, let the thread run.
790 				 */
791 				if (pthread->data.spinlock->access_lock ==
792 				    _SPINLOCK_UNLOCKED) {
793 					PTHREAD_WAITQ_CLEARACTIVE();
794 					PTHREAD_WORKQ_REMOVE(pthread);
795 					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
796 					PTHREAD_WAITQ_SETACTIVE();
797 
798 					/*
799 					 * One less thread in a spinblock state:
800 					 */
801 					_spinblock_count--;
802 				}
803 				break;
804 
805 			/* File descriptor read wait: */
806 			case PS_FDR_WAIT:
807 				if ((nfds < _thread_dtablesize) &&
808 				    (_thread_pfd_table[nfds].revents
809 				       & (POLLRDNORM|POLLERR|POLLHUP|POLLNVAL))
810 				      != 0) {
811 					PTHREAD_WAITQ_CLEARACTIVE();
812 					PTHREAD_WORKQ_REMOVE(pthread);
813 					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
814 					PTHREAD_WAITQ_SETACTIVE();
815 				}
816 				nfds++;
817 				break;
818 
819 			/* File descriptor write wait: */
820 			case PS_FDW_WAIT:
821 				if ((nfds < _thread_dtablesize) &&
822 				    (_thread_pfd_table[nfds].revents
823 				       & (POLLWRNORM|POLLERR|POLLHUP|POLLNVAL))
824 				      != 0) {
825 					PTHREAD_WAITQ_CLEARACTIVE();
826 					PTHREAD_WORKQ_REMOVE(pthread);
827 					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
828 					PTHREAD_WAITQ_SETACTIVE();
829 				}
830 				nfds++;
831 				break;
832 
833 			/* File descriptor poll or select wait: */
834 			case PS_POLL_WAIT:
835 			case PS_SELECT_WAIT:
836 				if (pthread->data.poll_data->nfds + nfds <
837 				    _thread_dtablesize) {
838 					/*
839 					 * Enter a loop looking for I/O
840 					 * readiness:
841 					 */
842 					found = 0;
843 					for (i = 0; i < pthread->data.poll_data->nfds; i++) {
844 						if (_thread_pfd_table[nfds + i].revents != 0) {
845 							pthread->data.poll_data->fds[i].revents =
846 							    _thread_pfd_table[nfds + i].revents;
847 							found++;
848 						}
849 					}
850 
851 					/* Increment before destroying: */
852 					nfds += pthread->data.poll_data->nfds;
853 
854 					if (found != 0) {
855 						pthread->data.poll_data->nfds = found;
856 						PTHREAD_WAITQ_CLEARACTIVE();
857 						PTHREAD_WORKQ_REMOVE(pthread);
858 						PTHREAD_NEW_STATE(pthread,PS_RUNNING);
859 						PTHREAD_WAITQ_SETACTIVE();
860 					}
861 				}
862 				else
863 					nfds += pthread->data.poll_data->nfds;
864 				break;
865 
866 			/* Other states do not depend on file I/O. */
867 			default:
868 				break;
869 			}
870 		}
871 		PTHREAD_WAITQ_CLEARACTIVE();
872 	}
873 	else if (_spinblock_count != 0) {
874 		/*
875 		 * Enter a loop to look for threads waiting on a spinlock
876 		 * that is now available.
877 		 */
878 		PTHREAD_WAITQ_SETACTIVE();
879 		TAILQ_FOREACH(pthread, &_workq, qe) {
880 			if (pthread->state == PS_SPINBLOCK) {
881 				/*
882 				 * If the lock is available, let the thread run.
883 				 */
884 				if (pthread->data.spinlock->access_lock ==
885 				    _SPINLOCK_UNLOCKED) {
886 					PTHREAD_WAITQ_CLEARACTIVE();
887 					PTHREAD_WORKQ_REMOVE(pthread);
888 					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
889 					PTHREAD_WAITQ_SETACTIVE();
890 
891 					/*
892 					 * One less thread in a spinblock state:
893 					 */
894 					_spinblock_count--;
895 				}
896 			}
897 		}
898 		PTHREAD_WAITQ_CLEARACTIVE();
899 	}
900 
901 	/* Unprotect the scheduling queues: */
902 	_queue_signals = 0;
903 
904 	while (_sigq_check_reqd != 0) {
905 		/* Handle queued signals: */
906 		_sigq_check_reqd = 0;
907 
908 		/* Protect the scheduling queues: */
909 		_queue_signals = 1;
910 		_dequeue_signals();
911 		_queue_signals = 0;
912 	}
913 }
914 
915 void
_thread_kern_set_timeout(const struct timespec * timeout)916 _thread_kern_set_timeout(const struct timespec * timeout)
917 {
918 	struct pthread	*curthread = _get_curthread();
919 	struct timespec current_time;
920 	struct timeval  tv;
921 
922 	/* Reset the timeout flag for the running thread: */
923 	curthread->timeout = 0;
924 
925 	/* Check if the thread is to wait forever: */
926 	if (timeout == NULL) {
927 		/*
928 		 * Set the wakeup time to something that can be recognised as
929 		 * different to an actual time of day:
930 		 */
931 		curthread->wakeup_time.tv_sec = -1;
932 		curthread->wakeup_time.tv_nsec = -1;
933 	}
934 	/* Check if no waiting is required: */
935 	else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
936 		/* Set the wake up time to 'immediately': */
937 		curthread->wakeup_time.tv_sec = 0;
938 		curthread->wakeup_time.tv_nsec = 0;
939 	} else {
940 		gettimeofday((struct timeval *) &_sched_tod, NULL);
941 		GET_CURRENT_TOD(tv);
942 		TIMEVAL_TO_TIMESPEC(&tv, &current_time);
943 		timespecadd(&current_time, timeout, &curthread->wakeup_time);
944 	}
945 }
946 
947 /*
948  * Function registered with dlctl to lock/unlock the kernel for
949  * threade safe dlopen calls.
950  *	which == 0:	defer signals (stops scheduler)
951  *	which != 0:	undefer signals and process any queued sigs
952  */
953 void
_thread_kern_lock(int which)954 _thread_kern_lock(int which)
955 {
956 	if (which == 0)
957 		_thread_kern_sig_defer();
958 	else
959 		_thread_kern_sig_undefer();
960 }
961 
962 
963 void
_thread_kern_sig_defer(void)964 _thread_kern_sig_defer(void)
965 {
966 	struct pthread	*curthread = _get_curthread();
967 
968 	/* Allow signal deferral to be recursive. */
969 	curthread->sig_defer_count++;
970 }
971 
972 void
_thread_kern_sig_undefer(void)973 _thread_kern_sig_undefer(void)
974 {
975 	struct pthread	*curthread = _get_curthread();
976 
977 	/*
978 	 * Perform checks to yield only if we are about to undefer
979 	 * signals.
980 	 */
981 	if (curthread->sig_defer_count > 1) {
982 		/* Decrement the signal deferral count. */
983 		curthread->sig_defer_count--;
984 	}
985 	else if (curthread->sig_defer_count == 1) {
986 		/* Reenable signals: */
987 		curthread->sig_defer_count = 0;
988 
989 		/*
990 		 * Check if there are queued signals:
991 		 */
992 		if (_sigq_check_reqd != 0)
993 			_thread_kern_sched(NULL);
994 
995 		/*
996 		 * Check for asynchronous cancellation before delivering any
997 		 * pending signals:
998 		 */
999 		if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1000 		    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1001 			pthread_testcancel();
1002 
1003 		/*
1004 		 * If there are pending signals or this thread has
1005 		 * to yield the CPU, call the kernel scheduler:
1006 		 *
1007 		 * XXX - Come back and revisit the pending signal problem
1008 		 */
1009 		if ((curthread->yield_on_sig_undefer != 0) ||
1010 		    curthread->sigpend != 0) {
1011 			curthread->yield_on_sig_undefer = 0;
1012 			_thread_kern_sched(NULL);
1013 		}
1014 	}
1015 }
1016 
1017 void
_dequeue_signals(void)1018 _dequeue_signals(void)
1019 {
1020 	char	bufr[128];
1021 	int	i, num;
1022 
1023 	/*
1024 	 * Enter a loop to read and handle queued signals from the
1025 	 * pthread kernel pipe:
1026 	 */
1027 	while (((num = _thread_sys_read(_thread_kern_pipe[0], bufr,
1028 	    sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1029 		/*
1030 		 * The buffer read contains one byte per signal and
1031 		 * each byte is the signal number.
1032 		 */
1033 		for (i = 0; i < num; i++) {
1034 			if ((int) bufr[i] != _SCHED_SIGNAL)
1035 				_thread_sig_handle((int) bufr[i], NULL);
1036 		}
1037 	}
1038 	if ((num < 0) && (errno != EAGAIN)) {
1039 		/*
1040 		 * The only error we should expect is if there is
1041 		 * no data to read.
1042 		 */
1043 		PANIC("Unable to read from thread kernel pipe");
1044 	}
1045 }
1046 
1047 inline void
_thread_run_switch_hook(pthread_t thread_out,pthread_t thread_in)1048 _thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1049 {
1050 	pthread_t tid_out = thread_out;
1051 	pthread_t tid_in = thread_in;
1052 
1053 	if ((tid_out != NULL) &&
1054 	    (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1055 		tid_out = NULL;
1056 	if ((tid_in != NULL) &&
1057 	    (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1058 		tid_in = NULL;
1059 
1060 	if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1061 		/* Run the scheduler switch hook: */
1062 		_sched_switch_hook(tid_out, tid_in);
1063 	}
1064 }
1065 
1066 struct pthread *
_get_curthread(void)1067 _get_curthread(void)
1068 {
1069 	if (_thread_initial == NULL)
1070 		_thread_init();
1071 
1072 	return (_thread_run);
1073 }
1074 
1075 void
_set_curthread(struct pthread * newthread)1076 _set_curthread(struct pthread *newthread)
1077 {
1078 	_thread_run = newthread;
1079 }
1080 #endif
1081