xref: /NextBSD/sys/kern/kern_thr.c (revision fb5720f7be0d2eab3253696e244c058a229b5473)
1 /*-
2  * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_compat.h"
31 #include "opt_posix.h"
32 #include "opt_thrworkq.h"
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/mman.h>
38 #include <sys/mutex.h>
39 #include <sys/priv.h>
40 #include <sys/proc.h>
41 #include <sys/posix4.h>
42 #include <sys/racct.h>
43 #include <sys/resourcevar.h>
44 #include <sys/rwlock.h>
45 #include <sys/sched.h>
46 #include <sys/sysctl.h>
47 #include <sys/smp.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/systm.h>
51 #include <sys/sysproto.h>
52 #include <sys/signalvar.h>
53 #include <sys/sysctl.h>
54 #include <sys/ucontext.h>
55 #include <sys/thr.h>
56 #include <sys/rtprio.h>
57 #include <sys/thrworkq.h>
58 #include <sys/umtx.h>
59 #include <sys/limits.h>
60 
61 #include <vm/vm_domain.h>
62 
63 #include <machine/frame.h>
64 
65 #include <vm/pmap.h>
66 #include <vm/vm.h>
67 #include <vm/vm_extern.h>
68 #include <vm/vm_map.h>
69 
70 #include <security/audit/audit.h>
71 
72 /*
73  * Default stack guard size for thread.  If set to zero then no
74  * guard page.
75  */
76 #define        THR_GUARD_DEFAULT       PAGE_SIZE
77 
78 /*
79  * XXX - These should most likely be sysctl parameters.
80  */
81 static vm_size_t thr_stack_default = THR_STACK_DEFAULT;
82 static vm_size_t thr_stack_initial = THR_STACK_INITIAL;
83 
84 static SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0,
85     "thread allocation");
86 
87 static int max_threads_per_proc = 1500;
88 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
89     &max_threads_per_proc, 0, "Limit on threads per proc");
90 
91 static int max_threads_hits;
92 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
93     &max_threads_hits, 0, "kern.threads.max_threads_per_proc hit count");
94 
95 #ifdef COMPAT_FREEBSD32
96 
97 static inline int
suword_lwpid(void * addr,lwpid_t lwpid)98 suword_lwpid(void *addr, lwpid_t lwpid)
99 {
100 	int error;
101 
102 	if (SV_CURPROC_FLAG(SV_LP64))
103 		error = suword(addr, lwpid);
104 	else
105 		error = suword32(addr, lwpid);
106 	return (error);
107 }
108 
109 #else
110 #define suword_lwpid	suword
111 #endif
112 
113 /*
114  * System call interface.
115  */
116 
117 struct thr_create_initthr_args {
118 	ucontext_t ctx;
119 	long *tid;
120 };
121 
122 static int
thr_create_initthr(struct thread * td,void * thunk)123 thr_create_initthr(struct thread *td, void *thunk)
124 {
125 	struct thr_create_initthr_args *args;
126 
127 	/* Copy out the child tid. */
128 	args = thunk;
129 	if (args->tid != NULL && suword_lwpid(args->tid, td->td_tid))
130 		return (EFAULT);
131 
132 	return (set_mcontext(td, &args->ctx.uc_mcontext));
133 }
134 
135 int
sys_thr_create(struct thread * td,struct thr_create_args * uap)136 sys_thr_create(struct thread *td, struct thr_create_args *uap)
137     /* ucontext_t *ctx, long *id, int flags */
138 {
139 	struct thr_create_initthr_args args;
140 	int error;
141 
142 	if ((error = copyin(uap->ctx, &args.ctx, sizeof(args.ctx))))
143 		return (error);
144 	args.tid = uap->id;
145 	return (thread_create(td, NULL, thr_create_initthr, &args));
146 }
147 
148 int
sys_thr_new(struct thread * td,struct thr_new_args * uap)149 sys_thr_new(struct thread *td, struct thr_new_args *uap)
150     /* struct thr_param * */
151 {
152 	struct thr_param param;
153 	int error;
154 
155 	if (uap->param_size < 0 || uap->param_size > sizeof(param))
156 		return (EINVAL);
157 	bzero(&param, sizeof(param));
158 	if ((error = copyin(uap->param, &param, uap->param_size)))
159 		return (error);
160 	return (kern_thr_new(td, &param));
161 }
162 
163 static int
thr_new_initthr(struct thread * td,void * thunk)164 thr_new_initthr(struct thread *td, void *thunk)
165 {
166 	stack_t stack;
167 	struct thr_param *param;
168 
169 	/*
170 	 * Here we copy out tid to two places, one for child and one
171 	 * for parent, because pthread can create a detached thread,
172 	 * if parent wants to safely access child tid, it has to provide
173 	 * its storage, because child thread may exit quickly and
174 	 * memory is freed before parent thread can access it.
175 	 */
176 	param = thunk;
177 	if ((param->child_tid != NULL &&
178 	    suword_lwpid(param->child_tid, td->td_tid)) ||
179 	    (param->parent_tid != NULL &&
180 	    suword_lwpid(param->parent_tid, td->td_tid)))
181 		return (EFAULT);
182 
183 	/* Set up our machine context. */
184 	stack.ss_sp = param->stack_base;
185 	stack.ss_size = param->stack_size;
186 	/* Set upcall address to user thread entry function. */
187 	cpu_set_upcall_kse(td, param->start_func, param->arg, &stack);
188 	/* Setup user TLS address and TLS pointer register. */
189 	return (cpu_set_user_tls(td, param->tls_base));
190 }
191 
192 int
kern_thr_new(struct thread * td,struct thr_param * param)193 kern_thr_new(struct thread *td, struct thr_param *param)
194 {
195 	struct rtprio rtp, *rtpp;
196 	int error;
197 
198 	rtpp = NULL;
199 	if (param->rtp != 0) {
200 		error = copyin(param->rtp, &rtp, sizeof(struct rtprio));
201 		if (error)
202 			return (error);
203 		rtpp = &rtp;
204 	}
205 	return (thread_create(td, rtpp, thr_new_initthr, param));
206 }
207 
208 int
thread_create(struct thread * td,struct rtprio * rtp,int (* initialize_thread)(struct thread *,void *),void * thunk)209 thread_create(struct thread *td, struct rtprio *rtp,
210     int (*initialize_thread)(struct thread *, void *), void *thunk)
211 {
212 	struct thread *newtd;
213 	struct proc *p;
214 	int error;
215 
216 	p = td->td_proc;
217 
218 	if (rtp != NULL) {
219 		switch(rtp->type) {
220 		case RTP_PRIO_REALTIME:
221 		case RTP_PRIO_FIFO:
222 			/* Only root can set scheduler policy */
223 			if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0)
224 				return (EPERM);
225 			if (rtp->prio > RTP_PRIO_MAX)
226 				return (EINVAL);
227 			break;
228 		case RTP_PRIO_NORMAL:
229 			rtp->prio = 0;
230 			break;
231 		default:
232 			return (EINVAL);
233 		}
234 	}
235 
236 #ifdef RACCT
237 	if (racct_enable) {
238 		PROC_LOCK(p);
239 		error = racct_add(p, RACCT_NTHR, 1);
240 		PROC_UNLOCK(p);
241 		if (error != 0)
242 			return (EPROCLIM);
243 	}
244 #endif
245 
246 	/* Initialize our td */
247 	error = kern_thr_alloc(p, 0, &newtd);
248 	if (error)
249 		goto fail;
250 
251 	cpu_set_upcall(newtd, td);
252 
253 	bzero(&newtd->td_startzero,
254 	    __rangeof(struct thread, td_startzero, td_endzero));
255 	bcopy(&td->td_startcopy, &newtd->td_startcopy,
256 	    __rangeof(struct thread, td_startcopy, td_endcopy));
257 	newtd->td_proc = td->td_proc;
258 	thread_cow_get(newtd, td);
259 
260 	error = initialize_thread(newtd, thunk);
261 	if (error != 0) {
262 		thread_cow_free(newtd);
263 		thread_free(newtd);
264 		goto fail;
265 	}
266 
267 	PROC_LOCK(p);
268 	p->p_flag |= P_HADTHREADS;
269 	thread_link(newtd, p);
270 	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
271 	thread_lock(td);
272 	/* let the scheduler know about these things. */
273 	sched_fork_thread(td, newtd);
274 	thread_unlock(td);
275 	if (P_SHOULDSTOP(p))
276 		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
277 	if (p->p_flag2 & P2_LWP_EVENTS)
278 		newtd->td_dbgflags |= TDB_BORN;
279 
280 	/*
281 	 * Copy the existing thread VM policy into the new thread.
282 	 */
283 	vm_domain_policy_localcopy(&newtd->td_vm_dom_policy,
284 	    &td->td_vm_dom_policy);
285 
286 	PROC_UNLOCK(p);
287 
288 	tidhash_add(newtd);
289 
290 	thread_lock(newtd);
291 	if (rtp != NULL) {
292 		if (!(td->td_pri_class == PRI_TIMESHARE &&
293 		      rtp->type == RTP_PRIO_NORMAL)) {
294 			rtp_to_pri(rtp, newtd);
295 			sched_prio(newtd, newtd->td_user_pri);
296 		} /* ignore timesharing class */
297 	}
298 	TD_SET_CAN_RUN(newtd);
299 	sched_add(newtd, SRQ_BORING);
300 	thread_unlock(newtd);
301 
302 	return (0);
303 
304 fail:
305 #ifdef RACCT
306 	if (racct_enable) {
307 		PROC_LOCK(p);
308 		racct_sub(p, RACCT_NTHR, 1);
309 		PROC_UNLOCK(p);
310 	}
311 #endif
312 	return (error);
313 }
314 
315 int
sys_thr_self(struct thread * td,struct thr_self_args * uap)316 sys_thr_self(struct thread *td, struct thr_self_args *uap)
317     /* long *id */
318 {
319 	int error;
320 
321 	error = suword_lwpid(uap->id, (unsigned)td->td_tid);
322 	if (error == -1)
323 		return (EFAULT);
324 	return (0);
325 }
326 
327 int
sys_thr_exit(struct thread * td,struct thr_exit_args * uap)328 sys_thr_exit(struct thread *td, struct thr_exit_args *uap)
329     /* long *state */
330 {
331 
332 #ifdef THRWORKQ
333 	if (td->td_reuse_stack != NULL) {
334 		thrworkq_reusestack(td->td_proc, td->td_reuse_stack);
335 		td->td_reuse_stack = NULL;
336 	}
337 #endif
338 
339 	if ((void *)uap->state != NULL) {
340 		/* Signal userland that it can free the stack. */
341 		suword_lwpid(uap->state, 1);
342 		kern_umtx_wake(td, uap->state, INT_MAX, 0);
343 	}
344 
345 	return (kern_thr_exit(td));
346 }
347 
348 int
kern_thr_exit(struct thread * td)349 kern_thr_exit(struct thread *td)
350 {
351 	struct proc *p;
352 
353 	p = td->td_proc;
354 
355 	/*
356 	 * If all of the threads in a process call this routine to
357 	 * exit (e.g. all threads call pthread_exit()), exactly one
358 	 * thread should return to the caller to terminate the process
359 	 * instead of the thread.
360 	 *
361 	 * Checking p_numthreads alone is not sufficient since threads
362 	 * might be committed to terminating while the PROC_LOCK is
363 	 * dropped in either ptracestop() or while removing this thread
364 	 * from the tidhash.  Instead, the p_pendingexits field holds
365 	 * the count of threads in either of those states and a thread
366 	 * is considered the "last" thread if all of the other threads
367 	 * in a process are already terminating.
368 	 */
369 	PROC_LOCK(p);
370 	if (p->p_numthreads == p->p_pendingexits + 1) {
371 		/*
372 		 * Ignore attempts to shut down last thread in the
373 		 * proc.  This will actually call _exit(2) in the
374 		 * usermode trampoline when it returns.
375 		 */
376 		PROC_UNLOCK(p);
377 		return (0);
378 	}
379 
380 	p->p_pendingexits++;
381 	td->td_dbgflags |= TDB_EXIT;
382 	if (p->p_flag & P_TRACED && p->p_flag2 & P2_LWP_EVENTS)
383 		ptracestop(td, SIGTRAP);
384 	PROC_UNLOCK(p);
385 	tidhash_remove(td);
386 	PROC_LOCK(p);
387 	p->p_pendingexits--;
388 
389 	/*
390 	 * The check above should prevent all other threads from this
391 	 * process from exiting while the PROC_LOCK is dropped, so
392 	 * there must be at least one other thread other than the
393 	 * current thread.
394 	 */
395 	KASSERT(p->p_numthreads > 1, ("too few threads"));
396 	racct_sub(p, RACCT_NTHR, 1);
397 	tdsigcleanup(td);
398 	umtx_thread_exit(td);
399 	PROC_SLOCK(p);
400 	thread_stopped(p);
401 	thread_exit();
402 	/* NOTREACHED */
403 }
404 
405 int
sys_thr_kill(struct thread * td,struct thr_kill_args * uap)406 sys_thr_kill(struct thread *td, struct thr_kill_args *uap)
407     /* long id, int sig */
408 {
409 	ksiginfo_t ksi;
410 	struct thread *ttd;
411 	struct proc *p;
412 	int error;
413 
414 	p = td->td_proc;
415 	ksiginfo_init(&ksi);
416 	ksi.ksi_signo = uap->sig;
417 	ksi.ksi_code = SI_LWP;
418 	ksi.ksi_pid = p->p_pid;
419 	ksi.ksi_uid = td->td_ucred->cr_ruid;
420 	if (uap->id == -1) {
421 		if (uap->sig != 0 && !_SIG_VALID(uap->sig)) {
422 			error = EINVAL;
423 		} else {
424 			error = ESRCH;
425 			PROC_LOCK(p);
426 			FOREACH_THREAD_IN_PROC(p, ttd) {
427 				if (ttd != td) {
428 					error = 0;
429 					if (uap->sig == 0)
430 						break;
431 					tdksignal(ttd, uap->sig, &ksi);
432 				}
433 			}
434 			PROC_UNLOCK(p);
435 		}
436 	} else {
437 		error = 0;
438 		ttd = tdfind((lwpid_t)uap->id, p->p_pid);
439 		if (ttd == NULL)
440 			return (ESRCH);
441 		if (uap->sig == 0)
442 			;
443 		else if (!_SIG_VALID(uap->sig))
444 			error = EINVAL;
445 		else
446 			tdksignal(ttd, uap->sig, &ksi);
447 		PROC_UNLOCK(ttd->td_proc);
448 	}
449 	return (error);
450 }
451 
452 int
sys_thr_kill2(struct thread * td,struct thr_kill2_args * uap)453 sys_thr_kill2(struct thread *td, struct thr_kill2_args *uap)
454     /* pid_t pid, long id, int sig */
455 {
456 	ksiginfo_t ksi;
457 	struct thread *ttd;
458 	struct proc *p;
459 	int error;
460 
461 	AUDIT_ARG_SIGNUM(uap->sig);
462 
463 	ksiginfo_init(&ksi);
464 	ksi.ksi_signo = uap->sig;
465 	ksi.ksi_code = SI_LWP;
466 	ksi.ksi_pid = td->td_proc->p_pid;
467 	ksi.ksi_uid = td->td_ucred->cr_ruid;
468 	if (uap->id == -1) {
469 		if ((p = pfind(uap->pid)) == NULL)
470 			return (ESRCH);
471 		AUDIT_ARG_PROCESS(p);
472 		error = p_cansignal(td, p, uap->sig);
473 		if (error) {
474 			PROC_UNLOCK(p);
475 			return (error);
476 		}
477 		if (uap->sig != 0 && !_SIG_VALID(uap->sig)) {
478 			error = EINVAL;
479 		} else {
480 			error = ESRCH;
481 			FOREACH_THREAD_IN_PROC(p, ttd) {
482 				if (ttd != td) {
483 					error = 0;
484 					if (uap->sig == 0)
485 						break;
486 					tdksignal(ttd, uap->sig, &ksi);
487 				}
488 			}
489 		}
490 		PROC_UNLOCK(p);
491 	} else {
492 		ttd = tdfind((lwpid_t)uap->id, uap->pid);
493 		if (ttd == NULL)
494 			return (ESRCH);
495 		p = ttd->td_proc;
496 		AUDIT_ARG_PROCESS(p);
497 		error = p_cansignal(td, p, uap->sig);
498 		if (uap->sig == 0)
499 			;
500 		else if (!_SIG_VALID(uap->sig))
501 			error = EINVAL;
502 		else
503 			tdksignal(ttd, uap->sig, &ksi);
504 		PROC_UNLOCK(p);
505 	}
506 	return (error);
507 }
508 
509 int
sys_thr_suspend(struct thread * td,struct thr_suspend_args * uap)510 sys_thr_suspend(struct thread *td, struct thr_suspend_args *uap)
511 	/* const struct timespec *timeout */
512 {
513 	struct timespec ts, *tsp;
514 	int error;
515 
516 	tsp = NULL;
517 	if (uap->timeout != NULL) {
518 		error = umtx_copyin_timeout(uap->timeout, &ts);
519 		if (error != 0)
520 			return (error);
521 		tsp = &ts;
522 	}
523 
524 	return (kern_thr_suspend(td, tsp));
525 }
526 
527 int
kern_thr_suspend(struct thread * td,struct timespec * tsp)528 kern_thr_suspend(struct thread *td, struct timespec *tsp)
529 {
530 	struct proc *p = td->td_proc;
531 	struct timeval tv;
532 	int error = 0;
533 	int timo = 0;
534 
535 	if (td->td_pflags & TDP_WAKEUP) {
536 		td->td_pflags &= ~TDP_WAKEUP;
537 		return (0);
538 	}
539 
540 	if (tsp != NULL) {
541 		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
542 			error = EWOULDBLOCK;
543 		else {
544 			TIMESPEC_TO_TIMEVAL(&tv, tsp);
545 			timo = tvtohz(&tv);
546 		}
547 	}
548 
549 	PROC_LOCK(p);
550 	if (error == 0 && (td->td_flags & TDF_THRWAKEUP) == 0)
551 		error = msleep((void *)td, &p->p_mtx,
552 			 PCATCH, "lthr", timo);
553 
554 	if (td->td_flags & TDF_THRWAKEUP) {
555 		thread_lock(td);
556 		td->td_flags &= ~TDF_THRWAKEUP;
557 		thread_unlock(td);
558 		PROC_UNLOCK(p);
559 		return (0);
560 	}
561 	PROC_UNLOCK(p);
562 	if (error == EWOULDBLOCK)
563 		error = ETIMEDOUT;
564 	else if (error == ERESTART) {
565 		if (timo != 0)
566 			error = EINTR;
567 	}
568 	return (error);
569 }
570 
571 int
sys_thr_wake(struct thread * td,struct thr_wake_args * uap)572 sys_thr_wake(struct thread *td, struct thr_wake_args *uap)
573 	/* long id */
574 {
575 	struct proc *p;
576 	struct thread *ttd;
577 
578 	if (uap->id == td->td_tid) {
579 		td->td_pflags |= TDP_WAKEUP;
580 		return (0);
581 	}
582 
583 	p = td->td_proc;
584 	ttd = tdfind((lwpid_t)uap->id, p->p_pid);
585 	if (ttd == NULL)
586 		return (ESRCH);
587 	thread_lock(ttd);
588 	ttd->td_flags |= TDF_THRWAKEUP;
589 	thread_unlock(ttd);
590 	wakeup((void *)ttd);
591 	PROC_UNLOCK(p);
592 	return (0);
593 }
594 
595 int
sys_thr_set_name(struct thread * td,struct thr_set_name_args * uap)596 sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap)
597 {
598 	struct proc *p;
599 	char name[MAXCOMLEN + 1];
600 	struct thread *ttd;
601 	int error;
602 
603 	error = 0;
604 	name[0] = '\0';
605 	if (uap->name != NULL) {
606 		error = copyinstr(uap->name, name, sizeof(name),
607 			NULL);
608 		if (error)
609 			return (error);
610 	}
611 	p = td->td_proc;
612 	ttd = tdfind((lwpid_t)uap->id, p->p_pid);
613 	if (ttd == NULL)
614 		return (ESRCH);
615 	strcpy(ttd->td_name, name);
616 #ifdef KTR
617 	sched_clear_tdname(ttd);
618 #endif
619 	PROC_UNLOCK(p);
620 	return (error);
621 }
622 
623 int
sys_thr_stack(struct thread * td,struct thr_stack_args * uap)624 sys_thr_stack(struct thread *td, struct thr_stack_args *uap)
625 {
626 	vm_size_t stacksz, guardsz;
627 	void *addr;
628 	int error;
629 
630 	/* Round up to the nearest page size. */
631 	stacksz = (vm_size_t)round_page(uap->stacksize);
632 	guardsz = (vm_size_t)round_page(uap->guardsize);
633 
634 	if (stacksz == 0)
635 		stacksz = thr_stack_default;
636 
637 	error = kern_thr_stack(td->td_proc, &addr, stacksz, guardsz);
638 
639 	td->td_retval[0] = (register_t) addr;
640 
641 	return (error);
642 }
643 
644 /*
645  * kern_thr_stack() maps a new thread stack in the process.  It returns
646  * the stack address in the 'addr' arg.
647  *
648  * Base address of the last stack allocated (including its red zone, if
649  * there is one).  Stacks are allocated contiguously, starting beyond the
650  * top of the main stack.  When a new stack is created, a red zone is
651  * typically created (actually, the red zone is mapped with PROT_NONE) above
652  * the top of the stack, such that the stack will not be able to grow all
653  * the way to the bottom of the next stack.  This isn't fool-proof.  It is
654  * possible for a stack to grow by a large amount, such that it grows into
655  * the next stack, and as long as the memory within the red zone is never
656  * accessed, nothing will prevent one thread stack from trouncing all over
657  * the next.
658  *
659  * low memory
660  *     . . . . . . . . . . . . . . . . . .
661  *    |                                   |
662  *    |             stack 3               | start of 3rd thread stack
663  *    +-----------------------------------+
664  *    |                                   |
665  *    |       Red Zone (guard page)       | red zone for 2nd thread
666  *    |                                   |
667  *    +-----------------------------------+
668  *    |  stack 2 - thr_stack_default      | top of 2nd thread stack
669  *    |                                   |
670  *    |                                   |
671  *    |                                   |
672  *    |                                   |
673  *    |             stack 2               |
674  *    +-----------------------------------+ <-- start of 2nd thread stack
675  *    |                                   |
676  *    |       Red Zone (guard page)       | red zone for 1st thread
677  *    |                                   |
678  *    +-----------------------------------+
679  *    |  stack 1 - thr_stack_default      | top of 1st thread stack
680  *    |                                   |
681  *    |                                   |
682  *    |                                   |
683  *    |                                   |
684  *    |             stack 1               |
685  *    +-----------------------------------+ <-- start of 1st thread stack
686  *    |                                   |   (initial value of p->p_thrstack)
687  *    |       Red Zone (guard page)       |
688  *    |                                   | red zone for main thread
689  *    +-----------------------------------+
690  *    | ->sv_usrstack - thr_stack_initial | top of main thread stack
691  *    |                                   | ^
692  *    |                                   | |
693  *    |                                   | |
694  *    |                                   | | stack growth
695  *    |                                   |
696  *    +-----------------------------------+ <-- start of main thread stack
697  *                                              (p->p_sysent->sv_usrstack)
698  * high memory
699  *
700  * XXX - This code assumes that the stack always grows down in address space.
701  */
702 int
kern_thr_stack(struct proc * p,void ** addr,vm_size_t stacksz,vm_size_t guardsz)703 kern_thr_stack(struct proc *p, void **addr, vm_size_t stacksz,
704     vm_size_t guardsz)
705 {
706 	vm_offset_t stackaddr;
707 	vm_map_t map;
708 	int error;
709 
710 	KASSERT(stacksz != 0, ("[%s: %d] stacksz = 0", __FILE__, __LINE__));
711 
712 	*addr = NULL;
713 
714 	PROC_LOCK(p);
715 	if (p->p_thrstack == 0)  {
716 		/* Compute the start of the first thread stack. */
717 		p->p_thrstack = p->p_sysent->sv_usrstack -
718 		    (vm_offset_t)(thr_stack_initial + THR_GUARD_DEFAULT);
719 	}
720 
721 	stackaddr = p->p_thrstack - (vm_offset_t)(stacksz + guardsz);
722 
723 	/*
724 	 * Compute the next stack location unconditionally.  Under normal
725 	 * operating conditions, the most likely reason for no being able
726 	 * to map the thread stack is a stack overflow of the adjacent
727 	 * thread stack.
728 	 */
729 	p->p_thrstack -= (vm_offset_t)(stacksz + guardsz);
730 	PROC_UNLOCK(p);
731 
732 	map = &p->p_vmspace->vm_map;
733 	error = vm_mmap(map, &stackaddr, (stacksz + guardsz), VM_PROT_ALL,
734 	    PROT_READ | PROT_WRITE, MAP_STACK, OBJT_DEFAULT, NULL, 0);
735 	if (error)
736 		return (error);
737 
738 	if (guardsz != 0) {
739 		error = vm_map_protect(map, stackaddr, stackaddr + guardsz,
740 		    PROT_NONE, 0);
741 		if (error) {
742 			/* unmap memory */
743 			(void) vm_map_remove(map, stackaddr, stackaddr +
744 			    (stacksz + guardsz));
745 
746 			return (error);
747 		}
748 	}
749 
750 	*addr = (void *)(stackaddr + guardsz);
751 	return (0);
752 }
753 
754 int
kern_thr_alloc(struct proc * p,int pages,struct thread ** ntd)755 kern_thr_alloc(struct proc *p, int pages, struct thread **ntd)
756 {
757 
758 	/* Have race condition but it is cheap. */
759 	if (p->p_numthreads >= max_threads_per_proc) {
760 		++max_threads_hits;
761 		return (EPROCLIM);
762 	}
763 
764 	*ntd = thread_alloc(pages);
765 	if (*ntd == NULL)
766 		return (ENOMEM);
767 
768 	return (0);
769 }
770