xref: /dragonfly/sys/kern/kern_exit.c (revision de22b67f22f891aa5d46702543b6a996b1ebc66c)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *        The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *        @(#)kern_exit.c     8.7 (Berkeley) 2/12/94
35  * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $
36  */
37 
38 #include "opt_ktrace.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysmsg.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/proc.h>
46 #include <sys/ktrace.h>
47 #include <sys/pioctl.h>
48 #include <sys/tty.h>
49 #include <sys/wait.h>
50 #include <sys/vnode.h>
51 #include <sys/resourcevar.h>
52 #include <sys/signalvar.h>
53 #include <sys/taskqueue.h>
54 #include <sys/ptrace.h>
55 #include <sys/acct.h>                   /* for acct_process() function prototype */
56 #include <sys/filedesc.h>
57 #include <sys/shm.h>
58 #include <sys/sem.h>
59 #include <sys/jail.h>
60 #include <sys/kern_syscall.h>
61 #include <sys/unistd.h>
62 #include <sys/eventhandler.h>
63 #include <sys/dsched.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <sys/lock.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_map.h>
70 #include <vm/vm_extern.h>
71 
72 #include <sys/refcount.h>
73 #include <sys/spinlock2.h>
74 
75 static void reaplwps(void *context, int dummy);
76 static void reaplwp(struct lwp *lp);
77 static void killlwps(struct lwp *lp);
78 
79 static MALLOC_DEFINE(M_ATEXIT, "atexit", "atexit callback");
80 
81 /*
82  * callout list for things to do at exit time
83  */
84 struct exitlist {
85           exitlist_fn function;
86           TAILQ_ENTRY(exitlist) next;
87 };
88 
89 TAILQ_HEAD(exit_list_head, exitlist);
90 static struct exit_list_head exit_list = TAILQ_HEAD_INITIALIZER(exit_list);
91 
92 /*
93  * LWP reaper data
94  */
95 static struct task *deadlwp_task[MAXCPU];
96 static struct lwplist deadlwp_list[MAXCPU];
97 static struct lwkt_token deadlwp_token[MAXCPU];
98 
99 void (*linux_task_drop_callback)(thread_t td);
100 void (*linux_proc_drop_callback)(struct proc *p);
101 
102 /*
103  * exit --
104  *        Death of process.
105  *
106  * SYS_EXIT_ARGS(int rval)
107  */
108 int
sys_exit(struct sysmsg * sysmsg,const struct exit_args * uap)109 sys_exit(struct sysmsg *sysmsg, const struct exit_args *uap)
110 {
111           exit1(W_EXITCODE(uap->rval, 0));
112           /* NOTREACHED */
113 }
114 
115 /*
116  * Extended exit --
117  *        Death of a lwp or process with optional bells and whistles.
118  */
119 int
sys_extexit(struct sysmsg * sysmsg,const struct extexit_args * uap)120 sys_extexit(struct sysmsg *sysmsg, const struct extexit_args *uap)
121 {
122           struct proc *p = curproc;
123           int action, who;
124           int error;
125 
126           action = EXTEXIT_ACTION(uap->how);
127           who = EXTEXIT_WHO(uap->how);
128 
129           /* Check parameters before we might perform some action */
130           switch (who) {
131           case EXTEXIT_PROC:
132           case EXTEXIT_LWP:
133                     break;
134           default:
135                     return (EINVAL);
136           }
137 
138           switch (action) {
139           case EXTEXIT_SIMPLE:
140                     break;
141           case EXTEXIT_SETINT:
142                     error = copyout(&uap->status, uap->addr, sizeof(uap->status));
143                     if (error)
144                               return (error);
145                     break;
146           default:
147                     return (EINVAL);
148           }
149 
150           lwkt_gettoken(&p->p_token);
151 
152           switch (who) {
153           case EXTEXIT_LWP:
154                     /*
155                      * Be sure only to perform a simple lwp exit if there is at
156                      * least one more lwp in the proc, which will call exit1()
157                      * later, otherwise the proc will be an UNDEAD and not even a
158                      * SZOMB!
159                      */
160                     if (p->p_nthreads > 1) {
161                               lwp_exit(0, NULL);  /* called w/ p_token held */
162                               /* NOT REACHED */
163                     }
164                     /* else last lwp in proc:  do the real thing */
165                     /* FALLTHROUGH */
166           default:  /* to help gcc */
167           case EXTEXIT_PROC:
168                     lwkt_reltoken(&p->p_token);
169                     exit1(W_EXITCODE(uap->status, 0));
170                     /* NOTREACHED */
171           }
172 
173           /* NOTREACHED */
174           lwkt_reltoken(&p->p_token);   /* safety */
175 }
176 
177 /*
178  * Kill all lwps associated with the current process except the
179  * current lwp.   Return an error if we race another thread trying to
180  * do the same thing and lose the race.
181  *
182  * If forexec is non-zero the current thread and process flags are
183  * cleaned up so they can be reused.
184  */
185 int
killalllwps(int forexec)186 killalllwps(int forexec)
187 {
188           struct lwp *lp = curthread->td_lwp;
189           struct proc *p = lp->lwp_proc;
190           int fakestop;
191 
192           /*
193            * Interlock against P_WEXIT.  Only one of the process's thread
194            * is allowed to do the master exit.
195            */
196           lwkt_gettoken(&p->p_token);
197           if (p->p_flags & P_WEXIT) {
198                     lwkt_reltoken(&p->p_token);
199                     return (EALREADY);
200           }
201           p->p_flags |= P_WEXIT;
202           lwkt_gettoken(&lp->lwp_token);
203 
204           /*
205            * Set temporary stopped state in case we are racing a coredump.
206            * Otherwise the coredump may hang forever.
207            */
208           if (lp->lwp_mpflags & LWP_MP_WSTOP) {
209                     fakestop = 0;
210           } else {
211                     atomic_set_int(&lp->lwp_mpflags, LWP_MP_WSTOP);
212                     ++p->p_nstopped;
213                     fakestop = 1;
214                     wakeup(&p->p_nstopped);
215           }
216 
217           /*
218            * Interlock with LWP_MP_WEXIT and kill any remaining LWPs
219            */
220           atomic_set_int(&lp->lwp_mpflags, LWP_MP_WEXIT);
221           if (p->p_nthreads > 1)
222                     killlwps(lp);
223 
224           /*
225            * Undo temporary stopped state
226            */
227           if (fakestop && (lp->lwp_mpflags & LWP_MP_WSTOP)) {
228                     atomic_clear_int(&lp->lwp_mpflags, LWP_MP_WSTOP);
229                     --p->p_nstopped;
230           }
231 
232           /*
233            * If doing this for an exec, clean up the remaining thread
234            * (us) for continuing operation after all the other threads
235            * have been killed.
236            */
237           if (forexec) {
238                     atomic_clear_int(&lp->lwp_mpflags, LWP_MP_WEXIT);
239                     p->p_flags &= ~P_WEXIT;
240           }
241           lwkt_reltoken(&lp->lwp_token);
242           lwkt_reltoken(&p->p_token);
243 
244           return(0);
245 }
246 
247 /*
248  * Kill all LWPs except the current one.  Do not try to signal
249  * LWPs which have exited on their own or have already been
250  * signaled.
251  */
252 static void
killlwps(struct lwp * lp)253 killlwps(struct lwp *lp)
254 {
255           struct proc *p = lp->lwp_proc;
256           struct lwp *tlp;
257 
258           /*
259            * Kill the remaining LWPs.  We must send the signal before setting
260            * LWP_MP_WEXIT.  The setting of WEXIT is optional but helps reduce
261            * races.  tlp must be held across the call as it might block and
262            * allow the target lwp to rip itself out from under our loop.
263            */
264           FOREACH_LWP_IN_PROC(tlp, p) {
265                     LWPHOLD(tlp);
266                     lwkt_gettoken(&tlp->lwp_token);
267                     if ((tlp->lwp_mpflags & LWP_MP_WEXIT) == 0) {
268                               atomic_set_int(&tlp->lwp_mpflags, LWP_MP_WEXIT);
269                               lwpsignal(p, tlp, SIGKILL);
270                     }
271                     lwkt_reltoken(&tlp->lwp_token);
272                     LWPRELE(tlp);
273           }
274 
275           /*
276            * Wait for everything to clear out.  Also make sure any tstop()s
277            * are signalled (we are holding p_token for the interlock).
278            */
279           wakeup(p);
280           while (p->p_nthreads > 1)
281                     tsleep(&p->p_nthreads, 0, "killlwps", 0);
282 }
283 
284 /*
285  * Exit: deallocate address space and other resources, change proc state
286  * to zombie, and unlink proc from allproc and parent's lists.  Save exit
287  * status and rusage for wait().  Check for child processes and orphan them.
288  */
289 void
exit1(int rv)290 exit1(int rv)
291 {
292           struct thread *td = curthread;
293           struct proc *p = td->td_proc;
294           struct lwp *lp = td->td_lwp;
295           struct proc *q;
296           struct proc *pp;
297           struct proc *reproc;
298           struct sysreaper *reap;
299           struct vmspace *vm;
300           struct vnode *vtmp;
301           struct exitlist *ep;
302           int error;
303 
304           lwkt_gettoken(&p->p_token);
305 
306           if (p->p_pid == 1) {
307                     kprintf("init died (signal %d, exit %d)\n",
308                         WTERMSIG(rv), WEXITSTATUS(rv));
309                     panic("Going nowhere without my init!");
310           }
311           varsymset_clean(&p->p_varsymset);
312           lockuninit(&p->p_varsymset.vx_lock);
313 
314           /*
315            * Kill all lwps associated with the current process, return an
316            * error if we race another thread trying to do the same thing
317            * and lose the race.
318            */
319           error = killalllwps(0);
320           if (error) {
321                     lwp_exit(0, NULL);
322                     /* NOT REACHED */
323           }
324 
325           /* are we a task leader? */
326           if (p == p->p_leader) {
327                     struct sysmsg sysmsg;
328 
329                     sysmsg.extargs.kill.signum = SIGKILL;
330                     q = p->p_peers;
331                     while(q) {
332                               sysmsg.extargs.kill.pid = q->p_pid;
333                               /*
334                              * The interface for kill is better
335                                * than the internal signal
336                                */
337                               sys_kill(&sysmsg, &sysmsg.extargs.kill);
338                               q = q->p_peers;
339                     }
340                     while (p->p_peers)
341                               tsleep((caddr_t)p, 0, "exit1", 0);
342           }
343 
344 #ifdef PGINPROF
345           vmsizmon();
346 #endif
347           STOPEVENT(p, S_EXIT, rv);
348           p->p_flags |= P_POSTEXIT;     /* stop procfs stepping */
349 
350           /*
351            * Check if any loadable modules need anything done at process exit.
352            * e.g. SYSV IPC stuff
353            * XXX what if one of these generates an error?
354            */
355           p->p_xstat = rv;
356 
357           /*
358            * XXX: imho, the eventhandler stuff is much cleaner than this.
359            *        Maybe we should move everything to use eventhandler.
360            */
361           TAILQ_FOREACH(ep, &exit_list, next)
362                     (*ep->function)(td);
363 
364           if (p->p_flags & P_PROFIL)
365                     stopprofclock(p);
366 
367           SIGEMPTYSET(p->p_siglist);
368           SIGEMPTYSET(lp->lwp_siglist);
369           if (timevalisset(&p->p_realtimer.it_value))
370                     callout_terminate(&p->p_ithandle);
371 
372           /*
373            * Reset any sigio structures pointing to us as a result of
374            * F_SETOWN with our pid.
375            */
376           funsetownlst(&p->p_sigiolst);
377 
378           /*
379            * Close open files and release open-file table.
380            * This may block!
381            */
382           fdfree(p, NULL);
383 
384           if (p->p_leader->p_peers) {
385                     q = p->p_leader;
386                     while(q->p_peers != p)
387                               q = q->p_peers;
388                     q->p_peers = p->p_peers;
389                     wakeup((caddr_t)p->p_leader);
390           }
391 
392           /*
393            * XXX Shutdown SYSV semaphores
394            */
395           semexit(p);
396 
397           /* The next two chunks should probably be moved to vmspace_exit. */
398           vm = p->p_vmspace;
399 
400           /*
401            * Clean up data related to virtual kernel operation.  Clean up
402            * any vkernel context related to the current lwp now so we can
403            * destroy p_vkernel.
404            */
405           if (p->p_vkernel) {
406                     vkernel_lwp_exit(lp);
407                     vkernel_exit(p);
408           }
409 
410           /*
411            * Release the user portion of address space.  The exitbump prevents
412            * the vmspace from being completely eradicated (using holdcnt).
413            * This releases references to vnodes, which could cause I/O if the
414            * file has been unlinked.  We need to do this early enough that
415            * we can still sleep.
416            *
417            * We can't free the entire vmspace as the kernel stack may be mapped
418            * within that space also.
419            *
420            * Processes sharing the same vmspace may exit in one order, and
421            * get cleaned up by vmspace_exit() in a different order.  The
422            * last exiting process to reach this point releases as much of
423            * the environment as it can, and the last process cleaned up
424            * by vmspace_exit() (which decrements exitingcnt) cleans up the
425            * remainder.
426            *
427            * NOTE: Releasing p_token around this call is helpful if the
428            *         vmspace had a huge RSS.  Otherwise some other process
429            *         trying to do an allproc or other scan (like 'ps') may
430            *         stall for a long time.
431            */
432           lwkt_reltoken(&p->p_token);
433           vmspace_relexit(vm);
434           lwkt_gettoken(&p->p_token);
435 
436           if (SESS_LEADER(p)) {
437                     struct session *sp = p->p_session;
438 
439                     if (sp->s_ttyvp) {
440                               /*
441                                * We are the controlling process.  Signal the
442                                * foreground process group, drain the controlling
443                                * terminal, and revoke access to the controlling
444                                * terminal.
445                                *
446                                * NOTE: While waiting for the process group to exit
447                                *         it is possible that one of the processes in
448                                *         the group will revoke the tty, so the
449                                *         ttyclosesession() function will re-check
450                                *         sp->s_ttyvp.
451                                *
452                                * NOTE: Force a timeout of one second when draining
453                                *         the controlling terminal.  PCATCH won't work
454                                *         in exit1().
455                                */
456                               if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) {
457                                         if (sp->s_ttyp->t_pgrp)
458                                                   pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
459                                         sp->s_ttyp->t_timeout = hz;
460                                         ttywait(sp->s_ttyp);
461                                         ttyclosesession(sp, 1); /* also revoke */
462                               }
463 
464                               /*
465                                * Release the tty.  If someone has it open via
466                                * /dev/tty then close it (since they no longer can
467                                * once we've NULL'd it out).
468                                */
469                               ttyclosesession(sp, 0);
470 
471                               /*
472                                * s_ttyp is not zero'd; we use this to indicate
473                                * that the session once had a controlling terminal.
474                                * (for logging and informational purposes)
475                                */
476                     }
477                     sp->s_leader = NULL;
478           }
479           fixjobc(p, p->p_pgrp, 0);
480           (void)acct_process(p);
481 #ifdef KTRACE
482           /*
483            * release trace file
484            */
485           if (p->p_tracenode)
486                     ktrdestroy(&p->p_tracenode);
487           p->p_traceflag = 0;
488 #endif
489           /*
490            * Release reference to text vnode
491            */
492           if ((vtmp = p->p_textvp) != NULL) {
493                     p->p_textvp = NULL;
494                     vrele(vtmp);
495           }
496 
497           /* Release namecache handle to text file */
498           if (p->p_textnch.ncp)
499                     cache_drop(&p->p_textnch);
500 
501           /*
502            * We have to handle PPWAIT here or proc_move_allproc_zombie()
503            * will block on the PHOLD() the parent is doing.
504            *
505            * We are using the flag as an interlock so an atomic op is
506            * necessary to synchronize with the parent's cpu.
507            */
508           if (p->p_flags & P_PPWAIT) {
509                     if (p->p_pptr && p->p_pptr->p_upmap)
510                               atomic_add_int(&p->p_pptr->p_upmap->invfork, -1);
511                     atomic_clear_int(&p->p_flags, P_PPWAIT);
512                     wakeup(p->p_pptr);
513           }
514 
515           /*
516            * Move the process to the zombie list.  This will block
517            * until the process p_lock count reaches 0.  The process will
518            * not be reaped until TDF_EXITING is set by cpu_thread_exit(),
519            * which is called from cpu_proc_exit().
520            *
521            * Interlock against waiters using p_waitgen.  We increment
522            * p_waitgen after completing the move of our process to the
523            * zombie list.
524            *
525            * WARNING: pp becomes stale when we block, clear it now as a
526            *            reminder.
527            */
528           proc_move_allproc_zombie(p);
529           pp = p->p_pptr;
530           atomic_add_long(&pp->p_waitgen, 1);
531           pp = NULL;
532 
533           /*
534            * release controlled reaper for exit if we own it and return the
535            * remaining reaper (the one for us), which we will drop after we
536            * are done.
537            */
538           reap = reaper_exit(p);
539 
540           /*
541            * Reparent all of this process's children to the init process or
542            * to the designated reaper.  We must hold the reaper's p_token in
543            * order to safely mess with p_children.
544            *
545            * Issue the p_deathsig signal to children that request it.
546            *
547            * We already hold p->p_token (to remove the children from our list).
548            */
549           reproc = NULL;
550           q = LIST_FIRST(&p->p_children);
551           if (q) {
552                     reproc = reaper_get(reap);
553                     lwkt_gettoken(&reproc->p_token);
554                     while ((q = LIST_FIRST(&p->p_children)) != NULL) {
555                               PHOLD(q);
556                               lwkt_gettoken(&q->p_token);
557                               if (q != LIST_FIRST(&p->p_children)) {
558                                         lwkt_reltoken(&q->p_token);
559                                         PRELE(q);
560                                         continue;
561                               }
562                               LIST_REMOVE(q, p_sibling);
563                               LIST_INSERT_HEAD(&reproc->p_children, q, p_sibling);
564                               q->p_pptr = reproc;
565                               q->p_ppid = reproc->p_pid;
566                               q->p_sigparent = SIGCHLD;
567 
568                               /*
569                                * Traced processes are killed
570                                * since their existence means someone is screwing up.
571                                */
572                               if (q->p_flags & P_TRACED) {
573                                         q->p_flags &= ~P_TRACED;
574                                         ksignal(q, SIGKILL);
575                               }
576 
577                               /*
578                                * Issue p_deathsig to children that request it
579                                */
580                               if (q->p_deathsig)
581                                         ksignal(q, q->p_deathsig);
582                               lwkt_reltoken(&q->p_token);
583                               PRELE(q);
584                     }
585                     lwkt_reltoken(&reproc->p_token);
586                     wakeup(reproc);
587           }
588 
589           /*
590            * Save exit status and final rusage info.  We no longer add
591            * child rusage info into self times, wait4() and kern_wait()
592            * handles it in order to properly support wait6().
593            */
594           calcru_proc(p, &p->p_ru);
595           /*ruadd(&p->p_ru, &p->p_cru); REMOVED */
596 
597           /*
598            * notify interested parties of our demise.
599            */
600           KNOTE(&p->p_klist, NOTE_EXIT);
601 
602           /*
603            * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
604            * flag set, or if the handler is set to SIG_IGN, notify the reaper
605            * instead (it will handle this situation).
606            *
607            * NOTE: The reaper can still be the parent process.
608            *
609            * (must reload pp)
610            */
611           if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
612                     if (reproc == NULL)
613                               reproc = reaper_get(reap);
614                     proc_reparent(p, reproc);
615           }
616           if (reproc)
617                     PRELE(reproc);
618           if (reap)
619                     reaper_drop(reap);
620 
621           /*
622            * Signal (possibly new) parent.
623            */
624           pp = p->p_pptr;
625           PHOLD(pp);
626           if (p->p_sigparent && pp != initproc) {
627                     int sig = p->p_sigparent;
628 
629                     if (sig != SIGUSR1 && sig != SIGCHLD)
630                               sig = SIGCHLD;
631                   ksignal(pp, sig);
632           } else {
633                   ksignal(pp, SIGCHLD);
634           }
635           p->p_flags &= ~P_TRACED;
636           PRELE(pp);
637 
638           /*
639            * cpu_exit is responsible for clearing curproc, since
640            * it is heavily integrated with the thread/switching sequence.
641            *
642            * Other substructures are freed from wait().
643            */
644           if (p->p_limit) {
645                     struct plimit *rlimit;
646 
647                     rlimit = p->p_limit;
648                     p->p_limit = NULL;
649                     plimit_free(rlimit);
650           }
651 
652           /*
653            * Finally, call machine-dependent code to release as many of the
654            * lwp's resources as we can and halt execution of this thread.
655            *
656            * pp is a wild pointer now but still the correct wakeup() target.
657            * lwp_exit() only uses it to send the wakeup() signal to the likely
658            * parent.  Any reparenting race that occurs will get a signal
659            * automatically and not be an issue.
660            */
661           lwp_exit(1, pp);
662 }
663 
664 /*
665  * Eventually called by every exiting LWP
666  *
667  * p->p_token must be held.  mplock may be held and will be released.
668  */
669 void
lwp_exit(int masterexit,void * waddr)670 lwp_exit(int masterexit, void *waddr)
671 {
672           struct thread *td = curthread;
673           struct lwp *lp = td->td_lwp;
674           struct proc *p = lp->lwp_proc;
675           int dowake = 0;
676 
677           /*
678            * Release the current user process designation on the process so
679            * the userland scheduler can work in someone else.
680            */
681           p->p_usched->release_curproc(lp);
682 
683           /*
684            * Destroy the per-thread shared page and remove from any pmaps
685            * it resides in.
686            */
687           lwp_userunmap(lp);
688 
689           /*
690            * lwp_exit() may be called without setting LWP_MP_WEXIT, so
691            * make sure it is set here.
692            */
693           ASSERT_LWKT_TOKEN_HELD(&p->p_token);
694           atomic_set_int(&lp->lwp_mpflags, LWP_MP_WEXIT);
695 
696           /*
697            * Clean up any virtualization
698            */
699           if (lp->lwp_vkernel)
700                     vkernel_lwp_exit(lp);
701 
702           /*
703            * Clean up select/poll support
704            */
705           kqueue_terminate(&lp->lwp_kqueue);
706 
707           if (td->td_linux_task)
708                     linux_task_drop_callback(td);
709           if (masterexit && p->p_linux_mm)
710                     linux_proc_drop_callback(p);
711 
712           /*
713            * Clean up any syscall-cached ucred or rlimit.
714            */
715           if (td->td_ucred) {
716                     crfree(td->td_ucred);
717                     td->td_ucred = NULL;
718           }
719           if (td->td_limit) {
720                     struct plimit *rlimit;
721 
722                     rlimit = td->td_limit;
723                     td->td_limit = NULL;
724                     plimit_free(rlimit);
725         }
726 
727           /*
728            * Cleanup any cached descriptors for this thread
729            */
730           if (p->p_fd)
731                     fexitcache(td);
732 
733           /*
734            * Nobody actually wakes us when the lock
735            * count reaches zero, so just wait one tick.
736            */
737           while (lp->lwp_lock > 0)
738                     tsleep(lp, 0, "lwpexit", 1);
739 
740           /* Hand down resource usage to our proc */
741           ruadd(&p->p_ru, &lp->lwp_ru);
742 
743           /*
744            * If we don't hold the process until the LWP is reaped wait*()
745            * may try to dispose of its vmspace before all the LWPs have
746            * actually terminated.
747            */
748           PHOLD(p);
749 
750           /*
751            * Do any remaining work that might block on us.  We should be
752            * coded such that further blocking is ok after decrementing
753            * p_nthreads but don't take the chance.
754            */
755           dsched_exit_thread(td);
756           biosched_done(curthread);
757 
758           /*
759            * We have to use the reaper for all the LWPs except the one doing
760            * the master exit.  The LWP doing the master exit can just be
761            * left on p_lwps and the process reaper will deal with it
762            * synchronously, which is much faster.
763            *
764            * Wakeup anyone waiting on p_nthreads to drop to 1 or 0.
765            *
766            * The process is left held until the reaper calls lwp_dispose() on
767            * the lp (after calling lwp_wait()).
768            */
769           if (masterexit == 0) {
770                     int cpu = mycpuid;
771 
772                     lwp_rb_tree_RB_REMOVE(&p->p_lwp_tree, lp);
773                     --p->p_nthreads;
774                     if ((p->p_flags & P_MAYBETHREADED) && p->p_nthreads <= 1)
775                               dowake = 1;
776                     lwkt_gettoken(&deadlwp_token[cpu]);
777                     LIST_INSERT_HEAD(&deadlwp_list[cpu], lp, u.lwp_reap_entry);
778                     taskqueue_enqueue(taskqueue_thread[cpu], deadlwp_task[cpu]);
779                     lwkt_reltoken(&deadlwp_token[cpu]);
780           } else {
781                     --p->p_nthreads;
782                     if ((p->p_flags & P_MAYBETHREADED) && p->p_nthreads <= 1)
783                               dowake = 1;
784           }
785 
786           /*
787            * We no longer need p_token.
788            *
789            * Tell the userland scheduler that we are going away
790            */
791           lwkt_reltoken(&p->p_token);
792           p->p_usched->heuristic_exiting(lp, p);
793 
794           /*
795            * Issue late wakeups after releasing our token to give us a chance
796            * to deschedule and switch away before another cpu in a wait*()
797            * reaps us.  This is done as late as possible to reduce contention.
798            */
799           if (dowake)
800                     wakeup(&p->p_nthreads);
801           if (waddr)
802                     wakeup(waddr);
803 
804           cpu_lwp_exit();
805 }
806 
807 /*
808  * Wait until a lwp is completely dead.  The final interlock in this drama
809  * is when TDF_EXITING is set in cpu_thread_exit() just before the final
810  * switchout.
811  *
812  * At the point TDF_EXITING is set a complete exit is accomplished when
813  * TDF_RUNNING and TDF_PREEMPT_LOCK are both clear.  td_mpflags has two
814  * post-switch interlock flags that can be used to wait for the TDF_
815  * flags to clear.
816  *
817  * Returns non-zero on success, and zero if the caller needs to retry
818  * the lwp_wait().
819  */
820 static int
lwp_wait(struct lwp * lp)821 lwp_wait(struct lwp *lp)
822 {
823           struct thread *td = lp->lwp_thread;
824           u_int mpflags;
825 
826           KKASSERT(lwkt_preempted_proc() != lp);
827 
828           /*
829            * This bit of code uses the thread destruction interlock
830            * managed by lwkt_switch_return() to wait for the lwp's
831            * thread to completely disengage.
832            *
833            * It is possible for us to race another cpu core so we
834            * have to do this correctly.
835            */
836           for (;;) {
837                     mpflags = td->td_mpflags;
838                     cpu_ccfence();
839                     if (mpflags & TDF_MP_EXITSIG)
840                               break;
841                     tsleep_interlock(td, 0);
842                     if (atomic_cmpset_int(&td->td_mpflags, mpflags,
843                                               mpflags | TDF_MP_EXITWAIT)) {
844                               tsleep(td, PINTERLOCKED, "lwpxt", 0);
845                     }
846           }
847 
848           /*
849            * We've already waited for the core exit but there can still
850            * be other refs from e.g. process scans and such.
851            */
852           if (lp->lwp_lock > 0) {
853                     tsleep(lp, 0, "lwpwait1", 1);
854                     return(0);
855           }
856           if (td->td_refs) {
857                     tsleep(td, 0, "lwpwait2", 1);
858                     return(0);
859           }
860 
861           /*
862            * Now that we have the thread destruction interlock these flags
863            * really should already be cleaned up, keep a check for safety.
864            *
865            * We can't rip its stack out from under it until TDF_EXITING is
866            * set and both TDF_RUNNING and TDF_PREEMPT_LOCK are clear.
867            * TDF_PREEMPT_LOCK must be checked because TDF_RUNNING
868            * will be cleared temporarily if a thread gets preempted.
869            */
870           while ((td->td_flags & (TDF_RUNNING |
871                                         TDF_RUNQ |
872                                       TDF_PREEMPT_LOCK |
873                                       TDF_EXITING)) != TDF_EXITING) {
874                     tsleep(lp, 0, "lwpwait3", 1);
875                     return (0);
876           }
877 
878           KASSERT((td->td_flags & (TDF_RUNQ|TDF_TSLEEPQ)) == 0,
879                     ("lwp_wait: td %p (%s) still on run or sleep queue",
880                     td, td->td_comm));
881           return (1);
882 }
883 
884 /*
885  * Release the resources associated with a lwp.
886  * The lwp must be completely dead.
887  */
888 void
lwp_dispose(struct lwp * lp)889 lwp_dispose(struct lwp *lp)
890 {
891           struct thread *td = lp->lwp_thread;
892 
893           KKASSERT(lwkt_preempted_proc() != lp);
894           KKASSERT(lp->lwp_lock == 0);
895           KKASSERT(td->td_refs == 0);
896           KKASSERT((td->td_flags & (TDF_RUNNING |
897                                           TDF_RUNQ |
898                                           TDF_PREEMPT_LOCK |
899                                           TDF_EXITING)) == TDF_EXITING);
900 
901           PRELE(lp->lwp_proc);
902           lp->lwp_proc = NULL;
903           if (td != NULL) {
904                     td->td_proc = NULL;
905                     td->td_lwp = NULL;
906                     lp->lwp_thread = NULL;
907                     lwkt_free_thread(td);
908           }
909           kfree(lp, M_LWP);
910 }
911 
912 int
sys_wait4(struct sysmsg * sysmsg,const struct wait_args * uap)913 sys_wait4(struct sysmsg *sysmsg, const struct wait_args *uap)
914 {
915           struct __wrusage wrusage;
916           int error;
917           int status;
918           int options;
919           id_t id;
920           idtype_t idtype;
921 
922           options = uap->options | WEXITED | WTRAPPED;
923           id = uap->pid;
924 
925           if (id == WAIT_ANY) {
926                     idtype = P_ALL;
927           } else if (id == WAIT_MYPGRP) {
928                     idtype = P_PGID;
929                     id = curproc->p_pgid;
930           } else if (id < 0) {
931                     idtype = P_PGID;
932                     id = -id;
933           } else {
934                     idtype = P_PID;
935           }
936 
937           error = kern_wait(idtype, id, &status, options, &wrusage,
938                                 NULL, &sysmsg->sysmsg_result);
939 
940           if (error == 0 && uap->status)
941                     error = copyout(&status, uap->status, sizeof(*uap->status));
942           if (error == 0 && uap->rusage) {
943                     ruadd(&wrusage.wru_self, &wrusage.wru_children);
944                     error = copyout(&wrusage.wru_self, uap->rusage, sizeof(*uap->rusage));
945           }
946           return (error);
947 }
948 
949 int
sys_wait6(struct sysmsg * sysmsg,const struct wait6_args * uap)950 sys_wait6(struct sysmsg *sysmsg, const struct wait6_args *uap)
951 {
952           struct __wrusage wrusage;
953           siginfo_t info;
954           siginfo_t *infop;
955           int error;
956           int status;
957           int options;
958           id_t id;
959           idtype_t idtype;
960 
961           /*
962            * NOTE: wait6() requires WEXITED and WTRAPPED to be specified if
963            *         desired.
964            */
965           options = uap->options;
966           idtype = uap->idtype;
967           id = uap->id;
968           infop = uap->info ? &info : NULL;
969 
970           switch(idtype) {
971           case P_PID:
972           case P_PGID:
973                     if (id == WAIT_MYPGRP) {
974                               idtype = P_PGID;
975                               id = curproc->p_pgid;
976                     }
977                     break;
978           default:
979                     /* let kern_wait deal with the remainder */
980                     break;
981           }
982 
983           error = kern_wait(idtype, id, &status, options,
984                                 &wrusage, infop, &sysmsg->sysmsg_result);
985 
986           if (error == 0 && uap->status)
987                     error = copyout(&status, uap->status, sizeof(*uap->status));
988           if (error == 0 && uap->wrusage)
989                     error = copyout(&wrusage, uap->wrusage, sizeof(*uap->wrusage));
990           if (error == 0 && uap->info)
991                     error = copyout(&info, uap->info, sizeof(*uap->info));
992           return (error);
993 }
994 
995 /*
996  * kernel wait*() system call support
997  */
998 int
kern_wait(idtype_t idtype,id_t id,int * status,int options,struct __wrusage * wrusage,siginfo_t * info,int * res)999 kern_wait(idtype_t idtype, id_t id, int *status, int options,
1000             struct __wrusage *wrusage, siginfo_t *info, int *res)
1001 {
1002           struct thread *td = curthread;
1003           struct lwp *lp;
1004           struct proc *q = td->td_proc;
1005           struct proc *p, *t;
1006           struct ucred *cr;
1007           struct pargs *pa;
1008           struct sigacts *ps;
1009           int nfound, error;
1010           long waitgen;
1011 
1012           /*
1013            * Must not have extraneous options.  Must have at least one
1014            * matchable option.
1015            */
1016           if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE|WSTOPPED|
1017                               WEXITED|WTRAPPED|WNOWAIT)) {
1018                     return (EINVAL);
1019           }
1020           if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) {
1021                     return (EINVAL);
1022           }
1023 
1024           /*
1025            * Protect the q->p_children list
1026            */
1027           lwkt_gettoken(&q->p_token);
1028 loop:
1029           /*
1030            * All sorts of things can change due to blocking so we have to loop
1031            * all the way back up here.
1032            *
1033            * The problem is that if a process group is stopped and the parent
1034            * is doing a wait*(..., WUNTRACED, ...), it will see the STOP
1035            * of the child and then stop itself when it tries to return from the
1036            * system call.  When the process group is resumed the parent will
1037            * then get the STOP status even though the child has now resumed
1038            * (a followup wait*() will get the CONT status).
1039            *
1040            * Previously the CONT would overwrite the STOP because the tstop
1041            * was handled within tsleep(), and the parent would only see
1042            * the CONT when both are stopped and continued together.  This little
1043            * two-line hack restores this effect.
1044            *
1045            * No locks are held so we can safely block the process here.
1046            */
1047           if (STOPLWP(q, td->td_lwp))
1048             tstop();
1049 
1050           nfound = 0;
1051 
1052           /*
1053            * Loop on children.
1054            *
1055            * NOTE: We don't want to break q's p_token in the loop for the
1056            *         case where no children are found or we risk breaking the
1057            *         interlock between child and parent.
1058            */
1059           waitgen = atomic_fetchadd_long(&q->p_waitgen, 0x80000000);
1060           LIST_FOREACH(p, &q->p_children, p_sibling) {
1061                     /*
1062                      * Skip children that another thread is already uninterruptably
1063                      * reaping.
1064                      */
1065                     if (PWAITRES_PENDING(p))
1066                               continue;
1067 
1068                     /*
1069                      * Filter, (p) will be held on fall-through.  Try to optimize
1070                      * this to avoid the atomic op until we are pretty sure we
1071                      * want this process.
1072                      */
1073                     switch(idtype) {
1074                     case P_ALL:
1075                               PHOLD(p);
1076                               break;
1077                     case P_PID:
1078                               if (p->p_pid != (pid_t)id)
1079                                         continue;
1080                               PHOLD(p);
1081                               break;
1082                     case P_PGID:
1083                               if (p->p_pgid != (pid_t)id)
1084                                         continue;
1085                               PHOLD(p);
1086                               break;
1087                     case P_SID:
1088                               PHOLD(p);
1089                               if (p->p_session && p->p_session->s_sid != (pid_t)id) {
1090                                         PRELE(p);
1091                                         continue;
1092                               }
1093                               break;
1094                     case P_UID:
1095                               PHOLD(p);
1096                               if (p->p_ucred->cr_uid != (uid_t)id) {
1097                                         PRELE(p);
1098                                         continue;
1099                               }
1100                               break;
1101                     case P_GID:
1102                               PHOLD(p);
1103                               if (p->p_ucred->cr_gid != (gid_t)id) {
1104                                         PRELE(p);
1105                                         continue;
1106                               }
1107                               break;
1108                     case P_JAILID:
1109                               PHOLD(p);
1110                               if (p->p_ucred->cr_prison &&
1111                                   p->p_ucred->cr_prison->pr_id != (int)id) {
1112                                         PRELE(p);
1113                                         continue;
1114                               }
1115                               break;
1116                     default:
1117                               /* unsupported filter */
1118                               continue;
1119                     }
1120                     /* (p) is held at this point */
1121 
1122                     /*
1123                      * This special case handles a kthread spawned by linux_clone
1124                      * (see linux_misc.c).  The linux_wait4 and linux_waitpid
1125                      * functions need to be able to distinguish between waiting
1126                      * on a process and waiting on a thread.  It is a thread if
1127                      * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
1128                      * signifies we want to wait for threads and not processes.
1129                      */
1130                     if ((p->p_sigparent != SIGCHLD) ^
1131                         ((options & WLINUXCLONE) != 0)) {
1132                               PRELE(p);
1133                               continue;
1134                     }
1135 
1136                     nfound++;
1137                     if (p->p_stat == SZOMB && (options & WEXITED)) {
1138                               /*
1139                                * We may go into SZOMB with threads still present.
1140                                * We must wait for them to exit before we can reap
1141                                * the master thread, otherwise we may race reaping
1142                                * non-master threads.
1143                                *
1144                                * Only this routine can remove a process from
1145                                * the zombie list and destroy it.
1146                                *
1147                                * This function will fail after sleeping if another
1148                                * thread owns the zombie lock.  This function will
1149                                * fail immediately or after sleeping if another
1150                                * thread owns or obtains ownership of the reap via
1151                                * WAITRES.
1152                                */
1153                               if (PHOLDZOMB(p)) {
1154                                         PRELE(p);
1155                                         goto loop;
1156                               }
1157                               lwkt_gettoken(&p->p_token);
1158                               if (p->p_pptr != q) {
1159                                         lwkt_reltoken(&p->p_token);
1160                                         PRELE(p);
1161                                         PRELEZOMB(p);
1162                                         goto loop;
1163                               }
1164 
1165                               /*
1166                                * We are the reaper, from this point on the reap
1167                                * cannot be aborted.
1168                                */
1169                               PWAITRES_SET(p);
1170                               while (p->p_nthreads > 0) {
1171                                         tsleep(&p->p_nthreads, 0, "lwpzomb", hz);
1172                               }
1173 
1174                               /*
1175                                * Reap any LWPs left in p->p_lwps.  This is usually
1176                                * just the last LWP.  This must be done before
1177                                * we loop on p_lock since the lwps hold a ref on
1178                                * it as a vmspace interlock.
1179                                *
1180                                * Once that is accomplished p_nthreads had better
1181                                * be zero.
1182                                */
1183                               while ((lp = RB_ROOT(&p->p_lwp_tree)) != NULL) {
1184                                         /*
1185                                          * Make sure no one is using this lwp, before
1186                                          * it is removed from the tree.  If we didn't
1187                                          * wait it here, lwp tree iteration with
1188                                          * blocking operation would be broken.
1189                                          */
1190                                         while (lp->lwp_lock > 0)
1191                                                   tsleep(lp, 0, "zomblwp", 1);
1192                                         lwp_rb_tree_RB_REMOVE(&p->p_lwp_tree, lp);
1193                                         reaplwp(lp);
1194                               }
1195                               KKASSERT(p->p_nthreads == 0);
1196 
1197                               /*
1198                                * Don't do anything really bad until all references
1199                                * to the process go away.  This may include other
1200                                * LWPs which are still in the process of being
1201                                * reaped.  We can't just pull the rug out from under
1202                                * them because they may still be using the VM space.
1203                                *
1204                                * Certain kernel facilities such as /proc will also
1205                                * put a hold on the process for short periods of
1206                                * time.
1207                                */
1208                               PRELE(p);           /* from top of loop */
1209                               PSTALL(p, "reap3", 1);        /* 1 ref (for PZOMBHOLD) */
1210 
1211                               /* Take care of our return values. */
1212                               *res = p->p_pid;
1213 
1214                               *status = p->p_xstat;
1215                               wrusage->wru_self = p->p_ru;
1216                               wrusage->wru_children = p->p_cru;
1217 
1218                               if (info) {
1219                                         bzero(info, sizeof(*info));
1220                                         info->si_errno = 0;
1221                                         info->si_signo = SIGCHLD;
1222                                         if (WIFEXITED(p->p_xstat)) {
1223                                                   info->si_code = CLD_EXITED;
1224                                                   info->si_status =
1225                                                             WEXITSTATUS(p->p_xstat);
1226                                         } else {
1227                                                   info->si_code = CLD_KILLED;
1228                                                   info->si_status = WTERMSIG(p->p_xstat);
1229                                         }
1230                                         info->si_pid = p->p_pid;
1231                                         info->si_uid = p->p_ucred->cr_uid;
1232                               }
1233 
1234                               /*
1235                                * WNOWAIT shortcuts to done here, leaving the
1236                                * child on the zombie list.
1237                                */
1238                               if (options & WNOWAIT) {
1239                                         lwkt_reltoken(&p->p_token);
1240                                         PRELEZOMB(p);
1241                                         error = 0;
1242                                         goto done;
1243                               }
1244 
1245                               /*
1246                                * If we got the child via a ptrace 'attach',
1247                                * we need to give it back to the old parent.
1248                                */
1249                               if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
1250                                         p->p_oppid = 0;
1251                                         proc_reparent(p, t);
1252                                         ksignal(t, SIGCHLD);
1253                                         wakeup((caddr_t)t);
1254                                         PRELE(t);
1255                                         lwkt_reltoken(&p->p_token);
1256                                         PRELEZOMB(p);
1257                                         error = 0;
1258                                         goto done;
1259                               }
1260 
1261                               /*
1262                                * Unlink the proc from its process group so that
1263                                * the following operations won't lead to an
1264                                * inconsistent state for processes running down
1265                                * the zombie list.
1266                                */
1267                               proc_remove_zombie(p);
1268                               proc_userunmap(p);
1269                               lwkt_reltoken(&p->p_token);
1270                               leavepgrp(p);
1271 
1272                               p->p_xstat = 0;
1273                               ruadd(&q->p_cru, &p->p_ru);
1274                               ruadd(&q->p_cru, &p->p_cru);
1275 
1276                               /*
1277                                * Decrement the count of procs running with this uid.
1278                                */
1279                               chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
1280 
1281                               /*
1282                                * Free up credentials.  p_spin is required to
1283                                * avoid races against allproc scans.
1284                                */
1285                               spin_lock(&p->p_spin);
1286                               cr = p->p_ucred;
1287                               p->p_ucred = NULL;
1288                               spin_unlock(&p->p_spin);
1289                               crfree(cr);
1290 
1291                               /*
1292                                * Remove unused arguments
1293                                */
1294                               pa = p->p_args;
1295                               p->p_args = NULL;
1296                               if (pa && refcount_release(&pa->ar_ref)) {
1297                                         kfree(pa, M_PARGS);
1298                                         pa = NULL;
1299                               }
1300 
1301                               ps = p->p_sigacts;
1302                               p->p_sigacts = NULL;
1303                               if (ps && refcount_release(&ps->ps_refcnt)) {
1304                                         kfree(ps, M_SUBPROC);
1305                                         ps = NULL;
1306                               }
1307 
1308                               /*
1309                                * Our exitingcount was incremented when the process
1310                                * became a zombie, now that the process has been
1311                                * removed from (almost) all lists we should be able
1312                                * to safely destroy its vmspace.  Wait for any current
1313                                * holders to go away (so the vmspace remains stable),
1314                                * then scrap it.
1315                                *
1316                                * NOTE: Releasing the parent process (q) p_token
1317                                *         across the vmspace_exitfree() call is
1318                                *         important here to reduce stalls on
1319                                *         interactions with (q) (such as
1320                                *         fork/exec/wait or 'ps').
1321                                */
1322                               PSTALL(p, "reap4", 1);
1323                               lwkt_reltoken(&q->p_token);
1324                               vmspace_exitfree(p);
1325                               lwkt_gettoken(&q->p_token);
1326                               PSTALL(p, "reap5", 1);
1327 
1328                               /*
1329                                * NOTE: We have to officially release ZOMB in order
1330                                *         to ensure that a racing thread in kern_wait()
1331                                *         which blocked on ZOMB is woken up.
1332                                */
1333                               PRELEZOMB(p);
1334                               kfree(p->p_uidpcpu, M_SUBPROC);
1335                               kfree(p, M_PROC);
1336                               atomic_add_int(&nprocs, -1);
1337                               error = 0;
1338                               goto done;
1339                     }
1340 
1341                     /*
1342                      * Process has not yet exited
1343                      */
1344                     if ((p->p_stat == SSTOP || p->p_stat == SCORE) &&
1345                         (p->p_flags & P_WAITED) == 0 &&
1346                         (((p->p_flags & P_TRACED) && (options & WTRAPPED)) ||
1347                          (options & WSTOPPED))) {
1348                               lwkt_gettoken(&p->p_token);
1349                               if (p->p_pptr != q) {
1350                                         lwkt_reltoken(&p->p_token);
1351                                         PRELE(p);
1352                                         goto loop;
1353                               }
1354                               if ((p->p_stat != SSTOP && p->p_stat != SCORE) ||
1355                                   (p->p_flags & P_WAITED) != 0 ||
1356                                   ((p->p_flags & P_TRACED) == 0 &&
1357                                    (options & WUNTRACED) == 0)) {
1358                                         lwkt_reltoken(&p->p_token);
1359                                         PRELE(p);
1360                                         goto loop;
1361                               }
1362 
1363                               /*
1364                                * Don't set P_WAITED if WNOWAIT specified, leaving
1365                                * the process in a waitable state.
1366                                */
1367                               if ((options & WNOWAIT) == 0)
1368                                         p->p_flags |= P_WAITED;
1369 
1370                               *res = p->p_pid;
1371                               *status = W_STOPCODE(p->p_xstat);
1372                               /* Zero rusage so we get something consistent. */
1373                               bzero(wrusage, sizeof(*wrusage));
1374                               error = 0;
1375                               if (info) {
1376                                         bzero(info, sizeof(*info));
1377                                         if (p->p_flags & P_TRACED)
1378                                                   info->si_code = CLD_TRAPPED;
1379                                         else
1380                                                   info->si_code = CLD_STOPPED;
1381                                         info->si_status = WSTOPSIG(p->p_xstat);
1382                               }
1383                               lwkt_reltoken(&p->p_token);
1384                               PRELE(p);
1385                               goto done;
1386                     }
1387                     if ((options & WCONTINUED) && (p->p_flags & P_CONTINUED)) {
1388                               lwkt_gettoken(&p->p_token);
1389                               if (p->p_pptr != q) {
1390                                         lwkt_reltoken(&p->p_token);
1391                                         PRELE(p);
1392                                         goto loop;
1393                               }
1394                               if ((p->p_flags & P_CONTINUED) == 0) {
1395                                         lwkt_reltoken(&p->p_token);
1396                                         PRELE(p);
1397                                         goto loop;
1398                               }
1399 
1400                               *res = p->p_pid;
1401 
1402                               /*
1403                                * Don't set P_WAITED if WNOWAIT specified, leaving
1404                                * the process in a waitable state.
1405                                */
1406                               if ((options & WNOWAIT) == 0)
1407                                         p->p_flags &= ~P_CONTINUED;
1408 
1409                               *status = SIGCONT;
1410                               error = 0;
1411                               if (info) {
1412                                         bzero(info, sizeof(*info));
1413                                         info->si_code = CLD_CONTINUED;
1414                                         info->si_status = WSTOPSIG(p->p_xstat);
1415                               }
1416                               lwkt_reltoken(&p->p_token);
1417                               PRELE(p);
1418                               goto done;
1419                     }
1420                     PRELE(p);
1421           }
1422           if (nfound == 0) {
1423                     error = ECHILD;
1424                     goto done;
1425           }
1426           if (options & WNOHANG) {
1427                     *res = 0;
1428                     error = 0;
1429                     goto done;
1430           }
1431 
1432           /*
1433            * Wait for signal - interlocked using q->p_waitgen.
1434            */
1435           error = 0;
1436           while ((waitgen & 0x7FFFFFFF) == (q->p_waitgen & 0x7FFFFFFF)) {
1437                     tsleep_interlock(q, PCATCH);
1438                     waitgen = atomic_fetchadd_long(&q->p_waitgen, 0x80000000);
1439                     if ((waitgen & 0x7FFFFFFF) == (q->p_waitgen & 0x7FFFFFFF)) {
1440                               error = tsleep(q, PCATCH | PINTERLOCKED, "wait", 0);
1441                               break;
1442                     }
1443           }
1444           if (error) {
1445 done:
1446                     lwkt_reltoken(&q->p_token);
1447                     return (error);
1448           }
1449           goto loop;
1450 }
1451 
1452 /*
1453  * Change child's parent process to parent.
1454  *
1455  * p_children/p_sibling requires the parent's token, and
1456  * changing pptr requires the child's token, so we have to
1457  * get three tokens to do this operation.  We also need to
1458  * hold pointers that might get ripped out from under us to
1459  * preserve structural integrity.
1460  *
1461  * It is possible to race another reparent or disconnect or other
1462  * similar operation.  We must retry when this situation occurs.
1463  * Once we successfully reparent the process we no longer care
1464  * about any races.
1465  */
1466 void
proc_reparent(struct proc * child,struct proc * parent)1467 proc_reparent(struct proc *child, struct proc *parent)
1468 {
1469           struct proc *opp;
1470 
1471           PHOLD(parent);
1472           while ((opp = child->p_pptr) != parent) {
1473                     PHOLD(opp);
1474                     lwkt_gettoken(&opp->p_token);
1475                     lwkt_gettoken(&child->p_token);
1476                     lwkt_gettoken(&parent->p_token);
1477                     if (child->p_pptr != opp) {
1478                               lwkt_reltoken(&parent->p_token);
1479                               lwkt_reltoken(&child->p_token);
1480                               lwkt_reltoken(&opp->p_token);
1481                               PRELE(opp);
1482                               continue;
1483                     }
1484                     LIST_REMOVE(child, p_sibling);
1485                     LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
1486                     child->p_pptr = parent;
1487                     child->p_ppid = parent->p_pid;
1488                     lwkt_reltoken(&parent->p_token);
1489                     lwkt_reltoken(&child->p_token);
1490                     lwkt_reltoken(&opp->p_token);
1491                     if (LIST_EMPTY(&opp->p_children))
1492                               wakeup(opp);
1493                     PRELE(opp);
1494                     break;
1495           }
1496           PRELE(parent);
1497 }
1498 
1499 /*
1500  * The next two functions are to handle adding/deleting items on the
1501  * exit callout list
1502  *
1503  * at_exit():
1504  * Take the arguments given and put them onto the exit callout list,
1505  * However first make sure that it's not already there.
1506  * returns 0 on success.
1507  */
1508 
1509 int
at_exit(exitlist_fn function)1510 at_exit(exitlist_fn function)
1511 {
1512           struct exitlist *ep;
1513 
1514 #ifdef INVARIANTS
1515           /* Be noisy if the programmer has lost track of things */
1516           if (rm_at_exit(function))
1517                     kprintf("WARNING: exit callout entry (%p) already present\n",
1518                         function);
1519 #endif
1520           ep = kmalloc(sizeof(*ep), M_ATEXIT, M_NOWAIT);
1521           if (ep == NULL)
1522                     return (ENOMEM);
1523           ep->function = function;
1524           TAILQ_INSERT_TAIL(&exit_list, ep, next);
1525           return (0);
1526 }
1527 
1528 /*
1529  * Scan the exit callout list for the given item and remove it.
1530  * Returns the number of items removed (0 or 1)
1531  */
1532 int
rm_at_exit(exitlist_fn function)1533 rm_at_exit(exitlist_fn function)
1534 {
1535           struct exitlist *ep;
1536 
1537           TAILQ_FOREACH(ep, &exit_list, next) {
1538                     if (ep->function == function) {
1539                               TAILQ_REMOVE(&exit_list, ep, next);
1540                               kfree(ep, M_ATEXIT);
1541                               return(1);
1542                     }
1543           }
1544           return (0);
1545 }
1546 
1547 /*
1548  * LWP reaper related code.
1549  */
1550 static void
reaplwps(void * context,int dummy)1551 reaplwps(void *context, int dummy)
1552 {
1553           struct lwplist *lwplist = context;
1554           struct lwp *lp;
1555           int cpu = mycpuid;
1556 
1557           lwkt_gettoken(&deadlwp_token[cpu]);
1558           while ((lp = LIST_FIRST(lwplist))) {
1559                     LIST_REMOVE(lp, u.lwp_reap_entry);
1560                     reaplwp(lp);
1561           }
1562           lwkt_reltoken(&deadlwp_token[cpu]);
1563 }
1564 
1565 static void
reaplwp(struct lwp * lp)1566 reaplwp(struct lwp *lp)
1567 {
1568           while (lwp_wait(lp) == 0)
1569                     ;
1570           lwp_dispose(lp);
1571 }
1572 
1573 static void
deadlwp_init(void)1574 deadlwp_init(void)
1575 {
1576           int cpu;
1577 
1578           for (cpu = 0; cpu < ncpus; cpu++) {
1579                     lwkt_token_init(&deadlwp_token[cpu], "deadlwpl");
1580                     LIST_INIT(&deadlwp_list[cpu]);
1581                     deadlwp_task[cpu] = kmalloc(sizeof(*deadlwp_task[cpu]),
1582                                                       M_DEVBUF, M_WAITOK);
1583                     TASK_INIT(deadlwp_task[cpu], 0, reaplwps, &deadlwp_list[cpu]);
1584           }
1585 }
1586 
1587 SYSINIT(deadlwpinit, SI_SUB_CONFIGURE, SI_ORDER_ANY, deadlwp_init, NULL);
1588