1 /*        $NetBSD: kern_proc.c,v 1.279 2025/03/17 19:02:49 riastradh Exp $      */
2 
3 /*-
4  * Copyright (c) 1999, 2006, 2007, 2008, 2020, 2023
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center, and by Andrew Doran.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1982, 1986, 1989, 1991, 1993
36  *        The Regents of the University of California.  All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *        @(#)kern_proc.c     8.7 (Berkeley) 2/14/95
63  */
64 
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.279 2025/03/17 19:02:49 riastradh Exp $");
67 
68 #ifdef _KERNEL_OPT
69 #include "opt_kstack.h"
70 #include "opt_maxuprc.h"
71 #include "opt_dtrace.h"
72 #include "opt_compat_netbsd32.h"
73 #include "opt_kaslr.h"
74 #endif
75 
76 #if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
77     && !defined(_RUMPKERNEL)
78 #define COMPAT_NETBSD32
79 #endif
80 
81 #include <sys/param.h>
82 #include <sys/types.h>
83 
84 #include <sys/acct.h>
85 #include <sys/atomic.h>
86 #include <sys/buf.h>
87 #include <sys/compat_stub.h>
88 #include <sys/cpu.h>
89 #include <sys/dtrace_bsd.h>
90 #include <sys/exec.h>
91 #include <sys/file.h>
92 #include <sys/filedesc.h>
93 #include <sys/futex.h>
94 #include <sys/ioctl.h>
95 #include <sys/kauth.h>
96 #include <sys/kernel.h>
97 #include <sys/kmem.h>
98 #include <sys/namei.h>
99 #include <sys/pool.h>
100 #include <sys/proc.h>
101 #include <sys/pserialize.h>
102 #include <sys/pset.h>
103 #include <sys/ras.h>
104 #include <sys/resourcevar.h>
105 #include <sys/sdt.h>
106 #include <sys/signalvar.h>
107 #include <sys/sleepq.h>
108 #include <sys/syscall_stats.h>
109 #include <sys/sysctl.h>
110 #include <sys/systm.h>
111 #include <sys/tty.h>
112 #include <sys/uio.h>
113 #include <sys/wait.h>
114 #include <ufs/ufs/quota.h>
115 
116 #include <uvm/uvm_extern.h>
117 
118 /*
119  * Process lists.
120  */
121 
122 struct proclist               allproc             __cacheline_aligned;
123 struct proclist               zombproc  __cacheline_aligned;
124 
125 kmutex_t            proc_lock __cacheline_aligned;
126 static pserialize_t proc_psz;
127 
128 /*
129  * pid to lwp/proc lookup is done by indexing the pid_table array.
130  * Since pid numbers are only allocated when an empty slot
131  * has been found, there is no need to search any lists ever.
132  * (an orphaned pgrp will lock the slot, a session will lock
133  * the pgrp with the same number.)
134  * If the table is too small it is reallocated with twice the
135  * previous size and the entries 'unzipped' into the two halves.
136  * A linked list of free entries is passed through the pt_lwp
137  * field of 'free' items - set odd to be an invalid ptr.  Two
138  * additional bits are also used to indicate if the slot is
139  * currently occupied by a proc or lwp, and if the PID is
140  * hidden from certain kinds of lookups.  We thus require a
141  * minimum alignment for proc and lwp structures (LWPs are
142  * at least 32-byte aligned).
143  */
144 
145 struct pid_table {
146           uintptr_t pt_slot;
147           struct pgrp         *pt_pgrp;
148           pid_t               pt_pid;
149 };
150 
151 #define   PT_F_FREE           ((uintptr_t)__BIT(0))
152 #define   PT_F_LWP            0         /* pseudo-flag */
153 #define   PT_F_PROC           ((uintptr_t)__BIT(1))
154 
155 #define   PT_F_TYPEBITS                 (PT_F_FREE|PT_F_PROC)
156 #define   PT_F_ALLBITS                  (PT_F_FREE|PT_F_PROC)
157 
158 #define   PT_VALID(s)                   (((s) & PT_F_FREE) == 0)
159 #define   PT_RESERVED(s)                ((s) == 0)
160 #define   PT_NEXT(s)                    ((u_int)(s) >> 1)
161 #define   PT_SET_FREE(pid)    (((pid) << 1) | PT_F_FREE)
162 #define   PT_SET_LWP(l)                 ((uintptr_t)(l))
163 #define   PT_SET_PROC(p)                (((uintptr_t)(p)) | PT_F_PROC)
164 #define   PT_SET_RESERVED               0
165 #define   PT_GET_LWP(s)                 ((struct lwp *)((s) & ~PT_F_ALLBITS))
166 #define   PT_GET_PROC(s)                ((struct proc *)((s) & ~PT_F_ALLBITS))
167 #define   PT_GET_TYPE(s)                ((s) & PT_F_TYPEBITS)
168 #define   PT_IS_LWP(s)                  (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
169 #define   PT_IS_PROC(s)                 (PT_GET_TYPE(s) == PT_F_PROC)
170 
171 #define   MIN_PROC_ALIGNMENT  (PT_F_ALLBITS + 1)
172 
173 /*
174  * Table of process IDs (PIDs).
175  */
176 static struct pid_table *pid_table      __read_mostly;
177 
178 #define   INITIAL_PID_TABLE_SIZE                  (1 << 5)
179 
180 /* Table mask, threshold for growing and number of allocated PIDs. */
181 static u_int                  pid_tbl_mask        __read_mostly;
182 static u_int                  pid_alloc_lim       __read_mostly;
183 static u_int                  pid_alloc_cnt       __cacheline_aligned;
184 
185 /* Next free, last free and maximum PIDs. */
186 static u_int                  next_free_pt        __cacheline_aligned;
187 static u_int                  last_free_pt        __cacheline_aligned;
188 static pid_t                  pid_max             __read_mostly;
189 
190 /* Components of the first process -- never freed. */
191 
192 struct session session0 = {
193           .s_count = 1,
194           .s_sid = 0,
195 };
196 struct pgrp pgrp0 = {
197           .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
198           .pg_session = &session0,
199 };
200 filedesc_t filedesc0;
201 struct cwdinfo cwdi0 = {
202           .cwdi_cmask = CMASK,
203           .cwdi_refcnt = 1,
204 };
205 struct plimit limit0;
206 struct pstats pstat0;
207 struct vmspace vmspace0;
208 struct sigacts sigacts0;
209 struct proc proc0 = {
210           .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
211           .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
212           .p_nlwps = 1,
213           .p_nrlwps = 1,
214           .p_pgrp = &pgrp0,
215           .p_comm = "system",
216           /*
217            * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
218            * when they exit.  init(8) can easily wait them out for us.
219            */
220           .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
221           .p_stat = SACTIVE,
222           .p_nice = NZERO,
223           .p_emul = &emul_netbsd,
224           .p_cwdi = &cwdi0,
225           .p_limit = &limit0,
226           .p_fd = &filedesc0,
227           .p_vmspace = &vmspace0,
228           .p_stats = &pstat0,
229           .p_sigacts = &sigacts0,
230 #ifdef PROC0_MD_INITIALIZERS
231           PROC0_MD_INITIALIZERS
232 #endif
233 };
234 kauth_cred_t cred0;
235 
236 static const int    nofile    = NOFILE;
237 static const int    maxuprc   = MAXUPRC;
238 
239 static int sysctl_doeproc(SYSCTLFN_PROTO);
240 static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
241 static int sysctl_security_expose_address(SYSCTLFN_PROTO);
242 
243 #ifdef KASLR
244 static int kern_expose_address = 0;
245 #else
246 static int kern_expose_address = 1;
247 #endif
248 /*
249  * The process list descriptors, used during pid allocation and
250  * by sysctl.  No locking on this data structure is needed since
251  * it is completely static.
252  */
253 const struct proclist_desc proclists[] = {
254           { &allproc          },
255           { &zombproc         },
256           { NULL              },
257 };
258 
259 static struct pgrp *          pg_remove(pid_t);
260 static void                   pg_delete(pid_t);
261 static void                   orphanpg(struct pgrp *);
262 
263 static specificdata_domain_t proc_specificdata_domain;
264 
265 static pool_cache_t proc_cache;
266 
267 static kauth_listener_t proc_listener;
268 
269 static void fill_proc(const struct proc *, struct proc *, bool);
270 static int fill_pathname(struct lwp *, pid_t, void *, size_t *);
271 static int fill_cwd(struct lwp *, pid_t, void *, size_t *);
272 
273 static int
proc_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)274 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
275     void *arg0, void *arg1, void *arg2, void *arg3)
276 {
277           struct proc *p;
278           int result;
279 
280           result = KAUTH_RESULT_DEFER;
281           p = arg0;
282 
283           switch (action) {
284           case KAUTH_PROCESS_CANSEE: {
285                     enum kauth_process_req req;
286 
287                     req = (enum kauth_process_req)(uintptr_t)arg1;
288 
289                     switch (req) {
290                     case KAUTH_REQ_PROCESS_CANSEE_ARGS:
291                     case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
292                     case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
293                     case KAUTH_REQ_PROCESS_CANSEE_EPROC:
294                               result = KAUTH_RESULT_ALLOW;
295                               break;
296 
297                     case KAUTH_REQ_PROCESS_CANSEE_ENV:
298                               if (kauth_cred_getuid(cred) !=
299                                   kauth_cred_getuid(p->p_cred) ||
300                                   kauth_cred_getuid(cred) !=
301                                   kauth_cred_getsvuid(p->p_cred))
302                                         break;
303 
304                               result = KAUTH_RESULT_ALLOW;
305 
306                               break;
307 
308                     case KAUTH_REQ_PROCESS_CANSEE_KPTR:
309                               if (!kern_expose_address)
310                                         break;
311 
312                               if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM))
313                                         break;
314 
315                               result = KAUTH_RESULT_ALLOW;
316 
317                               break;
318 
319                     default:
320                               break;
321                     }
322 
323                     break;
324                     }
325 
326           case KAUTH_PROCESS_FORK: {
327                     int lnprocs = (int)(unsigned long)arg2;
328 
329                     /*
330                      * Don't allow a nonprivileged user to use the last few
331                      * processes. The variable lnprocs is the current number of
332                      * processes, maxproc is the limit.
333                      */
334                     if (__predict_false((lnprocs >= maxproc - 5)))
335                               break;
336 
337                     result = KAUTH_RESULT_ALLOW;
338 
339                     break;
340                     }
341 
342           case KAUTH_PROCESS_CORENAME:
343           case KAUTH_PROCESS_STOPFLAG:
344                     if (proc_uidmatch(cred, p->p_cred) == 0)
345                               result = KAUTH_RESULT_ALLOW;
346 
347                     break;
348 
349           default:
350                     break;
351           }
352 
353           return result;
354 }
355 
356 static int
proc_ctor(void * arg __unused,void * obj,int flags __unused)357 proc_ctor(void *arg __unused, void *obj, int flags __unused)
358 {
359           struct proc *p = obj;
360 
361           memset(p, 0, sizeof(*p));
362           klist_init(&p->p_klist);
363 
364           /*
365            * There is no need for a proc_dtor() to do a klist_fini(),
366            * since knote_proc_exit() ensures that p->p_klist is empty
367            * when a process exits.
368            */
369 
370           return 0;
371 }
372 
373 static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);
374 
375 /*
376  * Initialize global process hashing structures.
377  */
378 void
procinit(void)379 procinit(void)
380 {
381           const struct proclist_desc *pd;
382           u_int i;
383 #define   LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
384 
385           for (pd = proclists; pd->pd_list != NULL; pd++)
386                     LIST_INIT(pd->pd_list);
387 
388           mutex_init(&proc_lock, MUTEX_DEFAULT, IPL_NONE);
389 
390           proc_psz = pserialize_create();
391 
392           pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
393               * sizeof(struct pid_table), KM_SLEEP);
394           pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
395           pid_max = PID_MAX;
396 
397           /* Set free list running through table...
398              Preset 'use count' above PID_MAX so we allocate pid 1 next. */
399           for (i = 0; i <= pid_tbl_mask; i++) {
400                     pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
401                     pid_table[i].pt_pgrp = 0;
402                     pid_table[i].pt_pid = 0;
403           }
404           /* slot 0 is just grabbed */
405           next_free_pt = 1;
406           /* Need to fix last entry. */
407           last_free_pt = pid_tbl_mask;
408           pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
409           /* point at which we grow table - to avoid reusing pids too often */
410           pid_alloc_lim = pid_tbl_mask - 1;
411 #undef LINK_EMPTY
412 
413           /* Reserve PID 1 for init(8). */        /* XXX slightly gross */
414           mutex_enter(&proc_lock);
415           if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
416                     panic("failed to reserve PID 1 for init(8)");
417           mutex_exit(&proc_lock);
418 
419           proc_specificdata_domain = specificdata_domain_create();
420           KASSERT(proc_specificdata_domain != NULL);
421 
422           size_t proc_alignment = coherency_unit;
423           if (proc_alignment < MIN_PROC_ALIGNMENT)
424                     proc_alignment = MIN_PROC_ALIGNMENT;
425 
426           proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
427               "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);
428 
429           proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
430               proc_listener_cb, NULL);
431 }
432 
433 void
procinit_sysctl(void)434 procinit_sysctl(void)
435 {
436           static struct sysctllog *clog;
437 
438           sysctl_createv(&clog, 0, NULL, NULL,
439                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
440                            CTLTYPE_INT, "expose_address",
441                            SYSCTL_DESCR("Enable exposing kernel addresses"),
442                            sysctl_security_expose_address, 0,
443                            &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
444           sysctl_createv(&clog, 0, NULL, NULL,
445                            CTLFLAG_PERMANENT,
446                            CTLTYPE_NODE, "proc",
447                            SYSCTL_DESCR("System-wide process information"),
448                            sysctl_doeproc, 0, NULL, 0,
449                            CTL_KERN, KERN_PROC, CTL_EOL);
450           sysctl_createv(&clog, 0, NULL, NULL,
451                            CTLFLAG_PERMANENT,
452                            CTLTYPE_NODE, "proc2",
453                            SYSCTL_DESCR("Machine-independent process information"),
454                            sysctl_doeproc, 0, NULL, 0,
455                            CTL_KERN, KERN_PROC2, CTL_EOL);
456           sysctl_createv(&clog, 0, NULL, NULL,
457                            CTLFLAG_PERMANENT,
458                            CTLTYPE_NODE, "proc_args",
459                            SYSCTL_DESCR("Process argument information"),
460                            sysctl_kern_proc_args, 0, NULL, 0,
461                            CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
462 
463           /*
464             "nodes" under these:
465 
466             KERN_PROC_ALL
467             KERN_PROC_PID pid
468             KERN_PROC_PGRP pgrp
469             KERN_PROC_SESSION sess
470             KERN_PROC_TTY tty
471             KERN_PROC_UID uid
472             KERN_PROC_RUID uid
473             KERN_PROC_GID gid
474             KERN_PROC_RGID gid
475 
476             all in all, probably not worth the effort...
477           */
478 }
479 
480 /*
481  * Initialize process 0.
482  */
483 void
proc0_init(void)484 proc0_init(void)
485 {
486           struct proc *p;
487           struct pgrp *pg;
488           struct rlimit *rlim;
489           rlim_t lim;
490           int i;
491 
492           p = &proc0;
493           pg = &pgrp0;
494 
495           mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
496           mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
497           p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
498 
499           rw_init(&p->p_reflock);
500           cv_init(&p->p_waitcv, "wait");
501           cv_init(&p->p_lwpcv, "lwpwait");
502 
503           LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
504 
505           KASSERT(lwp0.l_lid == 0);
506           pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
507           LIST_INSERT_HEAD(&allproc, p, p_list);
508 
509           pid_table[lwp0.l_lid].pt_pgrp = pg;
510           LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
511 
512 #ifdef __HAVE_SYSCALL_INTERN
513           (*p->p_emul->e_syscall_intern)(p);
514 #endif
515 
516           /* Create credentials. */
517           cred0 = kauth_cred_alloc();
518           p->p_cred = cred0;
519 
520           /* Create the CWD info. */
521           rw_init(&cwdi0.cwdi_lock);
522 
523           /* Create the limits structures. */
524           mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
525 
526           rlim = limit0.pl_rlimit;
527           for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
528                     rlim[i].rlim_cur = RLIM_INFINITY;
529                     rlim[i].rlim_max = RLIM_INFINITY;
530           }
531 
532           rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
533           rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
534 
535           rlim[RLIMIT_NPROC].rlim_max = maxproc;
536           rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
537 
538           lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem(false)));
539           rlim[RLIMIT_RSS].rlim_max = lim;
540           rlim[RLIMIT_MEMLOCK].rlim_max = lim;
541           rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
542 
543           rlim[RLIMIT_NTHR].rlim_max = maxlwp;
544           rlim[RLIMIT_NTHR].rlim_cur = maxlwp / 2;
545 
546           /* Note that default core name has zero length. */
547           limit0.pl_corename = defcorename;
548           limit0.pl_cnlen = 0;
549           limit0.pl_refcnt = 1;
550           limit0.pl_writeable = false;
551           limit0.pl_sv_limit = NULL;
552 
553           /* Configure virtual memory system, set vm rlimits. */
554           uvm_init_limits(p);
555 
556           /* Initialize file descriptor table for proc0. */
557           fd_init(&filedesc0);
558 
559           /*
560            * Initialize proc0's vmspace, which uses the kernel pmap.
561            * All kernel processes (which never have user space mappings)
562            * share proc0's vmspace, and thus, the kernel pmap.
563            */
564           uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
565               trunc_page(VM_MAXUSER_ADDRESS),
566 #ifdef __USE_TOPDOWN_VM
567               true
568 #else
569               false
570 #endif
571               );
572 
573           /* Initialize signal state for proc0. XXX IPL_SCHED */
574           mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
575           siginit(p);
576 
577           proc_initspecific(p);
578           kdtrace_proc_ctor(NULL, p);
579 }
580 
581 /*
582  * Session reference counting.
583  */
584 
585 void
proc_sesshold(struct session * ss)586 proc_sesshold(struct session *ss)
587 {
588 
589           KASSERT(mutex_owned(&proc_lock));
590           ss->s_count++;
591 }
592 
593 void
proc_sessrele(struct session * ss)594 proc_sessrele(struct session *ss)
595 {
596           struct pgrp *pg;
597 
598           KASSERT(mutex_owned(&proc_lock));
599           KASSERT(ss->s_count > 0);
600 
601           /*
602            * We keep the pgrp with the same id as the session in order to
603            * stop a process being given the same pid.  Since the pgrp holds
604            * a reference to the session, it must be a 'zombie' pgrp by now.
605            */
606           if (--ss->s_count == 0) {
607                     pg = pg_remove(ss->s_sid);
608           } else {
609                     pg = NULL;
610                     ss = NULL;
611           }
612 
613           mutex_exit(&proc_lock);
614 
615           if (pg)
616                     kmem_free(pg, sizeof(struct pgrp));
617           if (ss)
618                     kmem_free(ss, sizeof(struct session));
619 }
620 
621 /*
622  * Check that the specified process group is in the session of the
623  * specified process.
624  * Treats -ve ids as process ids.
625  * Used to validate TIOCSPGRP requests.
626  */
627 int
pgid_in_session(struct proc * p,pid_t pg_id)628 pgid_in_session(struct proc *p, pid_t pg_id)
629 {
630           struct pgrp *pgrp;
631           struct session *session;
632           int error;
633 
634           if (pg_id <= INT_MIN)
635                     return SET_ERROR(EINVAL);
636 
637           mutex_enter(&proc_lock);
638           if (pg_id < 0) {
639                     struct proc *p1 = proc_find(-pg_id);
640                     if (p1 == NULL) {
641                               error = SET_ERROR(EINVAL);
642                               goto fail;
643                     }
644                     pgrp = p1->p_pgrp;
645           } else {
646                     pgrp = pgrp_find(pg_id);
647                     if (pgrp == NULL) {
648                               error = SET_ERROR(EINVAL);
649                               goto fail;
650                     }
651           }
652           session = pgrp->pg_session;
653           error = (session != p->p_pgrp->pg_session) ? SET_ERROR(EPERM) : 0;
654 fail:
655           mutex_exit(&proc_lock);
656           return error;
657 }
658 
659 /*
660  * p_inferior: is p an inferior of q?
661  */
662 static inline bool
p_inferior(struct proc * p,struct proc * q)663 p_inferior(struct proc *p, struct proc *q)
664 {
665 
666           KASSERT(mutex_owned(&proc_lock));
667 
668           for (; p != q; p = p->p_pptr)
669                     if (p->p_pid == 0)
670                               return false;
671           return true;
672 }
673 
674 /*
675  * proc_find_lwp: locate an lwp in said proc by the ID.
676  *
677  * => Must be called with p::p_lock held.
678  * => LSIDL lwps are not returned because they are only partially
679  *    constructed while occupying the slot.
680  * => Callers need to be careful about lwp::l_stat of the returned
681  *    lwp.
682  */
683 struct lwp *
proc_find_lwp(proc_t * p,pid_t pid)684 proc_find_lwp(proc_t *p, pid_t pid)
685 {
686           struct pid_table *pt;
687           unsigned pt_mask;
688           struct lwp *l = NULL;
689           uintptr_t slot;
690           int s;
691 
692           KASSERT(mutex_owned(p->p_lock));
693 
694           /*
695            * Look in the pid_table.  This is done unlocked inside a
696            * pserialize read section covering pid_table's memory
697            * allocation only, so take care to read things in the correct
698            * order:
699            *
700            * 1. First read the table mask -- this only ever increases, in
701            *    expand_pid_table, so a stale value is safely
702            *    conservative.
703            *
704            * 2. Next read the pid table -- this is always set _before_
705            *    the mask increases, so if we see a new table and stale
706            *    mask, the mask is still valid for the table.
707            */
708           s = pserialize_read_enter();
709           pt_mask = atomic_load_acquire(&pid_tbl_mask);
710           pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
711           slot = atomic_load_consume(&pt->pt_slot);
712           if (__predict_false(!PT_IS_LWP(slot))) {
713                     pserialize_read_exit(s);
714                     return NULL;
715           }
716 
717           /*
718            * Check to see if the LWP is from the correct process.  We won't
719            * see entries in pid_table from a prior process that also used "p",
720            * by virtue of the fact that allocating "p" means all prior updates
721            * to dependant data structures are visible to this thread.
722            */
723           l = PT_GET_LWP(slot);
724           if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) {
725                     pserialize_read_exit(s);
726                     return NULL;
727           }
728 
729           /*
730            * We now know that p->p_lock holds this LWP stable.
731            *
732            * If the status is not LSIDL, it means the LWP is intended to be
733            * findable by LID and l_lid cannot change behind us.
734            *
735            * No need to acquire the LWP's lock to check for LSIDL, as
736            * p->p_lock must be held to transition in and out of LSIDL.
737            * Any other observed state of is no particular interest.
738            */
739           pserialize_read_exit(s);
740           return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL;
741 }
742 
743 /*
744  * proc_find_lwp_unlocked: locate an lwp in said proc by the ID.
745  *
746  * => Called in a pserialize read section with no locks held.
747  * => LSIDL lwps are not returned because they are only partially
748  *    constructed while occupying the slot.
749  * => Callers need to be careful about lwp::l_stat of the returned
750  *    lwp.
751  * => If an LWP is found, it's returned locked.
752  */
753 struct lwp *
proc_find_lwp_unlocked(proc_t * p,pid_t pid)754 proc_find_lwp_unlocked(proc_t *p, pid_t pid)
755 {
756           struct pid_table *pt;
757           unsigned pt_mask;
758           struct lwp *l = NULL;
759           uintptr_t slot;
760 
761           KASSERT(pserialize_in_read_section());
762 
763           /*
764            * Look in the pid_table.  This is done unlocked inside a
765            * pserialize read section covering pid_table's memory
766            * allocation only, so take care to read things in the correct
767            * order:
768            *
769            * 1. First read the table mask -- this only ever increases, in
770            *    expand_pid_table, so a stale value is safely
771            *    conservative.
772            *
773            * 2. Next read the pid table -- this is always set _before_
774            *    the mask increases, so if we see a new table and stale
775            *    mask, the mask is still valid for the table.
776            */
777           pt_mask = atomic_load_acquire(&pid_tbl_mask);
778           pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
779           slot = atomic_load_consume(&pt->pt_slot);
780           if (__predict_false(!PT_IS_LWP(slot))) {
781                     return NULL;
782           }
783 
784           /*
785            * Lock the LWP we found to get it stable.  If it's embryonic or
786            * reaped (LSIDL) then none of the other fields can safely be
787            * checked.
788            */
789           l = PT_GET_LWP(slot);
790           lwp_lock(l);
791           if (__predict_false(l->l_stat == LSIDL)) {
792                     lwp_unlock(l);
793                     return NULL;
794           }
795 
796           /*
797            * l_proc and l_lid are now known stable because the LWP is not
798            * LSIDL, so check those fields too to make sure we found the
799            * right thing.
800            */
801           if (__predict_false(l->l_proc != p || l->l_lid != pid)) {
802                     lwp_unlock(l);
803                     return NULL;
804           }
805 
806           /* Everything checks out, return it locked. */
807           return l;
808 }
809 
810 /*
811  * proc_find_lwp_acquire_proc: locate an lwp and acquire a lock
812  * on its containing proc.
813  *
814  * => Similar to proc_find_lwp(), but does not require you to have
815  *    the proc a priori.
816  * => Also returns proc * to caller, with p::p_lock held.
817  * => Same caveats apply.
818  */
819 struct lwp *
proc_find_lwp_acquire_proc(pid_t pid,struct proc ** pp)820 proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
821 {
822           struct pid_table *pt;
823           struct proc *p = NULL;
824           struct lwp *l = NULL;
825           uintptr_t slot;
826 
827           KASSERT(pp != NULL);
828           mutex_enter(&proc_lock);
829           pt = &pid_table[pid & pid_tbl_mask];
830 
831           slot = pt->pt_slot;
832           if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
833                     l = PT_GET_LWP(slot);
834                     p = l->l_proc;
835                     mutex_enter(p->p_lock);
836                     if (__predict_false(l->l_stat == LSIDL)) {
837                               mutex_exit(p->p_lock);
838                               l = NULL;
839                               p = NULL;
840                     }
841           }
842           mutex_exit(&proc_lock);
843 
844           KASSERT(p == NULL || mutex_owned(p->p_lock));
845           *pp = p;
846           return l;
847 }
848 
849 /*
850  * proc_find_raw_pid_table_locked: locate a process by the ID.
851  *
852  * => Must be called with proc_lock held.
853  */
854 static proc_t *
proc_find_raw_pid_table_locked(pid_t pid,bool any_lwpid)855 proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
856 {
857           struct pid_table *pt;
858           proc_t *p = NULL;
859           uintptr_t slot;
860 
861           /* No - used by DDB.  KASSERT(mutex_owned(&proc_lock)); */
862           pt = &pid_table[pid & pid_tbl_mask];
863 
864           slot = pt->pt_slot;
865           if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
866                     /*
867                      * When looking up processes, require a direct match
868                      * on the PID assigned to the proc, not just one of
869                      * its LWPs.
870                      *
871                      * N.B. We require lwp::l_proc of LSIDL LWPs to be
872                      * valid here.
873                      */
874                     p = PT_GET_LWP(slot)->l_proc;
875                     if (__predict_false(p->p_pid != pid && !any_lwpid))
876                               p = NULL;
877           } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
878                     p = PT_GET_PROC(slot);
879           }
880           return p;
881 }
882 
883 proc_t *
proc_find_raw(pid_t pid)884 proc_find_raw(pid_t pid)
885 {
886 
887           return proc_find_raw_pid_table_locked(pid, false);
888 }
889 
890 static proc_t *
proc_find_internal(pid_t pid,bool any_lwpid)891 proc_find_internal(pid_t pid, bool any_lwpid)
892 {
893           proc_t *p;
894 
895           KASSERT(mutex_owned(&proc_lock));
896 
897           p = proc_find_raw_pid_table_locked(pid, any_lwpid);
898           if (__predict_false(p == NULL)) {
899                     return NULL;
900           }
901 
902           /*
903            * Only allow live processes to be found by PID.
904            * XXX: p_stat might change, since proc unlocked.
905            */
906           if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
907                     return p;
908           }
909           return NULL;
910 }
911 
912 proc_t *
proc_find(pid_t pid)913 proc_find(pid_t pid)
914 {
915           return proc_find_internal(pid, false);
916 }
917 
918 proc_t *
proc_find_lwpid(pid_t pid)919 proc_find_lwpid(pid_t pid)
920 {
921           return proc_find_internal(pid, true);
922 }
923 
924 /*
925  * pgrp_find: locate a process group by the ID.
926  *
927  * => Must be called with proc_lock held.
928  */
929 struct pgrp *
pgrp_find(pid_t pgid)930 pgrp_find(pid_t pgid)
931 {
932           struct pgrp *pg;
933 
934           KASSERT(mutex_owned(&proc_lock));
935 
936           pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
937 
938           /*
939            * Cannot look up a process group that only exists because the
940            * session has not died yet (traditional).
941            */
942           if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
943                     return NULL;
944           }
945           return pg;
946 }
947 
948 static void
expand_pid_table(void)949 expand_pid_table(void)
950 {
951           size_t pt_size, tsz;
952           struct pid_table *n_pt, *new_pt;
953           uintptr_t slot;
954           struct pgrp *pgrp;
955           pid_t pid, rpid;
956           u_int i;
957           uint new_pt_mask;
958 
959           KASSERT(mutex_owned(&proc_lock));
960 
961           /* Unlock the pid_table briefly to allocate memory. */
962           pt_size = pid_tbl_mask + 1;
963           mutex_exit(&proc_lock);
964 
965           tsz = pt_size * 2 * sizeof(struct pid_table);
966           new_pt = kmem_alloc(tsz, KM_SLEEP);
967           new_pt_mask = pt_size * 2 - 1;
968 
969           /* XXX For now.  The pratical limit is much lower anyway. */
970           KASSERT(new_pt_mask <= FUTEX_TID_MASK);
971 
972           mutex_enter(&proc_lock);
973           if (pt_size != pid_tbl_mask + 1) {
974                     /* Another process beat us to it... */
975                     mutex_exit(&proc_lock);
976                     kmem_free(new_pt, tsz);
977                     goto out;
978           }
979 
980           /*
981            * Copy entries from old table into new one.
982            * If 'pid' is 'odd' we need to place in the upper half,
983            * even pid's to the lower half.
984            * Free items stay in the low half so we don't have to
985            * fixup the reference to them.
986            * We stuff free items on the front of the freelist
987            * because we can't write to unmodified entries.
988            * Processing the table backwards maintains a semblance
989            * of issuing pid numbers that increase with time.
990            */
991           i = pt_size - 1;
992           n_pt = new_pt + i;
993           for (; ; i--, n_pt--) {
994                     slot = pid_table[i].pt_slot;
995                     pgrp = pid_table[i].pt_pgrp;
996                     if (!PT_VALID(slot)) {
997                               /* Up 'use count' so that link is valid */
998                               pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
999                               rpid = 0;
1000                               slot = PT_SET_FREE(pid);
1001                               if (pgrp)
1002                                         pid = pgrp->pg_id;
1003                     } else {
1004                               pid = pid_table[i].pt_pid;
1005                               rpid = pid;
1006                     }
1007 
1008                     /* Save entry in appropriate half of table */
1009                     n_pt[pid & pt_size].pt_slot = slot;
1010                     n_pt[pid & pt_size].pt_pgrp = pgrp;
1011                     n_pt[pid & pt_size].pt_pid = rpid;
1012 
1013                     /* Put other piece on start of free list */
1014                     pid = (pid ^ pt_size) & ~pid_tbl_mask;
1015                     n_pt[pid & pt_size].pt_slot =
1016                               PT_SET_FREE((pid & ~pt_size) | next_free_pt);
1017                     n_pt[pid & pt_size].pt_pgrp = 0;
1018                     n_pt[pid & pt_size].pt_pid = 0;
1019 
1020                     next_free_pt = i | (pid & pt_size);
1021                     if (i == 0)
1022                               break;
1023           }
1024 
1025           /* Save old table size and switch tables */
1026           tsz = pt_size * sizeof(struct pid_table);
1027           n_pt = pid_table;
1028           atomic_store_release(&pid_table, new_pt);
1029           KASSERT(new_pt_mask >= pid_tbl_mask);
1030           atomic_store_release(&pid_tbl_mask, new_pt_mask);
1031 
1032           /*
1033            * pid_max starts as PID_MAX (= 30000), once we have 16384
1034            * allocated pids we need it to be larger!
1035            */
1036           if (pid_tbl_mask > PID_MAX) {
1037                     pid_max = pid_tbl_mask * 2 + 1;
1038                     pid_alloc_lim |= pid_alloc_lim << 1;
1039           } else
1040                     pid_alloc_lim <<= 1;          /* doubles number of free slots... */
1041 
1042           mutex_exit(&proc_lock);
1043 
1044           /*
1045            * Make sure that unlocked access to the old pid_table is complete
1046            * and then free it.
1047            */
1048           pserialize_perform(proc_psz);
1049           kmem_free(n_pt, tsz);
1050 
1051  out:     /* Return with proc_lock held again. */
1052           mutex_enter(&proc_lock);
1053 }
1054 
1055 struct proc *
proc_alloc(void)1056 proc_alloc(void)
1057 {
1058           struct proc *p;
1059 
1060           p = pool_cache_get(proc_cache, PR_WAITOK);
1061           p->p_stat = SIDL;                       /* protect against others */
1062           proc_initspecific(p);
1063           kdtrace_proc_ctor(NULL, p);
1064 
1065           /*
1066            * Allocate a placeholder in the pid_table.  When we create the
1067            * first LWP for this process, it will take ownership of the
1068            * slot.
1069            */
1070           if (__predict_false(proc_alloc_pid(p) == -1)) {
1071                     /* Allocating the PID failed; unwind. */
1072                     proc_finispecific(p);
1073                     proc_free_mem(p);
1074                     p = NULL;
1075           }
1076           return p;
1077 }
1078 
1079 /*
1080  * proc_alloc_pid_slot: allocate PID and record the occupant so that
1081  * proc_find_raw() can find it by the PID.
1082  */
1083 static pid_t __noinline
proc_alloc_pid_slot(struct proc * p,uintptr_t slot)1084 proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
1085 {
1086           struct pid_table *pt;
1087           pid_t pid;
1088           int nxt;
1089 
1090           KASSERT(mutex_owned(&proc_lock));
1091 
1092           for (;;expand_pid_table()) {
1093                     if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
1094                               /* ensure pids cycle through 2000+ values */
1095                               continue;
1096                     }
1097                     /*
1098                      * The first user process *must* be given PID 1.
1099                      * it has already been reserved for us.  This
1100                      * will be coming in from the proc_alloc() call
1101                      * above, and the entry will be usurped later when
1102                      * the first user LWP is created.
1103                      * XXX this is slightly gross.
1104                      */
1105                     if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
1106                                             p != &proc0)) {
1107                               KASSERT(PT_IS_PROC(slot));
1108                               pt = &pid_table[1];
1109                               pt->pt_slot = slot;
1110                               return 1;
1111                     }
1112                     pt = &pid_table[next_free_pt];
1113 #ifdef DIAGNOSTIC
1114                     if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
1115                               panic("proc_alloc: slot busy");
1116 #endif
1117                     nxt = PT_NEXT(pt->pt_slot);
1118                     if (nxt & pid_tbl_mask)
1119                               break;
1120                     /* Table full - expand (NB last entry not used....) */
1121           }
1122 
1123           /* pid is 'saved use count' + 'size' + entry */
1124           pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
1125           if ((uint)pid > (uint)pid_max)
1126                     pid &= pid_tbl_mask;
1127           next_free_pt = nxt & pid_tbl_mask;
1128 
1129           /* XXX For now.  The pratical limit is much lower anyway. */
1130           KASSERT(pid <= FUTEX_TID_MASK);
1131 
1132           /* Grab table slot */
1133           pt->pt_slot = slot;
1134 
1135           KASSERT(pt->pt_pid == 0);
1136           pt->pt_pid = pid;
1137           pid_alloc_cnt++;
1138 
1139           return pid;
1140 }
1141 
1142 pid_t
proc_alloc_pid(struct proc * p)1143 proc_alloc_pid(struct proc *p)
1144 {
1145           pid_t pid;
1146 
1147           KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
1148           KASSERT(p->p_stat == SIDL);
1149 
1150           mutex_enter(&proc_lock);
1151           pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
1152           if (pid != -1)
1153                     p->p_pid = pid;
1154           mutex_exit(&proc_lock);
1155 
1156           return pid;
1157 }
1158 
1159 pid_t
proc_alloc_lwpid(struct proc * p,struct lwp * l)1160 proc_alloc_lwpid(struct proc *p, struct lwp *l)
1161 {
1162           struct pid_table *pt;
1163           pid_t pid;
1164 
1165           KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
1166           KASSERT(l->l_proc == p);
1167           KASSERT(l->l_stat == LSIDL);
1168 
1169           /*
1170            * For unlocked lookup in proc_find_lwp(), make sure l->l_proc
1171            * is globally visible before the LWP becomes visible via the
1172            * pid_table.
1173            */
1174 #ifndef __HAVE_ATOMIC_AS_MEMBAR
1175           membar_producer();
1176 #endif
1177 
1178           /*
1179            * If the slot for p->p_pid currently points to the proc,
1180            * then we should usurp this ID for the LWP.  This happens
1181            * at least once per process (for the first LWP), and can
1182            * happen again if the first LWP for a process exits and
1183            * before the process creates another.
1184            */
1185           mutex_enter(&proc_lock);
1186           pid = p->p_pid;
1187           pt = &pid_table[pid & pid_tbl_mask];
1188           KASSERT(pt->pt_pid == pid);
1189           if (PT_IS_PROC(pt->pt_slot)) {
1190                     KASSERT(PT_GET_PROC(pt->pt_slot) == p);
1191                     l->l_lid = pid;
1192                     pt->pt_slot = PT_SET_LWP(l);
1193           } else {
1194                     /* Need to allocate a new slot. */
1195                     pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
1196                     if (pid != -1)
1197                               l->l_lid = pid;
1198           }
1199           mutex_exit(&proc_lock);
1200 
1201           return pid;
1202 }
1203 
1204 static void __noinline
proc_free_pid_internal(pid_t pid,uintptr_t type __diagused)1205 proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
1206 {
1207           struct pid_table *pt;
1208 
1209           KASSERT(mutex_owned(&proc_lock));
1210 
1211           pt = &pid_table[pid & pid_tbl_mask];
1212 
1213           KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
1214           KASSERT(pt->pt_pid == pid);
1215 
1216           /* save pid use count in slot */
1217           pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
1218           pt->pt_pid = 0;
1219 
1220           if (pt->pt_pgrp == NULL) {
1221                     /* link last freed entry onto ours */
1222                     pid &= pid_tbl_mask;
1223                     pt = &pid_table[last_free_pt];
1224                     pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
1225                     pt->pt_pid = 0;
1226                     last_free_pt = pid;
1227                     pid_alloc_cnt--;
1228           }
1229 }
1230 
1231 /*
1232  * Free a process id - called from proc_free (in kern_exit.c)
1233  *
1234  * Called with the proc_lock held.
1235  */
1236 void
proc_free_pid(pid_t pid)1237 proc_free_pid(pid_t pid)
1238 {
1239 
1240           KASSERT(mutex_owned(&proc_lock));
1241           proc_free_pid_internal(pid, PT_F_PROC);
1242 }
1243 
1244 /*
1245  * Free a process id used by an LWP.  If this was the process's
1246  * first LWP, we convert the slot to point to the process; the
1247  * entry will get cleaned up later when the process finishes exiting.
1248  *
1249  * If not, then it's the same as proc_free_pid().
1250  */
1251 void
proc_free_lwpid(struct proc * p,pid_t pid)1252 proc_free_lwpid(struct proc *p, pid_t pid)
1253 {
1254 
1255           KASSERT(mutex_owned(&proc_lock));
1256 
1257           if (__predict_true(p->p_pid == pid)) {
1258                     struct pid_table *pt;
1259 
1260                     pt = &pid_table[pid & pid_tbl_mask];
1261 
1262                     KASSERT(pt->pt_pid == pid);
1263                     KASSERT(PT_IS_LWP(pt->pt_slot));
1264                     KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
1265 
1266                     pt->pt_slot = PT_SET_PROC(p);
1267                     return;
1268           }
1269           proc_free_pid_internal(pid, PT_F_LWP);
1270 }
1271 
1272 void
proc_free_mem(struct proc * p)1273 proc_free_mem(struct proc *p)
1274 {
1275 
1276           kdtrace_proc_dtor(NULL, p);
1277           pool_cache_put(proc_cache, p);
1278 }
1279 
1280 /*
1281  * proc_enterpgrp: move p to a new or existing process group (and session).
1282  *
1283  * If we are creating a new pgrp, the pgid should equal
1284  * the calling process' pid.
1285  * If is only valid to enter a process group that is in the session
1286  * of the process.
1287  * Also mksess should only be set if we are creating a process group
1288  *
1289  * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
1290  */
1291 int
proc_enterpgrp(struct proc * curp,pid_t pid,pid_t pgid,bool mksess)1292 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
1293 {
1294           struct pgrp *new_pgrp, *pgrp;
1295           struct session *sess;
1296           struct proc *p;
1297           int rval;
1298           pid_t pg_id = NO_PGID;
1299 
1300           /* Allocate data areas we might need before doing any validity checks */
1301           sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
1302           new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
1303 
1304           mutex_enter(&proc_lock);
1305 
1306           /* Check pgrp exists or can be created */
1307           pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
1308           if (pgrp != NULL && pgrp->pg_id != pgid)
1309                     goto eperm;
1310 
1311           /* Can only set another process under restricted circumstances. */
1312           if (pid != curp->p_pid) {
1313                     /* Must exist and be one of our children... */
1314                     p = proc_find_internal(pid, false);
1315                     if (p == NULL || !p_inferior(p, curp)) {
1316                               rval = SET_ERROR(ESRCH);
1317                               goto done;
1318                     }
1319                     /* ... in the same session... */
1320                     if (sess != NULL || p->p_session != curp->p_session)
1321                               goto eperm;
1322                     /* ... existing pgid must be in same session ... */
1323                     if (pgrp != NULL && pgrp->pg_session != p->p_session)
1324                               goto eperm;
1325                     /* ... and not done an exec. */
1326                     if (p->p_flag & PK_EXEC) {
1327                               rval = SET_ERROR(EACCES);
1328                               goto done;
1329                     }
1330           } else {
1331                     /* ... setsid() cannot re-enter a pgrp */
1332                     if (mksess && (curp->p_pgid == curp->p_pid ||
1333                         pgrp_find(curp->p_pid)))
1334                               goto eperm;
1335                     p = curp;
1336           }
1337 
1338           /* Changing the process group/session of a session
1339              leader is definitely off limits. */
1340           if (SESS_LEADER(p)) {
1341                     if (sess == NULL && p->p_pgrp == pgrp) {
1342                               /* unless it's a definite noop */
1343                               rval = 0;
1344                               goto done;
1345                     }
1346                     goto eperm;
1347           }
1348 
1349           /* Can only create a process group with id of process */
1350           if (pgrp == NULL && pgid != pid)
1351                     goto eperm;
1352 
1353           /* Can only create a session if creating pgrp */
1354           if (sess != NULL && pgrp != NULL)
1355                     goto eperm;
1356 
1357           /* Check we allocated memory for a pgrp... */
1358           if (pgrp == NULL && new_pgrp == NULL)
1359                     goto eperm;
1360 
1361           /* Don't attach to 'zombie' pgrp */
1362           if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
1363                     goto eperm;
1364 
1365           /* Expect to succeed now */
1366           rval = 0;
1367 
1368           if (pgrp == p->p_pgrp)
1369                     /* nothing to do */
1370                     goto done;
1371 
1372           /* Ok all setup, link up required structures */
1373 
1374           if (pgrp == NULL) {
1375                     pgrp = new_pgrp;
1376                     new_pgrp = NULL;
1377                     if (sess != NULL) {
1378                               sess->s_sid = p->p_pid;
1379                               sess->s_leader = p;
1380                               sess->s_count = 1;
1381                               sess->s_ttyvp = NULL;
1382                               sess->s_ttyp = NULL;
1383                               sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
1384                               memcpy(sess->s_login, p->p_session->s_login,
1385                                   sizeof(sess->s_login));
1386                               p->p_lflag &= ~PL_CONTROLT;
1387                     } else {
1388                               sess = p->p_pgrp->pg_session;
1389                               proc_sesshold(sess);
1390                     }
1391                     pgrp->pg_session = sess;
1392                     sess = NULL;
1393 
1394                     pgrp->pg_id = pgid;
1395                     LIST_INIT(&pgrp->pg_members);
1396 #ifdef DIAGNOSTIC
1397                     if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
1398                               panic("enterpgrp: pgrp table slot in use");
1399                     if (__predict_false(mksess && p != curp))
1400                               panic("enterpgrp: mksession and p != curproc");
1401 #endif
1402                     pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
1403                     pgrp->pg_jobc = 0;
1404           }
1405 
1406           /*
1407            * Adjust eligibility of affected pgrps to participate in job control.
1408            * Increment eligibility counts before decrementing, otherwise we
1409            * could reach 0 spuriously during the first call.
1410            */
1411           fixjobc(p, pgrp, 1);
1412           fixjobc(p, p->p_pgrp, 0);
1413 
1414           /* Interlock with ttread(). */
1415           mutex_spin_enter(&tty_lock);
1416 
1417           /* Move process to requested group. */
1418           LIST_REMOVE(p, p_pglist);
1419           if (LIST_EMPTY(&p->p_pgrp->pg_members))
1420                     /* defer delete until we've dumped the lock */
1421                     pg_id = p->p_pgrp->pg_id;
1422           p->p_pgrp = pgrp;
1423           LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
1424 
1425           /* Done with the swap; we can release the tty mutex. */
1426           mutex_spin_exit(&tty_lock);
1427           goto done;
1428 
1429 eperm:
1430           rval = SET_ERROR(EPERM);
1431 done:
1432           if (pg_id != NO_PGID) {
1433                     /* Releases proc_lock. */
1434                     pg_delete(pg_id);
1435           } else {
1436                     mutex_exit(&proc_lock);
1437           }
1438           if (sess != NULL)
1439                     kmem_free(sess, sizeof(*sess));
1440           if (new_pgrp != NULL)
1441                     kmem_free(new_pgrp, sizeof(*new_pgrp));
1442 #ifdef DEBUG_PGRP
1443           if (__predict_false(rval))
1444                     printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
1445                               pid, pgid, mksess, curp->p_pid, rval);
1446 #endif
1447           return rval;
1448 }
1449 
1450 /*
1451  * proc_leavepgrp: remove a process from its process group.
1452  *  => must be called with the proc_lock held, which will be released;
1453  */
1454 void
proc_leavepgrp(struct proc * p)1455 proc_leavepgrp(struct proc *p)
1456 {
1457           struct pgrp *pgrp;
1458 
1459           KASSERT(mutex_owned(&proc_lock));
1460 
1461           /* Interlock with ttread() */
1462           mutex_spin_enter(&tty_lock);
1463           pgrp = p->p_pgrp;
1464           LIST_REMOVE(p, p_pglist);
1465           p->p_pgrp = NULL;
1466           mutex_spin_exit(&tty_lock);
1467 
1468           if (LIST_EMPTY(&pgrp->pg_members)) {
1469                     /* Releases proc_lock. */
1470                     pg_delete(pgrp->pg_id);
1471           } else {
1472                     mutex_exit(&proc_lock);
1473           }
1474 }
1475 
1476 /*
1477  * pg_remove: remove a process group from the table.
1478  *  => must be called with the proc_lock held;
1479  *  => returns process group to free;
1480  */
1481 static struct pgrp *
pg_remove(pid_t pg_id)1482 pg_remove(pid_t pg_id)
1483 {
1484           struct pgrp *pgrp;
1485           struct pid_table *pt;
1486 
1487           KASSERT(mutex_owned(&proc_lock));
1488 
1489           pt = &pid_table[pg_id & pid_tbl_mask];
1490           pgrp = pt->pt_pgrp;
1491 
1492           KASSERT(pgrp != NULL);
1493           KASSERT(pgrp->pg_id == pg_id);
1494           KASSERT(LIST_EMPTY(&pgrp->pg_members));
1495 
1496           pt->pt_pgrp = NULL;
1497 
1498           if (!PT_VALID(pt->pt_slot)) {
1499                     /* Orphaned pgrp, put slot onto free list. */
1500                     KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
1501                     pg_id &= pid_tbl_mask;
1502                     pt = &pid_table[last_free_pt];
1503                     pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
1504                     KASSERT(pt->pt_pid == 0);
1505                     last_free_pt = pg_id;
1506                     pid_alloc_cnt--;
1507           }
1508           return pgrp;
1509 }
1510 
1511 /*
1512  * pg_delete: delete and free a process group.
1513  *  => must be called with the proc_lock held, which will be released.
1514  */
1515 static void
pg_delete(pid_t pg_id)1516 pg_delete(pid_t pg_id)
1517 {
1518           struct pgrp *pg;
1519           struct tty *ttyp;
1520           struct session *ss;
1521 
1522           KASSERT(mutex_owned(&proc_lock));
1523 
1524           pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1525           if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1526                     mutex_exit(&proc_lock);
1527                     return;
1528           }
1529 
1530           ss = pg->pg_session;
1531 
1532           /* Remove reference (if any) from tty to this process group */
1533           mutex_spin_enter(&tty_lock);
1534           ttyp = ss->s_ttyp;
1535           if (ttyp != NULL && ttyp->t_pgrp == pg) {
1536                     ttyp->t_pgrp = NULL;
1537                     KASSERT(ttyp->t_session == ss);
1538           }
1539           mutex_spin_exit(&tty_lock);
1540 
1541           /*
1542            * The leading process group in a session is freed by proc_sessrele(),
1543            * if last reference.  It will also release the locks.
1544            */
1545           pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1546           proc_sessrele(ss);
1547 
1548           if (pg != NULL) {
1549                     /* Free it, if was not done above. */
1550                     kmem_free(pg, sizeof(struct pgrp));
1551           }
1552 }
1553 
1554 /*
1555  * Adjust pgrp jobc counters when specified process changes process group.
1556  * We count the number of processes in each process group that "qualify"
1557  * the group for terminal job control (those with a parent in a different
1558  * process group of the same session).  If that count reaches zero, the
1559  * process group becomes orphaned.  Check both the specified process'
1560  * process group and that of its children.
1561  * entering == 0 => p is leaving specified group.
1562  * entering == 1 => p is entering specified group.
1563  *
1564  * Call with proc_lock held.
1565  */
1566 void
fixjobc(struct proc * p,struct pgrp * pgrp,int entering)1567 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1568 {
1569           struct pgrp *hispgrp;
1570           struct session *mysession = pgrp->pg_session;
1571           struct proc *child;
1572 
1573           KASSERT(mutex_owned(&proc_lock));
1574 
1575           /*
1576            * Check p's parent to see whether p qualifies its own process
1577            * group; if so, adjust count for p's process group.
1578            */
1579           hispgrp = p->p_pptr->p_pgrp;
1580           if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1581                     if (entering) {
1582                               pgrp->pg_jobc++;
1583                               p->p_lflag &= ~PL_ORPHANPG;
1584                     } else {
1585                               /* KASSERT(pgrp->pg_jobc > 0); */
1586                               if (--pgrp->pg_jobc == 0)
1587                                         orphanpg(pgrp);
1588                     }
1589           }
1590 
1591           /*
1592            * Check this process' children to see whether they qualify
1593            * their process groups; if so, adjust counts for children's
1594            * process groups.
1595            */
1596           LIST_FOREACH(child, &p->p_children, p_sibling) {
1597                     hispgrp = child->p_pgrp;
1598                     if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1599                         !P_ZOMBIE(child)) {
1600                               if (entering) {
1601                                         child->p_lflag &= ~PL_ORPHANPG;
1602                                         hispgrp->pg_jobc++;
1603                               } else {
1604                                         KASSERT(hispgrp->pg_jobc > 0);
1605                                         if (--hispgrp->pg_jobc == 0)
1606                                                   orphanpg(hispgrp);
1607                               }
1608                     }
1609           }
1610 }
1611 
1612 /*
1613  * A process group has become orphaned;
1614  * if there are any stopped processes in the group,
1615  * hang-up all process in that group.
1616  *
1617  * Call with proc_lock held.
1618  */
1619 static void
orphanpg(struct pgrp * pg)1620 orphanpg(struct pgrp *pg)
1621 {
1622           struct proc *p;
1623 
1624           KASSERT(mutex_owned(&proc_lock));
1625 
1626           LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1627                     if (p->p_stat == SSTOP) {
1628                               p->p_lflag |= PL_ORPHANPG;
1629                               psignal(p, SIGHUP);
1630                               psignal(p, SIGCONT);
1631                     }
1632           }
1633 }
1634 
1635 #ifdef DDB
1636 #include <ddb/db_output.h>
1637 void pidtbl_dump(void);
1638 void
pidtbl_dump(void)1639 pidtbl_dump(void)
1640 {
1641           struct pid_table *pt;
1642           struct proc *p;
1643           struct pgrp *pgrp;
1644           uintptr_t slot;
1645           int id;
1646 
1647           db_printf("pid table %p size %x, next %x, last %x\n",
1648                     pid_table, pid_tbl_mask+1,
1649                     next_free_pt, last_free_pt);
1650           for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1651                     slot = pt->pt_slot;
1652                     if (!PT_VALID(slot) && !pt->pt_pgrp)
1653                               continue;
1654                     if (PT_IS_LWP(slot)) {
1655                               p = PT_GET_LWP(slot)->l_proc;
1656                     } else if (PT_IS_PROC(slot)) {
1657                               p = PT_GET_PROC(slot);
1658                     } else {
1659                               p = NULL;
1660                     }
1661                     db_printf("  id %x: ", id);
1662                     if (p != NULL)
1663                               db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
1664                                         pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
1665                     else
1666                               db_printf("next %x use %x\n",
1667                                         PT_NEXT(slot) & pid_tbl_mask,
1668                                         PT_NEXT(slot) & ~pid_tbl_mask);
1669                     if ((pgrp = pt->pt_pgrp)) {
1670                               db_printf("\tsession %p, sid %d, count %d, login %s\n",
1671                                   pgrp->pg_session, pgrp->pg_session->s_sid,
1672                                   pgrp->pg_session->s_count,
1673                                   pgrp->pg_session->s_login);
1674                               db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1675                                   pgrp, pgrp->pg_id, pgrp->pg_jobc,
1676                                   LIST_FIRST(&pgrp->pg_members));
1677                               LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1678                                         db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1679                                             p->p_pid, p, p->p_pgrp, p->p_comm);
1680                               }
1681                     }
1682           }
1683 }
1684 #endif /* DDB */
1685 
1686 #ifdef KSTACK_CHECK_MAGIC
1687 
1688 #define   KSTACK_MAGIC        0xdeadbeaf
1689 
1690 /* XXX should be per process basis? */
1691 static int          kstackleftmin = KSTACK_SIZE;
1692 static int          kstackleftthres = KSTACK_SIZE / 8;
1693 
1694 void
kstack_setup_magic(const struct lwp * l)1695 kstack_setup_magic(const struct lwp *l)
1696 {
1697           uint32_t *ip;
1698           uint32_t const *end;
1699 
1700           KASSERT(l != NULL);
1701           KASSERT(l != &lwp0);
1702 
1703           /*
1704            * fill all the stack with magic number
1705            * so that later modification on it can be detected.
1706            */
1707           ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1708           end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1709           for (; ip < end; ip++) {
1710                     *ip = KSTACK_MAGIC;
1711           }
1712 }
1713 
1714 void
kstack_check_magic(const struct lwp * l)1715 kstack_check_magic(const struct lwp *l)
1716 {
1717           uint32_t const *ip, *end;
1718           int stackleft;
1719 
1720           KASSERT(l != NULL);
1721 
1722           /* don't check proc0 */ /*XXX*/
1723           if (l == &lwp0)
1724                     return;
1725 
1726 #ifdef __MACHINE_STACK_GROWS_UP
1727           /* stack grows upwards (eg. hppa) */
1728           ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1729           end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1730           for (ip--; ip >= end; ip--)
1731                     if (*ip != KSTACK_MAGIC)
1732                               break;
1733 
1734           stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1735 #else /* __MACHINE_STACK_GROWS_UP */
1736           /* stack grows downwards (eg. i386) */
1737           ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1738           end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1739           for (; ip < end; ip++)
1740                     if (*ip != KSTACK_MAGIC)
1741                               break;
1742 
1743           stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1744 #endif /* __MACHINE_STACK_GROWS_UP */
1745 
1746           if (kstackleftmin > stackleft) {
1747                     kstackleftmin = stackleft;
1748                     if (stackleft < kstackleftthres)
1749                               printf("warning: kernel stack left %d bytes"
1750                                   "(pid %u:lid %u)\n", stackleft,
1751                                   (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1752           }
1753 
1754           if (stackleft <= 0) {
1755                     panic("magic on the top of kernel stack changed for "
1756                         "pid %u, lid %u: maybe kernel stack overflow",
1757                         (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1758           }
1759 }
1760 #endif /* KSTACK_CHECK_MAGIC */
1761 
1762 int
proclist_foreach_call(struct proclist * list,int (* callback)(struct proc *,void * arg),void * arg)1763 proclist_foreach_call(struct proclist *list,
1764     int (*callback)(struct proc *, void *arg), void *arg)
1765 {
1766           struct proc marker;
1767           struct proc *p;
1768           int ret = 0;
1769 
1770           marker.p_flag = PK_MARKER;
1771           mutex_enter(&proc_lock);
1772           for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1773                     if (p->p_flag & PK_MARKER) {
1774                               p = LIST_NEXT(p, p_list);
1775                               continue;
1776                     }
1777                     LIST_INSERT_AFTER(p, &marker, p_list);
1778                     ret = (*callback)(p, arg);
1779                     KASSERT(mutex_owned(&proc_lock));
1780                     p = LIST_NEXT(&marker, p_list);
1781                     LIST_REMOVE(&marker, p_list);
1782           }
1783           mutex_exit(&proc_lock);
1784 
1785           return ret;
1786 }
1787 
1788 int
proc_vmspace_getref(struct proc * p,struct vmspace ** vm)1789 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1790 {
1791 
1792           /* XXXCDC: how should locking work here? */
1793 
1794           /* curproc exception is for coredump. */
1795 
1796           if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1797               (p->p_vmspace->vm_refcnt < 1)) {
1798                     return SET_ERROR(EFAULT);
1799           }
1800 
1801           uvmspace_addref(p->p_vmspace);
1802           *vm = p->p_vmspace;
1803 
1804           return 0;
1805 }
1806 
1807 /*
1808  * Acquire a write lock on the process credential.
1809  */
1810 void
proc_crmod_enter(void)1811 proc_crmod_enter(void)
1812 {
1813           struct lwp *l = curlwp;
1814           struct proc *p = l->l_proc;
1815           kauth_cred_t oc;
1816 
1817           /* Reset what needs to be reset in plimit. */
1818           if (p->p_limit->pl_corename != defcorename) {
1819                     lim_setcorename(p, defcorename, 0);
1820           }
1821 
1822           mutex_enter(p->p_lock);
1823 
1824           /* Ensure the LWP cached credentials are up to date. */
1825           if ((oc = l->l_cred) != p->p_cred) {
1826                     l->l_cred = kauth_cred_hold(p->p_cred);
1827                     kauth_cred_free(oc);
1828           }
1829 }
1830 
1831 /*
1832  * Set in a new process credential, and drop the write lock.  The credential
1833  * must have a reference already.  Optionally, free a no-longer required
1834  * credential.
1835  */
1836 void
proc_crmod_leave(kauth_cred_t scred,kauth_cred_t fcred,bool sugid)1837 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1838 {
1839           struct lwp *l = curlwp, *l2;
1840           struct proc *p = l->l_proc;
1841           kauth_cred_t oc;
1842 
1843           KASSERT(mutex_owned(p->p_lock));
1844 
1845           /* Is there a new credential to set in? */
1846           if (scred != NULL) {
1847                     p->p_cred = scred;
1848                     LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1849                               if (l2 != l) {
1850                                         lwp_lock(l2);
1851                                         l2->l_flag |= LW_CACHECRED;
1852                                         lwp_need_userret(l2);
1853                                         lwp_unlock(l2);
1854                               }
1855                     }
1856 
1857                     /* Ensure the LWP cached credentials are up to date. */
1858                     if ((oc = l->l_cred) != scred) {
1859                               l->l_cred = kauth_cred_hold(scred);
1860                     }
1861           } else
1862                     oc = NULL;          /* XXXgcc */
1863 
1864           if (sugid) {
1865                     /*
1866                      * Mark process as having changed credentials, stops
1867                      * tracing etc.
1868                      */
1869                     p->p_flag |= PK_SUGID;
1870           }
1871 
1872           mutex_exit(p->p_lock);
1873 
1874           /* If there is a credential to be released, free it now. */
1875           if (fcred != NULL) {
1876                     KASSERT(scred != NULL);
1877                     kauth_cred_free(fcred);
1878                     if (oc != scred)
1879                               kauth_cred_free(oc);
1880           }
1881 }
1882 
1883 /*
1884  * proc_specific_key_create --
1885  *        Create a key for subsystem proc-specific data.
1886  */
1887 int
proc_specific_key_create(specificdata_key_t * keyp,specificdata_dtor_t dtor)1888 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1889 {
1890 
1891           return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1892 }
1893 
1894 /*
1895  * proc_specific_key_delete --
1896  *        Delete a key for subsystem proc-specific data.
1897  */
1898 void
proc_specific_key_delete(specificdata_key_t key)1899 proc_specific_key_delete(specificdata_key_t key)
1900 {
1901 
1902           specificdata_key_delete(proc_specificdata_domain, key);
1903 }
1904 
1905 /*
1906  * proc_initspecific --
1907  *        Initialize a proc's specificdata container.
1908  */
1909 void
proc_initspecific(struct proc * p)1910 proc_initspecific(struct proc *p)
1911 {
1912           int error __diagused;
1913 
1914           error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1915           KASSERT(error == 0);
1916 }
1917 
1918 /*
1919  * proc_finispecific --
1920  *        Finalize a proc's specificdata container.
1921  */
1922 void
proc_finispecific(struct proc * p)1923 proc_finispecific(struct proc *p)
1924 {
1925 
1926           specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1927 }
1928 
1929 /*
1930  * proc_getspecific --
1931  *        Return proc-specific data corresponding to the specified key.
1932  */
1933 void *
proc_getspecific(struct proc * p,specificdata_key_t key)1934 proc_getspecific(struct proc *p, specificdata_key_t key)
1935 {
1936 
1937           return (specificdata_getspecific(proc_specificdata_domain,
1938                                                    &p->p_specdataref, key));
1939 }
1940 
1941 /*
1942  * proc_setspecific --
1943  *        Set proc-specific data corresponding to the specified key.
1944  */
1945 void
proc_setspecific(struct proc * p,specificdata_key_t key,void * data)1946 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1947 {
1948 
1949           specificdata_setspecific(proc_specificdata_domain,
1950                                          &p->p_specdataref, key, data);
1951 }
1952 
1953 int
proc_uidmatch(kauth_cred_t cred,kauth_cred_t target)1954 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1955 {
1956 
1957           if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1958               kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1959                     /*
1960                      * suid proc of ours or proc not ours
1961                      */
1962                     return SET_ERROR(EPERM);
1963           } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1964                     /*
1965                      * sgid proc has sgid back to us temporarily
1966                      */
1967                     return SET_ERROR(EPERM);
1968           } else {
1969                     /*
1970                      * our rgid must be in target's group list (ie,
1971                      * sub-processes started by a sgid process)
1972                      */
1973                     int ismember = 0;
1974 
1975                     if (kauth_cred_ismember_gid(cred,
1976                         kauth_cred_getgid(target), &ismember) != 0 ||
1977                         !ismember)
1978                               return SET_ERROR(EPERM);
1979           }
1980 
1981           return 0;
1982 }
1983 
1984 /*
1985  * sysctl stuff
1986  */
1987 
1988 #define KERN_PROCSLOP         (5 * sizeof(struct kinfo_proc))
1989 
1990 static const u_int sysctl_flagmap[] = {
1991           PK_ADVLOCK, P_ADVLOCK,
1992           PK_EXEC, P_EXEC,
1993           PK_NOCLDWAIT, P_NOCLDWAIT,
1994           PK_32, P_32,
1995           PK_CLDSIGIGN, P_CLDSIGIGN,
1996           PK_SUGID, P_SUGID,
1997           0
1998 };
1999 
2000 static const u_int sysctl_sflagmap[] = {
2001           PS_NOCLDSTOP, P_NOCLDSTOP,
2002           PS_WEXIT, P_WEXIT,
2003           PS_STOPFORK, P_STOPFORK,
2004           PS_STOPEXEC, P_STOPEXEC,
2005           PS_STOPEXIT, P_STOPEXIT,
2006           0
2007 };
2008 
2009 static const u_int sysctl_slflagmap[] = {
2010           PSL_TRACED, P_TRACED,
2011           PSL_CHTRACED, P_CHTRACED,
2012           PSL_SYSCALL, P_SYSCALL,
2013           0
2014 };
2015 
2016 static const u_int sysctl_lflagmap[] = {
2017           PL_CONTROLT, P_CONTROLT,
2018           PL_PPWAIT, P_PPWAIT,
2019           0
2020 };
2021 
2022 static const u_int sysctl_stflagmap[] = {
2023           PST_PROFIL, P_PROFIL,
2024           0
2025 
2026 };
2027 
2028 /* used by kern_lwp also */
2029 const u_int sysctl_lwpflagmap[] = {
2030           LW_SINTR, L_SINTR,
2031           LW_SYSTEM, L_SYSTEM,
2032           0
2033 };
2034 
2035 /*
2036  * Find the most ``active'' lwp of a process and return it for ps display
2037  * purposes
2038  */
2039 static struct lwp *
proc_active_lwp(struct proc * p)2040 proc_active_lwp(struct proc *p)
2041 {
2042           static const int ostat[] = {
2043                     0,
2044                     2,        /* LSIDL */
2045                     6,        /* LSRUN */
2046                     5,        /* LSSLEEP */
2047                     4,        /* LSSTOP */
2048                     0,        /* LSZOMB */
2049                     1,        /* LSDEAD */
2050                     7,        /* LSONPROC */
2051                     3         /* LSSUSPENDED */
2052           };
2053 
2054           struct lwp *l, *lp = NULL;
2055           LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2056                     KASSERT(l->l_stat >= 0);
2057                     KASSERT(l->l_stat < __arraycount(ostat));
2058                     if (lp == NULL ||
2059                         ostat[l->l_stat] > ostat[lp->l_stat] ||
2060                         (ostat[l->l_stat] == ostat[lp->l_stat] &&
2061                         l->l_cpticks > lp->l_cpticks)) {
2062                               lp = l;
2063                               continue;
2064                     }
2065           }
2066           return lp;
2067 }
2068 
2069 static int
sysctl_doeproc(SYSCTLFN_ARGS)2070 sysctl_doeproc(SYSCTLFN_ARGS)
2071 {
2072           union {
2073                     struct kinfo_proc kproc;
2074                     struct kinfo_proc2 kproc2;
2075           } *kbuf;
2076           struct proc *p, *next, *marker;
2077           char *where, *dp;
2078           int type, op, arg, error;
2079           u_int elem_size, kelem_size, elem_count;
2080           size_t buflen, needed;
2081           bool match, zombie, mmmbrains;
2082           const bool allowaddr = get_expose_address(curproc);
2083 
2084           if (namelen == 1 && name[0] == CTL_QUERY)
2085                     return (sysctl_query(SYSCTLFN_CALL(rnode)));
2086 
2087           dp = where = oldp;
2088           buflen = where != NULL ? *oldlenp : 0;
2089           error = 0;
2090           needed = 0;
2091           type = rnode->sysctl_num;
2092 
2093           if (type == KERN_PROC) {
2094                     if (namelen == 0)
2095                               return SET_ERROR(EINVAL);
2096                     switch (op = name[0]) {
2097                     case KERN_PROC_ALL:
2098                               if (namelen != 1)
2099                                         return SET_ERROR(EINVAL);
2100                               arg = 0;
2101                               break;
2102                     default:
2103                               if (namelen != 2)
2104                                         return SET_ERROR(EINVAL);
2105                               arg = name[1];
2106                               break;
2107                     }
2108                     elem_count = 0;     /* Hush little compiler, don't you cry */
2109                     kelem_size = elem_size = sizeof(kbuf->kproc);
2110           } else {
2111                     if (namelen != 4)
2112                               return SET_ERROR(EINVAL);
2113                     op = name[0];
2114                     arg = name[1];
2115                     elem_size = name[2];
2116                     elem_count = name[3];
2117                     kelem_size = sizeof(kbuf->kproc2);
2118           }
2119 
2120           sysctl_unlock();
2121 
2122           kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP);
2123           marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
2124           marker->p_flag = PK_MARKER;
2125 
2126           mutex_enter(&proc_lock);
2127           /*
2128            * Start with zombies to prevent reporting processes twice, in case they
2129            * are dying and being moved from the list of alive processes to zombies.
2130            */
2131           mmmbrains = true;
2132           for (p = LIST_FIRST(&zombproc);; p = next) {
2133                     if (p == NULL) {
2134                               if (mmmbrains) {
2135                                         p = LIST_FIRST(&allproc);
2136                                         mmmbrains = false;
2137                               }
2138                               if (p == NULL)
2139                                         break;
2140                     }
2141                     next = LIST_NEXT(p, p_list);
2142                     if ((p->p_flag & PK_MARKER) != 0)
2143                               continue;
2144 
2145                     /*
2146                      * Skip embryonic processes.
2147                      */
2148                     if (p->p_stat == SIDL)
2149                               continue;
2150 
2151                     mutex_enter(p->p_lock);
2152                     error = kauth_authorize_process(l->l_cred,
2153                         KAUTH_PROCESS_CANSEE, p,
2154                         KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL);
2155                     if (error != 0) {
2156                               mutex_exit(p->p_lock);
2157                               continue;
2158                     }
2159 
2160                     /*
2161                      * Hande all the operations in one switch on the cost of
2162                      * algorithm complexity is on purpose. The win splitting this
2163                      * function into several similar copies makes maintenance
2164                      * burden, code grow and boost is negligible in practical
2165                      * systems.
2166                      */
2167                     switch (op) {
2168                     case KERN_PROC_PID:
2169                               match = (p->p_pid == (pid_t)arg);
2170                               break;
2171 
2172                     case KERN_PROC_PGRP:
2173                               match = (p->p_pgrp->pg_id == (pid_t)arg);
2174                               break;
2175 
2176                     case KERN_PROC_SESSION:
2177                               match = (p->p_session->s_sid == (pid_t)arg);
2178                               break;
2179 
2180                     case KERN_PROC_TTY:
2181                               match = true;
2182                               if (arg == (int) KERN_PROC_TTY_REVOKE) {
2183                                         if ((p->p_lflag & PL_CONTROLT) == 0 ||
2184                                             p->p_session->s_ttyp == NULL ||
2185                                             p->p_session->s_ttyvp != NULL) {
2186                                                   match = false;
2187                                         }
2188                               } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
2189                                   p->p_session->s_ttyp == NULL) {
2190                                         if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
2191                                                   match = false;
2192                                         }
2193                               } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
2194                                         match = false;
2195                               }
2196                               break;
2197 
2198                     case KERN_PROC_UID:
2199                               match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
2200                               break;
2201 
2202                     case KERN_PROC_RUID:
2203                               match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
2204                               break;
2205 
2206                     case KERN_PROC_GID:
2207                               match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
2208                               break;
2209 
2210                     case KERN_PROC_RGID:
2211                               match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
2212                               break;
2213 
2214                     case KERN_PROC_ALL:
2215                               match = true;
2216                               /* allow everything */
2217                               break;
2218 
2219                     default:
2220                               error = SET_ERROR(EINVAL);
2221                               mutex_exit(p->p_lock);
2222                               goto cleanup;
2223                     }
2224                     if (!match) {
2225                               mutex_exit(p->p_lock);
2226                               continue;
2227                     }
2228 
2229                     /*
2230                      * Grab a hold on the process.
2231                      */
2232                     if (mmmbrains) {
2233                               zombie = true;
2234                     } else {
2235                               zombie = !rw_tryenter(&p->p_reflock, RW_READER);
2236                     }
2237                     if (zombie) {
2238                               LIST_INSERT_AFTER(p, marker, p_list);
2239                     }
2240 
2241                     if (buflen >= elem_size &&
2242                         (type == KERN_PROC || elem_count > 0)) {
2243                               ruspace(p);         /* Update process vm resource use */
2244 
2245                               if (type == KERN_PROC) {
2246                                         fill_proc(p, &kbuf->kproc.kp_proc, allowaddr);
2247                                         fill_eproc(p, &kbuf->kproc.kp_eproc, zombie,
2248                                             allowaddr);
2249                               } else {
2250                                         fill_kproc2(p, &kbuf->kproc2, zombie,
2251                                             allowaddr);
2252                                         elem_count--;
2253                               }
2254                               mutex_exit(p->p_lock);
2255                               mutex_exit(&proc_lock);
2256                               /*
2257                                * Copy out elem_size, but not larger than kelem_size
2258                                */
2259                               error = sysctl_copyout(l, kbuf, dp,
2260                                   uimin(kelem_size, elem_size));
2261                               mutex_enter(&proc_lock);
2262                               if (error) {
2263                                         goto bah;
2264                               }
2265                               dp += elem_size;
2266                               buflen -= elem_size;
2267                     } else {
2268                               mutex_exit(p->p_lock);
2269                     }
2270                     needed += elem_size;
2271 
2272                     /*
2273                      * Release reference to process.
2274                      */
2275                     if (zombie) {
2276                               next = LIST_NEXT(marker, p_list);
2277                               LIST_REMOVE(marker, p_list);
2278                     } else {
2279                               rw_exit(&p->p_reflock);
2280                               next = LIST_NEXT(p, p_list);
2281                     }
2282 
2283                     /*
2284                      * Short-circuit break quickly!
2285                      */
2286                     if (op == KERN_PROC_PID)
2287                     break;
2288           }
2289           mutex_exit(&proc_lock);
2290 
2291           if (where != NULL) {
2292                     *oldlenp = dp - where;
2293                     if (needed > *oldlenp) {
2294                               error = SET_ERROR(ENOMEM);
2295                               goto out;
2296                     }
2297           } else {
2298                     needed += KERN_PROCSLOP;
2299                     *oldlenp = needed;
2300           }
2301           kmem_free(kbuf, sizeof(*kbuf));
2302           kmem_free(marker, sizeof(*marker));
2303           sysctl_relock();
2304           return 0;
2305  bah:
2306           if (zombie)
2307                     LIST_REMOVE(marker, p_list);
2308           else
2309                     rw_exit(&p->p_reflock);
2310  cleanup:
2311           mutex_exit(&proc_lock);
2312  out:
2313           kmem_free(kbuf, sizeof(*kbuf));
2314           kmem_free(marker, sizeof(*marker));
2315           sysctl_relock();
2316           return error;
2317 }
2318 
2319 int
copyin_psstrings(struct proc * p,struct ps_strings * arginfo)2320 copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
2321 {
2322 #if !defined(_RUMPKERNEL)
2323           int retval;
2324 
2325           if (p->p_flag & PK_32) {
2326                     MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo),
2327                         enosys(), retval);
2328                     return retval;
2329           }
2330 #endif /* !defined(_RUMPKERNEL) */
2331 
2332           return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
2333 }
2334 
2335 static int
copy_procargs_sysctl_cb(void * cookie_,const void * src,size_t off,size_t len)2336 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
2337 {
2338           void **cookie = cookie_;
2339           struct lwp *l = cookie[0];
2340           char *dst = cookie[1];
2341 
2342           return sysctl_copyout(l, src, dst + off, len);
2343 }
2344 
2345 /*
2346  * sysctl helper routine for kern.proc_args pseudo-subtree.
2347  */
2348 static int
sysctl_kern_proc_args(SYSCTLFN_ARGS)2349 sysctl_kern_proc_args(SYSCTLFN_ARGS)
2350 {
2351           struct ps_strings pss;
2352           struct proc *p;
2353           pid_t pid;
2354           int type, error;
2355           void *cookie[2];
2356 
2357           if (namelen == 1 && name[0] == CTL_QUERY)
2358                     return (sysctl_query(SYSCTLFN_CALL(rnode)));
2359 
2360           if (newp != NULL || namelen != 2)
2361                     return SET_ERROR(EINVAL);
2362           pid = name[0];
2363           type = name[1];
2364 
2365           switch (type) {
2366           case KERN_PROC_PATHNAME:
2367                     sysctl_unlock();
2368                     error = fill_pathname(l, pid, oldp, oldlenp);
2369                     sysctl_relock();
2370                     return error;
2371 
2372           case KERN_PROC_CWD:
2373                     sysctl_unlock();
2374                     error = fill_cwd(l, pid, oldp, oldlenp);
2375                     sysctl_relock();
2376                     return error;
2377 
2378           case KERN_PROC_ARGV:
2379           case KERN_PROC_NARGV:
2380           case KERN_PROC_ENV:
2381           case KERN_PROC_NENV:
2382                     /* ok */
2383                     break;
2384           default:
2385                     return SET_ERROR(EINVAL);
2386           }
2387 
2388           sysctl_unlock();
2389 
2390           /* check pid */
2391           mutex_enter(&proc_lock);
2392           if ((p = proc_find(pid)) == NULL) {
2393                     error = SET_ERROR(EINVAL);
2394                     goto out_locked;
2395           }
2396           mutex_enter(p->p_lock);
2397 
2398           /* Check permission. */
2399           if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
2400                     error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
2401                         p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
2402           else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
2403                     error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
2404                         p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
2405           else
2406                     error = SET_ERROR(EINVAL); /* XXXGCC */
2407           if (error) {
2408                     mutex_exit(p->p_lock);
2409                     goto out_locked;
2410           }
2411 
2412           if (oldp == NULL) {
2413                     if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
2414                               *oldlenp = sizeof (int);
2415                     else
2416                               *oldlenp = ARG_MAX; /* XXX XXX XXX */
2417                     error = 0;
2418                     mutex_exit(p->p_lock);
2419                     goto out_locked;
2420           }
2421 
2422           /*
2423            * Zombies don't have a stack, so we can't read their psstrings.
2424            * System processes also don't have a user stack.
2425            */
2426           if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
2427                     error = SET_ERROR(EINVAL);
2428                     mutex_exit(p->p_lock);
2429                     goto out_locked;
2430           }
2431 
2432           error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : SET_ERROR(EBUSY);
2433           mutex_exit(p->p_lock);
2434           if (error) {
2435                     goto out_locked;
2436           }
2437           mutex_exit(&proc_lock);
2438 
2439           if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
2440                     int value;
2441                     if ((error = copyin_psstrings(p, &pss)) == 0) {
2442                               if (type == KERN_PROC_NARGV)
2443                                         value = pss.ps_nargvstr;
2444                               else
2445                                         value = pss.ps_nenvstr;
2446                               error = sysctl_copyout(l, &value, oldp, sizeof(value));
2447                               *oldlenp = sizeof(value);
2448                     }
2449           } else {
2450                     cookie[0] = l;
2451                     cookie[1] = oldp;
2452                     error = copy_procargs(p, type, oldlenp,
2453                         copy_procargs_sysctl_cb, cookie);
2454           }
2455           rw_exit(&p->p_reflock);
2456           sysctl_relock();
2457           return error;
2458 
2459 out_locked:
2460           mutex_exit(&proc_lock);
2461           sysctl_relock();
2462           return error;
2463 }
2464 
2465 int
copy_procargs(struct proc * p,int oid,size_t * limit,int (* cb)(void *,const void *,size_t,size_t),void * cookie)2466 copy_procargs(struct proc *p, int oid, size_t *limit,
2467     int (*cb)(void *, const void *, size_t, size_t), void *cookie)
2468 {
2469           struct ps_strings pss;
2470           size_t len, i, loaded, entry_len;
2471           struct uio auio;
2472           struct iovec aiov;
2473           int error, argvlen;
2474           char *arg;
2475           char **argv;
2476           vaddr_t user_argv;
2477           struct vmspace *vmspace;
2478 
2479           /*
2480            * Allocate a temporary buffer to hold the argument vector and
2481            * the arguments themselve.
2482            */
2483           arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2484           argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2485 
2486           /*
2487            * Lock the process down in memory.
2488            */
2489           vmspace = p->p_vmspace;
2490           uvmspace_addref(vmspace);
2491 
2492           /*
2493            * Read in the ps_strings structure.
2494            */
2495           if ((error = copyin_psstrings(p, &pss)) != 0)
2496                     goto done;
2497 
2498           /*
2499            * Now read the address of the argument vector.
2500            */
2501           switch (oid) {
2502           case KERN_PROC_ARGV:
2503                     user_argv = (uintptr_t)pss.ps_argvstr;
2504                     argvlen = pss.ps_nargvstr;
2505                     break;
2506           case KERN_PROC_ENV:
2507                     user_argv = (uintptr_t)pss.ps_envstr;
2508                     argvlen = pss.ps_nenvstr;
2509                     break;
2510           default:
2511                     error = SET_ERROR(EINVAL);
2512                     goto done;
2513           }
2514 
2515           if (argvlen < 0) {
2516                     error = SET_ERROR(EIO);
2517                     goto done;
2518           }
2519 
2520 
2521           /*
2522            * Now copy each string.
2523            */
2524           len = 0; /* bytes written to user buffer */
2525           loaded = 0; /* bytes from argv already processed */
2526           i = 0; /* To make compiler happy */
2527           entry_len = PROC_PTRSZ(p);
2528 
2529           for (; argvlen; --argvlen) {
2530                     int finished = 0;
2531                     vaddr_t base;
2532                     size_t xlen;
2533                     int j;
2534 
2535                     if (loaded == 0) {
2536                               size_t rem = entry_len * argvlen;
2537                               loaded = MIN(rem, PAGE_SIZE);
2538                               error = copyin_vmspace(vmspace,
2539                                   (const void *)user_argv, argv, loaded);
2540                               if (error)
2541                                         break;
2542                               user_argv += loaded;
2543                               i = 0;
2544                     }
2545 
2546 #if !defined(_RUMPKERNEL)
2547                     if (p->p_flag & PK_32)
2548                               MODULE_HOOK_CALL(kern_proc32_base_hook,
2549                                   (argv, i++), 0, base);
2550                     else
2551 #endif /* !defined(_RUMPKERNEL) */
2552                               base = (vaddr_t)argv[i++];
2553                     loaded -= entry_len;
2554 
2555                     /*
2556                      * The program has messed around with its arguments,
2557                      * possibly deleting some, and replacing them with
2558                      * NULL's. Treat this as the last argument and not
2559                      * a failure.
2560                      */
2561                     if (base == 0)
2562                               break;
2563 
2564                     while (!finished) {
2565                               xlen = PAGE_SIZE - (base & PAGE_MASK);
2566 
2567                               aiov.iov_base = arg;
2568                               aiov.iov_len = PAGE_SIZE;
2569                               auio.uio_iov = &aiov;
2570                               auio.uio_iovcnt = 1;
2571                               auio.uio_offset = base;
2572                               auio.uio_resid = xlen;
2573                               auio.uio_rw = UIO_READ;
2574                               UIO_SETUP_SYSSPACE(&auio);
2575                               error = uvm_io(&vmspace->vm_map, &auio, 0);
2576                               if (error)
2577                                         goto done;
2578 
2579                               /* Look for the end of the string */
2580                               for (j = 0; j < xlen; j++) {
2581                                         if (arg[j] == '\0') {
2582                                                   xlen = j + 1;
2583                                                   finished = 1;
2584                                                   break;
2585                                         }
2586                               }
2587 
2588                               /* Check for user buffer overflow */
2589                               if (len + xlen > *limit) {
2590                                         finished = 1;
2591                                         if (len > *limit)
2592                                                   xlen = 0;
2593                                         else
2594                                                   xlen = *limit - len;
2595                               }
2596 
2597                               /* Copyout the page */
2598                               error = (*cb)(cookie, arg, len, xlen);
2599                               if (error)
2600                                         goto done;
2601 
2602                               len += xlen;
2603                               base += xlen;
2604                     }
2605           }
2606           *limit = len;
2607 
2608 done:
2609           kmem_free(argv, PAGE_SIZE);
2610           kmem_free(arg, PAGE_SIZE);
2611           uvmspace_free(vmspace);
2612           return error;
2613 }
2614 
2615 /*
2616  * Fill in a proc structure for the specified process.
2617  */
2618 static void
fill_proc(const struct proc * psrc,struct proc * p,bool allowaddr)2619 fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr)
2620 {
2621           COND_SET_STRUCT(p->p_list, psrc->p_list, allowaddr);
2622           memset(&p->p_auxlock, 0, sizeof(p->p_auxlock));
2623           COND_SET_STRUCT(p->p_lock, psrc->p_lock, allowaddr);
2624           memset(&p->p_stmutex, 0, sizeof(p->p_stmutex));
2625           memset(&p->p_reflock, 0, sizeof(p->p_reflock));
2626           COND_SET_STRUCT(p->p_waitcv, psrc->p_waitcv, allowaddr);
2627           COND_SET_STRUCT(p->p_lwpcv, psrc->p_lwpcv, allowaddr);
2628           COND_SET_PTR(p->p_cred, psrc->p_cred, allowaddr);
2629           COND_SET_PTR(p->p_fd, psrc->p_fd, allowaddr);
2630           COND_SET_PTR(p->p_cwdi, psrc->p_cwdi, allowaddr);
2631           COND_SET_PTR(p->p_stats, psrc->p_stats, allowaddr);
2632           COND_SET_PTR(p->p_limit, psrc->p_limit, allowaddr);
2633           COND_SET_PTR(p->p_vmspace, psrc->p_vmspace, allowaddr);
2634           COND_SET_PTR(p->p_sigacts, psrc->p_sigacts, allowaddr);
2635           COND_SET_PTR(p->p_aio, psrc->p_aio, allowaddr);
2636           p->p_mqueue_cnt = psrc->p_mqueue_cnt;
2637           memset(&p->p_specdataref, 0, sizeof(p->p_specdataref));
2638           p->p_exitsig = psrc->p_exitsig;
2639           p->p_flag = psrc->p_flag;
2640           p->p_sflag = psrc->p_sflag;
2641           p->p_slflag = psrc->p_slflag;
2642           p->p_lflag = psrc->p_lflag;
2643           p->p_stflag = psrc->p_stflag;
2644           p->p_stat = psrc->p_stat;
2645           p->p_trace_enabled = psrc->p_trace_enabled;
2646           p->p_pid = psrc->p_pid;
2647           COND_SET_STRUCT(p->p_pglist, psrc->p_pglist, allowaddr);
2648           COND_SET_PTR(p->p_pptr, psrc->p_pptr, allowaddr);
2649           COND_SET_STRUCT(p->p_sibling, psrc->p_sibling, allowaddr);
2650           COND_SET_STRUCT(p->p_children, psrc->p_children, allowaddr);
2651           COND_SET_STRUCT(p->p_lwps, psrc->p_lwps, allowaddr);
2652           COND_SET_PTR(p->p_raslist, psrc->p_raslist, allowaddr);
2653           p->p_nlwps = psrc->p_nlwps;
2654           p->p_nzlwps = psrc->p_nzlwps;
2655           p->p_nrlwps = psrc->p_nrlwps;
2656           p->p_nlwpwait = psrc->p_nlwpwait;
2657           p->p_ndlwps = psrc->p_ndlwps;
2658           p->p_nstopchild = psrc->p_nstopchild;
2659           p->p_waited = psrc->p_waited;
2660           COND_SET_PTR(p->p_zomblwp, psrc->p_zomblwp, allowaddr);
2661           COND_SET_PTR(p->p_vforklwp, psrc->p_vforklwp, allowaddr);
2662           COND_SET_PTR(p->p_sched_info, psrc->p_sched_info, allowaddr);
2663           p->p_estcpu = psrc->p_estcpu;
2664           p->p_estcpu_inherited = psrc->p_estcpu_inherited;
2665           p->p_forktime = psrc->p_forktime;
2666           p->p_pctcpu = psrc->p_pctcpu;
2667           COND_SET_PTR(p->p_opptr, psrc->p_opptr, allowaddr);
2668           COND_SET_PTR(p->p_timers, psrc->p_timers, allowaddr);
2669           p->p_rtime = psrc->p_rtime;
2670           p->p_uticks = psrc->p_uticks;
2671           p->p_sticks = psrc->p_sticks;
2672           p->p_iticks = psrc->p_iticks;
2673           p->p_xutime = psrc->p_xutime;
2674           p->p_xstime = psrc->p_xstime;
2675           p->p_traceflag = psrc->p_traceflag;
2676           COND_SET_PTR(p->p_tracep, psrc->p_tracep, allowaddr);
2677           COND_SET_PTR(p->p_textvp, psrc->p_textvp, allowaddr);
2678           COND_SET_PTR(p->p_emul, psrc->p_emul, allowaddr);
2679           COND_SET_PTR(p->p_emuldata, psrc->p_emuldata, allowaddr);
2680           COND_SET_CPTR(p->p_execsw, psrc->p_execsw, allowaddr);
2681           COND_SET_STRUCT(p->p_klist, psrc->p_klist, allowaddr);
2682           COND_SET_STRUCT(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr);
2683           COND_SET_STRUCT(p->p_sigpend.sp_info, psrc->p_sigpend.sp_info,
2684               allowaddr);
2685           p->p_sigpend.sp_set = psrc->p_sigpend.sp_set;
2686           COND_SET_PTR(p->p_lwpctl, psrc->p_lwpctl, allowaddr);
2687           p->p_ppid = psrc->p_ppid;
2688           p->p_oppid = psrc->p_oppid;
2689           COND_SET_PTR(p->p_path, psrc->p_path, allowaddr);
2690           p->p_sigctx = psrc->p_sigctx;
2691           p->p_nice = psrc->p_nice;
2692           memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm));
2693           COND_SET_PTR(p->p_pgrp, psrc->p_pgrp, allowaddr);
2694           COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr);
2695           p->p_pax = psrc->p_pax;
2696           p->p_xexit = psrc->p_xexit;
2697           p->p_xsig = psrc->p_xsig;
2698           p->p_acflag = psrc->p_acflag;
2699           COND_SET_STRUCT(p->p_md, psrc->p_md, allowaddr);
2700           p->p_stackbase = psrc->p_stackbase;
2701           COND_SET_PTR(p->p_dtrace, psrc->p_dtrace, allowaddr);
2702 }
2703 
2704 /*
2705  * Fill in an eproc structure for the specified process.
2706  */
2707 void
fill_eproc(struct proc * p,struct eproc * ep,bool zombie,bool allowaddr)2708 fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr)
2709 {
2710           struct tty *tp;
2711           struct lwp *l;
2712 
2713           KASSERT(mutex_owned(&proc_lock));
2714           KASSERT(mutex_owned(p->p_lock));
2715 
2716           COND_SET_PTR(ep->e_paddr, p, allowaddr);
2717           COND_SET_PTR(ep->e_sess, p->p_session, allowaddr);
2718           if (p->p_cred) {
2719                     kauth_cred_topcred(p->p_cred, &ep->e_pcred);
2720                     kauth_cred_toucred(p->p_cred, &ep->e_ucred);
2721           }
2722           if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2723                     struct vmspace *vm = p->p_vmspace;
2724 
2725                     ep->e_vm.vm_rssize = vm_resident_count(vm);
2726                     ep->e_vm.vm_tsize = vm->vm_tsize;
2727                     ep->e_vm.vm_dsize = vm->vm_dsize;
2728                     ep->e_vm.vm_ssize = vm->vm_ssize;
2729                     ep->e_vm.vm_map.size = vm->vm_map.size;
2730 
2731                     /* Pick the primary (first) LWP */
2732                     l = proc_active_lwp(p);
2733                     KASSERT(l != NULL);
2734                     lwp_lock(l);
2735                     if (l->l_wchan)
2736                               strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
2737                     lwp_unlock(l);
2738           }
2739           ep->e_ppid = p->p_ppid;
2740           if (p->p_pgrp && p->p_session) {
2741                     ep->e_pgid = p->p_pgrp->pg_id;
2742                     ep->e_jobc = p->p_pgrp->pg_jobc;
2743                     ep->e_sid = p->p_session->s_sid;
2744                     if ((p->p_lflag & PL_CONTROLT) &&
2745                         (tp = p->p_session->s_ttyp)) {
2746                               ep->e_tdev = tp->t_dev;
2747                               ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2748                               COND_SET_PTR(ep->e_tsess, tp->t_session, allowaddr);
2749                     } else
2750                               ep->e_tdev = (uint32_t)NODEV;
2751                     ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0;
2752                     if (SESS_LEADER(p))
2753                               ep->e_flag |= EPROC_SLEADER;
2754                     strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME);
2755           }
2756           ep->e_xsize = ep->e_xrssize = 0;
2757           ep->e_xccount = ep->e_xswrss = 0;
2758 }
2759 
2760 /*
2761  * Fill in a kinfo_proc2 structure for the specified process.
2762  */
2763 void
fill_kproc2(struct proc * p,struct kinfo_proc2 * ki,bool zombie,bool allowaddr)2764 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr)
2765 {
2766           struct tty *tp;
2767           struct lwp *l;
2768           struct timeval ut, st, rt;
2769           sigset_t ss1, ss2;
2770           struct rusage ru;
2771           struct vmspace *vm;
2772 
2773           KASSERT(mutex_owned(&proc_lock));
2774           KASSERT(mutex_owned(p->p_lock));
2775 
2776           sigemptyset(&ss1);
2777           sigemptyset(&ss2);
2778 
2779           COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr);
2780           COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr);
2781           COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr);
2782           COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr);
2783           COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr);
2784           COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr);
2785           COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr);
2786           COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr);
2787           ki->p_tsess = 0;    /* may be changed if controlling tty below */
2788           COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr);
2789           ki->p_eflag = 0;
2790           ki->p_exitsig = p->p_exitsig;
2791           ki->p_flag = L_INMEM;   /* Process never swapped out */
2792           ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
2793           ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
2794           ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
2795           ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
2796           ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
2797           ki->p_pid = p->p_pid;
2798           ki->p_ppid = p->p_ppid;
2799           ki->p_uid = kauth_cred_geteuid(p->p_cred);
2800           ki->p_ruid = kauth_cred_getuid(p->p_cred);
2801           ki->p_gid = kauth_cred_getegid(p->p_cred);
2802           ki->p_rgid = kauth_cred_getgid(p->p_cred);
2803           ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
2804           ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
2805           ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
2806           kauth_cred_getgroups(p->p_cred, ki->p_groups,
2807               uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
2808               UIO_SYSSPACE);
2809 
2810           ki->p_uticks = p->p_uticks;
2811           ki->p_sticks = p->p_sticks;
2812           ki->p_iticks = p->p_iticks;
2813           ki->p_tpgid = NO_PGID;        /* may be changed if controlling tty below */
2814           COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr);
2815           ki->p_traceflag = p->p_traceflag;
2816 
2817           memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
2818           memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
2819 
2820           ki->p_cpticks = 0;
2821           ki->p_pctcpu = p->p_pctcpu;
2822           ki->p_estcpu = 0;
2823           ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
2824           ki->p_realstat = p->p_stat;
2825           ki->p_nice = p->p_nice;
2826           ki->p_xstat = P_WAITSTATUS(p);
2827           ki->p_acflag = p->p_acflag;
2828 
2829           strncpy(ki->p_comm, p->p_comm,
2830               uimin(sizeof(ki->p_comm), sizeof(p->p_comm)));
2831           strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
2832 
2833           ki->p_nlwps = p->p_nlwps;
2834           ki->p_realflag = ki->p_flag;
2835 
2836           if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2837                     vm = p->p_vmspace;
2838                     ki->p_vm_rssize = vm_resident_count(vm);
2839                     ki->p_vm_tsize = vm->vm_tsize;
2840                     ki->p_vm_dsize = vm->vm_dsize;
2841                     ki->p_vm_ssize = vm->vm_ssize;
2842                     ki->p_vm_vsize = atop(vm->vm_map.size);
2843                     /*
2844                      * Since the stack is initially mapped mostly with
2845                      * PROT_NONE and grown as needed, adjust the "mapped size"
2846                      * to skip the unused stack portion.
2847                      */
2848                     ki->p_vm_msize =
2849                         atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
2850 
2851                     /* Pick the primary (first) LWP */
2852                     l = proc_active_lwp(p);
2853                     KASSERT(l != NULL);
2854                     lwp_lock(l);
2855                     ki->p_nrlwps = p->p_nrlwps;
2856                     ki->p_forw = 0;
2857                     ki->p_back = 0;
2858                     COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr);
2859                     ki->p_stat = l->l_stat;
2860                     ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
2861                     ki->p_swtime = l->l_swtime;
2862                     ki->p_slptime = l->l_slptime;
2863                     if (l->l_stat == LSONPROC)
2864                               ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
2865                     else
2866                               ki->p_schedflags = 0;
2867                     ki->p_priority = lwp_eprio(l);
2868                     ki->p_usrpri = l->l_priority;
2869                     if (l->l_wchan)
2870                               strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
2871                     COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr);
2872                     ki->p_cpuid = cpu_index(l->l_cpu);
2873                     lwp_unlock(l);
2874                     LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2875                               /* This is hardly correct, but... */
2876                               sigplusset(&l->l_sigpend.sp_set, &ss1);
2877                               sigplusset(&l->l_sigmask, &ss2);
2878                               ki->p_cpticks += l->l_cpticks;
2879                               ki->p_pctcpu += l->l_pctcpu;
2880                               ki->p_estcpu += l->l_estcpu;
2881                     }
2882           }
2883           sigplusset(&p->p_sigpend.sp_set, &ss1);
2884           memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
2885           memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
2886 
2887           if (p->p_session != NULL) {
2888                     ki->p_sid = p->p_session->s_sid;
2889                     ki->p__pgid = p->p_pgrp->pg_id;
2890                     if (p->p_session->s_ttyvp)
2891                               ki->p_eflag |= EPROC_CTTY;
2892                     if (SESS_LEADER(p))
2893                               ki->p_eflag |= EPROC_SLEADER;
2894                     strncpy(ki->p_login, p->p_session->s_login,
2895                         uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
2896                     ki->p_jobc = p->p_pgrp->pg_jobc;
2897                     if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
2898                               ki->p_tdev = tp->t_dev;
2899                               ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2900                               COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session),
2901                                   allowaddr);
2902                     } else {
2903                               ki->p_tdev = (int32_t)NODEV;
2904                     }
2905           }
2906 
2907           if (!P_ZOMBIE(p) && !zombie) {
2908                     ki->p_uvalid = 1;
2909                     ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
2910                     ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
2911 
2912                     calcru(p, &ut, &st, NULL, &rt);
2913                     ki->p_rtime_sec = rt.tv_sec;
2914                     ki->p_rtime_usec = rt.tv_usec;
2915                     ki->p_uutime_sec = ut.tv_sec;
2916                     ki->p_uutime_usec = ut.tv_usec;
2917                     ki->p_ustime_sec = st.tv_sec;
2918                     ki->p_ustime_usec = st.tv_usec;
2919 
2920                     memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
2921                     rulwps(p, &ru);
2922                     ki->p_uru_nvcsw = ru.ru_nvcsw;
2923                     ki->p_uru_nivcsw = ru.ru_nivcsw;
2924                     ki->p_uru_maxrss = ru.ru_maxrss;
2925                     ki->p_uru_ixrss = ru.ru_ixrss;
2926                     ki->p_uru_idrss = ru.ru_idrss;
2927                     ki->p_uru_isrss = ru.ru_isrss;
2928                     ki->p_uru_minflt = ru.ru_minflt;
2929                     ki->p_uru_majflt = ru.ru_majflt;
2930                     ki->p_uru_nswap = ru.ru_nswap;
2931                     ki->p_uru_inblock = ru.ru_inblock;
2932                     ki->p_uru_oublock = ru.ru_oublock;
2933                     ki->p_uru_msgsnd = ru.ru_msgsnd;
2934                     ki->p_uru_msgrcv = ru.ru_msgrcv;
2935                     ki->p_uru_nsignals = ru.ru_nsignals;
2936 
2937                     timeradd(&p->p_stats->p_cru.ru_utime,
2938                                &p->p_stats->p_cru.ru_stime, &ut);
2939                     ki->p_uctime_sec = ut.tv_sec;
2940                     ki->p_uctime_usec = ut.tv_usec;
2941           }
2942 }
2943 
2944 
2945 int
proc_find_locked(struct lwp * l,struct proc ** p,pid_t pid)2946 proc_find_locked(struct lwp *l, struct proc **p, pid_t pid)
2947 {
2948           int error;
2949 
2950           mutex_enter(&proc_lock);
2951           if (pid == -1)
2952                     *p = l->l_proc;
2953           else
2954                     *p = proc_find(pid);
2955 
2956           if (*p == NULL) {
2957                     if (pid != -1)
2958                               mutex_exit(&proc_lock);
2959                     return SET_ERROR(ESRCH);
2960           }
2961           if (pid != -1)
2962                     mutex_enter((*p)->p_lock);
2963           mutex_exit(&proc_lock);
2964 
2965           error = kauth_authorize_process(l->l_cred,
2966               KAUTH_PROCESS_CANSEE, *p,
2967               KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
2968           if (error) {
2969                     if (pid != -1)
2970                               mutex_exit((*p)->p_lock);
2971           }
2972           return error;
2973 }
2974 
2975 static int
fill_pathname(struct lwp * l,pid_t pid,void * oldp,size_t * oldlenp)2976 fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
2977 {
2978           int error;
2979           struct proc *p;
2980 
2981           if ((error = proc_find_locked(l, &p, pid)) != 0)
2982                     return error;
2983 
2984           if (p->p_path == NULL) {
2985                     if (pid != -1)
2986                               mutex_exit(p->p_lock);
2987                     return SET_ERROR(ENOENT);
2988           }
2989 
2990           size_t len = strlen(p->p_path) + 1;
2991           if (oldp != NULL) {
2992                     size_t copylen = uimin(len, *oldlenp);
2993                     error = sysctl_copyout(l, p->p_path, oldp, copylen);
2994                     if (error == 0 && *oldlenp < len)
2995                               error = SET_ERROR(ENOSPC);
2996           }
2997           *oldlenp = len;
2998           if (pid != -1)
2999                     mutex_exit(p->p_lock);
3000           return error;
3001 }
3002 
3003 static int
fill_cwd(struct lwp * l,pid_t pid,void * oldp,size_t * oldlenp)3004 fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
3005 {
3006           int error;
3007           struct proc *p;
3008           char *path;
3009           char *bp, *bend;
3010           struct cwdinfo *cwdi;
3011           struct vnode *vp;
3012           size_t len, lenused;
3013 
3014           if ((error = proc_find_locked(l, &p, pid)) != 0)
3015                     return error;
3016 
3017           len = MAXPATHLEN * 4;
3018 
3019           path = kmem_alloc(len, KM_SLEEP);
3020 
3021           bp = &path[len];
3022           bend = bp;
3023           *(--bp) = '\0';
3024 
3025           cwdi = p->p_cwdi;
3026           rw_enter(&cwdi->cwdi_lock, RW_READER);
3027           vp = cwdi->cwdi_cdir;
3028           error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l);
3029           rw_exit(&cwdi->cwdi_lock);
3030 
3031           if (error)
3032                     goto out;
3033 
3034           lenused = bend - bp;
3035 
3036           if (oldp != NULL) {
3037                     size_t copylen = uimin(lenused, *oldlenp);
3038                     error = sysctl_copyout(l, bp, oldp, copylen);
3039                     if (error == 0 && *oldlenp < lenused)
3040                               error = SET_ERROR(ENOSPC);
3041           }
3042           *oldlenp = lenused;
3043 out:
3044           if (pid != -1)
3045                     mutex_exit(p->p_lock);
3046           kmem_free(path, len);
3047           return error;
3048 }
3049 
3050 int
proc_getauxv(struct proc * p,void ** buf,size_t * len)3051 proc_getauxv(struct proc *p, void **buf, size_t *len)
3052 {
3053           struct ps_strings pss;
3054           int error;
3055           void *uauxv, *kauxv;
3056           size_t size;
3057 
3058           if ((error = copyin_psstrings(p, &pss)) != 0)
3059                     return error;
3060           if (pss.ps_envstr == NULL)
3061                     return SET_ERROR(EIO);
3062 
3063           size = p->p_execsw->es_arglen;
3064           if (size == 0)
3065                     return SET_ERROR(EIO);
3066 
3067           size_t ptrsz = PROC_PTRSZ(p);
3068           uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz);
3069 
3070           kauxv = kmem_alloc(size, KM_SLEEP);
3071 
3072           error = copyin_proc(p, uauxv, kauxv, size);
3073           if (error) {
3074                     kmem_free(kauxv, size);
3075                     return error;
3076           }
3077 
3078           *buf = kauxv;
3079           *len = size;
3080 
3081           return 0;
3082 }
3083 
3084 
3085 static int
sysctl_security_expose_address(SYSCTLFN_ARGS)3086 sysctl_security_expose_address(SYSCTLFN_ARGS)
3087 {
3088           int expose_address, error;
3089           struct sysctlnode node;
3090 
3091           node = *rnode;
3092           node.sysctl_data = &expose_address;
3093           expose_address = *(int *)rnode->sysctl_data;
3094           error = sysctl_lookup(SYSCTLFN_CALL(&node));
3095           if (error || newp == NULL)
3096                     return error;
3097 
3098           if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR,
3099               0, NULL, NULL, NULL))
3100                     return SET_ERROR(EPERM);
3101 
3102           switch (expose_address) {
3103           case 0:
3104           case 1:
3105           case 2:
3106                     break;
3107           default:
3108                     return SET_ERROR(EINVAL);
3109           }
3110 
3111           *(int *)rnode->sysctl_data = expose_address;
3112 
3113           return 0;
3114 }
3115 
3116 bool
get_expose_address(struct proc * p)3117 get_expose_address(struct proc *p)
3118 {
3119           /* allow only if sysctl variable is set or privileged */
3120           return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE,
3121               p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0;
3122 }
3123