xref: /dragonfly/sys/kern/kern_caps.c (revision a44776b2e76cf2f7785c07410679a378246889bc)
1 /*
2  * Copyright (c) 2023 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/acct.h>
37 #include <sys/caps.h>
38 #include <sys/systm.h>
39 #include <sys/sysmsg.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/pioctl.h>
45 #include <sys/resourcevar.h>
46 #include <sys/jail.h>
47 #include <sys/lockf.h>
48 #include <sys/spinlock.h>
49 #include <sys/sysctl.h>
50 
51 #include <sys/spinlock2.h>
52 
53 __read_mostly static int caps_available = 1;
54 SYSCTL_INT(_kern, OID_AUTO, caps_available,
55              CTLFLAG_RW, &caps_available, 0 , "");
56 
57 /*
58  * Quick check for cap restriction in cred (no bounds checks),
59  * return cap flags.
60  */
61 static __inline
62 int
caps_check_cred(struct ucred * cred,int cap)63 caps_check_cred(struct ucred *cred, int cap)
64 {
65           __syscapelm_t elm;
66 
67           cap &= ~__SYSCAP_XFLAGS;
68           elm = cred->cr_caps.caps[__SYSCAP_INDEX(cap)];
69 
70           return ((int)(elm >> __SYSCAP_SHIFT(cap)) & __SYSCAP_ALL);
71 }
72 
73 /*
74  * int syscap_get(int cap, void *data, size_t bytes);
75  */
76 int
sys_syscap_get(struct sysmsg * sysmsg,const struct syscap_get_args * uap)77 sys_syscap_get(struct sysmsg *sysmsg, const struct syscap_get_args *uap)
78 {
79           struct ucred *cred;
80           int cap = uap->cap & ~__SYSCAP_XFLAGS;
81           int res;
82           int error;
83 
84           if (cap < 0)
85                     return EINVAL;
86           if (cap >= __SYSCAP_COUNT)
87                     return EOPNOTSUPP;
88           if (uap->bytes && uap->bytes < sizeof(syscap_base_t))
89                     return EINVAL;
90           error = 0;
91 
92           /*
93            * Get capability restriction from parent pid
94            */
95           if (uap->cap & __SYSCAP_INPARENT) {
96                     struct proc *pp;
97 
98                     pp = pfind(curproc->p_ppid);
99                     if (pp == NULL)
100                               return EINVAL;
101                 lwkt_gettoken_shared(&pp->p_token);         /* protect cred */
102                     cred = pp->p_ucred;
103                     crhold(cred);
104                 lwkt_reltoken(&pp->p_token);
105                     PRELE(pp);                                        /* from pfind */
106           } else {
107                     cred = curthread->td_ucred;
108           }
109 
110           /*
111            * No resource data by default
112            */
113           if (uap->data && uap->bytes) {
114                     syscap_base_t base;
115 
116                     base.res = SYSCAP_RESOURCE_EOF;
117                     base.len = sizeof(base);
118                     error = copyout(&base, uap->data, sizeof(base));
119           }
120 
121           /*
122            * Get resource bits
123            */
124           if (error == 0) {
125                     res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
126                                   __SYSCAP_SHIFT(cap));
127                     res &= __SYSCAP_BITS_MASK;
128                     sysmsg->sysmsg_result = res;
129           }
130 
131           if (uap->cap & __SYSCAP_INPARENT)
132                     crfree(cred);
133 
134           return error;
135 }
136 
137 /*
138  * int syscap_set(int cap, int flags, const void *data, size_t bytes)
139  */
140 int
sys_syscap_set(struct sysmsg * sysmsg,const struct syscap_set_args * uap)141 sys_syscap_set(struct sysmsg *sysmsg, const struct syscap_set_args *uap)
142 {
143           struct ucred *cred;
144           struct proc *pp;
145           int cap = uap->cap & ~__SYSCAP_XFLAGS;
146           int res;
147           int error;
148           int flags = uap->flags;
149           __syscapelm_t anymask;
150 
151           if (cap < 0 || cap >= __SYSCAP_COUNT)
152                     return EINVAL;
153           if (flags & ~__SYSCAP_BITS_MASK)
154                     return EINVAL;
155           if (uap->data || uap->bytes)
156                     return EINVAL;
157           error = 0;
158 
159           /*
160            * Get capability restriction from parent pid.  We can only
161            * mess with the parent if it is running under the same userid
162            * and prison.
163            */
164           if (uap->cap & __SYSCAP_INPARENT) {
165                     pp = pfind(curproc->p_ppid);
166                     if (pp == NULL)
167                               return EINVAL;
168                     if (pp->p_ucred->cr_uid != curproc->p_ucred->cr_uid ||
169                         pp->p_ucred->cr_prison != curproc->p_ucred->cr_prison)
170                     {
171                               PRELE(pp);                    /* from pfind */
172                               return EINVAL;
173                     }
174           } else {
175                     pp = curproc;
176           }
177           lwkt_gettoken(&pp->p_token);            /* protect p_ucred */
178           cred = pp->p_ucred;
179 
180           /*
181            * Calculate normalized value for requested capability and check
182            * against the stored value.  If they do not match, wire-or to
183            * add the bits and set appropriate SYSCAP_ANY bits indicating
184            * deviation from the root syscaps.
185            */
186           res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
187                         __SYSCAP_SHIFT(cap));
188           res &= __SYSCAP_BITS_MASK;
189 
190           /*
191            * Handle resource data, if any
192            */
193 
194           /*
195            * Set resource bits
196            */
197           if (error == 0) {
198                     if (res != (res | flags)) {
199                               cred = cratom_proc(pp);
200                               anymask = (__syscapelm_t)flags <<
201                                           __SYSCAP_SHIFT(SYSCAP_ANY);
202                               atomic_set_64(&cred->cr_caps.caps[0], anymask);
203                               atomic_set_64(&cred->cr_caps.caps[ __SYSCAP_INDEX(cap)],
204                                               ((__syscapelm_t)uap->flags <<
205                                                __SYSCAP_SHIFT(cap)));
206                     }
207                     sysmsg->sysmsg_result = res | uap->flags;
208           }
209 
210           /*
211            * Cleanup
212            */
213           lwkt_reltoken(&pp->p_token);
214           if (uap->cap & __SYSCAP_INPARENT)
215                     PRELE(pp);                              /* from pfind */
216           return error;
217 }
218 
219 /*
220  * Adjust capabilities for exec after the point of no return.
221  *
222  * This function shifts the EXEC bits into the SELF bits and
223  * replicates the EXEC bits.
224  */
225 void
caps_exec(struct proc * p)226 caps_exec(struct proc *p)
227 {
228           struct ucred *cred;
229           __syscapelm_t elm;
230           int changed = 0;
231           int i;
232 
233           /*
234            * Dry-run caps inheritance, did anything change?
235            *
236            * caps inheritance basically shifts the EXEC bits into the SELF bits,
237            * and then replicates the EXEC bits.  We have to avoid shifting any
238            * 1's from the SELF bits into the adjacent EXEC bits that may have
239            * previously been 0.
240            */
241           cred = p->p_ucred;
242           for (i = 0; i < __SYSCAP_NUMELMS; ++i) {
243                     elm = cred->cr_caps.caps[i];
244                     elm = ((elm & __SYSCAP_EXECMASK) >> 1) |
245                           (elm & __SYSCAP_EXECMASK);
246                     if (elm != cred->cr_caps.caps[i])
247                               changed = 1;
248           }
249 
250           /*
251            * Yes, setup a new ucred for the process
252            */
253           if (changed) {
254                     cratom_proc(p);
255                     cred = p->p_ucred;
256                     for (i = 0; i < __SYSCAP_NUMELMS; ++i) {
257                               elm = cred->cr_caps.caps[i];
258                               elm = ((elm & __SYSCAP_EXECMASK) >> 1) |
259                                     (elm & __SYSCAP_EXECMASK);
260                               cred->cr_caps.caps[i] = elm;
261                     }
262           }
263 }
264 
265 /*
266  * Return the raw flags for the requested capability.
267  */
268 int
caps_get(struct ucred * cred,int cap)269 caps_get(struct ucred *cred, int cap)
270 {
271           int res;
272 
273           cap &= ~__SYSCAP_XFLAGS;
274           if (cap < 0 || cap >= __SYSCAP_COUNT)
275                     return 0;
276           res = (int)(cred->cr_caps.caps[__SYSCAP_INDEX(cap)] >>
277                         __SYSCAP_SHIFT(cap));
278           res &= __SYSCAP_BITS_MASK;
279 
280           return res;
281 }
282 
283 /*
284  * Set capability restriction bits
285  */
286 void
caps_set_locked(struct proc * p,int cap,int flags)287 caps_set_locked(struct proc *p, int cap, int flags)
288 {
289           struct ucred *cred;
290           __syscapelm_t elm;
291 
292           cap &= ~__SYSCAP_XFLAGS;
293           if (cap < 0 || cap >= __SYSCAP_COUNT)
294                     return;
295 
296           cred = cratom_proc(p);
297           elm = (__syscapelm_t)flags << __SYSCAP_SHIFT(SYSCAP_ANY);
298           atomic_set_64(&cred->cr_caps.caps[0], elm);
299           elm = (__syscapelm_t)flags << __SYSCAP_SHIFT(cap);
300           atomic_set_64(&cred->cr_caps.caps[ __SYSCAP_INDEX(cap)], elm);
301 }
302 
303 /*
304  * Returns error code if restricted, 0 on success.
305  *
306  * These are more sophisticated versions of the baseline caps checks.
307  * cr_prison capabilities are also checked, and some capabilities may
308  * imply several tests.
309  */
310 int
caps_priv_check(struct ucred * cred,int cap)311 caps_priv_check(struct ucred *cred, int cap)
312 {
313           int res;
314 
315           if (cred == NULL) {
316                     if (cap & __SYSCAP_NULLCRED)
317                               return 0;
318                     return EPERM;
319           }
320 
321           /*
322            * Uid must be 0 unless NOROOTTEST is requested.  If requested
323            * it means the caller is depending on e.g. /dev/blah perms.
324            */
325           if (cred->cr_uid != 0 && (cap & __SYSCAP_NOROOTTEST) == 0)
326                     return EPERM;
327 
328           res = caps_check_cred(cred, cap);
329           if (cap & __SYSCAP_GROUP_MASK) {
330                     cap = (cap & __SYSCAP_GROUP_MASK) >> __SYSCAP_GROUP_SHIFT;
331                     res |= caps_check_cred(cred, cap);
332           }
333           if (res & __SYSCAP_SELF)
334                     return EPERM;
335           return (prison_priv_check(cred, cap));
336 }
337 
338 int
caps_priv_check_td(thread_t td,int cap)339 caps_priv_check_td(thread_t td, int cap)
340 {
341           struct ucred *cred;
342 
343           if (td->td_lwp == NULL)                           /* not user thread */
344                     return 0;
345           cred = td->td_ucred;
346         if (cred == NULL)
347                     return (EPERM);
348                                                             /* must pass restrictions */
349           if (caps_check_cred(cred, cap) & __SYSCAP_SELF)
350                     return EPERM;
351           return (prison_priv_check(cred, cap));
352 }
353 
354 int
caps_priv_check_self(int cap)355 caps_priv_check_self(int cap)
356 {
357           return (caps_priv_check_td(curthread, cap));
358 }
359