xref: /dragonfly/sys/kern/kern_jail.c (revision 24687fffce3dc9ab40cb693ed1f00986a106b031)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  */
10 /*-
11  * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 
37 /*
38  * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39  */
40 
41 #include "opt_inet6.h"
42 
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
48 #include <sys/sysmsg.h>
49 #include <sys/malloc.h>
50 #include <sys/nlookup.h>
51 #include <sys/namecache.h>
52 #include <sys/proc.h>
53 #include <sys/caps.h>
54 #include <sys/jail.h>
55 #include <sys/socket.h>
56 #include <sys/sysctl.h>
57 #include <sys/kern_syscall.h>
58 #include <net/if.h>
59 #include <netinet/in.h>
60 #include <netinet6/in6_var.h>
61 
62 static struct prison          *prison_find(int);
63 static void                   prison_ipcache_init(struct prison *);
64 
65 __read_mostly static prison_cap_t       prison_default_caps;
66 
67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
68 
69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
70     "All jails settings");
71 
72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0,
73     "Default options for jails");
74 
75 /*#define PRISON_DEBUG*/
76 #ifdef PRISON_DEBUG
77 __read_mostly static int prison_debug;
78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0,
79     "Debug prison refs");
80 #endif
81 
82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
83     &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME,
84     "Processes in jail can set their hostnames");
85 
86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
87     &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
88     "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
89 
90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
91     &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC,
92     "Processes in jail can use System V IPC primitives");
93 
94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW,
95     &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS,
96     "Processes in jail can alter system file flags");
97 
98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
99     &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
100     "Process in jail can create raw sockets");
101 
102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW,
103     &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
104     "Process in jail can override host wildcard listen");
105 
106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW,
107     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
108     "Process in jail can mount nullfs(5) filesystems");
109 
110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW,
111     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
112     "Process in jail can mount tmpfs(5) filesystems");
113 
114 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_devfs, CTLFLAG_RW,
115     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_DEVFS,
116     "Process in jail can mount devfs(5) filesystems");
117 
118 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_procfs, CTLFLAG_RW,
119     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_PROCFS,
120     "Process in jail can mount procfs(5) filesystems");
121 
122 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_fusefs, CTLFLAG_RW,
123     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_FUSEFS,
124     "Process in jail can mount fuse filesystems");
125 
126 static int          lastprid = 0;
127 static int          prisoncount = 0;
128 
129 static struct lock jail_lock =
130        LOCK_INITIALIZER("jail", 0, LK_CANRECURSE);
131 
132 LIST_HEAD(prisonlist, prison);
133 static struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
134 
135 static int
kern_jail_attach(int jid)136 kern_jail_attach(int jid)
137 {
138           struct proc *p = curthread->td_proc;
139           struct prison *pr;
140           struct ucred *cr;
141           int error;
142 
143           pr = prison_find(jid);
144           if (pr == NULL)
145                     return(EINVAL);
146 
147           error = kern_chroot(&pr->pr_root);
148           if (error)
149                     return(error);
150 
151           prison_hold(pr);
152           lwkt_gettoken(&p->p_token);
153           cr = cratom_proc(p);
154           cr->cr_prison = pr;
155           p->p_flags |= P_JAILED;
156           caps_set_locked(p, SYSCAP_RESTRICTEDROOT, __SYSCAP_ALL);
157           lwkt_reltoken(&p->p_token);
158 
159           return(0);
160 }
161 
162 static int
assign_prison_id(struct prison * pr)163 assign_prison_id(struct prison *pr)
164 {
165           int tryprid;
166           struct prison *tpr;
167 
168           tryprid = lastprid + 1;
169           if (tryprid == JAIL_MAX)
170                     tryprid = 1;
171 
172           lockmgr(&jail_lock, LK_EXCLUSIVE);
173 next:
174           LIST_FOREACH(tpr, &allprison, pr_list) {
175                     if (tpr->pr_id != tryprid)
176                               continue;
177                     tryprid++;
178                     if (tryprid == JAIL_MAX) {
179                               lockmgr(&jail_lock, LK_RELEASE);
180                               return (ERANGE);
181                     }
182                     goto next;
183           }
184           pr->pr_id = lastprid = tryprid;
185           lockmgr(&jail_lock, LK_RELEASE);
186 
187           return (0);
188 }
189 
190 static int
kern_jail(struct prison * pr,struct jail * j)191 kern_jail(struct prison *pr, struct jail *j)
192 {
193           int error;
194           struct nlookupdata nd;
195 
196           error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
197           if (error) {
198                     nlookup_done(&nd);
199                     return (error);
200           }
201           error = nlookup(&nd);
202           if (error) {
203                     nlookup_done(&nd);
204                     return (error);
205           }
206           cache_copy(&nd.nl_nch, &pr->pr_root);
207 
208           varsymset_init(&pr->pr_varsymset, NULL);
209           prison_ipcache_init(pr);
210 
211           error = assign_prison_id(pr);
212           if (error) {
213                     varsymset_clean(&pr->pr_varsymset);
214                     nlookup_done(&nd);
215                     return (error);
216           }
217 
218           lockmgr(&jail_lock, LK_EXCLUSIVE);
219           LIST_INSERT_HEAD(&allprison, pr, pr_list);
220           ++prisoncount;
221           lockmgr(&jail_lock, LK_RELEASE);
222 
223           error = prison_sysctl_create(pr);
224           if (error)
225                     goto out;
226 
227           error = kern_jail_attach(pr->pr_id);
228           if (error)
229                     goto out2;
230 
231           nlookup_done(&nd);
232           return 0;
233 
234 out2:
235           prison_sysctl_done(pr);
236 
237 out:
238           lockmgr(&jail_lock, LK_EXCLUSIVE);
239           LIST_REMOVE(pr, pr_list);
240           --prisoncount;
241           lockmgr(&jail_lock, LK_RELEASE);
242           varsymset_clean(&pr->pr_varsymset);
243           nlookup_done(&nd);
244           return (error);
245 }
246 
247 /*
248  * jail()
249  *
250  * jail_args(syscallarg(struct jail *) jail)
251  *
252  * MPALMOSTSAFE
253  */
254 int
sys_jail(struct sysmsg * sysmsg,const struct jail_args * uap)255 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap)
256 {
257           struct prison *pr;
258           struct jail_ip_storage *jip;
259           struct jail j;
260           int error;
261           uint32_t jversion;
262 
263           sysmsg->sysmsg_result = -1;
264 
265           error = caps_priv_check_self(SYSCAP_NOJAIL_CREATE);
266           if (error)
267                     return (error);
268 
269           error = copyin(uap->jail, &jversion, sizeof(jversion));
270           if (error)
271                     return (error);
272 
273           pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
274           SLIST_INIT(&pr->pr_ips);
275           lockmgr(&jail_lock, LK_EXCLUSIVE);
276 
277           switch (jversion) {
278           case 0:
279                     /* Single IPv4 jails. */
280                     {
281                     struct jail_v0 jv0;
282                     struct sockaddr_in ip4addr;
283 
284                     error = copyin(uap->jail, &jv0, sizeof(jv0));
285                     if (error)
286                               goto out;
287 
288                     j.path = jv0.path;
289                     j.hostname = jv0.hostname;
290 
291                     jip = kmalloc(sizeof(*jip),  M_PRISON, M_WAITOK | M_ZERO);
292                     ip4addr.sin_family = AF_INET;
293                     ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
294                     memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
295                     SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
296                     break;
297                     }
298 
299           case 1:
300                     /*
301                      * DragonFly multi noIP/IPv4/IPv6 jails
302                      *
303                      * NOTE: This version is unsupported by FreeBSD
304                      * (which uses version 2 instead).
305                      */
306 
307                     error = copyin(uap->jail, &j, sizeof(j));
308                     if (error)
309                               goto out;
310 
311                     for (int i = 0; i < j.n_ips; i++) {
312                               jip = kmalloc(sizeof(*jip), M_PRISON,
313                                               M_WAITOK | M_ZERO);
314                               SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
315                               error = copyin(&j.ips[i], &jip->ip,
316                                                   sizeof(struct sockaddr_storage));
317                               if (error)
318                                         goto out;
319                     }
320                     break;
321           default:
322                     error = EINVAL;
323                     goto out;
324           }
325 
326           error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
327           if (error)
328                     goto out;
329 
330           /* Use default capabilities as a template */
331           pr->pr_caps = prison_default_caps;
332 
333           error = kern_jail(pr, &j);
334           if (error)
335                     goto out;
336 
337           sysmsg->sysmsg_result = pr->pr_id;
338           lockmgr(&jail_lock, LK_RELEASE);
339 
340           return (0);
341 
342 out:
343           /* Delete all ips */
344           while (!SLIST_EMPTY(&pr->pr_ips)) {
345                     jip = SLIST_FIRST(&pr->pr_ips);
346                     SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
347                     kfree(jip, M_PRISON);
348           }
349           lockmgr(&jail_lock, LK_RELEASE);
350           kfree(pr, M_PRISON);
351 
352           return (error);
353 }
354 
355 /*
356  * int jail_attach(int jid);
357  *
358  * MPALMOSTSAFE
359  */
360 int
sys_jail_attach(struct sysmsg * sysmsg,const struct jail_attach_args * uap)361 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap)
362 {
363           int error;
364 
365           error = caps_priv_check_self(SYSCAP_NOJAIL_ATTACH);
366           if (error)
367                     return(error);
368           lockmgr(&jail_lock, LK_EXCLUSIVE);
369           error = kern_jail_attach(uap->jid);
370           lockmgr(&jail_lock, LK_RELEASE);
371           return (error);
372 }
373 
374 static void
prison_ipcache_init(struct prison * pr)375 prison_ipcache_init(struct prison *pr)
376 {
377           struct jail_ip_storage *jis;
378           struct sockaddr_in *ip4;
379           struct sockaddr_in6 *ip6;
380 
381           lockmgr(&jail_lock, LK_EXCLUSIVE);
382           SLIST_FOREACH(jis, &pr->pr_ips, entries) {
383                     switch (jis->ip.ss_family) {
384                     case AF_INET:
385                               ip4 = (struct sockaddr_in *)&jis->ip;
386                               if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
387                                   IN_LOOPBACKNET) {
388                                         /* loopback address */
389                                         if (pr->local_ip4 == NULL)
390                                                   pr->local_ip4 = ip4;
391                               } else {
392                                         /* public address */
393                                         if (pr->nonlocal_ip4 == NULL)
394                                                   pr->nonlocal_ip4 = ip4;
395                               }
396                               break;
397 
398                     case AF_INET6:
399                               ip6 = (struct sockaddr_in6 *)&jis->ip;
400                               if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
401                                         /* loopback address */
402                                         if (pr->local_ip6 == NULL)
403                                                   pr->local_ip6 = ip6;
404                               } else {
405                                         /* public address */
406                                         if (pr->nonlocal_ip6 == NULL)
407                                                   pr->nonlocal_ip6 = ip6;
408                               }
409                               break;
410                     }
411           }
412           lockmgr(&jail_lock, LK_RELEASE);
413 }
414 
415 /*
416  * Changes INADDR_LOOPBACK for a valid jail address.
417  * ip is in network byte order.
418  * Returns 1 if the ip is among jail valid ips.
419  * Returns 0 if is not among jail valid ips or
420  * if couldn't replace INADDR_LOOPBACK for a valid
421  * IP.
422  */
423 int
prison_replace_wildcards(struct thread * td,struct sockaddr * ip)424 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
425 {
426           struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
427           struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
428           struct prison *pr;
429 
430           if (td->td_proc == NULL || td->td_ucred == NULL)
431                     return (1);
432           if ((pr = td->td_ucred->cr_prison) == NULL)
433                     return (1);
434 
435           if ((ip->sa_family == AF_INET &&
436               ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
437               (ip->sa_family == AF_INET6 &&
438               IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
439                     return (1);
440           if ((ip->sa_family == AF_INET &&
441               ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
442               (ip->sa_family == AF_INET6 &&
443               IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
444                     if (!prison_get_local(pr, ip->sa_family, ip) &&
445                         !prison_get_nonlocal(pr, ip->sa_family, ip))
446                               return(0);
447                     else
448                               return(1);
449           }
450           if (jailed_ip(pr, ip))
451                     return(1);
452           return(0);
453 }
454 
455 /*
456  * Convert the localhost IP to the actual jail IP
457  */
458 int
prison_remote_ip(struct thread * td,struct sockaddr * ip)459 prison_remote_ip(struct thread *td, struct sockaddr *ip)
460 {
461           struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
462           struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
463           struct prison *pr;
464 
465           if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
466                     return(1);
467           if ((pr = td->td_ucred->cr_prison) == NULL)
468                     return(1);
469           if ((ip->sa_family == AF_INET &&
470               ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
471               (ip->sa_family == AF_INET6 &&
472               IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
473                     if (!prison_get_local(pr, ip->sa_family, ip) &&
474                         !prison_get_nonlocal(pr, ip->sa_family, ip))
475                               return(0);
476                     else
477                               return(1);
478           }
479           return(1);
480 }
481 
482 /*
483  * Convert the jail IP back to localhost
484  *
485  * Used by getsockname() and getpeername() to convert the in-jail loopback
486  * address back to LOCALHOST.  For example, 127.0.0.2 -> 127.0.0.1.  The
487  * idea is that programs running inside the jail should be unaware that they
488  * are using a different loopback IP than the host.
489  */
490 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT;
491 
492 int
prison_local_ip(struct thread * td,struct sockaddr * ip)493 prison_local_ip(struct thread *td, struct sockaddr *ip)
494 {
495           struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
496           struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
497           struct prison *pr;
498 
499           if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
500                     return(1);
501           if ((pr = td->td_ucred->cr_prison) == NULL)
502                     return(1);
503           if (ip->sa_family == AF_INET && pr->local_ip4 &&
504               pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr &&
505               pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) {
506                     ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
507                     return(0);
508           }
509           if (ip->sa_family == AF_INET6 && pr->local_ip6 &&
510               bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr,
511                      sizeof(ip6->sin6_addr)) == 0) {
512                     bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr));
513                     return(0);
514           }
515           return(1);
516 }
517 
518 /*
519  * Prison get non loopback ip:
520  * - af is the address family of the ip we want (AF_INET|AF_INET6).
521  * - If ip != NULL, put the first IP address that is not a loopback address
522  *   into *ip.
523  *
524  * ip is in network by order and we don't touch it unless we find a valid ip.
525  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
526  * or NULL.  This struct may not be modified.
527  */
528 struct sockaddr *
prison_get_nonlocal(struct prison * pr,sa_family_t af,struct sockaddr * ip)529 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
530 {
531           struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
532           struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
533 
534           /* Check if it is cached */
535           switch(af) {
536           case AF_INET:
537                     if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
538                               ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
539                     return (struct sockaddr *)pr->nonlocal_ip4;
540 
541           case AF_INET6:
542                     if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
543                               ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
544                     return (struct sockaddr *)pr->nonlocal_ip6;
545           }
546 
547           /* NOTREACHED */
548           return NULL;
549 }
550 
551 /*
552  * Prison get loopback ip.
553  * - af is the address family of the ip we want (AF_INET|AF_INET6).
554  * - If ip != NULL, put the first IP address that is not a loopback address
555  *   into *ip.
556  *
557  * ip is in network by order and we don't touch it unless we find a valid ip.
558  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
559  * or NULL.  This struct may not be modified.
560  */
561 struct sockaddr *
prison_get_local(struct prison * pr,sa_family_t af,struct sockaddr * ip)562 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
563 {
564           struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
565           struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
566 
567           /* Check if it is cached */
568           switch(af) {
569           case AF_INET:
570                     if (ip4 != NULL && pr->local_ip4 != NULL)
571                               ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
572                     return (struct sockaddr *)pr->local_ip4;
573 
574           case AF_INET6:
575                     if (ip6 != NULL && pr->local_ip6 != NULL)
576                               ip6->sin6_addr = pr->local_ip6->sin6_addr;
577                     return (struct sockaddr *)pr->local_ip6;
578           }
579 
580           /* NOTREACHED */
581           return NULL;
582 }
583 
584 /* Check if the IP is among ours, if it is return 1, else 0 */
585 int
jailed_ip(struct prison * pr,const struct sockaddr * ip)586 jailed_ip(struct prison *pr, const struct sockaddr *ip)
587 {
588           const struct jail_ip_storage *jis;
589           const struct sockaddr_in *jip4, *ip4;
590           const struct sockaddr_in6 *jip6, *ip6;
591 
592           if (pr == NULL)
593                     return(0);
594           ip4 = (const struct sockaddr_in *)ip;
595           ip6 = (const struct sockaddr_in6 *)ip;
596 
597           lockmgr(&jail_lock, LK_EXCLUSIVE);
598           SLIST_FOREACH(jis, &pr->pr_ips, entries) {
599                     switch (ip->sa_family) {
600                     case AF_INET:
601                               jip4 = (const struct sockaddr_in *) &jis->ip;
602                               if (jip4->sin_family == AF_INET &&
603                                   ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) {
604                                         lockmgr(&jail_lock, LK_RELEASE);
605                                         return(1);
606                               }
607                               break;
608                     case AF_INET6:
609                               jip6 = (const struct sockaddr_in6 *) &jis->ip;
610                               if (jip6->sin6_family == AF_INET6 &&
611                                   IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
612                                         &jip6->sin6_addr)) {
613                                         lockmgr(&jail_lock, LK_RELEASE);
614                                         return(1);
615                               }
616                               break;
617                     }
618           }
619           lockmgr(&jail_lock, LK_RELEASE);
620           /* Ip not in list */
621           return(0);
622 }
623 
624 int
prison_if(struct ucred * cred,struct sockaddr * sa)625 prison_if(struct ucred *cred, struct sockaddr *sa)
626 {
627           struct prison *pr;
628           struct sockaddr_in *sai = (struct sockaddr_in*) sa;
629 
630           pr = cred->cr_prison;
631 
632           if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
633               && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE))
634                     return(1);
635           else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
636                     return(0);
637           else if (jailed_ip(pr, sa))
638                     return(0);
639           return(1);
640 }
641 
642 /*
643  * Returns a prison instance, or NULL on failure.
644  */
645 static struct prison *
prison_find(int prid)646 prison_find(int prid)
647 {
648           struct prison *pr;
649 
650           lockmgr(&jail_lock, LK_EXCLUSIVE);
651           LIST_FOREACH(pr, &allprison, pr_list) {
652                     if (pr->pr_id == prid)
653                               break;
654           }
655           lockmgr(&jail_lock, LK_RELEASE);
656 
657           return(pr);
658 }
659 
660 static int
sysctl_jail_list(SYSCTL_HANDLER_ARGS)661 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
662 {
663           struct thread *td = curthread;
664           struct jail_ip_storage *jip;
665 #ifdef INET6
666           struct sockaddr_in6 *jsin6;
667 #endif
668           struct sockaddr_in *jsin;
669           struct lwp *lp;
670           struct prison *pr;
671           unsigned int jlssize, jlsused;
672           int count, error;
673           char *jls; /* Jail list */
674           char *oip; /* Output ip */
675           char *fullpath, *freepath;
676 
677           jlsused = 0;
678 
679           if (jailed(td->td_ucred))
680                     return (0);
681           lp = td->td_lwp;
682 retry:
683           count = prisoncount;
684 
685           if (count == 0)
686                     return(0);
687 
688           jlssize = (count * 1024);
689           jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
690           if (count < prisoncount) {
691                     kfree(jls, M_TEMP);
692                     goto retry;
693           }
694           count = prisoncount;
695 
696           lockmgr(&jail_lock, LK_EXCLUSIVE);
697           LIST_FOREACH(pr, &allprison, pr_list) {
698                     error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL,
699                                                   &fullpath, &freepath, 0);
700                     if (error)
701                               continue;
702                     if (jlsused && jlsused < jlssize)
703                               jls[jlsused++] = '\n';
704                     count = ksnprintf(jls + jlsused, (jlssize - jlsused),
705                                          "%d %s %s",
706                                          pr->pr_id, pr->pr_host, fullpath);
707                     kfree(freepath, M_TEMP);
708                     if (count < 0)
709                               goto end;
710                     jlsused += count;
711 
712                     /* Copy the IPS */
713                     SLIST_FOREACH(jip, &pr->pr_ips, entries) {
714                               char buf[INET_ADDRSTRLEN];
715 
716                               jsin = (struct sockaddr_in *)&jip->ip;
717 
718                               switch(jsin->sin_family) {
719                               case AF_INET:
720                                         oip = kinet_ntoa(jsin->sin_addr, buf);
721                                         break;
722 #ifdef INET6
723                               case AF_INET6:
724                                         jsin6 = (struct sockaddr_in6 *)&jip->ip;
725                                         oip = ip6_sprintf(&jsin6->sin6_addr);
726                                         break;
727 #endif
728                               default:
729                                         oip = "?family?";
730                                         break;
731                               }
732 
733                               if ((jlssize - jlsused) < (strlen(oip) + 1)) {
734                                         error = ERANGE;
735                                         goto end;
736                               }
737                               count = ksnprintf(jls + jlsused, (jlssize - jlsused),
738                                                     " %s", oip);
739                               if (count < 0)
740                                         goto end;
741                               jlsused += count;
742                     }
743           }
744 
745           /*
746            * The format is:
747            * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
748            */
749           error = SYSCTL_OUT(req, jls, jlsused);
750 end:
751           lockmgr(&jail_lock, LK_RELEASE);
752           kfree(jls, M_TEMP);
753 
754           return(error);
755 }
756 
757 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
758              sysctl_jail_list, "A", "List of active jails");
759 
760 static int
sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)761 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
762 {
763           int error, injail;
764 
765           injail = jailed(req->td->td_ucred);
766           error = SYSCTL_OUT(req, &injail, sizeof(injail));
767 
768           return (error);
769 }
770 
771 SYSCTL_PROC(_jail, OID_AUTO, jailed,
772               CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0,
773               sysctl_jail_jailed, "I", "Process in jail?");
774 
775 /*
776  * MPSAFE
777  */
778 void
prison_hold(struct prison * pr)779 prison_hold(struct prison *pr)
780 {
781           atomic_add_int(&pr->pr_ref, 1);
782 #ifdef PRISON_DEBUG
783           if (prison_debug > 0) {
784                     --prison_debug;
785                     print_backtrace(-1);
786           }
787 #endif
788 }
789 
790 /*
791  * MPALMOSTSAFE
792  */
793 void
prison_free(struct prison * pr)794 prison_free(struct prison *pr)
795 {
796           struct jail_ip_storage *jls;
797 
798 #ifdef PRISON_DEBUG
799           if (prison_debug > 0) {
800                     --prison_debug;
801                     print_backtrace(-1);
802           }
803 #endif
804           KKASSERT(pr->pr_ref > 0);
805           if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
806                     return;
807 
808           /*
809            * The global jail lock is needed on the last ref to adjust
810            * the list.
811            */
812           lockmgr(&jail_lock, LK_EXCLUSIVE);
813           if (pr->pr_ref) {
814                     lockmgr(&jail_lock, LK_RELEASE);
815                     return;
816           }
817           LIST_REMOVE(pr, pr_list);
818           --prisoncount;
819 
820           /*
821            * Clean up
822            */
823           while (!SLIST_EMPTY(&pr->pr_ips)) {
824                     jls = SLIST_FIRST(&pr->pr_ips);
825                     SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
826                     kfree(jls, M_PRISON);
827           }
828           lockmgr(&jail_lock, LK_RELEASE);
829 
830           if (pr->pr_linux != NULL)
831                     kfree(pr->pr_linux, M_PRISON);
832           varsymset_clean(&pr->pr_varsymset);
833 
834           /* Release the sysctl tree */
835           prison_sysctl_done(pr);
836 
837           cache_drop(&pr->pr_root);
838           kfree(pr, M_PRISON);
839 }
840 
841 /*
842  * Check if permisson for a specific privilege is granted within jail.
843  *
844  * MPSAFE
845  */
846 int
prison_priv_check(struct ucred * cred,int cap)847 prison_priv_check(struct ucred *cred, int cap)
848 {
849           struct prison *pr = cred->cr_prison;
850 
851           if (!jailed(cred))
852                     return (0);
853 
854           switch (cap & ~__SYSCAP_XFLAGS) {
855           case SYSCAP_RESTRICTEDROOT:             /* meta group 1 */
856                     /* RESTRICTEDROOT fallbacks disallowed in jails */
857                     return EPERM;
858           case SYSCAP_SENSITIVEROOT:              /* meta group 2 */
859           case SYSCAP_NOEXEC:                     /* meta group 3 */
860           case SYSCAP_NOCRED:                     /* meta group 4 */
861                     return 0;
862           case SYSCAP_NOJAIL:                     /* meta group 5 */
863                     /* all jail ops disallowed in jails */
864                     return EPERM;
865           case SYSCAP_NONET:                      /* meta group 6 */
866                     return 0;
867           case SYSCAP_NONET_SENSITIVE:            /* meta group 7 */
868                     /* all sensitive network ops disallowed in jails */
869                     return EPERM;
870           case SYSCAP_NOVFS:                      /* meta group 8 */
871           case SYSCAP_NOVFS_SENSITIVE:            /* meta group 9 */
872           case SYSCAP_NOMOUNT:                              /* meta group 10 */
873           case SYSCAP_NO11:                       /* meta group 11 */
874           case SYSCAP_NO12:                       /* meta group 12 */
875           case SYSCAP_NO13:                       /* meta group 13 */
876           case SYSCAP_NO14:                       /* meta group 14 */
877           case SYSCAP_NO15:                       /* meta group 15 */
878                     return (0);
879 
880           /* ----- */                                       /* group 1 - disallowed */
881 
882           case SYSCAP_NOPROC_TRESPASS:            /* group 2 allowed */
883           case SYSCAP_NOPROC_SETLOGIN:
884           case SYSCAP_NOPROC_SETRLIMIT:
885           case SYSCAP_NOSYSCTL_WR:
886           case SYSCAP_NOVARSYM_SYS:
887           case SYSCAP_NOSETHOSTNAME:
888           case SYSCAP_NOQUOTA_WR:
889           case SYSCAP_NODEBUG_UNPRIV:
890           case SYSCAP_NOSCHED:
891           case SYSCAP_NOSCHED_CPUSET:
892           case SYSCAP_NOSETTIME:
893                     return (0);
894 
895           case SYSCAP_NOEXEC_SUID:                /* group 3 allowed */
896           case SYSCAP_NOEXEC_SGID:
897                     return (0);
898 
899           case SYSCAP_NOCRED_SETUID:              /* group 4 allowed */
900           case SYSCAP_NOCRED_SETGID:
901           case SYSCAP_NOCRED_SETEUID:
902           case SYSCAP_NOCRED_SETEGID:
903           case SYSCAP_NOCRED_SETREUID:
904           case SYSCAP_NOCRED_SETREGID:
905           case SYSCAP_NOCRED_SETRESUID:
906           case SYSCAP_NOCRED_SETRESGID:
907           case SYSCAP_NOCRED_SETGROUPS:
908                     return (0);
909 
910           case SYSCAP_NOJAIL_CREATE:              /* group 5 disallowed */
911           case SYSCAP_NOJAIL_ATTACH:
912                     return EPERM;
913 
914           case SYSCAP_NONET_RESPORT:              /* group 6 mostly allowed */
915                     /*
916                      * Allow reserved ports
917                      */
918                     return 0;
919           case SYSCAP_NONET_RAW:
920                     /*
921                      * Conditionally allow creating raw sockets in jail.
922                      */
923                     if (PRISON_CAP_ISSET(pr->pr_caps,
924                               PRISON_CAP_NET_RAW_SOCKETS))
925                               return (0);
926                     else
927                               return (EPERM);
928 
929           /* ----- */                                       /* group 7 - disallowed */
930 
931           case SYSCAP_NOVFS_SYSFLAGS:             /* group 8 - allowed */
932           case SYSCAP_NOVFS_CHOWN:
933           case SYSCAP_NOVFS_CHMOD:
934           case SYSCAP_NOVFS_LINK:
935           case SYSCAP_NOVFS_CHFLAGS_DEV:
936           case SYSCAP_NOVFS_SETATTR:
937           case SYSCAP_NOVFS_SETGID:
938           case SYSCAP_NOVFS_GENERATION:
939           case SYSCAP_NOVFS_RETAINSUGID:
940                     return (0);
941 
942           case SYSCAP_NOVFS_MKNOD_BAD:            /* group 9 - allowed */
943           case SYSCAP_NOVFS_MKNOD_WHT:
944           case SYSCAP_NOVFS_MKNOD_DIR:
945           case SYSCAP_NOVFS_MKNOD_DEV:
946           case SYSCAP_NOVFS_IOCTL:
947           case SYSCAP_NOVFS_CHROOT:
948           case SYSCAP_NOVFS_REVOKE:
949                     return (0);
950 
951           case SYSCAP_NOMOUNT_NULLFS:             /* group 10 - conditional */
952                     if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS))
953                               return (0);
954                     else
955                               return (EPERM);
956           case SYSCAP_NOMOUNT_DEVFS:
957                     if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_DEVFS))
958                               return (0);
959                     else
960                               return (EPERM);
961           case SYSCAP_NOMOUNT_TMPFS:
962                     if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS))
963                               return (0);
964                     else
965                               return (EPERM);
966           case SYSCAP_NOMOUNT_PROCFS:
967                     if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_PROCFS))
968                               return (0);
969                     else
970                               return (EPERM);
971           case SYSCAP_NOMOUNT_FUSE:
972                     if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_FUSEFS))
973                               return (0);
974                     else
975                               return (EPERM);
976           case SYSCAP_NOMOUNT_UMOUNT:
977                     return (0);
978 
979           default:
980                     /* otherwise disallow */
981                     return (EPERM);
982           }
983 }
984 
985 
986 /*
987  * Create a per-jail sysctl tree to control the prison
988  */
989 int
prison_sysctl_create(struct prison * pr)990 prison_sysctl_create(struct prison *pr)
991 {
992           char id_str[7];
993 
994           ksnprintf(id_str, 6, "%d", pr->pr_id);
995 
996           pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc(
997                     sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO);
998 
999           sysctl_ctx_init(pr->pr_sysctl_ctx);
1000 
1001           /* Main jail node */
1002           pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx,
1003               SYSCTL_STATIC_CHILDREN(_jail),
1004               OID_AUTO, id_str, CTLFLAG_RD, 0,
1005               "Jail specific settings");
1006 
1007           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1008               OID_AUTO, "sys_set_hostname", CTLFLAG_RW,
1009               &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME,
1010               "Processes in jail can set their hostnames");
1011 
1012           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1013               OID_AUTO, "sys_sysvipc", CTLFLAG_RW,
1014               &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC,
1015               "Processes in jail can use System V IPC primitives");
1016 
1017           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1018               OID_AUTO, "net_unixiproute", CTLFLAG_RW,
1019               &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
1020               "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
1021 
1022           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1023               OID_AUTO, "net_raw_sockets", CTLFLAG_RW,
1024               &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
1025               "Process in jail can create raw sockets");
1026 
1027           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1028               OID_AUTO, "allow_listen_override", CTLFLAG_RW,
1029               &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
1030               "Process in jail can create raw sockets");
1031 
1032           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1033               OID_AUTO, "vfs_chflags", CTLFLAG_RW,
1034               &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS,
1035               "Process in jail can override host wildcard listen");
1036 
1037           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1038               OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW,
1039               &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
1040               "Processes in jail can mount nullfs(5) filesystems");
1041 
1042           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1043               OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW,
1044               &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
1045               "Processes in jail can mount tmpfs(5) filesystems");
1046 
1047           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1048               OID_AUTO, "vfs_mount_devfs", CTLFLAG_RW,
1049               &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_DEVFS,
1050               "Processes in jail can mount devfs(5) filesystems");
1051 
1052           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1053               OID_AUTO, "vfs_mount_procfs", CTLFLAG_RW,
1054               &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_PROCFS,
1055               "Processes in jail can mount procfs(5) filesystems");
1056 
1057           SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1058               OID_AUTO, "vfs_mount_fusefs", CTLFLAG_RW,
1059               &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_FUSEFS,
1060               "Processes in jail can mount fuse filesystems");
1061 
1062           return 0;
1063 }
1064 
1065 int
prison_sysctl_done(struct prison * pr)1066 prison_sysctl_done(struct prison *pr)
1067 {
1068           if (pr->pr_sysctl_tree) {
1069                     sysctl_ctx_free(pr->pr_sysctl_ctx);
1070                     kfree(pr->pr_sysctl_ctx, M_PRISON);
1071                     pr->pr_sysctl_tree = NULL;
1072           }
1073 
1074           return 0;
1075 }
1076