1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1999 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/lock.h>
32 #include <sys/malloc.h>
33 #include <sys/mount.h>
34 #include <sys/jail.h>
35 #include <sys/proc.h>
36 #include <sys/sx.h>
37
38 #include <compat/linux/linux_mib.h>
39 #include <compat/linux/linux_misc.h>
40
41 struct linux_prison {
42 char pr_osname[LINUX_MAX_UTSNAME];
43 char pr_osrelease[LINUX_MAX_UTSNAME];
44 int pr_oss_version;
45 int pr_osrel;
46 };
47
48 static struct linux_prison lprison0 = {
49 .pr_osname = "Linux",
50 .pr_osrelease = LINUX_VERSION_STR,
51 .pr_oss_version = 0x030600,
52 .pr_osrel = LINUX_VERSION_CODE
53 };
54
55 static unsigned linux_osd_jail_slot;
56
57 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
58 "Linux mode");
59
60 int linux_debug = 3;
61 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
62 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
63
64 int linux_default_openfiles = 1024;
65 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
66 &linux_default_openfiles, 0,
67 "Default soft openfiles resource limit, or -1 for unlimited");
68
69 int linux_default_stacksize = 8 * 1024 * 1024;
70 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
71 &linux_default_stacksize, 0,
72 "Default soft stack size resource limit, or -1 for unlimited");
73
74 int linux_dummy_rlimits = 0;
75 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
76 &linux_dummy_rlimits, 0,
77 "Return dummy values for unsupported Linux-specific rlimits");
78
79 int linux_ignore_ip_recverr = 1;
80 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
81 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
82
83 int linux_preserve_vstatus = 1;
84 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
85 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
86
87 bool linux_map_sched_prio = true;
88 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
89 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
90 "(not POSIX compliant)");
91
92 int linux_use_emul_path = 1;
93 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
94 &linux_use_emul_path, 0, "Use linux.compat.emul_path");
95
96 static bool linux_setid_allowed = true;
97 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
98 &linux_setid_allowed, 0,
99 "Allow setuid/setgid on execve of Linux binary");
100
101 int
linux_setid_allowed_query(struct thread * td __unused,struct image_params * imgp __unused)102 linux_setid_allowed_query(struct thread *td __unused,
103 struct image_params *imgp __unused)
104 {
105 return (linux_setid_allowed);
106 }
107
108 static int linux_set_osname(struct thread *td, char *osname);
109 static int linux_set_osrelease(struct thread *td, char *osrelease);
110 static int linux_set_oss_version(struct thread *td, int oss_version);
111
112 static int
linux_sysctl_osname(SYSCTL_HANDLER_ARGS)113 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
114 {
115 char osname[LINUX_MAX_UTSNAME];
116 int error;
117
118 linux_get_osname(req->td, osname);
119 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
120 if (error != 0 || req->newptr == NULL)
121 return (error);
122 error = linux_set_osname(req->td, osname);
123
124 return (error);
125 }
126
127 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
128 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
129 0, 0, linux_sysctl_osname, "A",
130 "Linux kernel OS name");
131
132 static int
linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)133 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
134 {
135 char osrelease[LINUX_MAX_UTSNAME];
136 int error;
137
138 linux_get_osrelease(req->td, osrelease);
139 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
140 if (error != 0 || req->newptr == NULL)
141 return (error);
142 error = linux_set_osrelease(req->td, osrelease);
143
144 return (error);
145 }
146
147 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
148 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
149 0, 0, linux_sysctl_osrelease, "A",
150 "Linux kernel OS release");
151
152 static int
linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)153 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
154 {
155 int oss_version;
156 int error;
157
158 oss_version = linux_get_oss_version(req->td);
159 error = sysctl_handle_int(oidp, &oss_version, 0, req);
160 if (error != 0 || req->newptr == NULL)
161 return (error);
162 error = linux_set_oss_version(req->td, oss_version);
163
164 return (error);
165 }
166
167 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
168 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
169 0, 0, linux_sysctl_oss_version, "I",
170 "Linux OSS version");
171
172 /*
173 * Map the osrelease into integer
174 */
175 static int
linux_map_osrel(char * osrelease,int * osrel)176 linux_map_osrel(char *osrelease, int *osrel)
177 {
178 char *sep, *eosrelease;
179 int len, v0, v1, v2, v;
180
181 len = strlen(osrelease);
182 eosrelease = osrelease + len;
183 v0 = strtol(osrelease, &sep, 10);
184 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
185 return (EINVAL);
186 osrelease = sep + 1;
187 v1 = strtol(osrelease, &sep, 10);
188 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
189 return (EINVAL);
190 osrelease = sep + 1;
191 v2 = strtol(osrelease, &sep, 10);
192 if (osrelease == sep ||
193 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
194 return (EINVAL);
195
196 v = LINUX_KERNVER(v0, v1, v2);
197 if (v < LINUX_KERNVER(1, 0, 0))
198 return (EINVAL);
199
200 if (osrel != NULL)
201 *osrel = v;
202
203 return (0);
204 }
205
206 /*
207 * Find a prison with Linux info.
208 * Return the Linux info and the (locked) prison.
209 */
210 static struct linux_prison *
linux_find_prison(struct prison * spr,struct prison ** prp)211 linux_find_prison(struct prison *spr, struct prison **prp)
212 {
213 struct prison *pr;
214 struct linux_prison *lpr;
215
216 for (pr = spr;; pr = pr->pr_parent) {
217 mtx_lock(&pr->pr_mtx);
218 lpr = (pr == &prison0)
219 ? &lprison0
220 : osd_jail_get(pr, linux_osd_jail_slot);
221 if (lpr != NULL)
222 break;
223 mtx_unlock(&pr->pr_mtx);
224 }
225 *prp = pr;
226
227 return (lpr);
228 }
229
230 /*
231 * Ensure a prison has its own Linux info. If lprp is non-null, point it to
232 * the Linux info and lock the prison.
233 */
234 static void
linux_alloc_prison(struct prison * pr,struct linux_prison ** lprp)235 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
236 {
237 struct prison *ppr;
238 struct linux_prison *lpr, *nlpr;
239 void **rsv;
240
241 /* If this prison already has Linux info, return that. */
242 lpr = linux_find_prison(pr, &ppr);
243 if (ppr == pr)
244 goto done;
245 /*
246 * Allocate a new info record. Then check again, in case something
247 * changed during the allocation.
248 */
249 mtx_unlock(&ppr->pr_mtx);
250 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
251 rsv = osd_reserve(linux_osd_jail_slot);
252 lpr = linux_find_prison(pr, &ppr);
253 if (ppr == pr) {
254 free(nlpr, M_PRISON);
255 osd_free_reserved(rsv);
256 goto done;
257 }
258 /* Inherit the initial values from the ancestor. */
259 mtx_lock(&pr->pr_mtx);
260 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
261 bcopy(lpr, nlpr, sizeof(*lpr));
262 lpr = nlpr;
263 mtx_unlock(&ppr->pr_mtx);
264 done:
265 if (lprp != NULL)
266 *lprp = lpr;
267 else
268 mtx_unlock(&pr->pr_mtx);
269 }
270
271 /*
272 * Jail OSD methods for Linux prison data.
273 */
274 static int
linux_prison_create(void * obj,void * data)275 linux_prison_create(void *obj, void *data)
276 {
277 struct prison *pr = obj;
278 struct vfsoptlist *opts = data;
279 int jsys;
280
281 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
282 jsys == JAIL_SYS_INHERIT)
283 return (0);
284 /*
285 * Inherit a prison's initial values from its parent
286 * (different from JAIL_SYS_INHERIT which also inherits changes).
287 */
288 linux_alloc_prison(pr, NULL);
289 return (0);
290 }
291
292 static int
linux_prison_check(void * obj __unused,void * data)293 linux_prison_check(void *obj __unused, void *data)
294 {
295 struct vfsoptlist *opts = data;
296 char *osname, *osrelease;
297 int error, jsys, len, oss_version;
298
299 /* Check that the parameters are correct. */
300 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
301 if (error != ENOENT) {
302 if (error != 0)
303 return (error);
304 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
305 return (EINVAL);
306 }
307 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
308 if (error != ENOENT) {
309 if (error != 0)
310 return (error);
311 if (len == 0 || osname[len - 1] != '\0')
312 return (EINVAL);
313 if (len > LINUX_MAX_UTSNAME) {
314 vfs_opterror(opts, "linux.osname too long");
315 return (ENAMETOOLONG);
316 }
317 }
318 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
319 if (error != ENOENT) {
320 if (error != 0)
321 return (error);
322 if (len == 0 || osrelease[len - 1] != '\0')
323 return (EINVAL);
324 if (len > LINUX_MAX_UTSNAME) {
325 vfs_opterror(opts, "linux.osrelease too long");
326 return (ENAMETOOLONG);
327 }
328 error = linux_map_osrel(osrelease, NULL);
329 if (error != 0) {
330 vfs_opterror(opts, "linux.osrelease format error");
331 return (error);
332 }
333 }
334 error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
335 sizeof(oss_version));
336
337 if (error == ENOENT)
338 error = 0;
339 return (error);
340 }
341
342 static int
linux_prison_set(void * obj,void * data)343 linux_prison_set(void *obj, void *data)
344 {
345 struct linux_prison *lpr;
346 struct prison *pr = obj;
347 struct vfsoptlist *opts = data;
348 char *osname, *osrelease;
349 int error, gotversion, jsys, len, oss_version;
350
351 /* Set the parameters, which should be correct. */
352 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
353 if (error == ENOENT)
354 jsys = -1;
355 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
356 if (error == ENOENT)
357 osname = NULL;
358 else
359 jsys = JAIL_SYS_NEW;
360 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
361 if (error == ENOENT)
362 osrelease = NULL;
363 else
364 jsys = JAIL_SYS_NEW;
365 error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
366 sizeof(oss_version));
367 if (error == ENOENT)
368 gotversion = 0;
369 else {
370 gotversion = 1;
371 jsys = JAIL_SYS_NEW;
372 }
373 switch (jsys) {
374 case JAIL_SYS_INHERIT:
375 /* "linux=inherit": inherit the parent's Linux info. */
376 mtx_lock(&pr->pr_mtx);
377 osd_jail_del(pr, linux_osd_jail_slot);
378 mtx_unlock(&pr->pr_mtx);
379 break;
380 case JAIL_SYS_NEW:
381 /*
382 * "linux=new" or "linux.*":
383 * the prison gets its own Linux info.
384 */
385 linux_alloc_prison(pr, &lpr);
386 if (osrelease) {
387 (void)linux_map_osrel(osrelease, &lpr->pr_osrel);
388 strlcpy(lpr->pr_osrelease, osrelease,
389 LINUX_MAX_UTSNAME);
390 }
391 if (osname)
392 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
393 if (gotversion)
394 lpr->pr_oss_version = oss_version;
395 mtx_unlock(&pr->pr_mtx);
396 }
397
398 return (0);
399 }
400
401 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
402 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
403 "Jail Linux kernel OS name");
404 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
405 "Jail Linux kernel OS release");
406 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
407 "I", "Jail Linux OSS version");
408
409 static int
linux_prison_get(void * obj,void * data)410 linux_prison_get(void *obj, void *data)
411 {
412 struct linux_prison *lpr;
413 struct prison *ppr;
414 struct prison *pr = obj;
415 struct vfsoptlist *opts = data;
416 int error, i;
417
418 static int version0;
419
420 /* See if this prison is the one with the Linux info. */
421 lpr = linux_find_prison(pr, &ppr);
422 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
423 error = vfs_setopt(opts, "linux", &i, sizeof(i));
424 if (error != 0 && error != ENOENT)
425 goto done;
426 if (i) {
427 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
428 if (error != 0 && error != ENOENT)
429 goto done;
430 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
431 if (error != 0 && error != ENOENT)
432 goto done;
433 error = vfs_setopt(opts, "linux.oss_version",
434 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
435 if (error != 0 && error != ENOENT)
436 goto done;
437 } else {
438 /*
439 * If this prison is inheriting its Linux info, report
440 * empty/zero parameters.
441 */
442 error = vfs_setopts(opts, "linux.osname", "");
443 if (error != 0 && error != ENOENT)
444 goto done;
445 error = vfs_setopts(opts, "linux.osrelease", "");
446 if (error != 0 && error != ENOENT)
447 goto done;
448 error = vfs_setopt(opts, "linux.oss_version", &version0,
449 sizeof(lpr->pr_oss_version));
450 if (error != 0 && error != ENOENT)
451 goto done;
452 }
453 error = 0;
454
455 done:
456 mtx_unlock(&ppr->pr_mtx);
457
458 return (error);
459 }
460
461 static void
linux_prison_destructor(void * data)462 linux_prison_destructor(void *data)
463 {
464
465 free(data, M_PRISON);
466 }
467
468 void
linux_osd_jail_register(void)469 linux_osd_jail_register(void)
470 {
471 struct prison *pr;
472 osd_method_t methods[PR_MAXMETHOD] = {
473 [PR_METHOD_CREATE] = linux_prison_create,
474 [PR_METHOD_GET] = linux_prison_get,
475 [PR_METHOD_SET] = linux_prison_set,
476 [PR_METHOD_CHECK] = linux_prison_check
477 };
478
479 linux_osd_jail_slot =
480 osd_jail_register(linux_prison_destructor, methods);
481 /* Copy the system Linux info to any current prisons. */
482 sx_slock(&allprison_lock);
483 TAILQ_FOREACH(pr, &allprison, pr_list)
484 linux_alloc_prison(pr, NULL);
485 sx_sunlock(&allprison_lock);
486 }
487
488 void
linux_osd_jail_deregister(void)489 linux_osd_jail_deregister(void)
490 {
491
492 osd_jail_deregister(linux_osd_jail_slot);
493 }
494
495 void
linux_get_osname(struct thread * td,char * dst)496 linux_get_osname(struct thread *td, char *dst)
497 {
498 struct prison *pr;
499 struct linux_prison *lpr;
500
501 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
502 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
503 mtx_unlock(&pr->pr_mtx);
504 }
505
506 static int
linux_set_osname(struct thread * td,char * osname)507 linux_set_osname(struct thread *td, char *osname)
508 {
509 struct prison *pr;
510 struct linux_prison *lpr;
511
512 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
513 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
514 mtx_unlock(&pr->pr_mtx);
515
516 return (0);
517 }
518
519 void
linux_get_osrelease(struct thread * td,char * dst)520 linux_get_osrelease(struct thread *td, char *dst)
521 {
522 struct prison *pr;
523 struct linux_prison *lpr;
524
525 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
526 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
527 mtx_unlock(&pr->pr_mtx);
528 }
529
530 int
linux_kernver(struct thread * td)531 linux_kernver(struct thread *td)
532 {
533 struct prison *pr;
534 struct linux_prison *lpr;
535 int osrel;
536
537 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
538 osrel = lpr->pr_osrel;
539 mtx_unlock(&pr->pr_mtx);
540
541 return (osrel);
542 }
543
544 static int
linux_set_osrelease(struct thread * td,char * osrelease)545 linux_set_osrelease(struct thread *td, char *osrelease)
546 {
547 struct prison *pr;
548 struct linux_prison *lpr;
549 int error;
550
551 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
552 error = linux_map_osrel(osrelease, &lpr->pr_osrel);
553 if (error == 0)
554 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
555 mtx_unlock(&pr->pr_mtx);
556
557 return (error);
558 }
559
560 int
linux_get_oss_version(struct thread * td)561 linux_get_oss_version(struct thread *td)
562 {
563 struct prison *pr;
564 struct linux_prison *lpr;
565 int version;
566
567 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
568 version = lpr->pr_oss_version;
569 mtx_unlock(&pr->pr_mtx);
570
571 return (version);
572 }
573
574 static int
linux_set_oss_version(struct thread * td,int oss_version)575 linux_set_oss_version(struct thread *td, int oss_version)
576 {
577 struct prison *pr;
578 struct linux_prison *lpr;
579
580 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
581 lpr->pr_oss_version = oss_version;
582 mtx_unlock(&pr->pr_mtx);
583
584 return (0);
585 }
586