xref: /freebsd-13-stable/sys/compat/linux/linux_mib.c (revision 3bc80996974a61a4223eae4c1ccd47b6ee32a48a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1999 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/lock.h>
32 #include <sys/malloc.h>
33 #include <sys/mount.h>
34 #include <sys/jail.h>
35 #include <sys/proc.h>
36 #include <sys/sx.h>
37 
38 #include <compat/linux/linux_mib.h>
39 #include <compat/linux/linux_misc.h>
40 
41 struct linux_prison {
42 	char	pr_osname[LINUX_MAX_UTSNAME];
43 	char	pr_osrelease[LINUX_MAX_UTSNAME];
44 	int	pr_oss_version;
45 	int	pr_osrel;
46 };
47 
48 static struct linux_prison lprison0 = {
49 	.pr_osname =		"Linux",
50 	.pr_osrelease =		LINUX_VERSION_STR,
51 	.pr_oss_version =	0x030600,
52 	.pr_osrel =		LINUX_VERSION_CODE
53 };
54 
55 static unsigned linux_osd_jail_slot;
56 
57 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
58     "Linux mode");
59 
60 int linux_debug = 3;
61 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
62     &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
63 
64 int linux_default_openfiles = 1024;
65 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
66     &linux_default_openfiles, 0,
67     "Default soft openfiles resource limit, or -1 for unlimited");
68 
69 int linux_default_stacksize = 8 * 1024 * 1024;
70 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
71     &linux_default_stacksize, 0,
72     "Default soft stack size resource limit, or -1 for unlimited");
73 
74 int linux_dummy_rlimits = 0;
75 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
76     &linux_dummy_rlimits, 0,
77     "Return dummy values for unsupported Linux-specific rlimits");
78 
79 int linux_ignore_ip_recverr = 1;
80 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
81     &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
82 
83 int linux_preserve_vstatus = 1;
84 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
85     &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
86 
87 bool linux_map_sched_prio = true;
88 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
89     &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
90     "(not POSIX compliant)");
91 
92 int linux_use_emul_path = 1;
93 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
94     &linux_use_emul_path, 0, "Use linux.compat.emul_path");
95 
96 static bool linux_setid_allowed = true;
97 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
98     &linux_setid_allowed, 0,
99     "Allow setuid/setgid on execve of Linux binary");
100 
101 int
linux_setid_allowed_query(struct thread * td __unused,struct image_params * imgp __unused)102 linux_setid_allowed_query(struct thread *td __unused,
103     struct image_params *imgp __unused)
104 {
105 	return (linux_setid_allowed);
106 }
107 
108 static int	linux_set_osname(struct thread *td, char *osname);
109 static int	linux_set_osrelease(struct thread *td, char *osrelease);
110 static int	linux_set_oss_version(struct thread *td, int oss_version);
111 
112 static int
linux_sysctl_osname(SYSCTL_HANDLER_ARGS)113 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
114 {
115 	char osname[LINUX_MAX_UTSNAME];
116 	int error;
117 
118 	linux_get_osname(req->td, osname);
119 	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
120 	if (error != 0 || req->newptr == NULL)
121 		return (error);
122 	error = linux_set_osname(req->td, osname);
123 
124 	return (error);
125 }
126 
127 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
128 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
129 	    0, 0, linux_sysctl_osname, "A",
130 	    "Linux kernel OS name");
131 
132 static int
linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)133 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
134 {
135 	char osrelease[LINUX_MAX_UTSNAME];
136 	int error;
137 
138 	linux_get_osrelease(req->td, osrelease);
139 	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
140 	if (error != 0 || req->newptr == NULL)
141 		return (error);
142 	error = linux_set_osrelease(req->td, osrelease);
143 
144 	return (error);
145 }
146 
147 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
148 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
149 	    0, 0, linux_sysctl_osrelease, "A",
150 	    "Linux kernel OS release");
151 
152 static int
linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)153 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
154 {
155 	int oss_version;
156 	int error;
157 
158 	oss_version = linux_get_oss_version(req->td);
159 	error = sysctl_handle_int(oidp, &oss_version, 0, req);
160 	if (error != 0 || req->newptr == NULL)
161 		return (error);
162 	error = linux_set_oss_version(req->td, oss_version);
163 
164 	return (error);
165 }
166 
167 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
168 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
169 	    0, 0, linux_sysctl_oss_version, "I",
170 	    "Linux OSS version");
171 
172 /*
173  * Map the osrelease into integer
174  */
175 static int
linux_map_osrel(char * osrelease,int * osrel)176 linux_map_osrel(char *osrelease, int *osrel)
177 {
178 	char *sep, *eosrelease;
179 	int len, v0, v1, v2, v;
180 
181 	len = strlen(osrelease);
182 	eosrelease = osrelease + len;
183 	v0 = strtol(osrelease, &sep, 10);
184 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
185 		return (EINVAL);
186 	osrelease = sep + 1;
187 	v1 = strtol(osrelease, &sep, 10);
188 	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
189 		return (EINVAL);
190 	osrelease = sep + 1;
191 	v2 = strtol(osrelease, &sep, 10);
192 	if (osrelease == sep ||
193 	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
194 		return (EINVAL);
195 
196 	v = LINUX_KERNVER(v0, v1, v2);
197 	if (v < LINUX_KERNVER(1, 0, 0))
198 		return (EINVAL);
199 
200 	if (osrel != NULL)
201 		*osrel = v;
202 
203 	return (0);
204 }
205 
206 /*
207  * Find a prison with Linux info.
208  * Return the Linux info and the (locked) prison.
209  */
210 static struct linux_prison *
linux_find_prison(struct prison * spr,struct prison ** prp)211 linux_find_prison(struct prison *spr, struct prison **prp)
212 {
213 	struct prison *pr;
214 	struct linux_prison *lpr;
215 
216 	for (pr = spr;; pr = pr->pr_parent) {
217 		mtx_lock(&pr->pr_mtx);
218 		lpr = (pr == &prison0)
219 		    ? &lprison0
220 		    : osd_jail_get(pr, linux_osd_jail_slot);
221 		if (lpr != NULL)
222 			break;
223 		mtx_unlock(&pr->pr_mtx);
224 	}
225 	*prp = pr;
226 
227 	return (lpr);
228 }
229 
230 /*
231  * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
232  * the Linux info and lock the prison.
233  */
234 static void
linux_alloc_prison(struct prison * pr,struct linux_prison ** lprp)235 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
236 {
237 	struct prison *ppr;
238 	struct linux_prison *lpr, *nlpr;
239 	void **rsv;
240 
241 	/* If this prison already has Linux info, return that. */
242 	lpr = linux_find_prison(pr, &ppr);
243 	if (ppr == pr)
244 		goto done;
245 	/*
246 	 * Allocate a new info record.  Then check again, in case something
247 	 * changed during the allocation.
248 	 */
249 	mtx_unlock(&ppr->pr_mtx);
250 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
251 	rsv = osd_reserve(linux_osd_jail_slot);
252 	lpr = linux_find_prison(pr, &ppr);
253 	if (ppr == pr) {
254 		free(nlpr, M_PRISON);
255 		osd_free_reserved(rsv);
256 		goto done;
257 	}
258 	/* Inherit the initial values from the ancestor. */
259 	mtx_lock(&pr->pr_mtx);
260 	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
261 	bcopy(lpr, nlpr, sizeof(*lpr));
262 	lpr = nlpr;
263 	mtx_unlock(&ppr->pr_mtx);
264  done:
265 	if (lprp != NULL)
266 		*lprp = lpr;
267 	else
268 		mtx_unlock(&pr->pr_mtx);
269 }
270 
271 /*
272  * Jail OSD methods for Linux prison data.
273  */
274 static int
linux_prison_create(void * obj,void * data)275 linux_prison_create(void *obj, void *data)
276 {
277 	struct prison *pr = obj;
278 	struct vfsoptlist *opts = data;
279 	int jsys;
280 
281 	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
282 	    jsys == JAIL_SYS_INHERIT)
283 		return (0);
284 	/*
285 	 * Inherit a prison's initial values from its parent
286 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
287 	 */
288 	linux_alloc_prison(pr, NULL);
289 	return (0);
290 }
291 
292 static int
linux_prison_check(void * obj __unused,void * data)293 linux_prison_check(void *obj __unused, void *data)
294 {
295 	struct vfsoptlist *opts = data;
296 	char *osname, *osrelease;
297 	int error, jsys, len, oss_version;
298 
299 	/* Check that the parameters are correct. */
300 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
301 	if (error != ENOENT) {
302 		if (error != 0)
303 			return (error);
304 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
305 			return (EINVAL);
306 	}
307 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
308 	if (error != ENOENT) {
309 		if (error != 0)
310 			return (error);
311 		if (len == 0 || osname[len - 1] != '\0')
312 			return (EINVAL);
313 		if (len > LINUX_MAX_UTSNAME) {
314 			vfs_opterror(opts, "linux.osname too long");
315 			return (ENAMETOOLONG);
316 		}
317 	}
318 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
319 	if (error != ENOENT) {
320 		if (error != 0)
321 			return (error);
322 		if (len == 0 || osrelease[len - 1] != '\0')
323 			return (EINVAL);
324 		if (len > LINUX_MAX_UTSNAME) {
325 			vfs_opterror(opts, "linux.osrelease too long");
326 			return (ENAMETOOLONG);
327 		}
328 		error = linux_map_osrel(osrelease, NULL);
329 		if (error != 0) {
330 			vfs_opterror(opts, "linux.osrelease format error");
331 			return (error);
332 		}
333 	}
334 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
335 	    sizeof(oss_version));
336 
337 	if (error == ENOENT)
338 		error = 0;
339 	return (error);
340 }
341 
342 static int
linux_prison_set(void * obj,void * data)343 linux_prison_set(void *obj, void *data)
344 {
345 	struct linux_prison *lpr;
346 	struct prison *pr = obj;
347 	struct vfsoptlist *opts = data;
348 	char *osname, *osrelease;
349 	int error, gotversion, jsys, len, oss_version;
350 
351 	/* Set the parameters, which should be correct. */
352 	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
353 	if (error == ENOENT)
354 		jsys = -1;
355 	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
356 	if (error == ENOENT)
357 		osname = NULL;
358 	else
359 		jsys = JAIL_SYS_NEW;
360 	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
361 	if (error == ENOENT)
362 		osrelease = NULL;
363 	else
364 		jsys = JAIL_SYS_NEW;
365 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
366 	    sizeof(oss_version));
367 	if (error == ENOENT)
368 		gotversion = 0;
369 	else {
370 		gotversion = 1;
371 		jsys = JAIL_SYS_NEW;
372 	}
373 	switch (jsys) {
374 	case JAIL_SYS_INHERIT:
375 		/* "linux=inherit": inherit the parent's Linux info. */
376 		mtx_lock(&pr->pr_mtx);
377 		osd_jail_del(pr, linux_osd_jail_slot);
378 		mtx_unlock(&pr->pr_mtx);
379 		break;
380 	case JAIL_SYS_NEW:
381 		/*
382 		 * "linux=new" or "linux.*":
383 		 * the prison gets its own Linux info.
384 		 */
385 		linux_alloc_prison(pr, &lpr);
386 		if (osrelease) {
387 			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
388 			strlcpy(lpr->pr_osrelease, osrelease,
389 			    LINUX_MAX_UTSNAME);
390 		}
391 		if (osname)
392 			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
393 		if (gotversion)
394 			lpr->pr_oss_version = oss_version;
395 		mtx_unlock(&pr->pr_mtx);
396 	}
397 
398 	return (0);
399 }
400 
401 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
402 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
403     "Jail Linux kernel OS name");
404 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
405     "Jail Linux kernel OS release");
406 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
407     "I", "Jail Linux OSS version");
408 
409 static int
linux_prison_get(void * obj,void * data)410 linux_prison_get(void *obj, void *data)
411 {
412 	struct linux_prison *lpr;
413 	struct prison *ppr;
414 	struct prison *pr = obj;
415 	struct vfsoptlist *opts = data;
416 	int error, i;
417 
418 	static int version0;
419 
420 	/* See if this prison is the one with the Linux info. */
421 	lpr = linux_find_prison(pr, &ppr);
422 	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
423 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
424 	if (error != 0 && error != ENOENT)
425 		goto done;
426 	if (i) {
427 		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
428 		if (error != 0 && error != ENOENT)
429 			goto done;
430 		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
431 		if (error != 0 && error != ENOENT)
432 			goto done;
433 		error = vfs_setopt(opts, "linux.oss_version",
434 		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
435 		if (error != 0 && error != ENOENT)
436 			goto done;
437 	} else {
438 		/*
439 		 * If this prison is inheriting its Linux info, report
440 		 * empty/zero parameters.
441 		 */
442 		error = vfs_setopts(opts, "linux.osname", "");
443 		if (error != 0 && error != ENOENT)
444 			goto done;
445 		error = vfs_setopts(opts, "linux.osrelease", "");
446 		if (error != 0 && error != ENOENT)
447 			goto done;
448 		error = vfs_setopt(opts, "linux.oss_version", &version0,
449 		    sizeof(lpr->pr_oss_version));
450 		if (error != 0 && error != ENOENT)
451 			goto done;
452 	}
453 	error = 0;
454 
455  done:
456 	mtx_unlock(&ppr->pr_mtx);
457 
458 	return (error);
459 }
460 
461 static void
linux_prison_destructor(void * data)462 linux_prison_destructor(void *data)
463 {
464 
465 	free(data, M_PRISON);
466 }
467 
468 void
linux_osd_jail_register(void)469 linux_osd_jail_register(void)
470 {
471 	struct prison *pr;
472 	osd_method_t methods[PR_MAXMETHOD] = {
473 	    [PR_METHOD_CREATE] =	linux_prison_create,
474 	    [PR_METHOD_GET] =		linux_prison_get,
475 	    [PR_METHOD_SET] =		linux_prison_set,
476 	    [PR_METHOD_CHECK] =		linux_prison_check
477 	};
478 
479 	linux_osd_jail_slot =
480 	    osd_jail_register(linux_prison_destructor, methods);
481 	/* Copy the system Linux info to any current prisons. */
482 	sx_slock(&allprison_lock);
483 	TAILQ_FOREACH(pr, &allprison, pr_list)
484 		linux_alloc_prison(pr, NULL);
485 	sx_sunlock(&allprison_lock);
486 }
487 
488 void
linux_osd_jail_deregister(void)489 linux_osd_jail_deregister(void)
490 {
491 
492 	osd_jail_deregister(linux_osd_jail_slot);
493 }
494 
495 void
linux_get_osname(struct thread * td,char * dst)496 linux_get_osname(struct thread *td, char *dst)
497 {
498 	struct prison *pr;
499 	struct linux_prison *lpr;
500 
501 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
502 	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
503 	mtx_unlock(&pr->pr_mtx);
504 }
505 
506 static int
linux_set_osname(struct thread * td,char * osname)507 linux_set_osname(struct thread *td, char *osname)
508 {
509 	struct prison *pr;
510 	struct linux_prison *lpr;
511 
512 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
513 	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
514 	mtx_unlock(&pr->pr_mtx);
515 
516 	return (0);
517 }
518 
519 void
linux_get_osrelease(struct thread * td,char * dst)520 linux_get_osrelease(struct thread *td, char *dst)
521 {
522 	struct prison *pr;
523 	struct linux_prison *lpr;
524 
525 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
526 	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
527 	mtx_unlock(&pr->pr_mtx);
528 }
529 
530 int
linux_kernver(struct thread * td)531 linux_kernver(struct thread *td)
532 {
533 	struct prison *pr;
534 	struct linux_prison *lpr;
535 	int osrel;
536 
537 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
538 	osrel = lpr->pr_osrel;
539 	mtx_unlock(&pr->pr_mtx);
540 
541 	return (osrel);
542 }
543 
544 static int
linux_set_osrelease(struct thread * td,char * osrelease)545 linux_set_osrelease(struct thread *td, char *osrelease)
546 {
547 	struct prison *pr;
548 	struct linux_prison *lpr;
549 	int error;
550 
551 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
552 	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
553 	if (error == 0)
554 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
555 	mtx_unlock(&pr->pr_mtx);
556 
557 	return (error);
558 }
559 
560 int
linux_get_oss_version(struct thread * td)561 linux_get_oss_version(struct thread *td)
562 {
563 	struct prison *pr;
564 	struct linux_prison *lpr;
565 	int version;
566 
567 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
568 	version = lpr->pr_oss_version;
569 	mtx_unlock(&pr->pr_mtx);
570 
571 	return (version);
572 }
573 
574 static int
linux_set_oss_version(struct thread * td,int oss_version)575 linux_set_oss_version(struct thread *td, int oss_version)
576 {
577 	struct prison *pr;
578 	struct linux_prison *lpr;
579 
580 	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
581 	lpr->pr_oss_version = oss_version;
582 	mtx_unlock(&pr->pr_mtx);
583 
584 	return (0);
585 }
586