1 /*        $NetBSD: linux_sched.c,v 1.83 2024/10/03 12:56:49 hannken Exp $       */
2 
3 /*-
4  * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Linux compatibility module. Try to deal with scheduler related syscalls.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.83 2024/10/03 12:56:49 hannken Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/syscallargs.h>
46 #include <sys/wait.h>
47 #include <sys/kauth.h>
48 #include <sys/ptrace.h>
49 #include <sys/atomic.h>
50 
51 #include <sys/cpu.h>
52 
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_emuldata.h>
56 #include <compat/linux/common/linux_ipc.h>
57 #include <compat/linux/common/linux_sem.h>
58 #include <compat/linux/common/linux_exec.h>
59 #include <compat/linux/common/linux_machdep.h>
60 
61 #include <compat/linux/linux_syscallargs.h>
62 
63 #include <compat/linux/common/linux_sched.h>
64 
65 static int linux_clone_nptl(struct lwp *, const struct linux_sys_clone_args *,
66     register_t *);
67 
68 /* Unlike Linux, dynamically calculate CPU mask size */
69 #define   LINUX_CPU_MASK_SIZE (sizeof(long) * ((ncpu + LONG_BIT - 1) / LONG_BIT))
70 
71 #if DEBUG_LINUX
72 #define DPRINTF(x, ...) uprintf(x, __VA_ARGS__)
73 #else
74 #define DPRINTF(x, ...)
75 #endif
76 
77 static void
linux_child_return(void * arg)78 linux_child_return(void *arg)
79 {
80           struct lwp *l = arg;
81           struct proc *p = l->l_proc;
82           struct linux_emuldata *led = l->l_emuldata;
83           void *ctp = led->led_child_tidptr;
84           int error;
85 
86           if (ctp) {
87                     if ((error = copyout(&p->p_pid, ctp, sizeof(p->p_pid))) != 0)
88                               printf("%s: LINUX_CLONE_CHILD_SETTID "
89                                   "failed (child_tidptr = %p, tid = %d error =%d)\n",
90                                   __func__, ctp, p->p_pid, error);
91           }
92           child_return(arg);
93 }
94 
95 int
linux_sys_clone(struct lwp * l,const struct linux_sys_clone_args * uap,register_t * retval)96 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap,
97     register_t *retval)
98 {
99           /* {
100                     syscallarg(int) flags;
101                     syscallarg(void *) stack;
102                     syscallarg(void *) parent_tidptr;
103                     syscallarg(void *) tls;
104                     syscallarg(void *) child_tidptr;
105           } */
106           struct linux_emuldata *led;
107           int flags, sig, error;
108 
109           /*
110            * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
111            */
112           if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
113                     return EINVAL;
114 
115           /*
116            * Thread group implies shared signals. Shared signals
117            * imply shared VM. This matches what Linux kernel does.
118            */
119           if (SCARG(uap, flags) & LINUX_CLONE_THREAD
120               && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
121                     return EINVAL;
122           if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
123               && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
124                     return EINVAL;
125 
126           /*
127            * The thread group flavor is implemented totally differently.
128            */
129           if (SCARG(uap, flags) & LINUX_CLONE_THREAD)
130                     return linux_clone_nptl(l, uap, retval);
131 
132           flags = 0;
133           if (SCARG(uap, flags) & LINUX_CLONE_VM)
134                     flags |= FORK_SHAREVM;
135           if (SCARG(uap, flags) & LINUX_CLONE_FS)
136                     flags |= FORK_SHARECWD;
137           if (SCARG(uap, flags) & LINUX_CLONE_FILES)
138                     flags |= FORK_SHAREFILES;
139           if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
140                     flags |= FORK_SHARESIGS;
141           if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
142                     flags |= FORK_PPWAIT;
143 
144           sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
145           if (sig < 0 || sig >= LINUX__NSIG)
146                     return EINVAL;
147           sig = linux_to_native_signo[sig];
148 
149           if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) {
150                     led = l->l_emuldata;
151                     led->led_child_tidptr = SCARG(uap, child_tidptr);
152           }
153 
154           /*
155            * Note that Linux does not provide a portable way of specifying
156            * the stack area; the caller must know if the stack grows up
157            * or down.  So, we pass a stack size of 0, so that the code
158            * that makes this adjustment is a noop.
159            */
160           if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
161               linux_child_return, NULL, retval)) != 0) {
162                     DPRINTF("%s: fork1: error %d\n", __func__, error);
163                     return error;
164           }
165 
166           return 0;
167 }
168 
169 
170 int
linux_sys_clone3(struct lwp * l,const struct linux_sys_clone3_args * uap,register_t * retval)171 linux_sys_clone3(struct lwp *l, const struct linux_sys_clone3_args *uap, register_t *retval)
172 {
173           struct linux_user_clone3_args cl_args;
174           struct linux_sys_clone_args clone_args;
175           int error;
176 
177           if (SCARG(uap, size) != sizeof(cl_args)) {
178               DPRINTF("%s: Invalid size less or more\n", __func__);
179               return EINVAL;
180           }
181 
182           error = copyin(SCARG(uap, cl_args), &cl_args, SCARG(uap, size));
183           if (error) {
184                     DPRINTF("%s: Copyin failed: %d\n", __func__, error);
185                     return error;
186           }
187 
188           DPRINTF("%s: Flags: %#jx\n", __func__, (intmax_t)cl_args.flags);
189 
190           /* Define allowed flags */
191           if (cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS) {
192                     DPRINTF("%s: Unsupported flags for clone3: %#" PRIx64 "\n",
193                         __func__, cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS);
194                     return EOPNOTSUPP;
195           }
196           if (cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS) {
197                     DPRINTF("%s: Disallowed flags for clone3: %#" PRIx64 "\n",
198                         __func__, cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS);
199                     return EINVAL;
200           }
201 
202 #if 0
203           // XXX: this is wrong, exit_signal is the signal to deliver to the
204           // process upon exit.
205           if ((cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL) != 0){
206                     DPRINTF("%s: Disallowed flags for clone3: %#x\n", __func__,
207                         cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL);
208                     return EINVAL;
209           }
210 #endif
211 
212           if (cl_args.stack == 0 && cl_args.stack_size != 0) {
213                     DPRINTF("%s: Stack is NULL but stack size is not 0\n",
214                         __func__);
215                     return EINVAL;
216           }
217           if (cl_args.stack != 0 && cl_args.stack_size == 0) {
218                     DPRINTF("%s: Stack is not NULL but stack size is 0\n",
219                         __func__);
220                     return EINVAL;
221           }
222 
223           int flags = cl_args.flags & LINUX_CLONE_ALLOWED_FLAGS;
224 #if 0
225           int sig = cl_args.exit_signal & LINUX_CLONE_CSIGNAL;
226 #endif
227           // XXX: Pidfd member handling
228           // XXX: we don't have cgroups
229           // XXX: what to do with tid_set and tid_set_size
230           // XXX: clone3 has stacksize, instead implement clone as a clone3
231           // wrapper.
232           SCARG(&clone_args, flags) = flags;
233           SCARG(&clone_args, stack) = (void *)(uintptr_t)cl_args.stack;
234           SCARG(&clone_args, parent_tidptr) =
235               (void *)(intptr_t)cl_args.parent_tid;
236           SCARG(&clone_args, tls) =
237               (void *)(intptr_t)cl_args.tls;
238           SCARG(&clone_args, child_tidptr) =
239               (void *)(intptr_t)cl_args.child_tid;
240 
241           return linux_sys_clone(l, &clone_args, retval);
242 }
243 
244 static int
linux_clone_nptl(struct lwp * l,const struct linux_sys_clone_args * uap,register_t * retval)245 linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
246 {
247           /* {
248                     syscallarg(int) flags;
249                     syscallarg(void *) stack;
250                     syscallarg(void *) parent_tidptr;
251                     syscallarg(void *) tls;
252                     syscallarg(void *) child_tidptr;
253           } */
254           struct proc *p;
255           struct lwp *l2;
256           struct linux_emuldata *led;
257           void *parent_tidptr, *tls, *child_tidptr;
258           vaddr_t uaddr;
259           lwpid_t lid;
260           int flags, error;
261 
262           p = l->l_proc;
263           flags = SCARG(uap, flags);
264           parent_tidptr = SCARG(uap, parent_tidptr);
265           tls = SCARG(uap, tls);
266           child_tidptr = SCARG(uap, child_tidptr);
267 
268           uaddr = uvm_uarea_alloc();
269           if (__predict_false(uaddr == 0)) {
270                     return ENOMEM;
271           }
272 
273           error = lwp_create(l, p, uaddr, LWP_DETACHED,
274               SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class,
275               &l->l_sigmask, &l->l_sigstk);
276           if (__predict_false(error)) {
277                     DPRINTF("%s: lwp_create error=%d\n", __func__, error);
278                     uvm_uarea_free(uaddr);
279                     return error;
280           }
281           lid = l2->l_lid;
282 
283           /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */
284           if (flags & LINUX_CLONE_CHILD_CLEARTID) {
285                     led = l2->l_emuldata;
286                     led->led_clear_tid = child_tidptr;
287           }
288 
289           /* LINUX_CLONE_PARENT_SETTID: store child's TID in parent's memory */
290           if (flags & LINUX_CLONE_PARENT_SETTID) {
291                     if ((error = copyout(&lid, parent_tidptr, sizeof(lid))) != 0)
292                               printf("%s: LINUX_CLONE_PARENT_SETTID "
293                                   "failed (parent_tidptr = %p tid = %d error=%d)\n",
294                                   __func__, parent_tidptr, lid, error);
295           }
296 
297           /* LINUX_CLONE_CHILD_SETTID: store child's TID in child's memory  */
298           if (flags & LINUX_CLONE_CHILD_SETTID) {
299                     if ((error = copyout(&lid, child_tidptr, sizeof(lid))) != 0)
300                               printf("%s: LINUX_CLONE_CHILD_SETTID "
301                                   "failed (child_tidptr = %p, tid = %d error=%d)\n",
302                                   __func__, child_tidptr, lid, error);
303           }
304 
305           if (flags & LINUX_CLONE_SETTLS) {
306                     error = LINUX_LWP_SETPRIVATE(l2, tls);
307                     if (error) {
308                               DPRINTF("%s: LINUX_LWP_SETPRIVATE %d\n", __func__,
309                                   error);
310                               lwp_exit(l2);
311                               return error;
312                     }
313           }
314 
315           /* Set the new LWP running. */
316           lwp_start(l2, 0);
317 
318           retval[0] = lid;
319           retval[1] = 0;
320           return 0;
321 }
322 
323 /*
324  * linux realtime priority
325  *
326  * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
327  *
328  * - SCHED_OTHER tasks don't have realtime priorities.
329  *   in particular, sched_param::sched_priority is always 0.
330  */
331 
332 #define   LINUX_SCHED_RTPRIO_MIN        1
333 #define   LINUX_SCHED_RTPRIO_MAX        99
334 
335 static int
sched_linux2native(int linux_policy,struct linux_sched_param * linux_params,int * native_policy,struct sched_param * native_params)336 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
337     int *native_policy, struct sched_param *native_params)
338 {
339 
340           switch (linux_policy) {
341           case LINUX_SCHED_OTHER:
342                     if (native_policy != NULL) {
343                               *native_policy = SCHED_OTHER;
344                     }
345                     break;
346 
347           case LINUX_SCHED_FIFO:
348                     if (native_policy != NULL) {
349                               *native_policy = SCHED_FIFO;
350                     }
351                     break;
352 
353           case LINUX_SCHED_RR:
354                     if (native_policy != NULL) {
355                               *native_policy = SCHED_RR;
356                     }
357                     break;
358 
359           default:
360                     return EINVAL;
361           }
362 
363           if (linux_params != NULL) {
364                     int prio = linux_params->sched_priority;
365 
366                     KASSERT(native_params != NULL);
367 
368                     if (linux_policy == LINUX_SCHED_OTHER) {
369                               if (prio != 0) {
370                                         return EINVAL;
371                               }
372                               native_params->sched_priority = PRI_NONE; /* XXX */
373                     } else {
374                               if (prio < LINUX_SCHED_RTPRIO_MIN ||
375                                   prio > LINUX_SCHED_RTPRIO_MAX) {
376                                         return EINVAL;
377                               }
378                               native_params->sched_priority =
379                                   (prio - LINUX_SCHED_RTPRIO_MIN)
380                                   * (SCHED_PRI_MAX - SCHED_PRI_MIN)
381                                   / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
382                                   + SCHED_PRI_MIN;
383                     }
384           }
385 
386           return 0;
387 }
388 
389 static int
sched_native2linux(int native_policy,struct sched_param * native_params,int * linux_policy,struct linux_sched_param * linux_params)390 sched_native2linux(int native_policy, struct sched_param *native_params,
391     int *linux_policy, struct linux_sched_param *linux_params)
392 {
393 
394           switch (native_policy) {
395           case SCHED_OTHER:
396                     if (linux_policy != NULL) {
397                               *linux_policy = LINUX_SCHED_OTHER;
398                     }
399                     break;
400 
401           case SCHED_FIFO:
402                     if (linux_policy != NULL) {
403                               *linux_policy = LINUX_SCHED_FIFO;
404                     }
405                     break;
406 
407           case SCHED_RR:
408                     if (linux_policy != NULL) {
409                               *linux_policy = LINUX_SCHED_RR;
410                     }
411                     break;
412 
413           default:
414                     panic("%s: unknown policy %d\n", __func__, native_policy);
415           }
416 
417           if (native_params != NULL) {
418                     int prio = native_params->sched_priority;
419 
420                     KASSERT(prio >= SCHED_PRI_MIN);
421                     KASSERT(prio <= SCHED_PRI_MAX);
422                     KASSERT(linux_params != NULL);
423 
424                     memset(linux_params, 0, sizeof(*linux_params));
425 
426                     DPRINTF("%s: native: policy %d, priority %d\n",
427                         __func__, native_policy, prio);
428 
429                     if (native_policy == SCHED_OTHER) {
430                               linux_params->sched_priority = 0;
431                     } else {
432                               linux_params->sched_priority =
433                                   (prio - SCHED_PRI_MIN)
434                                   * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
435                                   / (SCHED_PRI_MAX - SCHED_PRI_MIN)
436                                   + LINUX_SCHED_RTPRIO_MIN;
437                     }
438                     DPRINTF("%s: linux: policy %d, priority %d\n",
439                         __func__, -1, linux_params->sched_priority);
440           }
441 
442           return 0;
443 }
444 
445 int
linux_sys_sched_setparam(struct lwp * l,const struct linux_sys_sched_setparam_args * uap,register_t * retval)446 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
447 {
448           /* {
449                     syscallarg(linux_pid_t) pid;
450                     syscallarg(const struct linux_sched_param *) sp;
451           } */
452           int error, policy;
453           struct linux_sched_param lp;
454           struct sched_param sp;
455 
456           if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
457                     error = EINVAL;
458                     goto out;
459           }
460 
461           error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
462           if (error)
463                     goto out;
464 
465           /* We need the current policy in Linux terms. */
466           error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
467           if (error)
468                     goto out;
469           error = sched_native2linux(policy, NULL, &policy, NULL);
470           if (error)
471                     goto out;
472 
473           error = sched_linux2native(policy, &lp, &policy, &sp);
474           if (error)
475                     goto out;
476 
477           error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
478           if (error)
479                     goto out;
480 
481  out:
482           return error;
483 }
484 
485 int
linux_sys_sched_getparam(struct lwp * l,const struct linux_sys_sched_getparam_args * uap,register_t * retval)486 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
487 {
488           /* {
489                     syscallarg(linux_pid_t) pid;
490                     syscallarg(struct linux_sched_param *) sp;
491           } */
492           struct linux_sched_param lp;
493           struct sched_param sp;
494           int error, policy;
495 
496           if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
497                     error = EINVAL;
498                     goto out;
499           }
500 
501           error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
502           if (error)
503                     goto out;
504           DPRINTF("%s: native: policy %d, priority %d\n",
505               __func__, policy, sp.sched_priority);
506 
507           error = sched_native2linux(policy, &sp, NULL, &lp);
508           if (error)
509                     goto out;
510           DPRINTF("%s: linux: policy %d, priority %d\n",
511               __func__, policy, lp.sched_priority);
512 
513           error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
514           if (error)
515                     goto out;
516 
517  out:
518           return error;
519 }
520 
521 int
linux_sys_sched_setscheduler(struct lwp * l,const struct linux_sys_sched_setscheduler_args * uap,register_t * retval)522 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
523 {
524           /* {
525                     syscallarg(linux_pid_t) pid;
526                     syscallarg(int) policy;
527                     syscallarg(cont struct linux_sched_param *) sp;
528           } */
529           int error, policy;
530           struct linux_sched_param lp;
531           struct sched_param sp;
532 
533           if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
534                     error = EINVAL;
535                     goto out;
536           }
537 
538           error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
539           if (error)
540                     goto out;
541           DPRINTF("%s: linux: policy %d, priority %d\n",
542               __func__, SCARG(uap, policy), lp.sched_priority);
543 
544           error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
545           if (error)
546                     goto out;
547           DPRINTF("%s: native: policy %d, priority %d\n",
548               __func__, policy, sp.sched_priority);
549 
550           error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
551           if (error)
552                     goto out;
553 
554  out:
555           return error;
556 }
557 
558 int
linux_sys_sched_getscheduler(struct lwp * l,const struct linux_sys_sched_getscheduler_args * uap,register_t * retval)559 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
560 {
561           /* {
562                     syscallarg(linux_pid_t) pid;
563           } */
564           int error, policy;
565 
566           *retval = -1;
567 
568           error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
569           if (error)
570                     goto out;
571 
572           error = sched_native2linux(policy, NULL, &policy, NULL);
573           if (error)
574                     goto out;
575 
576           *retval = policy;
577 
578  out:
579           return error;
580 }
581 
582 int
linux_sys_sched_yield(struct lwp * l,const void * v,register_t * retval)583 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
584 {
585 
586           yield();
587           return 0;
588 }
589 
590 int
linux_sys_sched_get_priority_max(struct lwp * l,const struct linux_sys_sched_get_priority_max_args * uap,register_t * retval)591 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
592 {
593           /* {
594                     syscallarg(int) policy;
595           } */
596 
597           switch (SCARG(uap, policy)) {
598           case LINUX_SCHED_OTHER:
599                     *retval = 0;
600                     break;
601           case LINUX_SCHED_FIFO:
602           case LINUX_SCHED_RR:
603                     *retval = LINUX_SCHED_RTPRIO_MAX;
604                     break;
605           default:
606                     return EINVAL;
607           }
608 
609           return 0;
610 }
611 
612 int
linux_sys_sched_get_priority_min(struct lwp * l,const struct linux_sys_sched_get_priority_min_args * uap,register_t * retval)613 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
614 {
615           /* {
616                     syscallarg(int) policy;
617           } */
618 
619           switch (SCARG(uap, policy)) {
620           case LINUX_SCHED_OTHER:
621                     *retval = 0;
622                     break;
623           case LINUX_SCHED_FIFO:
624           case LINUX_SCHED_RR:
625                     *retval = LINUX_SCHED_RTPRIO_MIN;
626                     break;
627           default:
628                     return EINVAL;
629           }
630 
631           return 0;
632 }
633 
634 int
linux_sys_exit(struct lwp * l,const struct linux_sys_exit_args * uap,register_t * retval)635 linux_sys_exit(struct lwp *l, const struct linux_sys_exit_args *uap, register_t *retval)
636 {
637 
638           lwp_exit(l);
639           return 0;
640 }
641 
642 #ifndef __m68k__
643 /* Present on everything but m68k */
644 int
linux_sys_exit_group(struct lwp * l,const struct linux_sys_exit_group_args * uap,register_t * retval)645 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
646 {
647 
648           return sys_exit(l, (const void *)uap, retval);
649 }
650 #endif /* !__m68k__ */
651 
652 int
linux_sys_set_tid_address(struct lwp * l,const struct linux_sys_set_tid_address_args * uap,register_t * retval)653 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
654 {
655           /* {
656                     syscallarg(int *) tidptr;
657           } */
658           struct linux_emuldata *led;
659 
660           led = (struct linux_emuldata *)l->l_emuldata;
661           led->led_clear_tid = SCARG(uap, tid);
662           *retval = l->l_lid;
663 
664           return 0;
665 }
666 
667 /* ARGUSED1 */
668 int
linux_sys_gettid(struct lwp * l,const void * v,register_t * retval)669 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
670 {
671 
672           *retval = l->l_lid;
673           return 0;
674 }
675 
676 /*
677  * The affinity syscalls assume that the layout of our cpu kcpuset is
678  * the same as linux's: a linear bitmask.
679  */
680 int
linux_sys_sched_getaffinity(struct lwp * l,const struct linux_sys_sched_getaffinity_args * uap,register_t * retval)681 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
682 {
683           /* {
684                     syscallarg(linux_pid_t) pid;
685                     syscallarg(unsigned int) len;
686                     syscallarg(unsigned long *) mask;
687           } */
688           struct proc *p;
689           struct lwp *t;
690           kcpuset_t *kcset;
691           size_t size;
692           cpuid_t i;
693           int error;
694 
695           size = LINUX_CPU_MASK_SIZE;
696           if (SCARG(uap, len) < size)
697                     return EINVAL;
698 
699           if (SCARG(uap, pid) == 0) {
700                     p = curproc;
701                     mutex_enter(p->p_lock);
702                     t = curlwp;
703           } else {
704                     t = lwp_find2(-1, SCARG(uap, pid));
705                     if (__predict_false(t == NULL)) {
706                               return ESRCH;
707                     }
708                     p = t->l_proc;
709                     KASSERT(mutex_owned(p->p_lock));
710           }
711 
712           /* Check the permission */
713           if (kauth_authorize_process(l->l_cred,
714               KAUTH_PROCESS_SCHEDULER_GETAFFINITY, p, NULL, NULL, NULL)) {
715                     mutex_exit(p->p_lock);
716                     return EPERM;
717           }
718 
719           kcpuset_create(&kcset, true);
720           lwp_lock(t);
721           if (t->l_affinity != NULL)
722                     kcpuset_copy(kcset, t->l_affinity);
723           else {
724                     /*
725                      * All available CPUs should be masked when affinity has not
726                      * been set.
727                      */
728                     kcpuset_zero(kcset);
729                     for (i = 0; i < ncpu; i++)
730                               kcpuset_set(kcset, i);
731           }
732           lwp_unlock(t);
733           mutex_exit(p->p_lock);
734           error = kcpuset_copyout(kcset, (cpuset_t *)SCARG(uap, mask), size);
735           kcpuset_unuse(kcset, NULL);
736           *retval = size;
737           return error;
738 }
739 
740 int
linux_sys_sched_setaffinity(struct lwp * l,const struct linux_sys_sched_setaffinity_args * uap,register_t * retval)741 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
742 {
743           /* {
744                     syscallarg(linux_pid_t) pid;
745                     syscallarg(unsigned int) len;
746                     syscallarg(unsigned long *) mask;
747           } */
748           struct sys__sched_setaffinity_args ssa;
749           size_t size;
750           pid_t pid;
751           lwpid_t lid;
752 
753           size = LINUX_CPU_MASK_SIZE;
754           if (SCARG(uap, len) < size)
755                     return EINVAL;
756 
757           lid = SCARG(uap, pid);
758           if (lid != 0) {
759                     /* Get the canonical PID for the process. */
760                     mutex_enter(&proc_lock);
761                     struct proc *p = proc_find_lwpid(SCARG(uap, pid));
762                     if (p == NULL) {
763                               mutex_exit(&proc_lock);
764                               return ESRCH;
765                     }
766                     pid = p->p_pid;
767                     mutex_exit(&proc_lock);
768           } else {
769                     pid = curproc->p_pid;
770                     lid = curlwp->l_lid;
771           }
772 
773           SCARG(&ssa, pid) = pid;
774           SCARG(&ssa, lid) = lid;
775           SCARG(&ssa, size) = size;
776           SCARG(&ssa, cpuset) = (cpuset_t *)SCARG(uap, mask);
777 
778           return sys__sched_setaffinity(l, &ssa, retval);
779 }
780