xref: /dragonfly/sys/kern/kern_usched.c (revision e4b9e6f6c4819e081b27592550b55458d4c3d0a7)
1 /*
2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sergey Glushchenko <deen@smz.com.ua>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cpumask.h>
37 #include <sys/errno.h>
38 #include <sys/globaldata.h>             /* curthread */
39 #include <sys/proc.h>
40 #include <sys/caps.h>
41 #include <sys/sysmsg.h>                           /* struct usched_set_args */
42 #include <sys/systm.h>                            /* strcmp() */
43 #include <sys/usched.h>
44 
45 #include <machine/smp.h>
46 
47 static TAILQ_HEAD(, usched) usched_list = TAILQ_HEAD_INITIALIZER(usched_list);
48 
49 cpumask_t usched_mastermask = CPUMASK_INITIALIZER_ALLONES;
50 
51 static int setaffinity_lp(struct lwp *lp, cpumask_t *mask);
52 
53 /*
54  * Called from very low level boot code, sys/kern/init_main.c:mi_proc0init().
55  * We cannot do anything fancy.  no malloc's, no nothing other then
56  * static initialization.
57  */
58 struct usched *
usched_init(void)59 usched_init(void)
60 {
61           const char *defsched;
62 
63           defsched = kgetenv("kern.user_scheduler");
64 
65           /*
66            * Add various userland schedulers to the system.
67            */
68           usched_ctl(&usched_bsd4, USCH_ADD);
69           usched_ctl(&usched_dfly, USCH_ADD);
70           usched_ctl(&usched_dummy, USCH_ADD);
71           if (defsched == NULL )
72                     return(&usched_dfly);
73           if (strcmp(defsched, "bsd4") == 0)
74                     return(&usched_bsd4);
75           if (strcmp(defsched, "dfly") == 0)
76                     return(&usched_dfly);
77           kprintf("WARNING: Running dummy userland scheduler\n");
78           return(&usched_dummy);
79 }
80 
81 /*
82  * USCHED_CTL
83  *
84  * SYNOPSIS:
85  *        Add/remove usched to/from list.
86  *
87  * ARGUMENTS:
88  *        usched - pointer to target scheduler
89  *        action - addition or removal ?
90  *
91  * RETURN VALUES:
92  *        0 - success
93  *        EINVAL - error
94  */
95 int
usched_ctl(struct usched * usched,int action)96 usched_ctl(struct usched *usched, int action)
97 {
98           struct usched *item;          /* temporaly for TAILQ processing */
99           int error = 0;
100 
101           switch(action) {
102           case USCH_ADD:
103                     /*
104                      * Make sure it isn't already on the list
105                      */
106 #ifdef INVARIANTS
107                     TAILQ_FOREACH(item, &usched_list, entry) {
108                               KKASSERT(item != usched);
109                     }
110 #endif
111                     /*
112                      * Optional callback to the scheduler before we officially
113                      * add it to the list.
114                      */
115                     if (usched->usched_register)
116                               usched->usched_register();
117                     TAILQ_INSERT_TAIL(&usched_list, usched, entry);
118                     break;
119           case USCH_REM:
120                     /*
121                      * Do not allow the default scheduler to be removed
122                      */
123                     if (strcmp(usched->name, "bsd4") == 0) {
124                               error = EINVAL;
125                               break;
126                     }
127                     TAILQ_FOREACH(item, &usched_list, entry) {
128                               if (item == usched)
129                                         break;
130                     }
131                     if (item) {
132                               if (item->usched_unregister)
133                                         item->usched_unregister();
134                               TAILQ_REMOVE(&usched_list, item, entry);
135                     } else {
136                               error = EINVAL;
137                     }
138                     break;
139           default:
140                     error = EINVAL;
141                     break;
142           }
143           return (error);
144 }
145 
146 /*
147  * Called from the scheduler clock on each cpu independently at the
148  * common scheduling rate.  If the scheduler clock interrupted a running
149  * lwp the lp will be non-NULL.
150  */
151 void
usched_schedulerclock(struct lwp * lp,sysclock_t periodic,sysclock_t time)152 usched_schedulerclock(struct lwp *lp, sysclock_t periodic, sysclock_t time)
153 {
154           struct usched *item;
155 
156           TAILQ_FOREACH(item, &usched_list, entry) {
157                     if (lp && lp->lwp_proc->p_usched == item)
158                               item->schedulerclock(lp, periodic, time);
159                     else
160                               item->schedulerclock(NULL, periodic, time);
161           }
162 }
163 
164 /*
165  * USCHED_SET(syscall)
166  *
167  * SYNOPSIS:
168  *        Setting up a proc's usched.
169  *
170  * ARGUMENTS:
171  *        pid       -
172  *        cmd       -
173  *        data      -
174  *        bytes     -
175  * RETURN VALUES:
176  *        0 - success
177  *        EFBIG  - error (invalid cpu#)
178  *        EPERM  - error (failed to delete cpu#)
179  *        EINVAL - error (other reasons)
180  *
181  * MPALMOSTSAFE
182  */
183 int
sys_usched_set(struct sysmsg * sysmsg,const struct usched_set_args * uap)184 sys_usched_set(struct sysmsg *sysmsg, const struct usched_set_args *uap)
185 {
186           struct proc *p = curthread->td_proc;
187           struct usched *item;          /* temporaly for TAILQ processing */
188           int error;
189           char buffer[NAME_LENGTH];
190           cpumask_t mask;
191           struct lwp *lp;
192           int cpuid;
193 
194           if (uap->pid != 0 && uap->pid != curthread->td_proc->p_pid)
195                     return (EINVAL);
196 
197           lp = curthread->td_lwp;
198           lwkt_gettoken(&lp->lwp_token);
199 
200           switch (uap->cmd) {
201           case USCHED_SET_SCHEDULER:
202                     if ((error = caps_priv_check_self(SYSCAP_NOSCHED)) != 0)
203                               break;
204                     error = copyinstr(uap->data, buffer, sizeof(buffer), NULL);
205                     if (error)
206                               break;
207                     TAILQ_FOREACH(item, &usched_list, entry) {
208                               if ((strcmp(item->name, buffer) == 0))
209                                         break;
210                     }
211 
212                     /*
213                      * If the scheduler for a process is being changed, disassociate
214                      * the old scheduler before switching to the new one.
215                      *
216                      * XXX we might have to add an additional ABI call to do a 'full
217                      * disassociation' and another ABI call to do a 'full
218                      * reassociation'
219                      */
220                     /* XXX lwp have to deal with multiple lwps here */
221                     if (p->p_nthreads != 1) {
222                               error = EINVAL;
223                               break;
224                     }
225                     if (item && item != p->p_usched) {
226                               /* XXX lwp */
227                               p->p_usched->release_curproc(ONLY_LWP_IN_PROC(p));
228                               p->p_usched->heuristic_exiting(ONLY_LWP_IN_PROC(p), p);
229                               p->p_usched = item;
230                     } else if (item == NULL) {
231                               error = EINVAL;
232                     }
233                     break;
234           case USCHED_SET_CPU:
235                     if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
236                               break;
237                     if (uap->bytes != sizeof(int)) {
238                               error = EINVAL;
239                               break;
240                     }
241                     error = copyin(uap->data, &cpuid, sizeof(int));
242                     if (error)
243                               break;
244                     if (cpuid < 0 || cpuid >= ncpus) {
245                               error = EFBIG;
246                               break;
247                     }
248                     if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) {
249                               error = EINVAL;
250                               break;
251                     }
252                     CPUMASK_ASSBIT(lp->lwp_cpumask, cpuid);
253                     if (cpuid != mycpu->gd_cpuid) {
254                               lwkt_migratecpu(cpuid);
255                               p->p_usched->changedcpu(lp);
256                     }
257                     break;
258           case USCHED_GET_CPU:
259                     /* USCHED_GET_CPU doesn't require special privileges. */
260                     if (uap->bytes != sizeof(int)) {
261                               error = EINVAL;
262                               break;
263                     }
264                     error = copyout(&(mycpu->gd_cpuid), uap->data, sizeof(int));
265                     break;
266           case USCHED_GET_CPUMASK:
267                     /* USCHED_GET_CPUMASK doesn't require special privileges. */
268                     if (uap->bytes != sizeof(cpumask_t)) {
269                               error = EINVAL;
270                               break;
271                     }
272                     mask = lp->lwp_cpumask;
273                     CPUMASK_ANDMASK(mask, smp_active_mask);
274                     error = copyout(&mask, uap->data, sizeof(cpumask_t));
275                     break;
276           case USCHED_ADD_CPU:
277                     if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
278                               break;
279                     if (uap->bytes != sizeof(int)) {
280                               error = EINVAL;
281                               break;
282                     }
283                     error = copyin(uap->data, &cpuid, sizeof(int));
284                     if (error)
285                               break;
286                     if (cpuid < 0 || cpuid >= ncpus) {
287                               error = EFBIG;
288                               break;
289                     }
290                     if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) {
291                               error = EINVAL;
292                               break;
293                     }
294                     CPUMASK_ORBIT(lp->lwp_cpumask, cpuid);
295                     break;
296           case USCHED_DEL_CPU:
297                     /* USCHED_DEL_CPU doesn't require special privileges. */
298                     if (uap->bytes != sizeof(int)) {
299                               error = EINVAL;
300                               break;
301                     }
302                     error = copyin(uap->data, &cpuid, sizeof(int));
303                     if (error)
304                               break;
305                     if (cpuid < 0 || cpuid >= ncpus) {
306                               error = EFBIG;
307                               break;
308                     }
309                     lp = curthread->td_lwp;
310                     mask = lp->lwp_cpumask;
311                     CPUMASK_ANDMASK(mask, smp_active_mask);
312                     CPUMASK_NANDBIT(mask, cpuid);
313                     if (CPUMASK_TESTZERO(mask)) {
314                               error = EPERM;
315                     } else {
316                               CPUMASK_NANDBIT(lp->lwp_cpumask, cpuid);
317                               if (CPUMASK_TESTMASK(lp->lwp_cpumask,
318                                                       mycpu->gd_cpumask) == 0) {
319                                         mask = lp->lwp_cpumask;
320                                         CPUMASK_ANDMASK(mask, smp_active_mask);
321                                         cpuid = BSFCPUMASK(mask);
322                                         lwkt_migratecpu(cpuid);
323                                         p->p_usched->changedcpu(lp);
324                               }
325                     }
326                     break;
327           case USCHED_SET_CPUMASK:
328                     if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
329                               break;
330                     if (uap->bytes != sizeof(mask)) {
331                               error = EINVAL;
332                               break;
333                     }
334                     error = copyin(uap->data, &mask, sizeof(mask));
335                     if (error)
336                               break;
337 
338                     CPUMASK_ANDMASK(mask, smp_active_mask);
339                     if (CPUMASK_TESTZERO(mask)) {
340                               error = EPERM;
341                               break;
342                     }
343                     /* Commit the new cpumask. */
344                     lp->lwp_cpumask = mask;
345 
346                     /* Migrate if necessary. */
347                     if (CPUMASK_TESTMASK(lp->lwp_cpumask, mycpu->gd_cpumask) == 0) {
348                               cpuid = BSFCPUMASK(lp->lwp_cpumask);
349                               lwkt_migratecpu(cpuid);
350                               p->p_usched->changedcpu(lp);
351                     }
352                     break;
353           default:
354                     error = EINVAL;
355                     break;
356           }
357           lwkt_reltoken(&lp->lwp_token);
358 
359           return (error);
360 }
361 
362 int
sys_lwp_getaffinity(struct sysmsg * sysmsg,const struct lwp_getaffinity_args * uap)363 sys_lwp_getaffinity(struct sysmsg *sysmsg,
364                         const struct lwp_getaffinity_args *uap)
365 {
366           struct proc *p;
367           cpumask_t mask;
368           struct lwp *lp;
369           int error = 0;
370 
371           if (uap->pid < 0)
372                     return (EINVAL);
373 
374           if (uap->pid == 0) {
375                     p = curproc;
376                     PHOLD(p);
377           } else {
378                     p = pfind(uap->pid);          /* pfind() holds (p) */
379                     if (p == NULL)
380                               return (ESRCH);
381           }
382           lwkt_gettoken(&p->p_token);
383 
384           if (uap->tid < 0) {
385                     lp = RB_FIRST(lwp_rb_tree, &p->p_lwp_tree);
386           } else {
387                     lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
388           }
389           if (lp == NULL) {
390                     error = ESRCH;
391           } else {
392                     /* Take a snapshot for copyout, which may block. */
393                     LWPHOLD(lp);
394                     lwkt_gettoken(&lp->lwp_token);
395                     mask = lp->lwp_cpumask;
396                     CPUMASK_ANDMASK(mask, smp_active_mask);
397                     lwkt_reltoken(&lp->lwp_token);
398                     LWPRELE(lp);
399           }
400 
401           lwkt_reltoken(&p->p_token);
402           PRELE(p);
403 
404           if (error == 0)
405                     error = copyout(&mask, uap->mask, sizeof(cpumask_t));
406 
407           return (error);
408 }
409 
410 int
sys_lwp_setaffinity(struct sysmsg * sysmsg,const struct lwp_setaffinity_args * uap)411 sys_lwp_setaffinity(struct sysmsg *sysmsg,
412                         const struct lwp_setaffinity_args *uap)
413 {
414           struct proc *p;
415           cpumask_t mask;
416           struct lwp *lp;
417           int error;
418 
419           /*
420            * NOTE:
421            * Always allow change self CPU affinity.
422            */
423           if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0 &&
424               uap->pid != 0)
425           {
426                     return (error);
427           }
428 
429           error = copyin(uap->mask, &mask, sizeof(mask));
430           if (error)
431                     return (error);
432 
433           CPUMASK_ANDMASK(mask, smp_active_mask);
434           if (CPUMASK_TESTZERO(mask))
435                     return (EPERM);
436           if (uap->pid < 0)
437                     return (EINVAL);
438 
439           /*
440            * Locate the process
441            */
442           if (uap->pid == 0) {
443                     p = curproc;
444                     PHOLD(p);
445           } else {
446                     p = pfind(uap->pid);          /* pfind() holds (p) */
447                     if (p == NULL)
448                               return (ESRCH);
449           }
450           lwkt_gettoken(&p->p_token);
451 
452           if (uap->tid < 0) {
453                     FOREACH_LWP_IN_PROC(lp, p) {
454                               error = setaffinity_lp(lp, &mask);
455                     }
456                     /* not an error if no LPs left in process */
457           } else {
458                     lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
459                     error = setaffinity_lp(lp, &mask);
460           }
461           lwkt_reltoken(&p->p_token);
462           PRELE(p);
463 
464           return (error);
465 }
466 
467 static int
setaffinity_lp(struct lwp * lp,cpumask_t * mask)468 setaffinity_lp(struct lwp *lp, cpumask_t *mask)
469 {
470           if (lp == NULL)
471                     return ESRCH;
472 
473           LWPHOLD(lp);
474           lwkt_gettoken(&lp->lwp_token);
475           lp->lwp_cpumask = *mask;
476 
477           /*
478            * NOTE: When adjusting a thread that is not our own the migration
479            *         will occur at the next reschedule.
480            */
481           if (lp == curthread->td_lwp) {
482                     /*
483                      * Self migration can be done immediately,
484                      * if necessary.
485                      */
486                     if (CPUMASK_TESTBIT(lp->lwp_cpumask,
487                         mycpu->gd_cpuid) == 0) {
488                               lwkt_migratecpu(BSFCPUMASK(lp->lwp_cpumask));
489                               lp->lwp_proc->p_usched->changedcpu(lp);
490                     }
491           }
492           lwkt_reltoken(&lp->lwp_token);
493           LWPRELE(lp);
494 
495           return 0;
496 }
497