1 /** $MirOS: src/sys/kern/kern_synch.c,v 1.3 2006/10/17 20:48:48 tg Exp $ */
2 /* $OpenBSD: kern_synch.c,v 1.54 2004/01/26 01:27:02 deraadt Exp $ */
3 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
4
5 /*-
6 * Copyright (c) 1982, 1986, 1990, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
8 * (c) UNIX System Laboratories, Inc.
9 * All or some portions of this file are derived from material licensed
10 * to the University of California by American Telephone and Telegraph
11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12 * the permission of UNIX System Laboratories, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/buf.h>
46 #include <sys/signalvar.h>
47 #include <sys/resourcevar.h>
48 #include <uvm/uvm_extern.h>
49 #include <sys/sched.h>
50 #include <sys/timeout.h>
51
52 #ifdef KTRACE
53 #include <sys/ktrace.h>
54 #endif
55
56 #include <machine/cpu.h>
57
58 u_char curpriority; /* usrpri of curproc */
59 int lbolt; /* once a second sleep address */
60
61 int whichqs; /* Bit mask summary of non-empty Q's. */
62 struct prochd qs[NQS];
63
64 void scheduler_start(void);
65
66 void roundrobin(void *);
67 void schedcpu(void *);
68 void updatepri(struct proc *);
69 void endtsleep(void *);
70
71 void
scheduler_start()72 scheduler_start()
73 {
74 static struct timeout roundrobin_to;
75 static struct timeout schedcpu_to;
76
77 /*
78 * We avoid polluting the global namespace by keeping the scheduler
79 * timeouts static in this function.
80 * We setup the timeouts here and kick roundrobin and schedcpu once to
81 * make them do their job.
82 */
83
84 timeout_set(&roundrobin_to, roundrobin, &roundrobin_to);
85 timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
86
87 roundrobin(&roundrobin_to);
88 schedcpu(&schedcpu_to);
89 }
90
91 /*
92 * Force switch among equal priority processes every 100ms.
93 */
94 /* ARGSUSED */
95 void
roundrobin(arg)96 roundrobin(arg)
97 void *arg;
98 {
99 struct timeout *to = (struct timeout *)arg;
100 struct proc *p = curproc;
101 int s;
102
103 if (p != NULL) {
104 s = splstatclock();
105 if (p->p_schedflags & PSCHED_SEENRR) {
106 /*
107 * The process has already been through a roundrobin
108 * without switching and may be hogging the CPU.
109 * Indicate that the process should yield.
110 */
111 p->p_schedflags |= PSCHED_SHOULDYIELD;
112 } else {
113 p->p_schedflags |= PSCHED_SEENRR;
114 }
115 splx(s);
116 }
117 need_resched();
118 timeout_add(to, hz / 10);
119 }
120
121 /*
122 * Constants for digital decay and forget:
123 * 90% of (p_estcpu) usage in 5 * loadav time
124 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
125 * Note that, as ps(1) mentions, this can let percentages
126 * total over 100% (I've seen 137.9% for 3 processes).
127 *
128 * Note that hardclock updates p_estcpu and p_cpticks independently.
129 *
130 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
131 * That is, the system wants to compute a value of decay such
132 * that the following for loop:
133 * for (i = 0; i < (5 * loadavg); i++)
134 * p_estcpu *= decay;
135 * will compute
136 * p_estcpu *= 0.1;
137 * for all values of loadavg:
138 *
139 * Mathematically this loop can be expressed by saying:
140 * decay ** (5 * loadavg) ~= .1
141 *
142 * The system computes decay as:
143 * decay = (2 * loadavg) / (2 * loadavg + 1)
144 *
145 * We wish to prove that the system's computation of decay
146 * will always fulfill the equation:
147 * decay ** (5 * loadavg) ~= .1
148 *
149 * If we compute b as:
150 * b = 2 * loadavg
151 * then
152 * decay = b / (b + 1)
153 *
154 * We now need to prove two things:
155 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
156 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
157 *
158 * Facts:
159 * For x close to zero, exp(x) =~ 1 + x, since
160 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
161 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
162 * For x close to zero, ln(1+x) =~ x, since
163 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
164 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
165 * ln(.1) =~ -2.30
166 *
167 * Proof of (1):
168 * Solve (factor)**(power) =~ .1 given power (5*loadav):
169 * solving for factor,
170 * ln(factor) =~ (-2.30/5*loadav), or
171 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
172 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
173 *
174 * Proof of (2):
175 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
176 * solving for power,
177 * power*ln(b/(b+1)) =~ -2.30, or
178 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
179 *
180 * Actual power values for the implemented algorithm are as follows:
181 * loadav: 1 2 3 4
182 * power: 5.68 10.32 14.94 19.55
183 */
184
185 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
186 #define loadfactor(loadav) (2 * (loadav))
187 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
188
189 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
190 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
191
192 /*
193 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
194 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
195 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
196 *
197 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
198 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
199 *
200 * If you dont want to bother with the faster/more-accurate formula, you
201 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
202 * (more general) method of calculating the %age of CPU used by a process.
203 */
204 #define CCPU_SHIFT 11
205
206 /*
207 * Recompute process priorities, every hz ticks.
208 */
209 /* ARGSUSED */
210 void
schedcpu(arg)211 schedcpu(arg)
212 void *arg;
213 {
214 struct timeout *to = (struct timeout *)arg;
215 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
216 struct proc *p;
217 int s;
218 unsigned int newcpu;
219 int phz;
220
221 /*
222 * If we have a statistics clock, use that to calculate CPU
223 * time, otherwise revert to using the profiling clock (which,
224 * in turn, defaults to hz if there is no separate profiling
225 * clock available)
226 */
227 phz = stathz ? stathz : profhz;
228 KASSERT(phz);
229
230 for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) {
231 /*
232 * Increment time in/out of memory and sleep time
233 * (if sleeping). We ignore overflow; with 16-bit int's
234 * (remember them?) overflow takes 45 days.
235 */
236 p->p_swtime++;
237 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
238 p->p_slptime++;
239 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
240 /*
241 * If the process has slept the entire second,
242 * stop recalculating its priority until it wakes up.
243 */
244 if (p->p_slptime > 1)
245 continue;
246 s = splstatclock(); /* prevent state changes */
247 /*
248 * p_pctcpu is only for ps.
249 */
250 #if (FSHIFT >= CCPU_SHIFT)
251 p->p_pctcpu += (phz == 100)?
252 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
253 100 * (((fixpt_t) p->p_cpticks)
254 << (FSHIFT - CCPU_SHIFT)) / phz;
255 #else
256 p->p_pctcpu += ((FSCALE - ccpu) *
257 (p->p_cpticks * FSCALE / phz)) >> FSHIFT;
258 #endif
259 p->p_cpticks = 0;
260 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
261 p->p_estcpu = newcpu;
262 resetpriority(p);
263 if (p->p_priority >= PUSER) {
264 if ((p != curproc) &&
265 p->p_stat == SRUN &&
266 (p->p_flag & P_INMEM) &&
267 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
268 remrunqueue(p);
269 p->p_priority = p->p_usrpri;
270 setrunqueue(p);
271 } else
272 p->p_priority = p->p_usrpri;
273 }
274 splx(s);
275 }
276 uvm_meter();
277 wakeup((caddr_t)&lbolt);
278 timeout_add(to, hz);
279 }
280
281 /*
282 * Recalculate the priority of a process after it has slept for a while.
283 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
284 * least six times the loadfactor will decay p_estcpu to zero.
285 */
286 void
updatepri(p)287 updatepri(p)
288 register struct proc *p;
289 {
290 register unsigned int newcpu = p->p_estcpu;
291 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
292
293 if (p->p_slptime > 5 * loadfac)
294 p->p_estcpu = 0;
295 else {
296 p->p_slptime--; /* the first time was done in schedcpu */
297 while (newcpu && --p->p_slptime)
298 newcpu = (int) decay_cpu(loadfac, newcpu);
299 p->p_estcpu = newcpu;
300 }
301 resetpriority(p);
302 }
303
304 /*
305 * We're only looking at 7 bits of the address; everything is
306 * aligned to 4, lots of things are aligned to greater powers
307 * of 2. Shift right by 8, i.e. drop the bottom 256 worth.
308 */
309 #define TABLESIZE 128
310 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1))
311 struct slpque {
312 struct proc *sq_head;
313 struct proc **sq_tailp;
314 } slpque[TABLESIZE];
315
316 /*
317 * During autoconfiguration or after a panic, a sleep will simply
318 * lower the priority briefly to allow interrupts, then return.
319 * The priority to be used (safepri) is machine-dependent, thus this
320 * value is initialized and maintained in the machine-dependent layers.
321 * This priority will typically be 0, or the lowest priority
322 * that is safe for use on the interrupt stack; it can be made
323 * higher to block network software interrupts after panics.
324 */
325 int safepri;
326
327 /*
328 * General sleep call. Suspends the current process until a wakeup is
329 * performed on the specified identifier. The process will then be made
330 * runnable with the specified priority. Sleeps at most timo/hz seconds
331 * (0 means no timeout). If pri includes PCATCH flag, signals are checked
332 * before and after sleeping, else signals are not checked. Returns 0 if
333 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
334 * signal needs to be delivered, ERESTART is returned if the current system
335 * call should be restarted if possible, and EINTR is returned if the system
336 * call should be interrupted by the signal (return EINTR).
337 *
338 * The interlock is held until the scheduler_slock (XXX) is held. The
339 * interlock will be locked before returning back to the caller
340 * unless the PNORELOCK flag is specified, in which case the
341 * interlock will always be unlocked upon return.
342 */
343 int
ltsleep(ident,priority,wmesg,timo,interlock)344 ltsleep(ident, priority, wmesg, timo, interlock)
345 void *ident;
346 int priority, timo;
347 const char *wmesg;
348 volatile struct simplelock *interlock;
349 {
350 struct proc *p = curproc;
351 struct slpque *qp;
352 int s, sig;
353 int catch = priority & PCATCH;
354 int relock = (priority & PNORELOCK) == 0;
355
356 #ifdef KTRACE
357 if (KTRPOINT(p, KTR_CSW))
358 ktrcsw(p, 1, 0);
359 #endif
360 s = splhigh();
361 if (cold || panicstr) {
362 /*
363 * After a panic, or during autoconfiguration,
364 * just give interrupts a chance, then just return;
365 * don't run any other procs or panic below,
366 * in case this is the idle process and already asleep.
367 */
368 splx(safepri);
369 splx(s);
370 if (interlock != NULL && relock == 0)
371 simple_unlock(interlock);
372 return (0);
373 }
374 #ifdef DIAGNOSTIC
375 if (ident == NULL || p->p_stat != SRUN || p->p_back)
376 panic("tsleep");
377 #endif
378 p->p_wchan = ident;
379 p->p_wmesg = wmesg;
380 p->p_slptime = 0;
381 p->p_priority = priority & PRIMASK;
382 qp = &slpque[LOOKUP(ident)];
383 if (qp->sq_head == 0)
384 qp->sq_head = p;
385 else
386 *qp->sq_tailp = p;
387 *(qp->sq_tailp = &p->p_forw) = 0;
388 if (timo)
389 timeout_add(&p->p_sleep_to, timo);
390 /*
391 * We can now release the interlock; the scheduler_slock
392 * is held, so a thread can't get in to do wakeup() before
393 * we do the switch.
394 *
395 * XXX We leave the code block here, after inserting ourselves
396 * on the sleep queue, because we might want a more clever
397 * data structure for the sleep queues at some point.
398 */
399 if (interlock != NULL)
400 simple_unlock(interlock);
401
402 /*
403 * We put ourselves on the sleep queue and start our timeout
404 * before calling CURSIG, as we could stop there, and a wakeup
405 * or a SIGCONT (or both) could occur while we were stopped.
406 * A SIGCONT would cause us to be marked as SSLEEP
407 * without resuming us, thus we must be ready for sleep
408 * when CURSIG is called. If the wakeup happens while we're
409 * stopped, p->p_wchan will be 0 upon return from CURSIG.
410 */
411 if (catch) {
412 p->p_flag |= P_SINTR;
413 if ((sig = CURSIG(p)) != 0) {
414 if (p->p_wchan)
415 unsleep(p);
416 p->p_stat = SRUN;
417 goto resume;
418 }
419 if (p->p_wchan == 0) {
420 catch = 0;
421 goto resume;
422 }
423 } else
424 sig = 0;
425 p->p_stat = SSLEEP;
426 p->p_stats->p_ru.ru_nvcsw++;
427 mi_switch();
428 #ifdef DDB
429 /* handy breakpoint location after process "wakes" */
430 __asm(".globl bpendtsleep\nbpendtsleep:");
431 #endif
432 resume:
433 curpriority = p->p_usrpri;
434 splx(s);
435 p->p_flag &= ~P_SINTR;
436 if (p->p_flag & P_TIMEOUT) {
437 p->p_flag &= ~P_TIMEOUT;
438 if (sig == 0) {
439 #ifdef KTRACE
440 if (KTRPOINT(p, KTR_CSW))
441 ktrcsw(p, 0, 0);
442 #endif
443 if (interlock != NULL && relock)
444 simple_lock(interlock);
445 return (EWOULDBLOCK);
446 }
447 } else if (timo)
448 timeout_del(&p->p_sleep_to);
449 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) {
450 #ifdef KTRACE
451 if (KTRPOINT(p, KTR_CSW))
452 ktrcsw(p, 0, 0);
453 #endif
454 if (interlock != NULL && relock)
455 simple_lock(interlock);
456 if (p->p_sigacts->ps_sigintr & sigmask(sig))
457 return (EINTR);
458 return (ERESTART);
459 }
460 #ifdef KTRACE
461 if (KTRPOINT(p, KTR_CSW))
462 ktrcsw(p, 0, 0);
463 #endif
464 if (interlock != NULL && relock)
465 simple_lock(interlock);
466 return (0);
467 }
468
469 /*
470 * Implement timeout for tsleep.
471 * If process hasn't been awakened (wchan non-zero),
472 * set timeout flag and undo the sleep. If proc
473 * is stopped, just unsleep so it will remain stopped.
474 */
475 void
endtsleep(arg)476 endtsleep(arg)
477 void *arg;
478 {
479 struct proc *p;
480 int s;
481
482 p = (struct proc *)arg;
483 s = splhigh();
484 if (p->p_wchan) {
485 if (p->p_stat == SSLEEP)
486 setrunnable(p);
487 else
488 unsleep(p);
489 p->p_flag |= P_TIMEOUT;
490 }
491 splx(s);
492 }
493
494 /*
495 * Short-term, non-interruptable sleep.
496 */
497 void
sleep(ident,priority)498 sleep(ident, priority)
499 void *ident;
500 int priority;
501 {
502 register struct proc *p = curproc;
503 register struct slpque *qp;
504 register int s;
505
506 #ifdef DIAGNOSTIC
507 if (priority > PZERO) {
508 printf("sleep called with priority %d > PZERO, wchan: %p\n",
509 priority, ident);
510 panic("old sleep");
511 }
512 #endif
513 s = splhigh();
514 if (cold || panicstr) {
515 /*
516 * After a panic, or during autoconfiguration,
517 * just give interrupts a chance, then just return;
518 * don't run any other procs or panic below,
519 * in case this is the idle process and already asleep.
520 */
521 splx(safepri);
522 splx(s);
523 return;
524 }
525 #ifdef DIAGNOSTIC
526 if (ident == NULL || p->p_stat != SRUN || p->p_back)
527 panic("sleep");
528 #endif
529 p->p_wchan = ident;
530 p->p_wmesg = NULL;
531 p->p_slptime = 0;
532 p->p_priority = priority;
533 qp = &slpque[LOOKUP(ident)];
534 if (qp->sq_head == 0)
535 qp->sq_head = p;
536 else
537 *qp->sq_tailp = p;
538 *(qp->sq_tailp = &p->p_forw) = 0;
539 p->p_stat = SSLEEP;
540 p->p_stats->p_ru.ru_nvcsw++;
541 #ifdef KTRACE
542 if (KTRPOINT(p, KTR_CSW))
543 ktrcsw(p, 1, 0);
544 #endif
545 mi_switch();
546 #ifdef DDB
547 /* handy breakpoint location after process "wakes" */
548 __asm(".globl bpendsleep\nbpendsleep:");
549 #endif
550 #ifdef KTRACE
551 if (KTRPOINT(p, KTR_CSW))
552 ktrcsw(p, 0, 0);
553 #endif
554 curpriority = p->p_usrpri;
555 splx(s);
556 }
557
558 /*
559 * Remove a process from its wait queue
560 */
561 void
unsleep(p)562 unsleep(p)
563 register struct proc *p;
564 {
565 register struct slpque *qp;
566 register struct proc **hp;
567 int s;
568
569 s = splhigh();
570 if (p->p_wchan) {
571 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
572 while (*hp != p)
573 hp = &(*hp)->p_forw;
574 *hp = p->p_forw;
575 if (qp->sq_tailp == &p->p_forw)
576 qp->sq_tailp = hp;
577 p->p_wchan = 0;
578 }
579 splx(s);
580 }
581
582 /*
583 * Make all processes sleeping on the specified identifier runnable.
584 */
585 void
wakeup_n(ident,n)586 wakeup_n(ident, n)
587 void *ident;
588 int n;
589 {
590 struct slpque *qp;
591 struct proc *p, **q;
592 int s;
593
594 s = splhigh();
595 qp = &slpque[LOOKUP(ident)];
596 restart:
597 for (q = &qp->sq_head; (p = *q) != NULL; ) {
598 #ifdef DIAGNOSTIC
599 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
600 panic("wakeup");
601 #endif
602 if (p->p_wchan == ident) {
603 --n;
604 p->p_wchan = 0;
605 *q = p->p_forw;
606 if (qp->sq_tailp == &p->p_forw)
607 qp->sq_tailp = q;
608 if (p->p_stat == SSLEEP) {
609 /* OPTIMIZED EXPANSION OF setrunnable(p); */
610 if (p->p_slptime > 1)
611 updatepri(p);
612 p->p_slptime = 0;
613 p->p_stat = SRUN;
614
615 /*
616 * Since curpriority is a user priority,
617 * p->p_priority is always better than
618 * curpriority.
619 */
620
621 if ((p->p_flag & P_INMEM) != 0) {
622 setrunqueue(p);
623 need_resched();
624 } else {
625 wakeup((caddr_t)&proc0);
626 }
627 /* END INLINE EXPANSION */
628
629 if (n != 0)
630 goto restart;
631 else
632 break;
633 }
634 } else
635 q = &p->p_forw;
636 }
637 splx(s);
638 }
639
640 void
wakeup(chan)641 wakeup(chan)
642 void *chan;
643 {
644 wakeup_n(chan, -1);
645 }
646
647 /*
648 * General yield call. Puts the current process back on its run queue and
649 * performs a voluntary context switch.
650 */
651 void
yield()652 yield()
653 {
654 struct proc *p = curproc;
655 int s;
656
657 s = splstatclock();
658 p->p_priority = p->p_usrpri;
659 setrunqueue(p);
660 p->p_stats->p_ru.ru_nvcsw++;
661 mi_switch();
662 splx(s);
663 }
664
665 /*
666 * General preemption call. Puts the current process back on its run queue
667 * and performs an involuntary context switch. If a process is supplied,
668 * we switch to that process. Otherwise, we use the normal process selection
669 * criteria.
670 */
671 void
preempt(newp)672 preempt(newp)
673 struct proc *newp;
674 {
675 struct proc *p = curproc;
676 int s;
677
678 /*
679 * XXX Switching to a specific process is not supported yet.
680 */
681 if (newp != NULL)
682 panic("preempt: cpu_preempt not yet implemented");
683
684 s = splstatclock();
685 p->p_priority = p->p_usrpri;
686 setrunqueue(p);
687 p->p_stats->p_ru.ru_nivcsw++;
688 mi_switch();
689 splx(s);
690 }
691
692
693 /*
694 * Must be called at splstatclock() or higher.
695 */
696 void
mi_switch()697 mi_switch()
698 {
699 struct proc *p = curproc; /* XXX */
700 struct rlimit *rlim;
701 struct timeval tv;
702
703 splassert(IPL_STATCLOCK);
704
705 /*
706 * Compute the amount of time during which the current
707 * process was running, and add that to its total so far.
708 */
709 microtime(&tv);
710 if (timercmp(&tv, &runtime, <)) {
711 #ifdef DEBUG
712 printf("time is not monotonic! "
713 "tv=%lld.%06ld, runtime=%lld.%06ld\n",
714 (int64_t)tv.tv_sec, tv.tv_usec,
715 (int64_t)runtime.tv_sec, runtime.tv_usec);
716 #endif
717 } else {
718 timersub(&tv, &runtime, &tv);
719 timeradd(&p->p_rtime, &tv, &p->p_rtime);
720 }
721
722 /*
723 * Check if the process exceeds its cpu resource allocation.
724 * If over max, kill it.
725 */
726 rlim = &p->p_rlimit[RLIMIT_CPU];
727 if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_cur) {
728 if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_max) {
729 psignal(p, SIGKILL);
730 } else {
731 psignal(p, SIGXCPU);
732 if (rlim->rlim_cur < rlim->rlim_max)
733 rlim->rlim_cur += 5;
734 }
735 } else {
736 rlim = &p->p_rlimit[RLIMIT_TIME];
737 if (rlim->rlim_cur != RLIM_INFINITY) {
738 if ((time.tv_sec - p->p_stats->p_start.tv_sec)
739 >= rlim->rlim_cur) {
740 if ((time.tv_sec - p->p_stats->p_start.tv_sec)
741 >= rlim->rlim_max)
742 psignal(p, SIGKILL);
743 else {
744 psignal(p, SIGXCPU);
745 if (rlim->rlim_cur < rlim->rlim_max)
746 rlim->rlim_cur += 5;
747 }
748 }
749 }
750 }
751
752 /*
753 * Process is about to yield the CPU; clear the appropriate
754 * scheduling flags.
755 */
756 p->p_schedflags &= ~PSCHED_SWITCHCLEAR;
757
758 /*
759 * Pick a new current process and record its start time.
760 */
761 uvmexp.swtch++;
762 cpu_switch(p);
763 microtime(&runtime);
764 }
765
766 /*
767 * Initialize the (doubly-linked) run queues
768 * to be empty.
769 */
770 void
rqinit()771 rqinit()
772 {
773 register int i;
774
775 for (i = 0; i < NQS; i++)
776 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
777 }
778
779 /*
780 * Change process state to be runnable,
781 * placing it on the run queue if it is in memory,
782 * and awakening the swapper if it isn't in memory.
783 */
784 void
setrunnable(p)785 setrunnable(p)
786 register struct proc *p;
787 {
788 register int s;
789
790 s = splhigh();
791 switch (p->p_stat) {
792 case 0:
793 case SRUN:
794 case SZOMB:
795 case SDEAD:
796 default:
797 panic("setrunnable");
798 case SSTOP:
799 /*
800 * If we're being traced (possibly because someone attached us
801 * while we were stopped), check for a signal from the debugger.
802 */
803 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0)
804 p->p_siglist |= sigmask(p->p_xstat);
805 case SSLEEP:
806 unsleep(p); /* e.g. when sending signals */
807 break;
808 case SIDL:
809 break;
810 }
811 p->p_stat = SRUN;
812 if (p->p_flag & P_INMEM)
813 setrunqueue(p);
814 splx(s);
815 if (p->p_slptime > 1)
816 updatepri(p);
817 p->p_slptime = 0;
818 if ((p->p_flag & P_INMEM) == 0)
819 wakeup((caddr_t)&proc0);
820 else if (p->p_priority < curpriority)
821 need_resched();
822 }
823
824 /*
825 * Compute the priority of a process when running in user mode.
826 * Arrange to reschedule if the resulting priority is better
827 * than that of the current process.
828 */
829 void
resetpriority(p)830 resetpriority(p)
831 register struct proc *p;
832 {
833 register unsigned int newpriority;
834
835 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
836 newpriority = min(newpriority, MAXPRI);
837 p->p_usrpri = newpriority;
838 if (newpriority < curpriority)
839 need_resched();
840 }
841
842 /*
843 * We adjust the priority of the current process. The priority of a process
844 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
845 * is increased here. The formula for computing priorities (in kern_synch.c)
846 * will compute a different value each time p_estcpu increases. This can
847 * cause a switch, but unless the priority crosses a PPQ boundary the actual
848 * queue will not change. The cpu usage estimator ramps up quite quickly
849 * when the process is running (linearly), and decays away exponentially, at
850 * a rate which is proportionally slower when the system is busy. The basic
851 * principle is that the system will 90% forget that the process used a lot
852 * of CPU time in 5 * loadav seconds. This causes the system to favor
853 * processes which haven't run much recently, and to round-robin among other
854 * processes.
855 */
856
857 void
schedclock(p)858 schedclock(p)
859 struct proc *p;
860 {
861 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
862 resetpriority(p);
863 if (p->p_priority >= PUSER)
864 p->p_priority = p->p_usrpri;
865 }
866
867 #ifdef DDB
868 #include <machine/db_machdep.h>
869
870 #include <ddb/db_interface.h>
871 #include <ddb/db_output.h>
872
873 void
db_show_all_procs(addr,haddr,count,modif)874 db_show_all_procs(addr, haddr, count, modif)
875 db_expr_t addr;
876 int haddr;
877 db_expr_t count;
878 char *modif;
879 {
880 char *mode;
881 int doingzomb = 0;
882 struct proc *p, *pp;
883
884 if (modif[0] == 0)
885 modif[0] = 'n'; /* default == normal mode */
886
887 mode = "mawn";
888 while (*mode && *mode != modif[0])
889 mode++;
890 if (*mode == 0 || *mode == 'm') {
891 db_printf("usage: show all procs [/a] [/n] [/w]\n");
892 db_printf("\t/a == show process address info\n");
893 db_printf("\t/n == show normal process info [default]\n");
894 db_printf("\t/w == show process wait/emul info\n");
895 return;
896 }
897
898 p = LIST_FIRST(&allproc);
899
900 switch (*mode) {
901
902 case 'a':
903 db_printf(" PID %-10s %18s %18s %18s\n",
904 "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP");
905 break;
906 case 'n':
907 db_printf(" PID %5s %5s %5s S %10s %-9s %-16s\n",
908 "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND");
909 break;
910 case 'w':
911 db_printf(" PID %-16s %-8s %18s %s\n",
912 "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG");
913 break;
914 }
915
916 while (p != 0) {
917 pp = p->p_pptr;
918 if (p->p_stat) {
919
920 db_printf("%c%5d ", p == curproc ? '*' : ' ',
921 p->p_pid);
922
923 switch (*mode) {
924
925 case 'a':
926 db_printf("%-10.10s %18p %18p %18p\n",
927 p->p_comm, p, p->p_addr, p->p_vmspace);
928 break;
929
930 case 'n':
931 db_printf("%5d %5d %5d %d %#10x "
932 "%-9.9s %-16s\n",
933 pp ? pp->p_pid : -1, p->p_pgrp->pg_id,
934 p->p_cred->p_ruid, p->p_stat, p->p_flag,
935 (p->p_wchan && p->p_wmesg) ?
936 p->p_wmesg : "", p->p_comm);
937 break;
938
939 case 'w':
940 db_printf("%-16s %-8s %18p %s\n", p->p_comm,
941 p->p_emul->e_name, p->p_wchan,
942 (p->p_wchan && p->p_wmesg) ?
943 p->p_wmesg : "");
944 break;
945
946 }
947 }
948 p = LIST_NEXT(p, p_list);
949 if (p == 0 && doingzomb == 0) {
950 doingzomb = 1;
951 p = LIST_FIRST(&zombproc);
952 }
953 }
954 }
955 #endif
956