1 /*        $NetBSD: kern_time.c,v 1.228 2025/03/19 14:27:05 pho Exp $  */
2 
3 /*-
4  * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009, 2020
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Christopher G. Demetriou, by Andrew Doran, and by Jason R. Thorpe.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *        The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *        @(#)kern_time.c     8.4 (Berkeley) 5/26/95
62  */
63 
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.228 2025/03/19 14:27:05 pho Exp $");
66 
67 #include <sys/param.h>
68 #include <sys/types.h>
69 
70 #include <sys/callout.h>
71 #include <sys/cpu.h>
72 #include <sys/errno.h>
73 #include <sys/intr.h>
74 #include <sys/kauth.h>
75 #include <sys/kernel.h>
76 #include <sys/kmem.h>
77 #include <sys/lwp.h>
78 #include <sys/mount.h>
79 #include <sys/mutex.h>
80 #include <sys/proc.h>
81 #include <sys/queue.h>
82 #include <sys/resourcevar.h>
83 #include <sys/signal.h>
84 #include <sys/signalvar.h>
85 #include <sys/syscallargs.h>
86 #include <sys/syslog.h>
87 #include <sys/systm.h>
88 #include <sys/timetc.h>
89 #include <sys/timevar.h>
90 #include <sys/timex.h>
91 #include <sys/vnode.h>
92 
93 #include <machine/limits.h>
94 
95 kmutex_t  itimer_mutex __cacheline_aligned;       /* XXX static */
96 static struct itlist itimer_realtime_changed_notify;
97 
98 static void         itimer_callout(void *);
99 static void         ptimer_intr(void *);
100 static void         *ptimer_sih __read_mostly;
101 static TAILQ_HEAD(, ptimer) ptimer_queue;
102 
103 #define   CLOCK_VIRTUAL_P(clockid)      \
104           ((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF)
105 
106 CTASSERT(ITIMER_REAL == CLOCK_REALTIME);
107 CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL);
108 CTASSERT(ITIMER_PROF == CLOCK_PROF);
109 CTASSERT(ITIMER_MONOTONIC == CLOCK_MONOTONIC);
110 
111 /*
112  * Initialize timekeeping.
113  */
114 void
time_init(void)115 time_init(void)
116 {
117 
118           mutex_init(&itimer_mutex, MUTEX_DEFAULT, IPL_SCHED);
119           LIST_INIT(&itimer_realtime_changed_notify);
120 
121           TAILQ_INIT(&ptimer_queue);
122           ptimer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
123               ptimer_intr, NULL);
124 }
125 
126 /*
127  * Check if the time will wrap if set to ts.
128  *
129  * ts - timespec describing the new time
130  * delta - the delta between the current time and ts
131  */
132 bool
time_wraps(struct timespec * ts,struct timespec * delta)133 time_wraps(struct timespec *ts, struct timespec *delta)
134 {
135 
136           /*
137            * Don't allow the time to be set forward so far it
138            * will wrap and become negative, thus allowing an
139            * attacker to bypass the next check below.  The
140            * cutoff is 1 year before rollover occurs, so even
141            * if the attacker uses adjtime(2) to move the time
142            * past the cutoff, it will take a very long time
143            * to get to the wrap point.
144            */
145           if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) ||
146               (delta->tv_sec < 0 || delta->tv_nsec < 0))
147                     return true;
148 
149           return false;
150 }
151 
152 /*
153  * itimer_lock:
154  *
155  *        Acquire the interval timer data lock.
156  */
157 void
itimer_lock(void)158 itimer_lock(void)
159 {
160           mutex_spin_enter(&itimer_mutex);
161 }
162 
163 /*
164  * itimer_unlock:
165  *
166  *        Release the interval timer data lock.
167  */
168 void
itimer_unlock(void)169 itimer_unlock(void)
170 {
171           mutex_spin_exit(&itimer_mutex);
172 }
173 
174 /*
175  * itimer_lock_held:
176  *
177  *        Check that the interval timer lock is held for diagnostic
178  *        assertions.
179  */
180 inline bool __diagused
itimer_lock_held(void)181 itimer_lock_held(void)
182 {
183           return mutex_owned(&itimer_mutex);
184 }
185 
186 /*
187  * Time of day and interval timer support.
188  *
189  * These routines provide the kernel entry points to get and set
190  * the time-of-day and per-process interval timers.  Subroutines
191  * here provide support for adding and subtracting timeval structures
192  * and decrementing interval timers, optionally reloading the interval
193  * timers when they expire.
194  */
195 
196 /* This function is used by clock_settime and settimeofday */
197 static int
settime1(struct proc * p,const struct timespec * ts,bool check_kauth)198 settime1(struct proc *p, const struct timespec *ts, bool check_kauth)
199 {
200           struct timespec delta, now;
201 
202           /*
203            * The time being set to an unreasonable value will cause
204            * unreasonable system behaviour.
205            */
206           if (ts->tv_sec < 0 || ts->tv_sec > (1LL << 36))
207                     return EINVAL;
208 
209           nanotime(&now);
210           timespecsub(ts, &now, &delta);
211 
212           if (check_kauth && kauth_authorize_system(kauth_cred_get(),
213               KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts),
214               &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) {
215                     return EPERM;
216           }
217 
218 #ifdef notyet
219           if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */
220                     return EPERM;
221           }
222 #endif
223 
224           tc_setclock(ts);
225 
226           resettodr();
227 
228           /*
229            * Notify pending CLOCK_REALTIME timers about the real time change.
230            * There may be inactive timers on this list, but this happens
231            * comparatively less often than timers firing, and so it's better
232            * to put the extra checks here than to complicate the other code
233            * path.
234            */
235           struct itimer *it;
236           itimer_lock();
237           LIST_FOREACH(it, &itimer_realtime_changed_notify, it_rtchgq) {
238                     KASSERT(it->it_ops->ito_realtime_changed != NULL);
239                     if (timespecisset(&it->it_time.it_value)) {
240                               (*it->it_ops->ito_realtime_changed)(it);
241                     }
242           }
243           itimer_unlock();
244 
245           return 0;
246 }
247 
248 int
settime(struct proc * p,struct timespec * ts)249 settime(struct proc *p, struct timespec *ts)
250 {
251           return settime1(p, ts, true);
252 }
253 
254 /* ARGSUSED */
255 int
sys___clock_gettime50(struct lwp * l,const struct sys___clock_gettime50_args * uap,register_t * retval)256 sys___clock_gettime50(struct lwp *l,
257     const struct sys___clock_gettime50_args *uap, register_t *retval)
258 {
259           /* {
260                     syscallarg(clockid_t) clock_id;
261                     syscallarg(struct timespec *) tp;
262           } */
263           int error;
264           struct timespec ats;
265 
266           error = clock_gettime1(SCARG(uap, clock_id), &ats);
267           if (error != 0)
268                     return error;
269 
270           return copyout(&ats, SCARG(uap, tp), sizeof(ats));
271 }
272 
273 /* ARGSUSED */
274 int
sys___clock_settime50(struct lwp * l,const struct sys___clock_settime50_args * uap,register_t * retval)275 sys___clock_settime50(struct lwp *l,
276     const struct sys___clock_settime50_args *uap, register_t *retval)
277 {
278           /* {
279                     syscallarg(clockid_t) clock_id;
280                     syscallarg(const struct timespec *) tp;
281           } */
282           int error;
283           struct timespec ats;
284 
285           if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0)
286                     return error;
287 
288           return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true);
289 }
290 
291 
292 int
clock_settime1(struct proc * p,clockid_t clock_id,const struct timespec * tp,bool check_kauth)293 clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp,
294     bool check_kauth)
295 {
296           int error;
297 
298           if (tp->tv_nsec < 0 || tp->tv_nsec >= 1000000000L)
299                     return EINVAL;
300 
301           switch (clock_id) {
302           case CLOCK_REALTIME:
303                     if ((error = settime1(p, tp, check_kauth)) != 0)
304                               return error;
305                     break;
306           case CLOCK_MONOTONIC:
307                     return EINVAL;      /* read-only clock */
308           default:
309                     return EINVAL;
310           }
311 
312           return 0;
313 }
314 
315 int
sys___clock_getres50(struct lwp * l,const struct sys___clock_getres50_args * uap,register_t * retval)316 sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap,
317     register_t *retval)
318 {
319           /* {
320                     syscallarg(clockid_t) clock_id;
321                     syscallarg(struct timespec *) tp;
322           } */
323           struct timespec ts;
324           int error;
325 
326           if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0)
327                     return error;
328 
329           if (SCARG(uap, tp))
330                     error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
331 
332           return error;
333 }
334 
335 int
clock_getres1(clockid_t clock_id,struct timespec * ts)336 clock_getres1(clockid_t clock_id, struct timespec *ts)
337 {
338 
339           switch (clock_id) {
340           case CLOCK_REALTIME:
341           case CLOCK_MONOTONIC:
342           case CLOCK_PROCESS_CPUTIME_ID:
343           case CLOCK_THREAD_CPUTIME_ID:
344                     ts->tv_sec = 0;
345                     if (tc_getfrequency() > 1000000000)
346                               ts->tv_nsec = 1;
347                     else
348                               ts->tv_nsec = 1000000000 / tc_getfrequency();
349                     break;
350           default:
351                     return EINVAL;
352           }
353 
354           return 0;
355 }
356 
357 /* ARGSUSED */
358 int
sys___nanosleep50(struct lwp * l,const struct sys___nanosleep50_args * uap,register_t * retval)359 sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap,
360     register_t *retval)
361 {
362           /* {
363                     syscallarg(struct timespec *) rqtp;
364                     syscallarg(struct timespec *) rmtp;
365           } */
366           struct timespec rmt, rqt;
367           int error, error1;
368 
369           error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
370           if (error)
371                     return error;
372 
373           error = nanosleep1(l, CLOCK_MONOTONIC, 0, &rqt,
374               SCARG(uap, rmtp) ? &rmt : NULL);
375           if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
376                     return error;
377 
378           error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt));
379           return error1 ? error1 : error;
380 }
381 
382 /* ARGSUSED */
383 int
sys_clock_nanosleep(struct lwp * l,const struct sys_clock_nanosleep_args * uap,register_t * retval)384 sys_clock_nanosleep(struct lwp *l, const struct sys_clock_nanosleep_args *uap,
385     register_t *retval)
386 {
387           /* {
388                     syscallarg(clockid_t) clock_id;
389                     syscallarg(int) flags;
390                     syscallarg(struct timespec *) rqtp;
391                     syscallarg(struct timespec *) rmtp;
392           } */
393           struct timespec rmt, rqt;
394           int error, error1;
395 
396           error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
397           if (error)
398                     goto out;
399 
400           error = nanosleep1(l, SCARG(uap, clock_id), SCARG(uap, flags), &rqt,
401               SCARG(uap, rmtp) ? &rmt : NULL);
402           if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
403                     goto out;
404 
405           if ((SCARG(uap, flags) & TIMER_ABSTIME) == 0 &&
406               (error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt))) != 0)
407                     error = error1;
408 out:
409           *retval = error;
410           return 0;
411 }
412 
413 int
nanosleep1(struct lwp * l,clockid_t clock_id,int flags,struct timespec * rqt,struct timespec * rmt)414 nanosleep1(struct lwp *l, clockid_t clock_id, int flags, struct timespec *rqt,
415     struct timespec *rmt)
416 {
417           struct timespec rmtstart;
418           int error, timo;
419 
420           if ((error = ts2timo(clock_id, flags, rqt, &timo, &rmtstart)) != 0) {
421                     if (error == ETIMEDOUT) {
422                               error = 0;
423                               if (rmt != NULL)
424                                         rmt->tv_sec = rmt->tv_nsec = 0;
425                     }
426                     return error;
427           }
428 
429           /*
430            * Avoid inadvertently sleeping forever
431            */
432           if (timo == 0)
433                     timo = 1;
434 again:
435           error = kpause("nanoslp", true, timo, NULL);
436           if (error == EWOULDBLOCK)
437                     error = 0;
438           if (rmt != NULL || error == 0) {
439                     struct timespec rmtend;
440                     struct timespec t0;
441                     struct timespec *t;
442                     int err;
443 
444                     err = clock_gettime1(clock_id, &rmtend);
445                     if (err != 0)
446                               return err;
447 
448                     t = (rmt != NULL) ? rmt : &t0;
449                     if (flags & TIMER_ABSTIME) {
450                               timespecsub(rqt, &rmtend, t);
451                     } else {
452                               if (timespeccmp(&rmtend, &rmtstart, <))
453                                         timespecclear(t); /* clock wound back */
454                               else
455                                         timespecsub(&rmtend, &rmtstart, t);
456                               if (timespeccmp(rqt, t, <))
457                                         timespecclear(t);
458                               else
459                                         timespecsub(rqt, t, t);
460                     }
461                     if (t->tv_sec < 0)
462                               timespecclear(t);
463                     if (error == 0) {
464                               timo = tstohz(t);
465                               if (timo > 0)
466                                         goto again;
467                     }
468           }
469 
470           if (error == ERESTART)
471                     error = EINTR;
472 
473           return error;
474 }
475 
476 int
sys_clock_getcpuclockid2(struct lwp * l,const struct sys_clock_getcpuclockid2_args * uap,register_t * retval)477 sys_clock_getcpuclockid2(struct lwp *l,
478     const struct sys_clock_getcpuclockid2_args *uap,
479     register_t *retval)
480 {
481           /* {
482                     syscallarg(idtype_t idtype;
483                     syscallarg(id_t id);
484                     syscallarg(clockid_t *)clock_id;
485           } */
486           pid_t pid;
487           lwpid_t lid;
488           clockid_t clock_id;
489           id_t id = SCARG(uap, id);
490 
491           switch (SCARG(uap, idtype)) {
492           case P_PID:
493                     pid = id == 0 ? l->l_proc->p_pid : id;
494                     clock_id = CLOCK_PROCESS_CPUTIME_ID | pid;
495                     break;
496           case P_LWPID:
497                     lid = id == 0 ? l->l_lid : id;
498                     clock_id = CLOCK_THREAD_CPUTIME_ID | lid;
499                     break;
500           default:
501                     return EINVAL;
502           }
503           return copyout(&clock_id, SCARG(uap, clock_id), sizeof(clock_id));
504 }
505 
506 /* ARGSUSED */
507 int
sys___gettimeofday50(struct lwp * l,const struct sys___gettimeofday50_args * uap,register_t * retval)508 sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap,
509     register_t *retval)
510 {
511           /* {
512                     syscallarg(struct timeval *) tp;
513                     syscallarg(void *) tzp;                 really "struct timezone *";
514           } */
515           struct timeval atv;
516           int error = 0;
517           struct timezone tzfake;
518 
519           if (SCARG(uap, tp)) {
520                     memset(&atv, 0, sizeof(atv));
521                     microtime(&atv);
522                     error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
523                     if (error)
524                               return error;
525           }
526           if (SCARG(uap, tzp)) {
527                     /*
528                      * NetBSD has no kernel notion of time zone, so we just
529                      * fake up a timezone struct and return it if demanded.
530                      */
531                     tzfake.tz_minuteswest = 0;
532                     tzfake.tz_dsttime = 0;
533                     error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
534           }
535           return error;
536 }
537 
538 /* ARGSUSED */
539 int
sys___settimeofday50(struct lwp * l,const struct sys___settimeofday50_args * uap,register_t * retval)540 sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap,
541     register_t *retval)
542 {
543           /* {
544                     syscallarg(const struct timeval *) tv;
545                     syscallarg(const void *) tzp; really "const struct timezone *";
546           } */
547 
548           return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true);
549 }
550 
551 int
settimeofday1(const struct timeval * utv,bool userspace,const void * utzp,struct lwp * l,bool check_kauth)552 settimeofday1(const struct timeval *utv, bool userspace,
553     const void *utzp, struct lwp *l, bool check_kauth)
554 {
555           struct timeval atv;
556           struct timespec ts;
557           int error;
558 
559           /* Verify all parameters before changing time. */
560 
561           /*
562            * NetBSD has no kernel notion of time zone, and only an
563            * obsolete program would try to set it, so we log a warning.
564            */
565           if (utzp)
566                     log(LOG_WARNING, "pid %d attempted to set the "
567                         "(obsolete) kernel time zone\n", l->l_proc->p_pid);
568 
569           if (utv == NULL)
570                     return 0;
571 
572           if (userspace) {
573                     if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
574                               return error;
575                     utv = &atv;
576           }
577 
578           if (utv->tv_usec < 0 || utv->tv_usec >= 1000000)
579                     return EINVAL;
580 
581           TIMEVAL_TO_TIMESPEC(utv, &ts);
582           return settime1(l->l_proc, &ts, check_kauth);
583 }
584 
585 int       time_adjusted;                          /* set if an adjustment is made */
586 
587 /* ARGSUSED */
588 int
sys___adjtime50(struct lwp * l,const struct sys___adjtime50_args * uap,register_t * retval)589 sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap,
590     register_t *retval)
591 {
592           /* {
593                     syscallarg(const struct timeval *) delta;
594                     syscallarg(struct timeval *) olddelta;
595           } */
596           int error;
597           struct timeval atv, oldatv;
598 
599           if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
600               KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
601                     return error;
602 
603           if (SCARG(uap, delta)) {
604                     error = copyin(SCARG(uap, delta), &atv,
605                         sizeof(*SCARG(uap, delta)));
606                     if (error)
607                               return error;
608           }
609           adjtime1(SCARG(uap, delta) ? &atv : NULL,
610               SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc);
611           if (SCARG(uap, olddelta))
612                     error = copyout(&oldatv, SCARG(uap, olddelta),
613                         sizeof(*SCARG(uap, olddelta)));
614           return error;
615 }
616 
617 void
adjtime1(const struct timeval * delta,struct timeval * olddelta,struct proc * p)618 adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p)
619 {
620 
621           if (olddelta) {
622                     memset(olddelta, 0, sizeof(*olddelta));
623                     mutex_spin_enter(&timecounter_lock);
624                     olddelta->tv_sec = time_adjtime / 1000000;
625                     olddelta->tv_usec = time_adjtime % 1000000;
626                     if (olddelta->tv_usec < 0) {
627                               olddelta->tv_usec += 1000000;
628                               olddelta->tv_sec--;
629                     }
630                     mutex_spin_exit(&timecounter_lock);
631           }
632 
633           if (delta) {
634                     mutex_spin_enter(&timecounter_lock);
635                     /*
636                      * XXX This should maybe just report failure to
637                      * userland for nonsense deltas.
638                      */
639                     if (delta->tv_sec > INT64_MAX/1000000 - 1) {
640                               time_adjtime = INT64_MAX;
641                     } else if (delta->tv_sec < INT64_MIN/1000000 + 1) {
642                               time_adjtime = INT64_MIN;
643                     } else {
644                               time_adjtime = delta->tv_sec * 1000000
645                                   + MAX(-999999, MIN(999999, delta->tv_usec));
646                     }
647 
648                     if (time_adjtime) {
649                               /* We need to save the system time during shutdown */
650                               time_adjusted |= 1;
651                     }
652                     mutex_spin_exit(&timecounter_lock);
653           }
654 }
655 
656 /*
657  * Interval timer support.
658  *
659  * The itimer_*() routines provide generic support for interval timers,
660  * both real (CLOCK_REALTIME, CLOCK_MONOTIME), and virtual (CLOCK_VIRTUAL,
661  * CLOCK_PROF).
662  *
663  * Real timers keep their deadline as an absolute time, and are fired
664  * by a callout.  Virtual timers are kept as a linked-list of deltas,
665  * and are processed by hardclock().
666  *
667  * Because the real time timer callout may be delayed in real time due
668  * to interrupt processing on the system, it is possible for the real
669  * time timeout routine (itimer_callout()) run past after its deadline.
670  * It does not suffice, therefore, to reload the real timer .it_value
671  * from the timer's .it_interval.  Rather, we compute the next deadline
672  * in absolute time based on the current time and the .it_interval value,
673  * and report any overruns.
674  *
675  * Note that while the virtual timers are supported in a generic fashion
676  * here, they only (currently) make sense as per-process timers, and thus
677  * only really work for that case.
678  */
679 
680 /*
681  * itimer_init:
682  *
683  *        Initialize the common data for an interval timer.
684  */
685 void
itimer_init(struct itimer * const it,const struct itimer_ops * const ops,clockid_t const id,struct itlist * const itl)686 itimer_init(struct itimer * const it, const struct itimer_ops * const ops,
687     clockid_t const id, struct itlist * const itl)
688 {
689 
690           KASSERT(itimer_lock_held());
691           KASSERT(ops != NULL);
692 
693           timespecclear(&it->it_time.it_value);
694           it->it_ops = ops;
695           it->it_clockid = id;
696           it->it_overruns = 0;
697           it->it_dying = false;
698           if (!CLOCK_VIRTUAL_P(id)) {
699                     KASSERT(itl == NULL);
700                     callout_init(&it->it_ch, CALLOUT_MPSAFE);
701                     callout_setfunc(&it->it_ch, itimer_callout, it);
702                     if (id == CLOCK_REALTIME && ops->ito_realtime_changed != NULL) {
703                               LIST_INSERT_HEAD(&itimer_realtime_changed_notify,
704                                   it, it_rtchgq);
705                     }
706           } else {
707                     KASSERT(itl != NULL);
708                     it->it_vlist = itl;
709                     it->it_active = false;
710           }
711 }
712 
713 /*
714  * itimer_poison:
715  *
716  *        Poison an interval timer, preventing it from being scheduled
717  *        or processed, in preparation for freeing the timer.
718  */
719 void
itimer_poison(struct itimer * const it)720 itimer_poison(struct itimer * const it)
721 {
722 
723           KASSERT(itimer_lock_held());
724 
725           it->it_dying = true;
726 
727           /*
728            * For non-virtual timers, stop the callout, or wait for it to
729            * run if it has already fired.  It cannot restart again after
730            * this point: the callout won't restart itself when dying, no
731            * other users holding the lock can restart it, and any other
732            * users waiting for callout_halt concurrently (itimer_settime)
733            * will restart from the top.
734            */
735           if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
736                     callout_halt(&it->it_ch, &itimer_mutex);
737                     if (it->it_clockid == CLOCK_REALTIME &&
738                         it->it_ops->ito_realtime_changed != NULL) {
739                               LIST_REMOVE(it, it_rtchgq);
740                     }
741           }
742 }
743 
744 /*
745  * itimer_fini:
746  *
747  *        Release resources used by an interval timer.
748  *
749  *        N.B. itimer_lock must be held on entry, and is released on exit.
750  */
751 void
itimer_fini(struct itimer * const it)752 itimer_fini(struct itimer * const it)
753 {
754 
755           KASSERT(itimer_lock_held());
756 
757           /* All done with the global state. */
758           itimer_unlock();
759 
760           /* Destroy the callout, if needed. */
761           if (!CLOCK_VIRTUAL_P(it->it_clockid))
762                     callout_destroy(&it->it_ch);
763 }
764 
765 /*
766  * itimer_decr:
767  *
768  *        Decrement an interval timer by a specified number of nanoseconds,
769  *        which must be less than a second, i.e. < 1000000000.  If the timer
770  *        expires, then reload it.  In this case, carry over (nsec - old value)
771  *        to reduce the value reloaded into the timer so that the timer does
772  *        not drift.  This routine assumes that it is called in a context where
773  *        the timers on which it is operating cannot change in value.
774  *
775  *        Returns true if the timer has expired.
776  */
777 static bool
itimer_decr(struct itimer * it,int nsec)778 itimer_decr(struct itimer *it, int nsec)
779 {
780           struct itimerspec *itp;
781           int error __diagused;
782 
783           KASSERT(itimer_lock_held());
784           KASSERT(CLOCK_VIRTUAL_P(it->it_clockid));
785 
786           itp = &it->it_time;
787           if (itp->it_value.tv_nsec < nsec) {
788                     if (itp->it_value.tv_sec == 0) {
789                               /* expired, and already in next interval */
790                               nsec -= itp->it_value.tv_nsec;
791                               goto expire;
792                     }
793                     itp->it_value.tv_nsec += 1000000000;
794                     itp->it_value.tv_sec--;
795           }
796           itp->it_value.tv_nsec -= nsec;
797           nsec = 0;
798           if (timespecisset(&itp->it_value))
799                     return false;
800           /* expired, exactly at end of interval */
801  expire:
802           if (timespecisset(&itp->it_interval)) {
803                     itp->it_value = itp->it_interval;
804                     itp->it_value.tv_nsec -= nsec;
805                     if (itp->it_value.tv_nsec < 0) {
806                               itp->it_value.tv_nsec += 1000000000;
807                               itp->it_value.tv_sec--;
808                     }
809                     error = itimer_settime(it);
810                     KASSERT(error == 0); /* virtual, never fails */
811           } else
812                     itp->it_value.tv_nsec = 0;              /* sec is already 0 */
813           return true;
814 }
815 
816 /*
817  * itimer_arm_real:
818  *
819  *        Arm a non-virtual timer.
820  */
821 static void
itimer_arm_real(struct itimer * const it)822 itimer_arm_real(struct itimer * const it)
823 {
824 
825           KASSERT(!it->it_dying);
826           KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
827           KASSERT(!callout_pending(&it->it_ch));
828 
829           /*
830            * Don't need to check tshzto() return value, here.
831            * callout_schedule() does it for us.
832            */
833           callout_schedule(&it->it_ch,
834               (it->it_clockid == CLOCK_MONOTONIC
835                     ? tshztoup(&it->it_time.it_value)
836                     : tshzto(&it->it_time.it_value)));
837 }
838 
839 /*
840  * itimer_callout:
841  *
842  *        Callout to expire a non-virtual timer.  Queue it up for processing,
843  *        and then reload, if it is configured to do so.
844  *
845  *        N.B. A delay in processing this callout causes multiple
846  *        SIGALRM calls to be compressed into one.
847  */
848 static void
itimer_callout(void * arg)849 itimer_callout(void *arg)
850 {
851           struct timespec now, next;
852           struct itimer * const it = arg;
853           int overruns;
854 
855           itimer_lock();
856           (*it->it_ops->ito_fire)(it);
857 
858           if (!timespecisset(&it->it_time.it_interval)) {
859                     timespecclear(&it->it_time.it_value);
860                     itimer_unlock();
861                     return;
862           }
863 
864           if (it->it_clockid == CLOCK_MONOTONIC) {
865                     getnanouptime(&now);
866           } else {
867                     getnanotime(&now);
868           }
869 
870           /*
871            * Given the current itimer value and interval and the time
872            * now, compute the next itimer value and count overruns.
873            */
874           itimer_transition(&it->it_time, &now, &next, &overruns);
875           it->it_time.it_value = next;
876           it->it_overruns += overruns;
877 
878           /*
879            * Reset the callout, if it's not going away.
880            */
881           if (!it->it_dying)
882                     itimer_arm_real(it);
883           itimer_unlock();
884 }
885 
886 /*
887  * itimer_settime:
888  *
889  *        Set up the given interval timer. The value in it->it_time.it_value
890  *        is taken to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC
891  *        timers and a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers.
892  *
893  *        If the callout had already fired but not yet run, fails with
894  *        ERESTART -- caller must restart from the top to look up a timer.
895  *
896  *        Caller is responsible for validating it->it_value and
897  *        it->it_interval, e.g. with itimerfix or itimespecfix.
898  */
899 int
itimer_settime(struct itimer * it)900 itimer_settime(struct itimer *it)
901 {
902           struct itimer *itn, *pitn;
903           struct itlist *itl;
904 
905           KASSERT(itimer_lock_held());
906           KASSERT(!it->it_dying);
907           KASSERT(it->it_time.it_value.tv_sec >= 0);
908           KASSERT(it->it_time.it_value.tv_nsec >= 0);
909           KASSERT(it->it_time.it_value.tv_nsec < 1000000000);
910           KASSERT(it->it_time.it_interval.tv_sec >= 0);
911           KASSERT(it->it_time.it_interval.tv_nsec >= 0);
912           KASSERT(it->it_time.it_interval.tv_nsec < 1000000000);
913 
914           if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
915                     /*
916                      * Try to stop the callout.  However, if it had already
917                      * fired, we have to drop the lock to wait for it, so
918                      * the world may have changed and pt may not be there
919                      * any more.  In that case, tell the caller to start
920                      * over from the top.
921                      */
922                     if (callout_halt(&it->it_ch, &itimer_mutex))
923                               return ERESTART;
924                     KASSERT(!it->it_dying);
925 
926                     /* Now we can touch it and start it up again. */
927                     if (timespecisset(&it->it_time.it_value))
928                               itimer_arm_real(it);
929           } else {
930                     if (it->it_active) {
931                               itn = LIST_NEXT(it, it_list);
932                               LIST_REMOVE(it, it_list);
933                               for ( ; itn; itn = LIST_NEXT(itn, it_list))
934                                         timespecadd(&it->it_time.it_value,
935                                             &itn->it_time.it_value,
936                                             &itn->it_time.it_value);
937                     }
938                     if (timespecisset(&it->it_time.it_value)) {
939                               itl = it->it_vlist;
940                               for (itn = LIST_FIRST(itl), pitn = NULL;
941                                    itn && timespeccmp(&it->it_time.it_value,
942                                          &itn->it_time.it_value, >);
943                                    pitn = itn, itn = LIST_NEXT(itn, it_list))
944                                         timespecsub(&it->it_time.it_value,
945                                             &itn->it_time.it_value,
946                                             &it->it_time.it_value);
947 
948                               if (pitn)
949                                         LIST_INSERT_AFTER(pitn, it, it_list);
950                               else
951                                         LIST_INSERT_HEAD(itl, it, it_list);
952 
953                               for ( ; itn ; itn = LIST_NEXT(itn, it_list))
954                                         timespecsub(&itn->it_time.it_value,
955                                             &it->it_time.it_value,
956                                             &itn->it_time.it_value);
957 
958                               it->it_active = true;
959                     } else {
960                               it->it_active = false;
961                     }
962           }
963 
964           /* Success!  */
965           return 0;
966 }
967 
968 /*
969  * itimer_gettime:
970  *
971  *        Return the remaining time of an interval timer.
972  */
973 void
itimer_gettime(const struct itimer * it,struct itimerspec * aits)974 itimer_gettime(const struct itimer *it, struct itimerspec *aits)
975 {
976           struct timespec now;
977           struct itimer *itn;
978 
979           KASSERT(itimer_lock_held());
980           KASSERT(!it->it_dying);
981 
982           *aits = it->it_time;
983           if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
984                     /*
985                      * Convert from absolute to relative time in .it_value
986                      * part of real time timer.  If time for real time
987                      * timer has passed return 0, else return difference
988                      * between current time and time for the timer to go
989                      * off.
990                      */
991                     if (timespecisset(&aits->it_value)) {
992                               if (it->it_clockid == CLOCK_REALTIME) {
993                                         getnanotime(&now);
994                               } else { /* CLOCK_MONOTONIC */
995                                         getnanouptime(&now);
996                               }
997                               if (timespeccmp(&aits->it_value, &now, <))
998                                         timespecclear(&aits->it_value);
999                               else
1000                                         timespecsub(&aits->it_value, &now,
1001                                             &aits->it_value);
1002                     }
1003           } else if (it->it_active) {
1004                     for (itn = LIST_FIRST(it->it_vlist); itn && itn != it;
1005                          itn = LIST_NEXT(itn, it_list))
1006                               timespecadd(&aits->it_value,
1007                                   &itn->it_time.it_value, &aits->it_value);
1008                     KASSERT(itn != NULL); /* it should be findable on the list */
1009           } else
1010                     timespecclear(&aits->it_value);
1011 }
1012 
1013 /*
1014  * Per-process timer support.
1015  *
1016  * Both the BSD getitimer() family and the POSIX timer_*() family of
1017  * routines are supported.
1018  *
1019  * All timers are kept in an array pointed to by p_timers, which is
1020  * allocated on demand - many processes don't use timers at all. The
1021  * first four elements in this array are reserved for the BSD timers:
1022  * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element
1023  * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be
1024  * allocated by the timer_create() syscall.
1025  *
1026  * These timers are a "sub-class" of interval timer.
1027  */
1028 
1029 /*
1030  * ptimer_free:
1031  *
1032  *        Free the per-process timer at the specified index.
1033  */
1034 static void
ptimer_free(struct ptimers * pts,int index)1035 ptimer_free(struct ptimers *pts, int index)
1036 {
1037           struct itimer *it;
1038           struct ptimer *pt;
1039 
1040           KASSERT(itimer_lock_held());
1041 
1042           it = pts->pts_timers[index];
1043           pt = container_of(it, struct ptimer, pt_itimer);
1044           pts->pts_timers[index] = NULL;
1045           itimer_poison(it);
1046 
1047           /*
1048            * Remove it from the queue to be signalled.  Must be done
1049            * after itimer is poisoned, because we may have had to wait
1050            * for the callout to complete.
1051            */
1052           if (pt->pt_queued) {
1053                     TAILQ_REMOVE(&ptimer_queue, pt, pt_chain);
1054                     pt->pt_queued = false;
1055           }
1056 
1057           itimer_fini(it);    /* releases itimer_lock */
1058           kmem_free(pt, sizeof(*pt));
1059 }
1060 
1061 /*
1062  * ptimers_alloc:
1063  *
1064  *        Allocate a ptimers for the specified process.
1065  */
1066 static struct ptimers *
ptimers_alloc(struct proc * p)1067 ptimers_alloc(struct proc *p)
1068 {
1069           struct ptimers *pts;
1070           int i;
1071 
1072           pts = kmem_alloc(sizeof(*pts), KM_SLEEP);
1073           LIST_INIT(&pts->pts_virtual);
1074           LIST_INIT(&pts->pts_prof);
1075           for (i = 0; i < TIMER_MAX; i++)
1076                     pts->pts_timers[i] = NULL;
1077           itimer_lock();
1078           if (p->p_timers == NULL) {
1079                     p->p_timers = pts;
1080                     itimer_unlock();
1081                     return pts;
1082           }
1083           itimer_unlock();
1084           kmem_free(pts, sizeof(*pts));
1085           return p->p_timers;
1086 }
1087 
1088 /*
1089  * ptimers_free:
1090  *
1091  *        Clean up the per-process timers. If "which" is set to TIMERS_ALL,
1092  *        then clean up all timers and free all the data structures. If
1093  *        "which" is set to TIMERS_POSIX, only clean up the timers allocated
1094  *        by timer_create(), not the BSD setitimer() timers, and only free the
1095  *        structure if none of those remain.
1096  *
1097  *        This function is exported because it is needed in the exec and
1098  *        exit code paths.
1099  */
1100 void
ptimers_free(struct proc * p,int which)1101 ptimers_free(struct proc *p, int which)
1102 {
1103           struct ptimers *pts;
1104           struct itimer *itn;
1105           struct timespec ts;
1106           int i;
1107 
1108           if (p->p_timers == NULL)
1109                     return;
1110 
1111           pts = p->p_timers;
1112           itimer_lock();
1113           if (which == TIMERS_ALL) {
1114                     p->p_timers = NULL;
1115                     i = 0;
1116           } else {
1117                     timespecclear(&ts);
1118                     for (itn = LIST_FIRST(&pts->pts_virtual);
1119                          itn && itn != pts->pts_timers[ITIMER_VIRTUAL];
1120                          itn = LIST_NEXT(itn, it_list)) {
1121                               KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
1122                               timespecadd(&ts, &itn->it_time.it_value, &ts);
1123                     }
1124                     LIST_FIRST(&pts->pts_virtual) = NULL;
1125                     if (itn) {
1126                               KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
1127                               timespecadd(&ts, &itn->it_time.it_value,
1128                                   &itn->it_time.it_value);
1129                               LIST_INSERT_HEAD(&pts->pts_virtual, itn, it_list);
1130                     }
1131                     timespecclear(&ts);
1132                     for (itn = LIST_FIRST(&pts->pts_prof);
1133                          itn && itn != pts->pts_timers[ITIMER_PROF];
1134                          itn = LIST_NEXT(itn, it_list)) {
1135                               KASSERT(itn->it_clockid == CLOCK_PROF);
1136                               timespecadd(&ts, &itn->it_time.it_value, &ts);
1137                     }
1138                     LIST_FIRST(&pts->pts_prof) = NULL;
1139                     if (itn) {
1140                               KASSERT(itn->it_clockid == CLOCK_PROF);
1141                               timespecadd(&ts, &itn->it_time.it_value,
1142                                   &itn->it_time.it_value);
1143                               LIST_INSERT_HEAD(&pts->pts_prof, itn, it_list);
1144                     }
1145                     i = TIMER_MIN;
1146           }
1147           for ( ; i < TIMER_MAX; i++) {
1148                     if (pts->pts_timers[i] != NULL) {
1149                               /* Free the timer and release the lock.  */
1150                               ptimer_free(pts, i);
1151                               /* Reacquire the lock for the next one.  */
1152                               itimer_lock();
1153                     }
1154           }
1155           if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL &&
1156               pts->pts_timers[2] == NULL && pts->pts_timers[3] == NULL) {
1157                     p->p_timers = NULL;
1158                     itimer_unlock();
1159                     kmem_free(pts, sizeof(*pts));
1160           } else
1161                     itimer_unlock();
1162 }
1163 
1164 /*
1165  * ptimer_fire:
1166  *
1167  *        Fire a per-process timer.
1168  */
1169 static void
ptimer_fire(struct itimer * it)1170 ptimer_fire(struct itimer *it)
1171 {
1172           struct ptimer *pt = container_of(it, struct ptimer, pt_itimer);
1173 
1174           KASSERT(itimer_lock_held());
1175 
1176           /*
1177            * XXX Can overrun, but we don't do signal queueing yet, anyway.
1178            * XXX Relying on the clock interrupt is stupid.
1179            */
1180           if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
1181                     return;
1182           }
1183 
1184           if (!pt->pt_queued) {
1185                     TAILQ_INSERT_TAIL(&ptimer_queue, pt, pt_chain);
1186                     pt->pt_queued = true;
1187                     softint_schedule(ptimer_sih);
1188           }
1189 }
1190 
1191 /*
1192  * Operations vector for per-process timers (BSD and POSIX).
1193  */
1194 static const struct itimer_ops ptimer_itimer_ops = {
1195           .ito_fire = ptimer_fire,
1196 };
1197 
1198 /*
1199  * sys_timer_create:
1200  *
1201  *        System call to create a POSIX timer.
1202  */
1203 int
sys_timer_create(struct lwp * l,const struct sys_timer_create_args * uap,register_t * retval)1204 sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap,
1205     register_t *retval)
1206 {
1207           /* {
1208                     syscallarg(clockid_t) clock_id;
1209                     syscallarg(struct sigevent *) evp;
1210                     syscallarg(timer_t *) timerid;
1211           } */
1212 
1213           return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
1214               SCARG(uap, evp), copyin, l);
1215 }
1216 
1217 int
timer_create1(timer_t * tid,clockid_t id,struct sigevent * evp,copyin_t fetch_event,struct lwp * l)1218 timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
1219     copyin_t fetch_event, struct lwp *l)
1220 {
1221           int error;
1222           timer_t timerid;
1223           struct itlist *itl;
1224           struct ptimers *pts;
1225           struct ptimer *pt;
1226           struct proc *p;
1227 
1228           p = l->l_proc;
1229 
1230           if ((u_int)id > CLOCK_MONOTONIC)
1231                     return EINVAL;
1232 
1233           if ((pts = p->p_timers) == NULL)
1234                     pts = ptimers_alloc(p);
1235 
1236           pt = kmem_zalloc(sizeof(*pt), KM_SLEEP);
1237           if (evp != NULL) {
1238                     if (((error =
1239                         (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
1240                         ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
1241                               (pt->pt_ev.sigev_notify > SIGEV_SA)) ||
1242                               (pt->pt_ev.sigev_notify == SIGEV_SIGNAL &&
1243                                (pt->pt_ev.sigev_signo <= 0 ||
1244                                 pt->pt_ev.sigev_signo >= NSIG))) {
1245                               kmem_free(pt, sizeof(*pt));
1246                               return (error ? error : EINVAL);
1247                     }
1248           }
1249 
1250           /* Find a free timer slot, skipping those reserved for setitimer(). */
1251           itimer_lock();
1252           for (timerid = TIMER_MIN; timerid < TIMER_MAX; timerid++)
1253                     if (pts->pts_timers[timerid] == NULL)
1254                               break;
1255           if (timerid == TIMER_MAX) {
1256                     itimer_unlock();
1257                     kmem_free(pt, sizeof(*pt));
1258                     return EAGAIN;
1259           }
1260           if (evp == NULL) {
1261                     pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1262                     switch (id) {
1263                     case CLOCK_REALTIME:
1264                     case CLOCK_MONOTONIC:
1265                               pt->pt_ev.sigev_signo = SIGALRM;
1266                               break;
1267                     case CLOCK_VIRTUAL:
1268                               pt->pt_ev.sigev_signo = SIGVTALRM;
1269                               break;
1270                     case CLOCK_PROF:
1271                               pt->pt_ev.sigev_signo = SIGPROF;
1272                               break;
1273                     }
1274                     pt->pt_ev.sigev_value.sival_int = timerid;
1275           }
1276 
1277           switch (id) {
1278           case CLOCK_VIRTUAL:
1279                     itl = &pts->pts_virtual;
1280                     break;
1281           case CLOCK_PROF:
1282                     itl = &pts->pts_prof;
1283                     break;
1284           default:
1285                     itl = NULL;
1286           }
1287 
1288           itimer_init(&pt->pt_itimer, &ptimer_itimer_ops, id, itl);
1289           pt->pt_proc = p;
1290           pt->pt_poverruns = 0;
1291           pt->pt_entry = timerid;
1292           pt->pt_queued = false;
1293 
1294           pts->pts_timers[timerid] = &pt->pt_itimer;
1295           itimer_unlock();
1296 
1297           return copyout(&timerid, tid, sizeof(timerid));
1298 }
1299 
1300 /*
1301  * sys_timer_delete:
1302  *
1303  *        System call to delete a POSIX timer.
1304  */
1305 int
sys_timer_delete(struct lwp * l,const struct sys_timer_delete_args * uap,register_t * retval)1306 sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap,
1307     register_t *retval)
1308 {
1309           /* {
1310                     syscallarg(timer_t) timerid;
1311           } */
1312           struct proc *p = l->l_proc;
1313           timer_t timerid;
1314           struct ptimers *pts;
1315           struct itimer *it, *itn;
1316 
1317           timerid = SCARG(uap, timerid);
1318           pts = p->p_timers;
1319 
1320           if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1321                     return EINVAL;
1322 
1323           itimer_lock();
1324           if ((it = pts->pts_timers[timerid]) == NULL) {
1325                     itimer_unlock();
1326                     return EINVAL;
1327           }
1328 
1329           if (CLOCK_VIRTUAL_P(it->it_clockid)) {
1330                     if (it->it_active) {
1331                               itn = LIST_NEXT(it, it_list);
1332                               LIST_REMOVE(it, it_list);
1333                               for ( ; itn; itn = LIST_NEXT(itn, it_list))
1334                                         timespecadd(&it->it_time.it_value,
1335                                             &itn->it_time.it_value,
1336                                             &itn->it_time.it_value);
1337                               it->it_active = false;
1338                     }
1339           }
1340 
1341           /* Free the timer and release the lock.  */
1342           ptimer_free(pts, timerid);
1343 
1344           return 0;
1345 }
1346 
1347 /*
1348  * sys___timer_settime50:
1349  *
1350  *        System call to set/arm a POSIX timer.
1351  */
1352 int
sys___timer_settime50(struct lwp * l,const struct sys___timer_settime50_args * uap,register_t * retval)1353 sys___timer_settime50(struct lwp *l,
1354     const struct sys___timer_settime50_args *uap,
1355     register_t *retval)
1356 {
1357           /* {
1358                     syscallarg(timer_t) timerid;
1359                     syscallarg(int) flags;
1360                     syscallarg(const struct itimerspec *) value;
1361                     syscallarg(struct itimerspec *) ovalue;
1362           } */
1363           int error;
1364           struct itimerspec value, ovalue, *ovp = NULL;
1365 
1366           if ((error = copyin(SCARG(uap, value), &value,
1367               sizeof(struct itimerspec))) != 0)
1368                     return error;
1369 
1370           if (SCARG(uap, ovalue))
1371                     ovp = &ovalue;
1372 
1373           if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
1374               SCARG(uap, flags), l->l_proc)) != 0)
1375                     return error;
1376 
1377           if (ovp)
1378                     return copyout(&ovalue, SCARG(uap, ovalue),
1379                         sizeof(struct itimerspec));
1380           return 0;
1381 }
1382 
1383 int
dotimer_settime(int timerid,struct itimerspec * value,struct itimerspec * ovalue,int flags,struct proc * p)1384 dotimer_settime(int timerid, struct itimerspec *value,
1385     struct itimerspec *ovalue, int flags, struct proc *p)
1386 {
1387           struct timespec now;
1388           struct itimerspec val;
1389           struct ptimers *pts;
1390           struct itimer *it;
1391           int error;
1392 
1393           pts = p->p_timers;
1394 
1395           if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1396                     return EINVAL;
1397           val = *value;
1398           if (itimespecfix(&val.it_value) != 0 ||
1399               itimespecfix(&val.it_interval) != 0)
1400                     return EINVAL;
1401 
1402           itimer_lock();
1403  restart:
1404           if ((it = pts->pts_timers[timerid]) == NULL) {
1405                     itimer_unlock();
1406                     return EINVAL;
1407           }
1408 
1409           if (ovalue)
1410                     itimer_gettime(it, ovalue);
1411           it->it_time = val;
1412 
1413           /*
1414            * If we've been passed a relative time for a realtime timer,
1415            * convert it to absolute; if an absolute time for a virtual
1416            * timer, convert it to relative and make sure we don't set it
1417            * to zero, which would cancel the timer, or let it go
1418            * negative, which would confuse the comparison tests.
1419            */
1420           if (timespecisset(&it->it_time.it_value)) {
1421                     if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
1422                               if ((flags & TIMER_ABSTIME) == 0) {
1423                                         if (it->it_clockid == CLOCK_REALTIME) {
1424                                                   getnanotime(&now);
1425                                         } else { /* CLOCK_MONOTONIC */
1426                                                   getnanouptime(&now);
1427                                         }
1428                                         timespecadd(&it->it_time.it_value, &now,
1429                                             &it->it_time.it_value);
1430                               }
1431                     } else {
1432                               if ((flags & TIMER_ABSTIME) != 0) {
1433                                         getnanotime(&now);
1434                                         timespecsub(&it->it_time.it_value, &now,
1435                                             &it->it_time.it_value);
1436                                         if (!timespecisset(&it->it_time.it_value) ||
1437                                             it->it_time.it_value.tv_sec < 0) {
1438                                                   it->it_time.it_value.tv_sec = 0;
1439                                                   it->it_time.it_value.tv_nsec = 1;
1440                                         }
1441                               }
1442                     }
1443           }
1444 
1445           error = itimer_settime(it);
1446           if (error == ERESTART) {
1447                     KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
1448                     goto restart;
1449           }
1450           KASSERT(error == 0);
1451           itimer_unlock();
1452 
1453           return 0;
1454 }
1455 
1456 /*
1457  * sys___timer_gettime50:
1458  *
1459  *        System call to return the time remaining until a POSIX timer fires.
1460  */
1461 int
sys___timer_gettime50(struct lwp * l,const struct sys___timer_gettime50_args * uap,register_t * retval)1462 sys___timer_gettime50(struct lwp *l,
1463     const struct sys___timer_gettime50_args *uap, register_t *retval)
1464 {
1465           /* {
1466                     syscallarg(timer_t) timerid;
1467                     syscallarg(struct itimerspec *) value;
1468           } */
1469           struct itimerspec its;
1470           int error;
1471 
1472           if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
1473               &its)) != 0)
1474                     return error;
1475 
1476           return copyout(&its, SCARG(uap, value), sizeof(its));
1477 }
1478 
1479 int
dotimer_gettime(int timerid,struct proc * p,struct itimerspec * its)1480 dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
1481 {
1482           struct itimer *it;
1483           struct ptimers *pts;
1484 
1485           pts = p->p_timers;
1486           if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1487                     return EINVAL;
1488           itimer_lock();
1489           if ((it = pts->pts_timers[timerid]) == NULL) {
1490                     itimer_unlock();
1491                     return EINVAL;
1492           }
1493           itimer_gettime(it, its);
1494           itimer_unlock();
1495 
1496           return 0;
1497 }
1498 
1499 /*
1500  * sys_timer_getoverrun:
1501  *
1502  *        System call to return the number of times a POSIX timer has
1503  *        expired while a notification was already pending.  The counter
1504  *        is reset when a timer expires and a notification can be posted.
1505  */
1506 int
sys_timer_getoverrun(struct lwp * l,const struct sys_timer_getoverrun_args * uap,register_t * retval)1507 sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap,
1508     register_t *retval)
1509 {
1510           /* {
1511                     syscallarg(timer_t) timerid;
1512           } */
1513           struct proc *p = l->l_proc;
1514           struct ptimers *pts;
1515           int timerid;
1516           struct itimer *it;
1517           struct ptimer *pt;
1518 
1519           timerid = SCARG(uap, timerid);
1520 
1521           pts = p->p_timers;
1522           if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
1523                     return EINVAL;
1524           itimer_lock();
1525           if ((it = pts->pts_timers[timerid]) == NULL) {
1526                     itimer_unlock();
1527                     return EINVAL;
1528           }
1529           pt = container_of(it, struct ptimer, pt_itimer);
1530           *retval = pt->pt_poverruns;
1531           if (*retval >= DELAYTIMER_MAX)
1532                     *retval = DELAYTIMER_MAX;
1533           itimer_unlock();
1534 
1535           return 0;
1536 }
1537 
1538 /*
1539  * sys___getitimer50:
1540  *
1541  *        System call to get the time remaining before a BSD timer fires.
1542  */
1543 int
sys___getitimer50(struct lwp * l,const struct sys___getitimer50_args * uap,register_t * retval)1544 sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap,
1545     register_t *retval)
1546 {
1547           /* {
1548                     syscallarg(int) which;
1549                     syscallarg(struct itimerval *) itv;
1550           } */
1551           struct proc *p = l->l_proc;
1552           struct itimerval aitv;
1553           int error;
1554 
1555           memset(&aitv, 0, sizeof(aitv));
1556           error = dogetitimer(p, SCARG(uap, which), &aitv);
1557           if (error)
1558                     return error;
1559           return copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval));
1560 }
1561 
1562 int
dogetitimer(struct proc * p,int which,struct itimerval * itvp)1563 dogetitimer(struct proc *p, int which, struct itimerval *itvp)
1564 {
1565           struct ptimers *pts;
1566           struct itimer *it;
1567           struct itimerspec its;
1568 
1569           if ((u_int)which > ITIMER_MONOTONIC)
1570                     return EINVAL;
1571 
1572           itimer_lock();
1573           pts = p->p_timers;
1574           if (pts == NULL || (it = pts->pts_timers[which]) == NULL) {
1575                     timerclear(&itvp->it_value);
1576                     timerclear(&itvp->it_interval);
1577           } else {
1578                     itimer_gettime(it, &its);
1579                     TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value);
1580                     TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval);
1581           }
1582           itimer_unlock();
1583 
1584           return 0;
1585 }
1586 
1587 /*
1588  * sys___setitimer50:
1589  *
1590  *        System call to set/arm a BSD timer.
1591  */
1592 int
sys___setitimer50(struct lwp * l,const struct sys___setitimer50_args * uap,register_t * retval)1593 sys___setitimer50(struct lwp *l, const struct sys___setitimer50_args *uap,
1594     register_t *retval)
1595 {
1596           /* {
1597                     syscallarg(int) which;
1598                     syscallarg(const struct itimerval *) itv;
1599                     syscallarg(struct itimerval *) oitv;
1600           } */
1601           struct proc *p = l->l_proc;
1602           int which = SCARG(uap, which);
1603           struct sys___getitimer50_args getargs;
1604           const struct itimerval *itvp;
1605           struct itimerval aitv;
1606           int error;
1607 
1608           itvp = SCARG(uap, itv);
1609           if (itvp &&
1610               (error = copyin(itvp, &aitv, sizeof(struct itimerval))) != 0)
1611                     return error;
1612           if (SCARG(uap, oitv) != NULL) {
1613                     SCARG(&getargs, which) = which;
1614                     SCARG(&getargs, itv) = SCARG(uap, oitv);
1615                     if ((error = sys___getitimer50(l, &getargs, retval)) != 0)
1616                               return error;
1617           }
1618           if (itvp == 0)
1619                     return 0;
1620 
1621           return dosetitimer(p, which, &aitv);
1622 }
1623 
1624 int
dosetitimer(struct proc * p,int which,struct itimerval * itvp)1625 dosetitimer(struct proc *p, int which, struct itimerval *itvp)
1626 {
1627           struct timespec now;
1628           struct ptimers *pts;
1629           struct ptimer *spare;
1630           struct itimer *it;
1631           struct itlist *itl;
1632           int error;
1633 
1634           if ((u_int)which > ITIMER_MONOTONIC)
1635                     return EINVAL;
1636           if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval))
1637                     return EINVAL;
1638 
1639           /*
1640            * Don't bother allocating data structures if the process just
1641            * wants to clear the timer.
1642            */
1643           spare = NULL;
1644           pts = p->p_timers;
1645  retry:
1646           if (!timerisset(&itvp->it_value) && (pts == NULL ||
1647               pts->pts_timers[which] == NULL))
1648                     return 0;
1649           if (pts == NULL)
1650                     pts = ptimers_alloc(p);
1651           itimer_lock();
1652  restart:
1653           it = pts->pts_timers[which];
1654           if (it == NULL) {
1655                     struct ptimer *pt;
1656 
1657                     if (spare == NULL) {
1658                               itimer_unlock();
1659                               spare = kmem_zalloc(sizeof(*spare), KM_SLEEP);
1660                               goto retry;
1661                     }
1662                     pt = spare;
1663                     spare = NULL;
1664 
1665                     it = &pt->pt_itimer;
1666                     pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1667                     pt->pt_ev.sigev_value.sival_int = which;
1668 
1669                     switch (which) {
1670                     case ITIMER_REAL:
1671                     case ITIMER_MONOTONIC:
1672                               itl = NULL;
1673                               pt->pt_ev.sigev_signo = SIGALRM;
1674                               break;
1675                     case ITIMER_VIRTUAL:
1676                               itl = &pts->pts_virtual;
1677                               pt->pt_ev.sigev_signo = SIGVTALRM;
1678                               break;
1679                     case ITIMER_PROF:
1680                               itl = &pts->pts_prof;
1681                               pt->pt_ev.sigev_signo = SIGPROF;
1682                               break;
1683                     default:
1684                               panic("%s: can't happen %d", __func__, which);
1685                     }
1686                     itimer_init(it, &ptimer_itimer_ops, which, itl);
1687                     pt->pt_proc = p;
1688                     pt->pt_entry = which;
1689 
1690                     pts->pts_timers[which] = it;
1691           }
1692 
1693           TIMEVAL_TO_TIMESPEC(&itvp->it_value, &it->it_time.it_value);
1694           TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &it->it_time.it_interval);
1695 
1696           error = 0;
1697           if (timespecisset(&it->it_time.it_value)) {
1698                     /* Convert to absolute time */
1699                     /* XXX need to wrap in splclock for timecounters case? */
1700                     switch (which) {
1701                     case ITIMER_REAL:
1702                               getnanotime(&now);
1703                               if (!timespecaddok(&it->it_time.it_value, &now)) {
1704                                         error = EINVAL;
1705                                         goto out;
1706                               }
1707                               timespecadd(&it->it_time.it_value, &now,
1708                                   &it->it_time.it_value);
1709                               break;
1710                     case ITIMER_MONOTONIC:
1711                               getnanouptime(&now);
1712                               if (!timespecaddok(&it->it_time.it_value, &now)) {
1713                                         error = EINVAL;
1714                                         goto out;
1715                               }
1716                               timespecadd(&it->it_time.it_value, &now,
1717                                   &it->it_time.it_value);
1718                               break;
1719                     default:
1720                               break;
1721                     }
1722           }
1723 
1724           error = itimer_settime(it);
1725           if (error == ERESTART) {
1726                     KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
1727                     goto restart;
1728           }
1729           KASSERT(error == 0);
1730 out:
1731           itimer_unlock();
1732           if (spare != NULL)
1733                     kmem_free(spare, sizeof(*spare));
1734 
1735           return error;
1736 }
1737 
1738 /*
1739  * ptimer_tick:
1740  *
1741  *        Called from hardclock() to decrement per-process virtual timers.
1742  */
1743 void
ptimer_tick(lwp_t * l,bool user)1744 ptimer_tick(lwp_t *l, bool user)
1745 {
1746           struct ptimers *pts;
1747           struct itimer *it;
1748           proc_t *p;
1749 
1750           p = l->l_proc;
1751           if (p->p_timers == NULL)
1752                     return;
1753 
1754           itimer_lock();
1755           if ((pts = l->l_proc->p_timers) != NULL) {
1756                     /*
1757                      * Run current process's virtual and profile time, as needed.
1758                      */
1759                     if (user && (it = LIST_FIRST(&pts->pts_virtual)) != NULL)
1760                               if (itimer_decr(it, tick * 1000))
1761                                         (*it->it_ops->ito_fire)(it);
1762                     if ((it = LIST_FIRST(&pts->pts_prof)) != NULL)
1763                               if (itimer_decr(it, tick * 1000))
1764                                         (*it->it_ops->ito_fire)(it);
1765           }
1766           itimer_unlock();
1767 }
1768 
1769 /*
1770  * ptimer_intr:
1771  *
1772  *        Software interrupt handler for processing per-process
1773  *        timer expiration.
1774  */
1775 static void
ptimer_intr(void * cookie)1776 ptimer_intr(void *cookie)
1777 {
1778           ksiginfo_t ksi;
1779           struct itimer *it;
1780           struct ptimer *pt;
1781           proc_t *p;
1782 
1783           mutex_enter(&proc_lock);
1784           itimer_lock();
1785           while ((pt = TAILQ_FIRST(&ptimer_queue)) != NULL) {
1786                     it = &pt->pt_itimer;
1787 
1788                     TAILQ_REMOVE(&ptimer_queue, pt, pt_chain);
1789                     KASSERT(pt->pt_queued);
1790                     pt->pt_queued = false;
1791 
1792                     p = pt->pt_proc;
1793                     if (p->p_timers == NULL) {
1794                               /* Process is dying. */
1795                               continue;
1796                     }
1797                     if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
1798                               continue;
1799                     }
1800                     if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo)) {
1801                               it->it_overruns++;
1802                               continue;
1803                     }
1804 
1805                     KSI_INIT(&ksi);
1806                     ksi.ksi_signo = pt->pt_ev.sigev_signo;
1807                     ksi.ksi_code = SI_TIMER;
1808                     ksi.ksi_value = pt->pt_ev.sigev_value;
1809                     pt->pt_poverruns = it->it_overruns;
1810                     it->it_overruns = 0;
1811                     itimer_unlock();
1812                     kpsignal(p, &ksi, NULL);
1813                     itimer_lock();
1814           }
1815           itimer_unlock();
1816           mutex_exit(&proc_lock);
1817 }
1818