xref: /dragonfly/sys/kern/kern_timeout.c (revision 1eeaf6b2bb3621f01159723feaba3aa2c5d933fd)
1 /*
2  * Copyright (c) 2004,2014,2019-2020 The DragonFly Project.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Matthew Dillon <dillon@backplane.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * Copyright (c) 1982, 1986, 1991, 1993
37  *        The Regents of the University of California.  All rights reserved.
38  * (c) UNIX System Laboratories, Inc.
39  * All or some portions of this file are derived from material licensed
40  * to the University of California by American Telephone and Telegraph
41  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42  * the permission of UNIX System Laboratories, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  * 3. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  */
68 /*
69  * The original callout mechanism was based on the work of Adam M. Costello
70  * and George Varghese, published in a technical report entitled "Redesigning
71  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
72  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
73  * used in this implementation was published by G. Varghese and T. Lauck in
74  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
75  * the Efficient Implementation of a Timer Facility" in the Proceedings of
76  * the 11th ACM Annual Symposium on Operating Systems Principles,
77  * Austin, Texas Nov 1987.
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/spinlock.h>
83 #include <sys/callout.h>
84 #include <sys/kernel.h>
85 #include <sys/malloc.h>
86 #include <sys/interrupt.h>
87 #include <sys/thread.h>
88 #include <sys/sysctl.h>
89 #include <sys/exislock.h>
90 #include <vm/vm_extern.h>
91 #include <machine/atomic.h>
92 
93 #include <sys/spinlock2.h>
94 #include <sys/thread2.h>
95 #include <sys/mplock2.h>
96 #include <sys/exislock2.h>
97 
98 TAILQ_HEAD(colist, _callout);
99 struct softclock_pcpu;
100 
101 /*
102  * DID_INIT         - Sanity check
103  * PREVENTED        - A callback was prevented
104  * RESET  - Callout_reset requested
105  * STOP             - Callout_stop requested
106  * INPROG - Softclock_handler thread processing in-progress on callout,
107  *                    queue linkage is indeterminant.  Third parties must queue
108  *                    a STOP or CANCEL and await completion.
109  * SET              - Callout is linked to queue (if INPROG not set)
110  * AUTOLOCK         - Lockmgr cancelable interlock (copied from frontend)
111  * MPSAFE - Callout is MPSAFE (copied from frontend)
112  * CANCEL - callout_cancel requested
113  * ACTIVE - active/inactive (frontend only, see documentation).
114  *                    This is *NOT* the same as whether a callout is queued or
115  *                    not.
116  */
117 #define CALLOUT_DID_INIT      0x00000001          /* frontend */
118 #define CALLOUT_PREVENTED     0x00000002          /* backend */
119 #define CALLOUT_FREELIST      0x00000004          /* backend */
120 #define CALLOUT_UNUSED0008    0x00000008
121 #define CALLOUT_UNUSED0010    0x00000010
122 #define CALLOUT_RESET                   0x00000020          /* backend */
123 #define CALLOUT_STOP                    0x00000040          /* backend */
124 #define CALLOUT_INPROG                  0x00000080          /* backend */
125 #define CALLOUT_SET           0x00000100          /* backend */
126 #define CALLOUT_AUTOLOCK      0x00000200          /* both */
127 #define CALLOUT_MPSAFE                  0x00000400          /* both */
128 #define CALLOUT_CANCEL                  0x00000800          /* backend */
129 #define CALLOUT_ACTIVE                  0x00001000          /* frontend */
130 
131 struct wheel {
132           struct spinlock spin;
133           struct colist       list;
134 };
135 
136 struct softclock_pcpu {
137           struct wheel        *callwheel;
138           struct _callout *running;
139           struct _callout * volatile next;
140           struct colist       freelist;
141           int                 softticks;          /* softticks index */
142           int                 curticks; /* per-cpu ticks counter */
143           int                 isrunning;
144           struct thread       thread;
145 };
146 
147 typedef struct softclock_pcpu *softclock_pcpu_t;
148 
149 static int callout_debug = 0;
150 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW,
151              &callout_debug, 0, "");
152 
153 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts");
154 
155 static int cwheelsize;
156 static int cwheelmask;
157 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU];
158 
159 static void softclock_handler(void *arg);
160 static void slotimer_callback(void *arg);
161 
162 /*
163  * Handle pending requests.  No action can be taken if the callout is still
164  * flagged INPROG.  Called from softclock for post-processing and from
165  * various API functions.
166  *
167  * This routine does not block in any way.
168  * Caller must hold c->spin.
169  *
170  * NOTE: Flags can be adjusted without holding c->spin, so atomic ops
171  *         must be used at all times.
172  *
173  * NOTE: The related (sc) might refer to another cpu.
174  *
175  * NOTE: The cc-vs-c frontend-vs-backend might be disconnected during the
176  *         operation, but the EXIS lock prevents (c) from being destroyed.
177  */
178 static __inline
179 void
_callout_update_spinlocked(struct _callout * c)180 _callout_update_spinlocked(struct _callout *c)
181 {
182           struct wheel *wheel;
183 
184           if ((c->flags & CALLOUT_INPROG) && curthread != &c->qsc->thread) {
185                     /*
186                      * If the callout is in-progress the SET queuing state is
187                      * indeterminant and no action can be taken at this time.
188                      *
189                      * (however, recursive calls from the call-back are not
190                      * indeterminant and must be processed at this time).
191                      */
192                     /* nop */
193           } else if (c->flags & CALLOUT_SET) {
194                     /*
195                      * If the callout is SET it is queued on a callwheel, process
196                      * various requests relative to it being in this queued state.
197                      *
198                      * c->q* fields are stable while we hold c->spin and
199                      * wheel->spin.
200                      */
201                     softclock_pcpu_t sc;
202 
203                     sc = c->qsc;
204                     wheel = &sc->callwheel[c->qtick & cwheelmask];
205                     spin_lock(&wheel->spin);
206 
207                     if ((c->flags & CALLOUT_INPROG) &&
208                         curthread != &c->qsc->thread) {
209                               /*
210                                * Raced against INPROG getting set by the softclock
211                                * handler while we were acquiring wheel->spin.  We
212                                * can do nothing at this time.
213                                *
214                                * (however, recursive calls from the call-back are not
215                                * indeterminant and must be processed at this time).
216                                */
217                               /* nop */
218                     } else if (c->flags & CALLOUT_CANCEL) {
219                               /*
220                                * CANCEL requests override everything else.
221                                */
222                               if (sc->next == c)
223                                         sc->next = TAILQ_NEXT(c, entry);
224                               TAILQ_REMOVE(&wheel->list, c, entry);
225                               atomic_clear_int(&c->flags, CALLOUT_SET |
226                                                                 CALLOUT_STOP |
227                                                                 CALLOUT_CANCEL |
228                                                                 CALLOUT_RESET);
229                               atomic_set_int(&c->flags, CALLOUT_PREVENTED);
230                               if (c->waiters)
231                                         wakeup(c);
232                     } else if (c->flags & CALLOUT_RESET) {
233                               /*
234                                * RESET requests reload the callout, potentially
235                                * to a different cpu.  Once removed from the wheel,
236                                * the retention of c->spin prevents further races.
237                                *
238                                * Leave SET intact.
239                                */
240                               if (sc->next == c)
241                                         sc->next = TAILQ_NEXT(c, entry);
242                               TAILQ_REMOVE(&wheel->list, c, entry);
243                               spin_unlock(&wheel->spin);
244 
245                               atomic_clear_int(&c->flags, CALLOUT_RESET);
246                               sc = c->rsc;
247                               c->qsc = sc;
248                               c->qarg = c->rarg;
249                               c->qfunc = c->rfunc;
250                               c->qtick = c->rtick;
251 
252                               /*
253                                * Do not queue to a current or past wheel slot or
254                                * the callout will be lost for ages.  Handle
255                                * potential races against soft ticks.
256                                */
257                               wheel = &sc->callwheel[c->qtick & cwheelmask];
258                               spin_lock(&wheel->spin);
259                               while (c->qtick - sc->softticks <= 0) {
260                                         c->qtick = sc->softticks + 1;
261                                         spin_unlock(&wheel->spin);
262                                         wheel = &sc->callwheel[c->qtick & cwheelmask];
263                                         spin_lock(&wheel->spin);
264                               }
265                               TAILQ_INSERT_TAIL(&wheel->list, c, entry);
266                     } else if (c->flags & CALLOUT_STOP) {
267                               /*
268                                * STOP request simply unloads the callout.
269                                */
270                               if (sc->next == c)
271                                         sc->next = TAILQ_NEXT(c, entry);
272                               TAILQ_REMOVE(&wheel->list, c, entry);
273                               atomic_clear_int(&c->flags, CALLOUT_STOP |
274                                                                 CALLOUT_SET);
275 
276                               atomic_set_int(&c->flags, CALLOUT_PREVENTED);
277                               if (c->waiters)
278                                         wakeup(c);
279                     } else {
280                               /*
281                                * Do nothing if no request is pending.
282                                */
283                               /* nop */
284                     }
285                     spin_unlock(&wheel->spin);
286           } else {
287                     /*
288                      * If the callout is not SET it is not queued to any callwheel,
289                      * process various requests relative to it not being queued.
290                      *
291                      * c->q* fields are stable while we hold c->spin.
292                      */
293                     if (c->flags & CALLOUT_CANCEL) {
294                               /*
295                                * CANCEL requests override everything else.
296                                *
297                                * There is no state being canceled in this case,
298                                * so do not set the PREVENTED flag.
299                                */
300                               atomic_clear_int(&c->flags, CALLOUT_STOP |
301                                                                 CALLOUT_CANCEL |
302                                                                 CALLOUT_RESET);
303                               if (c->waiters)
304                                         wakeup(c);
305                     } else if (c->flags & CALLOUT_RESET) {
306                               /*
307                                * RESET requests get queued.  Do not queue to the
308                                * currently-processing tick.
309                                */
310                               softclock_pcpu_t sc;
311 
312                               sc = c->rsc;
313                               c->qsc = sc;
314                               c->qarg = c->rarg;
315                               c->qfunc = c->rfunc;
316                               c->qtick = c->rtick;
317 
318                               /*
319                                * Do not queue to current or past wheel or the
320                                * callout will be lost for ages.
321                                */
322                               wheel = &sc->callwheel[c->qtick & cwheelmask];
323                               spin_lock(&wheel->spin);
324                               while (c->qtick - sc->softticks <= 0) {
325                                         c->qtick = sc->softticks + 1;
326                                         spin_unlock(&wheel->spin);
327                                         wheel = &sc->callwheel[c->qtick & cwheelmask];
328                                         spin_lock(&wheel->spin);
329                               }
330                               TAILQ_INSERT_TAIL(&wheel->list, c, entry);
331                               atomic_clear_int(&c->flags, CALLOUT_RESET);
332                               atomic_set_int(&c->flags, CALLOUT_SET);
333                               spin_unlock(&wheel->spin);
334                     } else if (c->flags & CALLOUT_STOP) {
335                               /*
336                                * STOP requests.
337                                *
338                                * There is no state being stopped in this case,
339                                * so do not set the PREVENTED flag.
340                                */
341                               atomic_clear_int(&c->flags, CALLOUT_STOP);
342                               if (c->waiters)
343                                         wakeup(c);
344                     } else {
345                               /*
346                                * No request pending (someone else processed the
347                                * request before we could)
348                                */
349                               /* nop */
350                     }
351           }
352 }
353 
354 static __inline
355 void
_callout_free(struct _callout * c)356 _callout_free(struct _callout *c)
357 {
358           softclock_pcpu_t sc;
359 
360           sc = softclock_pcpu_ary[mycpu->gd_cpuid];
361 
362           crit_enter();
363           exis_terminate(&c->exis);
364           atomic_set_int(&c->flags, CALLOUT_FREELIST);
365           atomic_clear_int(&c->flags, CALLOUT_DID_INIT);
366           TAILQ_INSERT_TAIL(&sc->freelist, c, entry);
367           crit_exit();
368 }
369 
370 /*
371  * System init
372  */
373 static void
swi_softclock_setup(void * arg)374 swi_softclock_setup(void *arg)
375 {
376           int cpu;
377           int i;
378           int target;
379 
380           /*
381            * Figure out how large a callwheel we need.  It must be a power of 2.
382            *
383            * ncallout is primarily based on available memory, don't explode
384            * the allocations if the system has a lot of cpus.
385            */
386           target = ncallout / ncpus + 16;
387 
388           cwheelsize = 1;
389           while (cwheelsize < target)
390                     cwheelsize <<= 1;
391           cwheelmask = cwheelsize - 1;
392 
393           /*
394            * Initialize per-cpu data structures.
395            */
396           for (cpu = 0; cpu < ncpus; ++cpu) {
397                     softclock_pcpu_t sc;
398                     int wheel_sz;
399 
400                     sc = (void *)kmem_alloc3(kernel_map, sizeof(*sc),
401                                                    VM_SUBSYS_GD, KM_CPU(cpu));
402                     memset(sc, 0, sizeof(*sc));
403                     TAILQ_INIT(&sc->freelist);
404                     softclock_pcpu_ary[cpu] = sc;
405 
406                     wheel_sz = sizeof(*sc->callwheel) * cwheelsize;
407                     sc->callwheel = (void *)kmem_alloc3(kernel_map, wheel_sz,
408                                                                 VM_SUBSYS_GD, KM_CPU(cpu));
409                     memset(sc->callwheel, 0, wheel_sz);
410                     for (i = 0; i < cwheelsize; ++i) {
411                               spin_init(&sc->callwheel[i].spin, "wheel");
412                               TAILQ_INIT(&sc->callwheel[i].list);
413                     }
414 
415                     /*
416                      * Mark the softclock handler as being an interrupt thread
417                      * even though it really isn't, but do not allow it to
418                      * preempt other threads (do not assign td_preemptable).
419                      *
420                      * Kernel code now assumes that callouts do not preempt
421                      * the cpu they were scheduled on.
422                      */
423                     lwkt_create(softclock_handler, sc, NULL, &sc->thread,
424                                   TDF_NOSTART | TDF_INTTHREAD,
425                                   cpu, "softclock %d", cpu);
426           }
427 }
428 
429 /*
430  * Must occur after ncpus has been initialized.
431  */
432 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
433           swi_softclock_setup, NULL);
434 
435 /*
436  * This routine is called from the hardclock() (basically a FASTint/IPI) on
437  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
438  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
439  * the callwheel is currently indexed.
440  *
441  * sc->softticks is adjusted by either this routine or our helper thread
442  * depending on whether the helper thread is running or not.
443  *
444  * sc->curticks and sc->softticks are adjusted using atomic ops in order
445  * to ensure that remote cpu callout installation does not race the thread.
446  */
447 void
hardclock_softtick(globaldata_t gd)448 hardclock_softtick(globaldata_t gd)
449 {
450           softclock_pcpu_t sc;
451           struct wheel *wheel;
452 
453           sc = softclock_pcpu_ary[gd->gd_cpuid];
454           atomic_add_int(&sc->curticks, 1);
455           if (sc->isrunning)
456                     return;
457           if (sc->softticks == sc->curticks) {
458                     /*
459                      * In sync, only wakeup the thread if there is something to
460                      * do.
461                      */
462                     wheel = &sc->callwheel[sc->softticks & cwheelmask];
463                     spin_lock(&wheel->spin);
464                     if (TAILQ_FIRST(&wheel->list)) {
465                               sc->isrunning = 1;
466                               spin_unlock(&wheel->spin);
467                               lwkt_schedule(&sc->thread);
468                     } else {
469                               atomic_add_int(&sc->softticks, 1);
470                               spin_unlock(&wheel->spin);
471                     }
472           } else {
473                     /*
474                      * out of sync, wakeup the thread unconditionally so it can
475                      * catch up.
476                      */
477                     sc->isrunning = 1;
478                     lwkt_schedule(&sc->thread);
479           }
480 }
481 
482 /*
483  * This procedure is the main loop of our per-cpu helper thread.  The
484  * sc->isrunning flag prevents us from racing hardclock_softtick().
485  *
486  * The thread starts with the MP lock released and not in a critical
487  * section.  The loop itself is MP safe while individual callbacks
488  * may or may not be, so we obtain or release the MP lock as appropriate.
489  */
490 static void
softclock_handler(void * arg)491 softclock_handler(void *arg)
492 {
493           softclock_pcpu_t sc;
494           struct _callout *c;
495           struct wheel *wheel;
496           struct callout slotimer1;
497           struct _callout slotimer2;
498           int mpsafe = 1;
499 
500           /*
501            * Setup pcpu slow clocks which we want to run from the callout
502            * thread.  This thread starts very early and cannot kmalloc(),
503            * so use internal functions to supply the _callout.
504            */
505           _callout_setup_quick(&slotimer1, &slotimer2, hz * 10,
506                                    slotimer_callback, &slotimer1);
507 
508           /*
509            * Run the callout thread at the same priority as other kernel
510            * threads so it can be round-robined.
511            */
512           /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
513 
514           sc = arg;
515 loop:
516           while (sc->softticks != (int)(sc->curticks + 1)) {
517                     wheel = &sc->callwheel[sc->softticks & cwheelmask];
518 
519                     spin_lock(&wheel->spin);
520                     sc->next = TAILQ_FIRST(&wheel->list);
521                     while ((c = sc->next) != NULL) {
522                               int error;
523 
524                               /*
525                                * Match callouts for this tick.
526                                */
527                               sc->next = TAILQ_NEXT(c, entry);
528                               if (c->qtick != sc->softticks)
529                                         continue;
530 
531                               /*
532                                * Double check the validity of the callout, detect
533                                * if the originator's structure has been ripped out.
534                                */
535                               if ((uintptr_t)c->verifier < VM_MAX_USER_ADDRESS) {
536                                         spin_unlock(&wheel->spin);
537                                         panic("_callout %p verifier %p failed "
538                                               "func %p/%p\n",
539                                               c, c->verifier, c->rfunc, c->qfunc);
540                               }
541 
542                               if (c->verifier->toc != c) {
543                                         spin_unlock(&wheel->spin);
544                                         panic("_callout %p verifier %p failed "
545                                               "func %p/%p\n",
546                                               c, c->verifier, c->rfunc, c->qfunc);
547                               }
548 
549                               /*
550                                * The wheel spinlock is sufficient to set INPROG and
551                                * remove (c) from the list.  Once INPROG is set,
552                                * other threads can only make limited changes to (c).
553                                *
554                                * Setting INPROG masks SET tests in all other
555                                * conditionals except the 'quick' code (which is
556                                * always same-cpu and doesn't race).  This means
557                                * that we can clear SET here without obtaining
558                                * c->spin.
559                                */
560                               TAILQ_REMOVE(&wheel->list, c, entry);
561                               atomic_set_int(&c->flags, CALLOUT_INPROG);
562                               atomic_clear_int(&c->flags, CALLOUT_SET);
563                               sc->running = c;
564                               spin_unlock(&wheel->spin);
565 
566                               /*
567                                * Legacy mplock support
568                                */
569                               if (c->flags & CALLOUT_MPSAFE) {
570                                         if (mpsafe == 0) {
571                                                   mpsafe = 1;
572                                                   rel_mplock();
573                                         }
574                               } else {
575                                         if (mpsafe) {
576                                                   mpsafe = 0;
577                                                   get_mplock();
578                                         }
579                               }
580 
581                               /*
582                                * Execute the 'q' function (protected by INPROG)
583                                */
584                               if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) {
585                                         /*
586                                          * Raced a stop or cancel request, do
587                                          * not execute.  The processing code
588                                          * thinks its a normal completion so
589                                          * flag the fact that cancel/stop actually
590                                          * prevented a callout here.
591                                          */
592                                         if (c->flags &
593                                             (CALLOUT_CANCEL | CALLOUT_STOP)) {
594                                                   atomic_set_int(&c->verifier->flags,
595                                                                    CALLOUT_PREVENTED);
596                                         }
597                               } else if (c->flags & CALLOUT_RESET) {
598                                         /*
599                                          * A RESET raced, make it seem like it
600                                          * didn't.  Do nothing here and let the
601                                          * update procedure requeue us.
602                                          */
603                               } else if (c->flags & CALLOUT_AUTOLOCK) {
604                                         /*
605                                          * Interlocked cancelable call.  If the
606                                          * lock gets canceled we have to flag the
607                                          * fact that the cancel/stop actually
608                                          * prevented the callout here.
609                                          */
610                                         error = lockmgr(c->lk, LK_EXCLUSIVE |
611                                                                    LK_CANCELABLE);
612                                         if (error == 0) {
613                                                   c->qfunc(c->qarg);
614                                                   lockmgr(c->lk, LK_RELEASE);
615                                         } else if (c->flags &
616                                                      (CALLOUT_CANCEL | CALLOUT_STOP)) {
617                                                   atomic_set_int(&c->verifier->flags,
618                                                                    CALLOUT_PREVENTED);
619                                         }
620                               } else {
621                                         /*
622                                          * Normal call
623                                          */
624                                         c->qfunc(c->qarg);
625                               }
626 
627                               /*
628                                * INPROG will prevent SET from being set again.
629                                * Once we clear INPROG, update the callout to
630                                * handle any pending operations that have built-up.
631                                */
632 
633                               /*
634                                * Interlocked clearing of INPROG, then handle any
635                                * queued request (such as a callout_reset() request).
636                                */
637                               spin_lock(&c->spin);
638                               atomic_clear_int(&c->flags, CALLOUT_INPROG);
639                               sc->running = NULL;
640                               _callout_update_spinlocked(c);
641                               spin_unlock(&c->spin);
642 
643                               spin_lock(&wheel->spin);
644                     }
645                     spin_unlock(&wheel->spin);
646                     atomic_add_int(&sc->softticks, 1);
647 
648                     /*
649                      * Clean up any _callout structures which are now allowed
650                      * to be freed.
651                      */
652                     crit_enter();
653                     while ((c = TAILQ_FIRST(&sc->freelist)) != NULL) {
654                               if (!exis_freeable(&c->exis))
655                                         break;
656                               TAILQ_REMOVE(&sc->freelist, c, entry);
657                               c->flags = 0;
658                               kfree(c, M_CALLOUT);
659                               if (callout_debug)
660                                         kprintf("KFREEB %p\n", c);
661                     }
662                     crit_exit();
663           }
664 
665           /*
666            * Don't leave us holding the MP lock when we deschedule ourselves.
667            */
668           if (mpsafe == 0) {
669                     mpsafe = 1;
670                     rel_mplock();
671           }
672 
673           /*
674            * Recheck in critical section to interlock against hardlock
675            */
676           crit_enter();
677           if (sc->softticks == (int)(sc->curticks + 1)) {
678                     sc->isrunning = 0;
679                     lwkt_deschedule_self(&sc->thread);      /* == curthread */
680                     lwkt_switch();
681           }
682           crit_exit();
683           goto loop;
684           /* NOT REACHED */
685 }
686 
687 /*
688  * A very slow system cleanup timer (10 second interval),
689  * per-cpu.
690  */
691 void
slotimer_callback(void * arg)692 slotimer_callback(void *arg)
693 {
694           struct callout *c = arg;
695 
696           slab_cleanup();
697           callout_reset(c, hz * 10, slotimer_callback, c);
698 }
699 
700 /*
701  * API FUNCTIONS
702  */
703 
704 static __inline
705 struct _callout *
_callout_gettoc(struct callout * cc)706 _callout_gettoc(struct callout *cc)
707 {
708           globaldata_t gd = mycpu;
709           struct _callout *c;
710           softclock_pcpu_t sc;
711 
712           KKASSERT(cc->flags & CALLOUT_DID_INIT);
713           exis_hold_gd(gd);
714           for (;;) {
715                     c = cc->toc;
716                     cpu_ccfence();
717                     if (c) {
718                               KKASSERT(c->verifier == cc);
719                               spin_lock(&c->spin);
720                               break;
721                     }
722                     sc = softclock_pcpu_ary[gd->gd_cpuid];
723                     c = kmalloc(sizeof(*c), M_CALLOUT, M_INTWAIT | M_ZERO);
724                     if (callout_debug)
725                               kprintf("ALLOC %p\n", c);
726                     c->flags = cc->flags;
727                     c->lk = cc->lk;
728                     c->verifier = cc;
729                     exis_init(&c->exis);
730                     spin_init(&c->spin, "calou");
731                     spin_lock(&c->spin);
732                     if (atomic_cmpset_ptr(&cc->toc, NULL, c))
733                               break;
734                     spin_unlock(&c->spin);
735                     c->verifier = NULL;
736                     kfree(c, M_CALLOUT);
737                     if (callout_debug)
738                               kprintf("KFREEA %p\n", c);
739           }
740           exis_drop_gd(gd);
741 
742           /*
743            * Return internal __callout with spin-lock held
744            */
745           return c;
746 }
747 
748 /*
749  * Macrod in sys/callout.h for debugging
750  *
751  * WARNING! tsleep() assumes this will not block
752  */
753 void
_callout_init(struct callout * cc CALLOUT_DEBUG_ARGS)754 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS)
755 {
756           bzero(cc, sizeof(*cc));
757           cc->flags = CALLOUT_DID_INIT;
758 }
759 
760 void
_callout_init_mp(struct callout * cc CALLOUT_DEBUG_ARGS)761 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS)
762 {
763           bzero(cc, sizeof(*cc));
764           cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE;
765 }
766 
767 void
_callout_init_lk(struct callout * cc,struct lock * lk CALLOUT_DEBUG_ARGS)768 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS)
769 {
770           bzero(cc, sizeof(*cc));
771           cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK;
772           cc->lk = lk;
773 }
774 
775 /*
776  * Start or restart a timeout.  New timeouts can be installed while the
777  * current one is running.
778  *
779  * Start or restart a timeout.  Installs the callout structure on the
780  * callwheel of the current cpu.  Callers may legally pass any value, even
781  * if 0 or negative, but since the sc->curticks index may have already
782  * been processed a minimum timeout of 1 tick will be enforced.
783  *
784  * This function will not deadlock against a running call.
785  *
786  * WARNING! tsleep() assumes this will not block
787  */
788 void
callout_reset(struct callout * cc,int to_ticks,void (* ftn)(void *),void * arg)789 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg)
790 {
791           softclock_pcpu_t sc;
792           struct _callout *c;
793 
794           /*
795            * We need to acquire/associate a _callout.
796            * gettoc spin-locks (c).
797            */
798           KKASSERT(cc->flags & CALLOUT_DID_INIT);
799           atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
800           c = _callout_gettoc(cc);
801 
802           /*
803            * Request a RESET.  This automatically overrides a STOP in
804            * _callout_update_spinlocked().
805            */
806           atomic_set_int(&c->flags, CALLOUT_RESET);
807           sc = softclock_pcpu_ary[mycpu->gd_cpuid];
808           c->rsc = sc;
809           c->rtick = sc->curticks + to_ticks;
810           c->rfunc = ftn;
811           c->rarg = arg;
812           _callout_update_spinlocked(c);
813           spin_unlock(&c->spin);
814 }
815 
816 /*
817  * Same as callout_reset() but the timeout will run on a particular cpu.
818  */
819 void
callout_reset_bycpu(struct callout * cc,int to_ticks,void (* ftn)(void *),void * arg,int cpuid)820 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *),
821                         void *arg, int cpuid)
822 {
823           softclock_pcpu_t sc;
824           struct _callout *c;
825 
826           /*
827            * We need to acquire/associate a _callout.
828            * gettoc spin-locks (c).
829            */
830           KKASSERT(cc->flags & CALLOUT_DID_INIT);
831           atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
832           c = _callout_gettoc(cc);
833 
834           /*
835            * Set RESET.  Do not clear STOP here (let the process code do it).
836            */
837           atomic_set_int(&c->flags, CALLOUT_RESET);
838 
839           sc = softclock_pcpu_ary[cpuid];
840           c->rsc = sc;
841           c->rtick = sc->curticks + to_ticks;
842           c->rfunc = ftn;
843           c->rarg = arg;
844           _callout_update_spinlocked(c);
845           spin_unlock(&c->spin);
846 }
847 
848 /*
849  * Issue synchronous or asynchronous cancel or stop
850  */
851 static __inline
852 int
_callout_cancel_or_stop(struct callout * cc,uint32_t flags,int sync)853 _callout_cancel_or_stop(struct callout *cc, uint32_t flags, int sync)
854 {
855           globaldata_t gd = mycpu;
856           struct _callout *c;
857           int res;
858 
859           /*
860            * Callout is inactive after cancel or stop.  Degenerate case if
861            * no _callout is currently associated.
862            */
863           atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
864           if (cc->toc == NULL)
865                     return 0;
866 
867           /*
868            * Ensure that the related (c) is not destroyed.  Set the CANCEL
869            * or STOP request flag, clear the PREVENTED status flag, and update.
870            */
871           exis_hold_gd(gd);
872           c = _callout_gettoc(cc);
873           atomic_clear_int(&c->flags, CALLOUT_PREVENTED);
874           atomic_set_int(&c->flags, flags);
875           _callout_update_spinlocked(c);
876           spin_unlock(&c->spin);
877 
878           /*
879            * If the operation is still in-progress then re-acquire the spin-lock
880            * and block if necessary.  Also initiate the lock cancel.
881            */
882           if (sync == 0 || (c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) {
883                     exis_drop_gd(gd);
884                     return 0;
885           }
886           if (c->flags & CALLOUT_AUTOLOCK)
887                     lockmgr(c->lk, LK_CANCEL_BEG);
888           spin_lock(&c->spin);
889           if ((c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) {
890                     spin_unlock(&c->spin);
891                     if (c->flags & CALLOUT_AUTOLOCK)
892                               lockmgr(c->lk, LK_CANCEL_END);
893                     exis_drop_gd(gd);
894                     return ((c->flags & CALLOUT_PREVENTED) != 0);
895           }
896 
897           /*
898            * With c->spin held we can synchronously wait completion of our
899            * request.
900            *
901            * If INPROG is set and we are recursing from the callback the
902            * function completes immediately.
903            */
904           ++c->waiters;
905           for (;;) {
906                     cpu_ccfence();
907                     if ((c->flags & flags) == 0)
908                               break;
909                     if ((c->flags & CALLOUT_INPROG) &&
910                         curthread == &c->qsc->thread) {
911                               _callout_update_spinlocked(c);
912                               break;
913                     }
914                     ssleep(c, &c->spin, 0, "costp", 0);
915           }
916           --c->waiters;
917           spin_unlock(&c->spin);
918           if (c->flags & CALLOUT_AUTOLOCK)
919                     lockmgr(c->lk, LK_CANCEL_END);
920           res = ((c->flags & CALLOUT_PREVENTED) != 0);
921           exis_drop_gd(gd);
922 
923           return res;
924 }
925 
926 /*
927  * Internalized special low-overhead version without normal safety
928  * checks or allocations.  Used by tsleep().
929  *
930  * Must be called from critical section, specify both the external
931  * and internal callout structure and set timeout on the current cpu.
932  */
933 void
_callout_setup_quick(struct callout * cc,struct _callout * c,int ticks,void (* ftn)(void *),void * arg)934 _callout_setup_quick(struct callout *cc, struct _callout *c, int ticks,
935                          void (*ftn)(void *), void *arg)
936 {
937           softclock_pcpu_t sc;
938           struct wheel *wheel;
939 
940           /*
941            * Request a RESET.  This automatically overrides a STOP in
942            * _callout_update_spinlocked().
943            */
944           sc = softclock_pcpu_ary[mycpu->gd_cpuid];
945 
946           cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE;
947           cc->toc = c;
948           cc->lk = NULL;
949           c->flags = cc->flags | CALLOUT_SET;
950           c->lk = NULL;
951           c->verifier = cc;
952           c->qsc = sc;
953           c->qtick = sc->curticks + ticks;
954           c->qfunc = ftn;
955           c->qarg = arg;
956           spin_init(&c->spin, "calou");
957 
958           /*
959            * Since we are on the same cpu with a critical section, we can
960            * do this with only the wheel spinlock.
961            */
962           if (c->qtick - sc->softticks <= 0)
963                     c->qtick = sc->softticks + 1;
964           wheel = &sc->callwheel[c->qtick & cwheelmask];
965 
966           spin_lock(&wheel->spin);
967           TAILQ_INSERT_TAIL(&wheel->list, c, entry);
968           spin_unlock(&wheel->spin);
969 }
970 
971 /*
972  * Internalized special low-overhead version without normal safety
973  * checks or allocations.  Used by tsleep().
974  *
975  * Must be called on the same cpu that queued the timeout.
976  * Must be called with a critical section already held.
977  */
978 void
_callout_cancel_quick(struct _callout * c)979 _callout_cancel_quick(struct _callout *c)
980 {
981           softclock_pcpu_t sc;
982           struct wheel *wheel;
983 
984           /*
985            * Wakeup callouts for tsleep() should never block, so this flag
986            * had better never be found set.
987            */
988           KKASSERT((c->flags & CALLOUT_INPROG) == 0);
989 
990           /*
991            * Remove from queue if necessary.  Since we are in a critical
992            * section on the same cpu, the queueing status should not change.
993            */
994           if (c->flags & CALLOUT_SET) {
995                     sc = c->qsc;
996                     KKASSERT(sc == softclock_pcpu_ary[mycpu->gd_cpuid]);
997                     wheel = &sc->callwheel[c->qtick & cwheelmask];
998 
999                     /*
1000                      * NOTE: We must still spin-lock the wheel because other
1001                      *         cpus can manipulate the list, and adjust sc->next
1002                      *         if necessary.
1003                      */
1004                     spin_lock(&wheel->spin);
1005                     if (sc->next == c)
1006                               sc->next = TAILQ_NEXT(c, entry);
1007                     TAILQ_REMOVE(&wheel->list, c, entry);
1008                     c->flags &= ~(CALLOUT_SET | CALLOUT_STOP |
1009                                     CALLOUT_CANCEL | CALLOUT_RESET);
1010                     spin_unlock(&wheel->spin);
1011           }
1012           c->verifier = NULL;
1013 }
1014 
1015 /*
1016  * This is a synchronous STOP which cancels the callout.  If AUTOLOCK
1017  * then a CANCEL will be issued to the lock holder.  Unlike STOP, the
1018  * cancel function prevents any new callout_reset()s from being issued
1019  * in addition to canceling the lock.  The lock will also be deactivated.
1020  *
1021  * Returns 0 if the callout was not active (or was active and completed,
1022  *             but didn't try to start a new timeout).
1023  * Returns 1 if the cancel is responsible for stopping the callout.
1024  */
1025 int
callout_cancel(struct callout * cc)1026 callout_cancel(struct callout *cc)
1027 {
1028           return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1029 }
1030 
1031 /*
1032  * Currently the same as callout_cancel.  Ultimately we may wish the
1033  * drain function to allow a pending callout to proceed, but for now
1034  * we will attempt to to cancel it.
1035  *
1036  * Returns 0 if the callout was not active (or was active and completed,
1037  *             but didn't try to start a new timeout).
1038  * Returns 1 if the drain is responsible for stopping the callout.
1039  */
1040 int
callout_drain(struct callout * cc)1041 callout_drain(struct callout *cc)
1042 {
1043           return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1044 }
1045 
1046 /*
1047  * Stops a callout if it is pending or queued, does not block.
1048  * This function does not interlock against a callout that is in-progress.
1049  *
1050  * Returns whether the STOP operation was responsible for removing a
1051  * queued or pending callout.
1052  */
1053 int
callout_stop_async(struct callout * cc)1054 callout_stop_async(struct callout *cc)
1055 {
1056           return _callout_cancel_or_stop(cc, CALLOUT_STOP, 0);
1057 }
1058 
1059 /*
1060  * Callout deactivate merely clears the CALLOUT_ACTIVE bit and stop a
1061  * callout if it is pending or queued.  However this cannot stop a callout
1062  * whos callback is in-progress.
1063  *
1064  *
1065  * This function does not interlock against a callout that is in-progress.
1066  */
1067 void
callout_deactivate(struct callout * cc)1068 callout_deactivate(struct callout *cc)
1069 {
1070           atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1071           callout_stop_async(cc);
1072 }
1073 
1074 /*
1075  * lock-aided callouts are STOPped synchronously using STOP semantics
1076  * (meaning that another thread can start the callout again before we
1077  * return).
1078  *
1079  * non-lock-aided callouts
1080  *
1081  * Stops a callout if it is pending or queued, does not block.
1082  * This function does not interlock against a callout that is in-progress.
1083  */
1084 int
callout_stop(struct callout * cc)1085 callout_stop(struct callout *cc)
1086 {
1087           return _callout_cancel_or_stop(cc, CALLOUT_STOP, 1);
1088 }
1089 
1090 /*
1091  * Destroy the callout.  Synchronously cancel any operation in progress,
1092  * clear the INIT flag, and disconnect the internal _callout.  The internal
1093  * callout will be safely freed via EXIS.
1094  *
1095  * Upon return, the callout structure may only be reused if re-initialized.
1096  */
1097 void
callout_terminate(struct callout * cc)1098 callout_terminate(struct callout *cc)
1099 {
1100           struct _callout *c;
1101 
1102           exis_hold();
1103 
1104           _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1105           KKASSERT(cc->flags & CALLOUT_DID_INIT);
1106           atomic_clear_int(&cc->flags, CALLOUT_DID_INIT);
1107           c = atomic_swap_ptr((void *)&cc->toc, NULL);
1108           if (c) {
1109                     KKASSERT(c->verifier == cc);
1110                     c->verifier = NULL;
1111                     _callout_free(c);
1112           }
1113 
1114           exis_drop();
1115 }
1116 
1117 /*
1118  * Returns whether a callout is queued and the time has not yet
1119  * arrived (the callout is not yet in-progress).
1120  */
1121 int
callout_pending(struct callout * cc)1122 callout_pending(struct callout *cc)
1123 {
1124           struct _callout *c;
1125 
1126           /*
1127            * Don't instantiate toc to test pending
1128            */
1129           if (cc->toc == NULL)
1130                     return 0;
1131           c = _callout_gettoc(cc);
1132           if ((c->flags & (CALLOUT_SET | CALLOUT_INPROG)) == CALLOUT_SET) {
1133                     spin_unlock(&c->spin);
1134                     return 1;
1135           }
1136           spin_unlock(&c->spin);
1137 
1138           return 0;
1139 }
1140 
1141 /*
1142  * Returns whether a callout is active or not.  A callout is active when
1143  * a timeout is set and remains active upon normal termination, even if
1144  * it does not issue a new timeout.  A callout is inactive if a timeout has
1145  * never been set or if the callout has been stopped or canceled.  The next
1146  * timeout that is set will re-set the active state.
1147  */
1148 int
callout_active(struct callout * cc)1149 callout_active(struct callout *cc)
1150 {
1151           return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0);
1152 }
1153