xref: /dragonfly/sys/kern/lwkt_token.c (revision 6d0742ae7aea551e633fc7147abd5de001c40346)
1 /*
2  * Copyright (c) 2003-2006,2009-2019 The DragonFly Project.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Matthew Dillon <dillon@backplane.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * lwkt_token - Implement soft token locks.
38  *
39  * Tokens are locks which serialize a thread only while the thread is
40  * running.  If the thread blocks all tokens are released, then reacquired
41  * when the thread resumes.
42  *
43  * This implementation requires no critical sections or spin locks, but
44  * does use atomic_cmpset_ptr().
45  *
46  * Tokens may be recursively acquired by the same thread.  However the
47  * caller must be sure to release such tokens in reverse order.
48  */
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/kernel.h>
52 #include <sys/proc.h>
53 #include <sys/rtprio.h>
54 #include <sys/queue.h>
55 #include <sys/sysctl.h>
56 #include <sys/ktr.h>
57 #include <sys/kthread.h>
58 #include <machine/cpu.h>
59 #include <sys/lock.h>
60 #include <sys/spinlock.h>
61 
62 #include <sys/thread2.h>
63 #include <sys/spinlock2.h>
64 #include <sys/mplock2.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_param.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_pager.h>
73 #include <vm/vm_extern.h>
74 #include <vm/vm_zone.h>
75 
76 #include <machine/stdarg.h>
77 #include <machine/smp.h>
78 
79 #include "opt_ddb.h"
80 #ifdef DDB
81 #include <ddb/ddb.h>
82 #endif
83 
84 extern int lwkt_sched_debug;
85 
86 #define LWKT_POOL_TOKENS      16384               /* must be power of 2 */
87 #define LWKT_POOL_MASK                  (LWKT_POOL_TOKENS - 1)
88 
89 struct lwkt_pool_token {
90           struct lwkt_token   token;
91 } __cachealign;
92 
93 static struct lwkt_pool_token pool_tokens[LWKT_POOL_TOKENS];
94 static struct spinlock                  tok_debug_spin =
95     SPINLOCK_INITIALIZER(&tok_debug_spin, "tok_debug_spin");
96 
97 #define TOKEN_STRING          "REF=%p TOK=%p TD=%p"
98 #define TOKEN_ARGS  lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
99 #define CONTENDED_STRING      TOKEN_STRING " (contention started)"
100 #define UNCONTENDED_STRING    TOKEN_STRING " (contention stopped)"
101 #if !defined(KTR_TOKENS)
102 #define   KTR_TOKENS          KTR_ALL
103 #endif
104 
105 KTR_INFO_MASTER(tokens);
106 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
107 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
108 #if 0
109 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
110 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
111 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
112 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
113 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
114 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
115 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
116 #endif
117 
118 #define logtoken(name, ref)                                                     \
119           KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
120 
121 /*
122  * Global tokens.  These replace the MP lock for major subsystem locking.
123  * These tokens are initially used to lockup both global and individual
124  * operations.
125  *
126  * Once individual structures get their own locks these tokens are used
127  * only to protect global lists & other variables and to interlock
128  * allocations and teardowns and such.
129  *
130  * The UP initializer causes token acquisition to also acquire the MP lock
131  * for maximum compatibility.  The feature may be enabled and disabled at
132  * any time, the MP state is copied to the tokref when the token is acquired
133  * and will not race against sysctl changes.
134  */
135 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
136 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
137 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
138 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
139 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
140 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
141 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
142 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
143 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
144 struct lwkt_token vga_token = LWKT_TOKEN_INITIALIZER(vga_token);
145 struct lwkt_token kbd_token = LWKT_TOKEN_INITIALIZER(kbd_token);
146 
147 /*
148  * Exponential backoff (exclusive tokens) and TSC windowing (shared tokens)
149  * parameters.  Remember that tokens backoff to the scheduler.  This is a bit
150  * of trade-off.  Smaller values like 128 work better in some situations,
151  * but under extreme loads larger values like 4096 seem to provide the most
152  * determinism.
153  */
154 static int token_backoff_max __cachealign = 4096;
155 SYSCTL_INT(_lwkt, OID_AUTO, token_backoff_max, CTLFLAG_RW,
156     &token_backoff_max, 0, "Tokens exponential backoff");
157 static int token_window_shift __cachealign = 8;
158 SYSCTL_INT(_lwkt, OID_AUTO, token_window_shift, CTLFLAG_RW,
159     &token_window_shift, 0, "Tokens TSC windowing shift");
160 
161 /*
162  * The collision count is bumped every time the LWKT scheduler fails
163  * to acquire needed tokens in addition to a normal lwkt_gettoken()
164  * stall.
165  */
166 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
167     &mp_token.t_collisions, 0, "Collision counter of mp_token");
168 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
169     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
170 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
171     &dev_token.t_collisions, 0, "Collision counter of dev_token");
172 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
173     &vm_token.t_collisions, 0, "Collision counter of vm_token");
174 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
175     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
176 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
177     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
178 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
179     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
180 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
181     &tty_token.t_collisions, 0, "Collision counter of tty_token");
182 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
183     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
184 
185 static int tokens_debug_output;
186 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
187     &tokens_debug_output, 0, "Generate stack trace N times");
188 
189 static int _lwkt_getalltokens_sorted(thread_t td);
190 
191 /*
192  * Acquire the initial mplock
193  *
194  * (low level boot only)
195  */
196 void
cpu_get_initial_mplock(void)197 cpu_get_initial_mplock(void)
198 {
199           KKASSERT(mp_token.t_ref == NULL);
200           if (lwkt_trytoken(&mp_token) == FALSE)
201                     panic("cpu_get_initial_mplock");
202 }
203 
204 /*
205  * Return a pool token given an address.  Use a prime number to reduce
206  * overlaps.
207  */
208 #define POOL_HASH_PRIME1      66555444443333333ULL
209 #define POOL_HASH_PRIME2      989042931893ULL
210 
211 static __inline
212 lwkt_token_t
_lwkt_token_pool_lookup(void * ptr)213 _lwkt_token_pool_lookup(void *ptr)
214 {
215           uintptr_t hash1;
216           uintptr_t hash2;
217 
218           hash1 = (uintptr_t)ptr + ((uintptr_t)ptr >> 18);
219           hash1 %= POOL_HASH_PRIME1;
220           hash2 = ((uintptr_t)ptr >> 8) + ((uintptr_t)ptr >> 24);
221           hash2 %= POOL_HASH_PRIME2;
222           return (&pool_tokens[(hash1 ^ hash2) & LWKT_POOL_MASK].token);
223 }
224 
225 /*
226  * Initialize a tokref_t prior to making it visible in the thread's
227  * token array.
228  */
229 static __inline
230 void
_lwkt_tokref_init(lwkt_tokref_t ref,lwkt_token_t tok,thread_t td,long excl)231 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
232 {
233           ref->tr_tok = tok;
234           ref->tr_count = excl;
235           ref->tr_owner = td;
236 }
237 
238 /*
239  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
240  * FALSE on failure.
241  *
242  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
243  * token, otherwise are attempting to get a shared token.
244  *
245  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
246  * it is a non-blocking operation (for both exclusive or shared acquisions).
247  */
248 static __inline
249 int
_lwkt_trytokref(lwkt_tokref_t ref,thread_t td,long mode)250 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
251 {
252           lwkt_token_t tok;
253           lwkt_tokref_t oref;
254           long count;
255 
256           tok = ref->tr_tok;
257           KASSERT(((mode & TOK_EXCLREQ) == 0 ||   /* non blocking */
258                     td->td_gd->gd_intr_nesting_level == 0 ||
259                     panic_cpu_gd == mycpu),
260                     ("Attempt to acquire token %p not already "
261                     "held in hard code section", tok));
262 
263           if (mode & TOK_EXCLUSIVE) {
264                     /*
265                      * Attempt to get an exclusive token
266                      */
267                     count = tok->t_count;
268 
269                     for (;;) {
270                               oref = tok->t_ref;  /* can be NULL */
271                               cpu_ccfence();
272                               if ((count & ~TOK_EXCLREQ) == 0) {
273                                         /*
274                                          * It is possible to get the exclusive bit.
275                                          * We must clear TOK_EXCLREQ on successful
276                                          * acquisition.
277                                          */
278                                         if (atomic_fcmpset_long(&tok->t_count, &count,
279                                                                       (count & ~TOK_EXCLREQ) |
280                                                                       TOK_EXCLUSIVE)) {
281                                                   KKASSERT(tok->t_ref == NULL);
282                                                   tok->t_ref = ref;
283                                                   return TRUE;
284                                         }
285                                         /* retry */
286                               } else if ((count & TOK_EXCLUSIVE) &&
287                                            oref >= &td->td_toks_base &&
288                                            oref < td->td_toks_stop) {
289                                         /*
290                                          * Our thread already holds the exclusive
291                                          * bit, we treat this tokref as a shared
292                                          * token (sorta) to make the token release
293                                          * code easier.  Treating this as a shared
294                                          * token allows us to simply increment the
295                                          * count field.
296                                          *
297                                          * NOTE: oref cannot race above if it
298                                          *         happens to be ours, so we're good.
299                                          *         But we must still have a stable
300                                          *         variable for both parts of the
301                                          *         comparison.
302                                          *
303                                          * NOTE: Since we already have an exclusive
304                                          *         lock and don't need to check EXCLREQ
305                                          *         we can just use an atomic_add here
306                                          */
307                                         atomic_add_long(&tok->t_count, TOK_INCR);
308                                         ref->tr_count &= ~TOK_EXCLUSIVE;
309                                         return TRUE;
310                               } else if ((mode & TOK_EXCLREQ) &&
311                                            (count & TOK_EXCLREQ) == 0) {
312                                         /*
313                                          * Unable to get the exclusive bit but being
314                                          * asked to set the exclusive-request bit.
315                                          * Since we are going to retry anyway just
316                                          * set the bit unconditionally.
317                                          */
318                                         atomic_set_long(&tok->t_count, TOK_EXCLREQ);
319                                         return FALSE;
320                               } else {
321                                         /*
322                                          * Unable to get the exclusive bit and not
323                                          * being asked to set the exclusive-request
324                                          * (aka lwkt_trytoken()), or EXCLREQ was
325                                          * already set.
326                                          */
327                                         cpu_pause();
328                                         return FALSE;
329                               }
330                               /* retry */
331                     }
332           } else {
333                     /*
334                      * Attempt to get a shared token.  Note that TOK_EXCLREQ
335                      * for shared tokens simply means the caller intends to
336                      * block.  We never actually set the bit in tok->t_count.
337                      *
338                      * Due to the token's no-deadlock guarantee, and complications
339                      * created by the sorted reacquisition code, we can only
340                      * give exclusive requests priority over shared requests
341                      * in situations where the thread holds only one token.
342                      */
343                     count = tok->t_count;
344 
345                     for (;;) {
346                               oref = tok->t_ref;  /* can be NULL */
347                               cpu_ccfence();
348                               if ((count & (TOK_EXCLUSIVE|mode)) == 0 ||
349                                   ((count & TOK_EXCLUSIVE) == 0 &&
350                                   td->td_toks_stop != &td->td_toks_base + 1)
351                               ) {
352                                         /*
353                                          * It may be possible to get the token shared.
354                                          */
355                                         if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
356                                                   return TRUE;
357                                         }
358                                         count = atomic_fetchadd_long(&tok->t_count,
359                                                                            -TOK_INCR);
360                                         count -= TOK_INCR;
361                                         /* retry */
362                               } else if ((count & TOK_EXCLUSIVE) &&
363                                            oref >= &td->td_toks_base &&
364                                            oref < td->td_toks_stop) {
365                                         /*
366                                          * We own the exclusive bit on the token so
367                                          * we can in fact also get it shared.
368                                          */
369                                         atomic_add_long(&tok->t_count, TOK_INCR);
370                                         return TRUE;
371                               } else {
372                                         /*
373                                          * We failed to get the token shared
374                                          */
375                                         return FALSE;
376                               }
377                               /* retry */
378                     }
379           }
380 }
381 
382 static __inline
383 int
_lwkt_trytokref_spin(lwkt_tokref_t ref,thread_t td,long mode)384 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
385 {
386           if (_lwkt_trytokref(ref, td, mode))
387                     return TRUE;
388 
389           if (mode & TOK_EXCLUSIVE) {
390                     /*
391                      * Contested exclusive token, use exponential backoff
392                      * algorithm.
393                      */
394                     long expbackoff;
395                     long loop;
396 
397                     expbackoff = 0;
398                     while (expbackoff < 6 + token_backoff_max) {
399                               expbackoff = (expbackoff + 1) * 3 / 2;
400                               if ((rdtsc() >> token_window_shift) % ncpus != mycpuid)  {
401                                         for (loop = expbackoff; loop; --loop)
402                                                   cpu_pause();
403                               }
404                               if (_lwkt_trytokref(ref, td, mode))
405                                         return TRUE;
406                     }
407           } else {
408                     /*
409                      * Contested shared token, use TSC windowing.  Note that
410                      * exclusive tokens have priority over shared tokens only
411                      * for the first token.
412                      */
413                     if ((rdtsc() >> token_window_shift) % ncpus == mycpuid) {
414                               if (_lwkt_trytokref(ref, td, mode & ~TOK_EXCLREQ))
415                                         return TRUE;
416                     } else {
417                               if (_lwkt_trytokref(ref, td, mode))
418                                         return TRUE;
419                     }
420 
421           }
422           ++mycpu->gd_cnt.v_lock_colls;
423 
424           return FALSE;
425 }
426 
427 /*
428  * Release a token that we hold.
429  *
430  * Since tokens are polled, we don't have to deal with wakeups and releasing
431  * is really easy.
432  */
433 static __inline
434 void
_lwkt_reltokref(lwkt_tokref_t ref,thread_t td)435 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
436 {
437           lwkt_token_t tok;
438           long count;
439 
440           tok = ref->tr_tok;
441           if (tok->t_ref == ref) {
442                     /*
443                      * We are an exclusive holder.  We must clear tr_ref
444                      * before we clear the TOK_EXCLUSIVE bit.  If we are
445                      * unable to clear the bit we must restore
446                      * tok->t_ref.
447                      */
448 #if 0
449                     KKASSERT(count & TOK_EXCLUSIVE);
450 #endif
451                     tok->t_ref = NULL;
452                     atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
453           } else {
454                     /*
455                      * We are a shared holder
456                      */
457                     count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
458                     KKASSERT(count & TOK_COUNTMASK);        /* count prior */
459           }
460 }
461 
462 /*
463  * Obtain all the tokens required by the specified thread on the current
464  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
465  * any partially acquired tokens will be released prior to return.
466  *
467  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
468  * tokens that the thread had to release when it switched away.
469  *
470  * If spinning is non-zero this function acquires the tokens in a particular
471  * order to deal with potential deadlocks.  We simply use address order for
472  * the case.
473  *
474  * Called from a critical section.
475  */
476 int
lwkt_getalltokens(thread_t td,int spinning)477 lwkt_getalltokens(thread_t td, int spinning)
478 {
479           lwkt_tokref_t scan;
480           lwkt_token_t tok;
481 
482           if (spinning)
483                     return(_lwkt_getalltokens_sorted(td));
484 
485           /*
486            * Acquire tokens in forward order, assign or validate tok->t_ref.
487            */
488           for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
489                     tok = scan->tr_tok;
490                     for (;;) {
491                               /*
492                                * Only try really hard on the last token
493                                */
494                               if (scan == td->td_toks_stop - 1) {
495                                   if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
496                                             break;
497                               } else {
498                                   if (_lwkt_trytokref(scan, td, scan->tr_count))
499                                             break;
500                               }
501 
502                               /*
503                                * Otherwise we failed to acquire all the tokens.
504                                * Release whatever we did get.
505                                */
506                               KASSERT(tok->t_desc,
507                                         ("token %p is not initialized", tok));
508                               td->td_gd->gd_cnt.v_lock_addr = tok;
509                               td->td_gd->gd_cnt.v_lock_name[0] = 't';
510                               strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
511                                         tok->t_desc,
512                                         sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
513                               if (lwkt_sched_debug > 0) {
514                                         --lwkt_sched_debug;
515                                         kprintf("toka %p %s %s\n",
516                                                   tok, tok->t_desc, td->td_comm);
517                               }
518                               td->td_wmesg = tok->t_desc;
519                               ++tok->t_collisions;
520                               while (--scan >= &td->td_toks_base)
521                                         _lwkt_reltokref(scan, td);
522                               return(FALSE);
523                     }
524           }
525           return (TRUE);
526 }
527 
528 /*
529  * Release all tokens owned by the specified thread on the current cpu.
530  *
531  * This code is really simple.  Even in cases where we own all the tokens
532  * note that t_ref may not match the scan for recursively held tokens which
533  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
534  * failed.
535  *
536  * Tokens are released in reverse order to reduce chasing race failures.
537  *
538  * Called from a critical section.
539  */
540 void
lwkt_relalltokens(thread_t td)541 lwkt_relalltokens(thread_t td)
542 {
543           lwkt_tokref_t scan;
544 
545           /*
546            * Weird order is to try to avoid a panic loop
547            */
548           if (td->td_toks_have) {
549                     scan = td->td_toks_have;
550                     td->td_toks_have = NULL;
551           } else {
552                     scan = td->td_toks_stop;
553           }
554           while (--scan >= &td->td_toks_base)
555                     _lwkt_reltokref(scan, td);
556 }
557 
558 /*
559  * This is the decontention version of lwkt_getalltokens().  The tokens are
560  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
561  * token failures will spin into the scheduler and get here.
562  *
563  * Called from critical section
564  */
565 static
566 int
_lwkt_getalltokens_sorted(thread_t td)567 _lwkt_getalltokens_sorted(thread_t td)
568 {
569           lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
570           lwkt_tokref_t scan;
571           lwkt_token_t tok;
572           int i;
573           int j;
574           int n;
575 
576           /*
577            * Sort the token array.  Yah yah, I know this isn't fun.
578            *
579            * NOTE: Recursively acquired tokens are ordered the same as in the
580            *         td_toks_array so we can always get the earliest one first.
581            *         This is particularly important when a token is acquired
582            *         exclusively multiple times, as only the first acquisition
583            *         is treated as an exclusive token.
584            */
585           i = 0;
586           scan = &td->td_toks_base;
587           while (scan < td->td_toks_stop) {
588                     for (j = 0; j < i; ++j) {
589                               if (scan->tr_tok < sort_array[j]->tr_tok)
590                                         break;
591                     }
592                     if (j != i) {
593                               bcopy(sort_array + j, sort_array + j + 1,
594                                     (i - j) * sizeof(lwkt_tokref_t));
595                     }
596                     sort_array[j] = scan;
597                     ++scan;
598                     ++i;
599           }
600           n = i;
601 
602           /*
603            * Acquire tokens in forward order, assign or validate tok->t_ref.
604            */
605           for (i = 0; i < n; ++i) {
606                     scan = sort_array[i];
607                     tok = scan->tr_tok;
608                     for (;;) {
609                               /*
610                                * Only try really hard on the last token
611                                */
612                               if (scan == td->td_toks_stop - 1) {
613                                   if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
614                                             break;
615                               } else {
616                                   if (_lwkt_trytokref(scan, td, scan->tr_count))
617                                             break;
618                               }
619 
620                               /*
621                                * Otherwise we failed to acquire all the tokens.
622                                * Release whatever we did get.
623                                */
624                               td->td_gd->gd_cnt.v_lock_addr = tok;
625                               td->td_gd->gd_cnt.v_lock_name[0] = 't';
626                               strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
627                                         tok->t_desc,
628                                         sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
629                               if (lwkt_sched_debug > 0) {
630                                         --lwkt_sched_debug;
631                                         kprintf("tokb %p %s %s\n",
632                                                   tok, tok->t_desc, td->td_comm);
633                               }
634                               td->td_wmesg = tok->t_desc;
635                               ++tok->t_collisions;
636                               while (--i >= 0) {
637                                         scan = sort_array[i];
638                                         _lwkt_reltokref(scan, td);
639                               }
640                               return(FALSE);
641                     }
642           }
643 
644           /*
645            * We were successful, there is no need for another core to signal
646            * us.
647            */
648           return (TRUE);
649 }
650 
651 /*
652  * Get a serializing token.  This routine can block.
653  */
654 void
lwkt_gettoken(lwkt_token_t tok)655 lwkt_gettoken(lwkt_token_t tok)
656 {
657           thread_t td = curthread;
658           lwkt_tokref_t ref;
659 
660           ref = td->td_toks_stop;
661           KKASSERT(ref < &td->td_toks_end);
662           ++td->td_toks_stop;
663           cpu_ccfence();
664           _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
665 
666 #ifdef DEBUG_LOCKS
667           /*
668            * Taking an exclusive token after holding it shared will
669            * livelock. Scan for that case and assert.
670            */
671           lwkt_tokref_t tk;
672           int found = 0;
673           for (tk = &td->td_toks_base; tk < ref; tk++) {
674                     if (tk->tr_tok != tok)
675                               continue;
676 
677                     found++;
678                     if (tk->tr_count & TOK_EXCLUSIVE)
679                               goto good;
680           }
681           /* We found only shared instances of this token if found >0 here */
682           KASSERT((found == 0), ("Token %p s/x livelock", tok));
683 good:
684 #endif
685 
686           if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
687                     return;
688 
689           /*
690            * Give up running if we can't acquire the token right now.
691            *
692            * Since the tokref is already active the scheduler now
693            * takes care of acquisition, so we need only call
694            * lwkt_switch().
695            *
696            * Since we failed this was not a recursive token so upon
697            * return tr_tok->t_ref should be assigned to this specific
698            * ref.
699            */
700           td->td_wmesg = tok->t_desc;
701           ++tok->t_collisions;
702           logtoken(fail, ref);
703           td->td_toks_have = td->td_toks_stop - 1;
704 
705           if (tokens_debug_output > 0) {
706                     --tokens_debug_output;
707                     spin_lock(&tok_debug_spin);
708                     kprintf("Excl Token %p thread %p %s %s\n",
709                               tok, td, tok->t_desc, td->td_comm);
710                     print_backtrace(6);
711                     kprintf("\n");
712                     spin_unlock(&tok_debug_spin);
713           }
714 
715           atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
716           lwkt_switch();
717           logtoken(succ, ref);
718           KKASSERT(tok->t_ref == ref);
719 }
720 
721 /*
722  * Similar to gettoken but we acquire a shared token instead of an exclusive
723  * token.
724  */
725 void
lwkt_gettoken_shared(lwkt_token_t tok)726 lwkt_gettoken_shared(lwkt_token_t tok)
727 {
728           thread_t td = curthread;
729           lwkt_tokref_t ref;
730 
731           ref = td->td_toks_stop;
732           KKASSERT(ref < &td->td_toks_end);
733           ++td->td_toks_stop;
734           cpu_ccfence();
735           _lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
736 
737 #ifdef DEBUG_LOCKS
738           /*
739            * Taking a pool token in shared mode is a bad idea; other
740            * addresses deeper in the call stack may hash to the same pool
741            * token and you may end up with an exclusive-shared livelock.
742            * Warn in this condition.
743            */
744           if ((tok >= &pool_tokens[0].token) &&
745               (tok < &pool_tokens[LWKT_POOL_TOKENS].token))
746                     kprintf("Warning! Taking pool token %p in shared mode\n", tok);
747 #endif
748 
749 
750           if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
751                     return;
752 
753           /*
754            * Give up running if we can't acquire the token right now.
755            *
756            * Since the tokref is already active the scheduler now
757            * takes care of acquisition, so we need only call
758            * lwkt_switch().
759            *
760            * Since we failed this was not a recursive token so upon
761            * return tr_tok->t_ref should be assigned to this specific
762            * ref.
763            */
764           td->td_wmesg = tok->t_desc;
765           ++tok->t_collisions;
766           logtoken(fail, ref);
767           td->td_toks_have = td->td_toks_stop - 1;
768 
769           if (tokens_debug_output > 0) {
770                     --tokens_debug_output;
771                     spin_lock(&tok_debug_spin);
772                     kprintf("Shar Token %p thread %p %s %s\n",
773                               tok, td, tok->t_desc, td->td_comm);
774                     print_backtrace(6);
775                     kprintf("\n");
776                     spin_unlock(&tok_debug_spin);
777           }
778 
779           atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
780           lwkt_switch();
781           logtoken(succ, ref);
782 }
783 
784 /*
785  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
786  *
787  * We setup the tokref in case we actually get the token (if we switch later
788  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
789  * TOK_EXCLREQ in case we fail.
790  */
791 int
lwkt_trytoken(lwkt_token_t tok)792 lwkt_trytoken(lwkt_token_t tok)
793 {
794           thread_t td = curthread;
795           lwkt_tokref_t ref;
796 
797           ref = td->td_toks_stop;
798           KKASSERT(ref < &td->td_toks_end);
799           ++td->td_toks_stop;
800           cpu_ccfence();
801           _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
802 
803           if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
804                     return TRUE;
805 
806           /*
807            * Failed, unpend the request
808            */
809           cpu_ccfence();
810           --td->td_toks_stop;
811           ++tok->t_collisions;
812           return FALSE;
813 }
814 
815 lwkt_token_t
lwkt_getpooltoken(void * ptr)816 lwkt_getpooltoken(void *ptr)
817 {
818           lwkt_token_t tok;
819 
820           tok = _lwkt_token_pool_lookup(ptr);
821           lwkt_gettoken(tok);
822           return (tok);
823 }
824 
825 /*
826  * Release a serializing token.
827  *
828  * WARNING!  All tokens must be released in reverse order.  This will be
829  *             asserted.
830  */
831 void
lwkt_reltoken(lwkt_token_t tok)832 lwkt_reltoken(lwkt_token_t tok)
833 {
834           thread_t td = curthread;
835           lwkt_tokref_t ref;
836 
837           /*
838            * Remove ref from thread token list and assert that it matches
839            * the token passed in.  Tokens must be released in reverse order.
840            */
841           ref = td->td_toks_stop - 1;
842           if (__predict_false(ref < &td->td_toks_base || ref->tr_tok != tok)) {
843                     kprintf("LWKT_RELTOKEN ASSERTION td %p tok %p ref %p/%p\n",
844                               td, tok, &td->td_toks_base, ref);
845                     kprintf("REF CONTENT: tok=%p count=%016lx owner=%p\n",
846                               ref->tr_tok, ref->tr_count, ref->tr_owner);
847                     if (ref < &td->td_toks_base) {
848                               kprintf("lwkt_reltoken: no tokens to release\n");
849                     } else {
850                               kprintf("lwkt_reltoken: release wants %s and got %s\n",
851                                         tok->t_desc, ref->tr_tok->t_desc);
852                     }
853                     panic("lwkt_reltoken: illegal release");
854           }
855           _lwkt_reltokref(ref, td);
856           cpu_sfence();
857           td->td_toks_stop = ref;
858 }
859 
860 /*
861  * It is faster for users of lwkt_getpooltoken() to use the returned
862  * token and just call lwkt_reltoken(), but for convenience we provide
863  * this function which looks the token up based on the ident.
864  */
865 void
lwkt_relpooltoken(void * ptr)866 lwkt_relpooltoken(void *ptr)
867 {
868           lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
869           lwkt_reltoken(tok);
870 }
871 
872 /*
873  * Return a count of the number of token refs the thread has to the
874  * specified token, whether it currently owns the token or not.
875  */
876 int
lwkt_cnttoken(lwkt_token_t tok,thread_t td)877 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
878 {
879           lwkt_tokref_t scan;
880           int count = 0;
881 
882           for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
883                     if (scan->tr_tok == tok)
884                               ++count;
885           }
886           return(count);
887 }
888 
889 /*
890  * Pool tokens are used to provide a type-stable serializing token
891  * pointer that does not race against disappearing data structures.
892  *
893  * This routine is called in early boot just after we setup the BSP's
894  * globaldata structure.
895  */
896 void
lwkt_token_pool_init(void)897 lwkt_token_pool_init(void)
898 {
899           int i;
900 
901           for (i = 0; i < LWKT_POOL_TOKENS; ++i)
902                     lwkt_token_init(&pool_tokens[i].token, "pool");
903 }
904 
905 lwkt_token_t
lwkt_token_pool_lookup(void * ptr)906 lwkt_token_pool_lookup(void *ptr)
907 {
908           return (_lwkt_token_pool_lookup(ptr));
909 }
910 
911 /*
912  * Initialize a token.
913  */
914 void
lwkt_token_init(lwkt_token_t tok,const char * desc)915 lwkt_token_init(lwkt_token_t tok, const char *desc)
916 {
917           tok->t_count = 0;
918           tok->t_ref = NULL;
919           tok->t_collisions = 0;
920           tok->t_desc = desc;
921 }
922 
923 void
lwkt_token_uninit(lwkt_token_t tok)924 lwkt_token_uninit(lwkt_token_t tok)
925 {
926           /* empty */
927 }
928 
929 /*
930  * Exchange the two most recent tokens on the tokref stack.  This allows
931  * you to release a token out of order.
932  *
933  * We have to be careful about the case where the top two tokens are
934  * the same token.  In this case tok->t_ref will point to the deeper
935  * ref and must remain pointing to the deeper ref.  If we were to swap
936  * it the first release would clear the token even though a second
937  * ref is still present.
938  *
939  * Only exclusively held tokens contain a reference to the tokref which
940  * has to be flipped along with the swap.
941  */
942 void
lwkt_token_swap(void)943 lwkt_token_swap(void)
944 {
945           lwkt_tokref_t ref1, ref2;
946           lwkt_token_t tok1, tok2;
947           long count1, count2;
948           thread_t td = curthread;
949 
950           crit_enter();
951 
952           ref1 = td->td_toks_stop - 1;
953           ref2 = td->td_toks_stop - 2;
954           KKASSERT(ref1 >= &td->td_toks_base);
955           KKASSERT(ref2 >= &td->td_toks_base);
956 
957           tok1 = ref1->tr_tok;
958           tok2 = ref2->tr_tok;
959           count1 = ref1->tr_count;
960           count2 = ref2->tr_count;
961 
962           if (tok1 != tok2) {
963                     ref1->tr_tok = tok2;
964                     ref1->tr_count = count2;
965                     ref2->tr_tok = tok1;
966                     ref2->tr_count = count1;
967                     if (tok1->t_ref == ref1)
968                               tok1->t_ref = ref2;
969                     if (tok2->t_ref == ref2)
970                               tok2->t_ref = ref1;
971           }
972 
973           crit_exit();
974 }
975 
976 #ifdef DDB
DB_SHOW_COMMAND(tokens,db_tok_all)977 DB_SHOW_COMMAND(tokens, db_tok_all)
978 {
979           struct lwkt_token *tok, **ptr;
980           struct lwkt_token *toklist[16] = {
981                     &mp_token,
982                     &pmap_token,
983                     &dev_token,
984                     &vm_token,
985                     &vmspace_token,
986                     &kvm_token,
987                     &sigio_token,
988                     &tty_token,
989                     &vnode_token,
990                     NULL
991           };
992 
993           ptr = toklist;
994           for (tok = *ptr; tok; tok = *(++ptr)) {
995                     db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
996                         (tok->t_ref ? tok->t_ref->tr_owner : NULL),
997                         tok->t_collisions, tok->t_desc);
998           }
999 }
1000 #endif /* DDB */
1001