1 /*        $NetBSD: subr_kcpuset.c,v 1.20 2023/09/23 18:21:11 ad Exp $ */
2 
3 /*-
4  * Copyright (c) 2011, 2023 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mindaugas Rasiukevicius.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Kernel CPU set implementation.
34  *
35  * Interface can be used by kernel subsystems as a unified dynamic CPU
36  * bitset implementation handling many CPUs.  Facility also supports early
37  * use by MD code on boot, as it fixups bitsets on further boot.
38  *
39  * TODO:
40  * - Handle "reverse" bitset on fixup/grow.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: subr_kcpuset.c,v 1.20 2023/09/23 18:21:11 ad Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/types.h>
48 
49 #include <sys/atomic.h>
50 #include <sys/intr.h>
51 #include <sys/sched.h>
52 #include <sys/kcpuset.h>
53 #include <sys/kmem.h>
54 
55 /* Number of CPUs to support. */
56 #define   KC_MAXCPUS                    roundup2(MAXCPUS, 32)
57 
58 /*
59  * Structure of dynamic CPU set in the kernel.
60  */
61 struct kcpuset {
62           uint32_t            bits[0];
63 };
64 
65 typedef struct kcpuset_impl {
66           /* Reference count. */
67           u_int                         kc_refcnt;
68           /* Next to free, if non-NULL (used when multiple references). */
69           struct kcpuset *    kc_next;
70           /* Actual variable-sized field of bits. */
71           struct kcpuset                kc_field;
72 } kcpuset_impl_t;
73 
74 #define   KC_BITS_OFF                   (offsetof(struct kcpuset_impl, kc_field))
75 #define   KC_GETSTRUCT(b)               ((kcpuset_impl_t *)((char *)(b) - KC_BITS_OFF))
76 #define   KC_GETCSTRUCT(b)    ((const kcpuset_impl_t *)((const char *)(b) - KC_BITS_OFF))
77 
78 /* Sizes of a single bitset. */
79 #define   KC_SHIFT            5
80 #define   KC_MASK                       31
81 
82 /* An array of noted early kcpuset creations and data. */
83 #define   KC_SAVE_NITEMS                8
84 
85 /* Structures for early boot mechanism (must be statically initialised). */
86 static kcpuset_t **           kc_noted_early[KC_SAVE_NITEMS];
87 static uint32_t                         kc_bits_early[KC_SAVE_NITEMS];
88 static int                              kc_last_idx = 0;
89 static bool                             kc_initialised = false;
90 
91 #define   KC_BITSIZE_EARLY    sizeof(kc_bits_early[0])
92 #define   KC_NFIELDS_EARLY    1
93 
94 /*
95  * The size of whole bitset fields and amount of fields.
96  * The whole size must statically initialise for early case.
97  */
98 static size_t                           kc_bitsize __read_mostly = KC_BITSIZE_EARLY;
99 static size_t                           kc_nfields __read_mostly = KC_NFIELDS_EARLY;
100 static size_t                           kc_memsize __read_mostly;
101 
102 static kcpuset_t *            kcpuset_create_raw(bool);
103 
104 /*
105  * kcpuset_sysinit: initialize the subsystem, transfer early boot cases
106  * to dynamically allocated sets.
107  */
108 void
kcpuset_sysinit(void)109 kcpuset_sysinit(void)
110 {
111           kcpuset_t *kc_dynamic[KC_SAVE_NITEMS], *kcp;
112           int i, s;
113 
114           /* Set a kcpuset_t sizes. */
115           kc_nfields = (KC_MAXCPUS >> KC_SHIFT);
116           kc_bitsize = sizeof(uint32_t) * kc_nfields;
117           kc_memsize = sizeof(kcpuset_impl_t) + kc_bitsize;
118           KASSERT(kc_nfields != 0);
119           KASSERT(kc_bitsize != 0);
120 
121           /* First, pre-allocate kcpuset entries. */
122           for (i = 0; i < kc_last_idx; i++) {
123                     kcp = kcpuset_create_raw(true);
124                     kc_dynamic[i] = kcp;
125           }
126 
127           /*
128            * Prepare to convert all early noted kcpuset uses to dynamic sets.
129            * All processors, except the one we are currently running (primary),
130            * must not be spinned yet.  Since MD facilities can use kcpuset,
131            * raise the IPL to high.
132            */
133           KASSERT(mp_online == false);
134 
135           s = splhigh();
136           for (i = 0; i < kc_last_idx; i++) {
137                     /*
138                      * Transfer the bits from early static storage to the kcpuset.
139                      */
140                     KASSERT(kc_bitsize >= KC_BITSIZE_EARLY);
141                     memcpy(kc_dynamic[i], &kc_bits_early[i], KC_BITSIZE_EARLY);
142 
143                     /*
144                      * Store the new pointer, pointing to the allocated kcpuset.
145                      * Note: we are not in an interrupt context and it is the only
146                      * CPU running - thus store is safe (e.g. no need for pointer
147                      * variable to be volatile).
148                      */
149                     *kc_noted_early[i] = kc_dynamic[i];
150           }
151           kc_initialised = true;
152           kc_last_idx = 0;
153           splx(s);
154 }
155 
156 /*
157  * kcpuset_early_ptr: note an early boot use by saving the pointer and
158  * returning a pointer to a static, temporary bit field.
159  */
160 static kcpuset_t *
kcpuset_early_ptr(kcpuset_t ** kcptr)161 kcpuset_early_ptr(kcpuset_t **kcptr)
162 {
163           kcpuset_t *kcp;
164           int s;
165 
166           s = splhigh();
167           if (kc_last_idx < KC_SAVE_NITEMS) {
168                     /*
169                      * Save the pointer, return pointer to static early field.
170                      * Need to zero it out.
171                      */
172                     kc_noted_early[kc_last_idx] = kcptr;
173                     kcp = (kcpuset_t *)&kc_bits_early[kc_last_idx];
174                     kc_last_idx++;
175                     memset(kcp, 0, KC_BITSIZE_EARLY);
176                     KASSERT(kc_bitsize == KC_BITSIZE_EARLY);
177           } else {
178                     panic("kcpuset(9): all early-use entries exhausted; "
179                         "increase KC_SAVE_NITEMS\n");
180           }
181           splx(s);
182 
183           return kcp;
184 }
185 
186 /*
187  * Routines to create or destroy the CPU set.
188  * Early boot case is handled.
189  */
190 
191 static kcpuset_t *
kcpuset_create_raw(bool zero)192 kcpuset_create_raw(bool zero)
193 {
194           kcpuset_impl_t *kc;
195 
196           kc = kmem_alloc(kc_memsize, KM_SLEEP);
197           kc->kc_refcnt = 1;
198           kc->kc_next = NULL;
199 
200           if (zero) {
201                     memset(&kc->kc_field, 0, kc_bitsize);
202           }
203 
204           /* Note: return pointer to the actual field of bits. */
205           KASSERT((uint8_t *)kc + KC_BITS_OFF == (uint8_t *)&kc->kc_field);
206           return &kc->kc_field;
207 }
208 
209 void
kcpuset_create(kcpuset_t ** retkcp,bool zero)210 kcpuset_create(kcpuset_t **retkcp, bool zero)
211 {
212           if (__predict_false(!kc_initialised)) {
213                     /* Early boot use - special case. */
214                     *retkcp = kcpuset_early_ptr(retkcp);
215                     return;
216           }
217           *retkcp = kcpuset_create_raw(zero);
218 }
219 
220 void
kcpuset_clone(kcpuset_t ** retkcp,const kcpuset_t * kcp)221 kcpuset_clone(kcpuset_t **retkcp, const kcpuset_t *kcp)
222 {
223           kcpuset_create(retkcp, false);
224           memcpy(*retkcp, kcp, kc_bitsize);
225 }
226 
227 void
kcpuset_destroy(kcpuset_t * kcp)228 kcpuset_destroy(kcpuset_t *kcp)
229 {
230           const size_t size = kc_memsize;
231           kcpuset_impl_t *kc;
232 
233           KASSERT(kc_initialised);
234           KASSERT(kcp != NULL);
235 
236           do {
237                     kc = KC_GETSTRUCT(kcp);
238                     kcp = kc->kc_next;
239                     kmem_free(kc, size);
240           } while (kcp);
241 }
242 
243 /*
244  * Routines to reference/unreference the CPU set.
245  * Note: early boot case is not supported by these routines.
246  */
247 
248 void
kcpuset_use(kcpuset_t * kcp)249 kcpuset_use(kcpuset_t *kcp)
250 {
251           kcpuset_impl_t *kc = KC_GETSTRUCT(kcp);
252 
253           KASSERT(kc_initialised);
254           atomic_inc_uint(&kc->kc_refcnt);
255 }
256 
257 void
kcpuset_unuse(kcpuset_t * kcp,kcpuset_t ** lst)258 kcpuset_unuse(kcpuset_t *kcp, kcpuset_t **lst)
259 {
260           kcpuset_impl_t *kc = KC_GETSTRUCT(kcp);
261 
262           KASSERT(kc_initialised);
263           KASSERT(kc->kc_refcnt > 0);
264 
265           membar_release();
266           if (atomic_dec_uint_nv(&kc->kc_refcnt) != 0) {
267                     return;
268           }
269           membar_acquire();
270           KASSERT(kc->kc_next == NULL);
271           if (lst == NULL) {
272                     kcpuset_destroy(kcp);
273                     return;
274           }
275           kc->kc_next = *lst;
276           *lst = kcp;
277 }
278 
279 /*
280  * Routines to transfer the CPU set from / to userspace.
281  * Note: early boot case is not supported by these routines.
282  */
283 
284 int
kcpuset_copyin(const cpuset_t * ucp,kcpuset_t * kcp,size_t len)285 kcpuset_copyin(const cpuset_t *ucp, kcpuset_t *kcp, size_t len)
286 {
287           kcpuset_impl_t *kc __diagused = KC_GETSTRUCT(kcp);
288 
289           KASSERT(kc_initialised);
290           KASSERT(kc->kc_refcnt > 0);
291           KASSERT(kc->kc_next == NULL);
292 
293           if (len > kc_bitsize) { /* XXX */
294                     return EINVAL;
295           }
296           return copyin(ucp, kcp, len);
297 }
298 
299 int
kcpuset_copyout(kcpuset_t * kcp,cpuset_t * ucp,size_t len)300 kcpuset_copyout(kcpuset_t *kcp, cpuset_t *ucp, size_t len)
301 {
302           kcpuset_impl_t *kc __diagused = KC_GETSTRUCT(kcp);
303 
304           KASSERT(kc_initialised);
305           KASSERT(kc->kc_refcnt > 0);
306           KASSERT(kc->kc_next == NULL);
307 
308           if (len > kc_bitsize) { /* XXX */
309                     return EINVAL;
310           }
311           return copyout(kcp, ucp, len);
312 }
313 
314 void
kcpuset_export_u32(const kcpuset_t * kcp,uint32_t * bitfield,size_t len)315 kcpuset_export_u32(const kcpuset_t *kcp, uint32_t *bitfield, size_t len)
316 {
317           size_t rlen = MIN(kc_bitsize, len);
318 
319           KASSERT(kcp != NULL);
320           memcpy(bitfield, kcp->bits, rlen);
321 }
322 
323 /*
324  * Routines to change bit field - zero, fill, copy, set, unset, etc.
325  */
326 
327 void
kcpuset_zero(kcpuset_t * kcp)328 kcpuset_zero(kcpuset_t *kcp)
329 {
330 
331           KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_refcnt > 0);
332           KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL);
333           memset(kcp, 0, kc_bitsize);
334 }
335 
336 void
kcpuset_fill(kcpuset_t * kcp)337 kcpuset_fill(kcpuset_t *kcp)
338 {
339 
340           KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_refcnt > 0);
341           KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL);
342           memset(kcp, ~0, kc_bitsize);
343 }
344 
345 void
kcpuset_copy(kcpuset_t * dkcp,const kcpuset_t * skcp)346 kcpuset_copy(kcpuset_t *dkcp, const kcpuset_t *skcp)
347 {
348 
349           KASSERT(!kc_initialised || KC_GETSTRUCT(dkcp)->kc_refcnt > 0);
350           KASSERT(!kc_initialised || KC_GETSTRUCT(dkcp)->kc_next == NULL);
351           memcpy(dkcp, skcp, kc_bitsize);
352 }
353 
354 void
kcpuset_set(kcpuset_t * kcp,cpuid_t i)355 kcpuset_set(kcpuset_t *kcp, cpuid_t i)
356 {
357           const size_t j = i >> KC_SHIFT;
358 
359           KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL);
360           KASSERT(j < kc_nfields);
361 
362           kcp->bits[j] |= __BIT(i & KC_MASK);
363 }
364 
365 void
kcpuset_clear(kcpuset_t * kcp,cpuid_t i)366 kcpuset_clear(kcpuset_t *kcp, cpuid_t i)
367 {
368           const size_t j = i >> KC_SHIFT;
369 
370           KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_next == NULL);
371           KASSERT(j < kc_nfields);
372 
373           kcp->bits[j] &= ~(__BIT(i & KC_MASK));
374 }
375 
376 bool
kcpuset_isset(const kcpuset_t * kcp,cpuid_t i)377 kcpuset_isset(const kcpuset_t *kcp, cpuid_t i)
378 {
379           const size_t j = i >> KC_SHIFT;
380 
381           KASSERT(kcp != NULL);
382           KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_refcnt > 0);
383           KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_next == NULL);
384           KASSERT(j < kc_nfields);
385 
386           return ((__BIT(i & KC_MASK)) & kcp->bits[j]) != 0;
387 }
388 
389 bool
kcpuset_isotherset(const kcpuset_t * kcp,cpuid_t i)390 kcpuset_isotherset(const kcpuset_t *kcp, cpuid_t i)
391 {
392           const size_t j2 = i >> KC_SHIFT;
393           const uint32_t mask = ~(__BIT(i & KC_MASK));
394 
395           for (size_t j = 0; j < kc_nfields; j++) {
396                     const uint32_t bits = kcp->bits[j];
397                     if (bits && (j != j2 || (bits & mask) != 0)) {
398                               return true;
399                     }
400           }
401           return false;
402 }
403 
404 bool
kcpuset_iszero(const kcpuset_t * kcp)405 kcpuset_iszero(const kcpuset_t *kcp)
406 {
407 
408           for (size_t j = 0; j < kc_nfields; j++) {
409                     if (kcp->bits[j] != 0) {
410                               return false;
411                     }
412           }
413           return true;
414 }
415 
416 bool
kcpuset_match(const kcpuset_t * kcp1,const kcpuset_t * kcp2)417 kcpuset_match(const kcpuset_t *kcp1, const kcpuset_t *kcp2)
418 {
419 
420           return memcmp(kcp1, kcp2, kc_bitsize) == 0;
421 }
422 
423 bool
kcpuset_intersecting_p(const kcpuset_t * kcp1,const kcpuset_t * kcp2)424 kcpuset_intersecting_p(const kcpuset_t *kcp1, const kcpuset_t *kcp2)
425 {
426 
427           for (size_t j = 0; j < kc_nfields; j++) {
428                     if (kcp1->bits[j] & kcp2->bits[j])
429                               return true;
430           }
431           return false;
432 }
433 
434 cpuid_t
kcpuset_ffs(const kcpuset_t * kcp)435 kcpuset_ffs(const kcpuset_t *kcp)
436 {
437 
438           for (size_t j = 0; j < kc_nfields; j++) {
439                     if (kcp->bits[j])
440                               return 32 * j + ffs(kcp->bits[j]);
441           }
442           return 0;
443 }
444 
445 cpuid_t
kcpuset_ffs_intersecting(const kcpuset_t * kcp1,const kcpuset_t * kcp2)446 kcpuset_ffs_intersecting(const kcpuset_t *kcp1, const kcpuset_t *kcp2)
447 {
448 
449           for (size_t j = 0; j < kc_nfields; j++) {
450                     uint32_t bits = kcp1->bits[j] & kcp2->bits[j];
451                     if (bits)
452                               return 32 * j + ffs(bits);
453           }
454           return 0;
455 }
456 
457 void
kcpuset_merge(kcpuset_t * kcp1,const kcpuset_t * kcp2)458 kcpuset_merge(kcpuset_t *kcp1, const kcpuset_t *kcp2)
459 {
460 
461           for (size_t j = 0; j < kc_nfields; j++) {
462                     kcp1->bits[j] |= kcp2->bits[j];
463           }
464 }
465 
466 void
kcpuset_intersect(kcpuset_t * kcp1,const kcpuset_t * kcp2)467 kcpuset_intersect(kcpuset_t *kcp1, const kcpuset_t *kcp2)
468 {
469 
470           for (size_t j = 0; j < kc_nfields; j++) {
471                     kcp1->bits[j] &= kcp2->bits[j];
472           }
473 }
474 
475 void
kcpuset_remove(kcpuset_t * kcp1,const kcpuset_t * kcp2)476 kcpuset_remove(kcpuset_t *kcp1, const kcpuset_t *kcp2)
477 {
478 
479           for (size_t j = 0; j < kc_nfields; j++) {
480                     kcp1->bits[j] &= ~kcp2->bits[j];
481           }
482 }
483 
484 int
kcpuset_countset(const kcpuset_t * kcp)485 kcpuset_countset(const kcpuset_t *kcp)
486 {
487           int count = 0;
488 
489           for (size_t j = 0; j < kc_nfields; j++) {
490                     count += popcount32(kcp->bits[j]);
491           }
492           return count;
493 }
494 
495 /*
496  * Routines to set/clear the flags atomically.
497  */
498 
499 void
kcpuset_atomic_set(kcpuset_t * kcp,cpuid_t i)500 kcpuset_atomic_set(kcpuset_t *kcp, cpuid_t i)
501 {
502           const size_t j = i >> KC_SHIFT;
503 
504           KASSERT(j < kc_nfields);
505           atomic_or_32(&kcp->bits[j], __BIT(i & KC_MASK));
506 }
507 
508 void
kcpuset_atomic_clear(kcpuset_t * kcp,cpuid_t i)509 kcpuset_atomic_clear(kcpuset_t *kcp, cpuid_t i)
510 {
511           const size_t j = i >> KC_SHIFT;
512 
513           KASSERT(j < kc_nfields);
514           atomic_and_32(&kcp->bits[j], ~(__BIT(i & KC_MASK)));
515 }
516 
517 void
kcpuset_atomicly_intersect(kcpuset_t * kcp1,const kcpuset_t * kcp2)518 kcpuset_atomicly_intersect(kcpuset_t *kcp1, const kcpuset_t *kcp2)
519 {
520 
521           for (size_t j = 0; j < kc_nfields; j++) {
522                     if (kcp2->bits[j])
523                               atomic_and_32(&kcp1->bits[j], kcp2->bits[j]);
524           }
525 }
526 
527 void
kcpuset_atomicly_merge(kcpuset_t * kcp1,const kcpuset_t * kcp2)528 kcpuset_atomicly_merge(kcpuset_t *kcp1, const kcpuset_t *kcp2)
529 {
530 
531           for (size_t j = 0; j < kc_nfields; j++) {
532                     if (kcp2->bits[j])
533                               atomic_or_32(&kcp1->bits[j], kcp2->bits[j]);
534           }
535 }
536 
537 void
kcpuset_atomicly_remove(kcpuset_t * kcp1,const kcpuset_t * kcp2)538 kcpuset_atomicly_remove(kcpuset_t *kcp1, const kcpuset_t *kcp2)
539 {
540 
541           for (size_t j = 0; j < kc_nfields; j++) {
542                     if (kcp2->bits[j])
543                               atomic_and_32(&kcp1->bits[j], ~kcp2->bits[j]);
544           }
545 }
546