1 /*        $NetBSD: subr_cpu.c,v 1.22 2024/03/05 20:59:41 thorpej Exp $          */
2 
3 /*-
4  * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*-
34  * Copyright (c)2007 YAMAMOTO Takashi,
35  * All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56  * SUCH DAMAGE.
57  */
58 
59 /*
60  * CPU related routines shared with rump.
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.22 2024/03/05 20:59:41 thorpej Exp $");
65 
66 #include <sys/param.h>
67 #include <sys/atomic.h>
68 #include <sys/systm.h>
69 #include <sys/sched.h>
70 #include <sys/conf.h>
71 #include <sys/cpu.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/kmem.h>
75 
76 static void         cpu_topology_fake1(struct cpu_info *);
77 
78 kmutex_t  cpu_lock            __cacheline_aligned;
79 int                 ncpu                          __read_mostly;
80 int                 ncpuonline                    __read_mostly;
81 bool                mp_online           __read_mostly;
82 static bool         cpu_topology_present          __read_mostly;
83 static bool         cpu_topology_haveslow         __read_mostly;
84 int64_t             cpu_counts[CPU_COUNT_MAX];
85 
86 /* An array of CPUs.  There are ncpu entries. */
87 struct cpu_info **cpu_infos             __read_mostly;
88 
89 /* Note: set on mi_cpu_attach() and idle_loop(). */
90 kcpuset_t *         kcpuset_attached    __read_mostly       = NULL;
91 kcpuset_t *         kcpuset_running               __read_mostly       = NULL;
92 
93 static char cpu_model[128];
94 
95 /*
96  * mi_cpu_init: early initialisation of MI CPU related structures.
97  *
98  * Note: may not block and memory allocator is not yet available.
99  */
100 void
mi_cpu_init(void)101 mi_cpu_init(void)
102 {
103           struct cpu_info *ci;
104 
105           mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
106 
107           kcpuset_create(&kcpuset_attached, true);
108           kcpuset_create(&kcpuset_running, true);
109           kcpuset_set(kcpuset_running, 0);
110 
111           ci = curcpu();
112           cpu_topology_fake1(ci);
113 }
114 
115 int
cpu_setmodel(const char * fmt,...)116 cpu_setmodel(const char *fmt, ...)
117 {
118           int len;
119           va_list ap;
120 
121           va_start(ap, fmt);
122           len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap);
123           va_end(ap);
124           return len;
125 }
126 
127 const char *
cpu_getmodel(void)128 cpu_getmodel(void)
129 {
130           return cpu_model;
131 }
132 
133 bool
cpu_softintr_p(void)134 cpu_softintr_p(void)
135 {
136 
137           return (curlwp->l_pflag & LP_INTR) != 0;
138 }
139 
140 bool
curcpu_stable(void)141 curcpu_stable(void)
142 {
143           struct lwp *const l = curlwp;
144           const int pflag = l->l_pflag;
145           const int nopreempt = l->l_nopreempt;
146 
147           /*
148            * - Softints (LP_INTR) never migrate between CPUs.
149            * - Bound lwps (LP_BOUND), either kthreads created bound to
150            *   a CPU or any lwps bound with curlwp_bind, never migrate.
151            * - If kpreemption is disabled, the lwp can't migrate.
152            * - If we're in interrupt context, preemption is blocked.
153            *
154            * We combine the LP_INTR, LP_BOUND, and l_nopreempt test into
155            * a single predicted-true branch so this is cheap to assert in
156            * most contexts where it will be used, then fall back to
157            * calling the full kpreempt_disabled() and cpu_intr_p() as
158            * subroutines.
159            *
160            * XXX Is cpu_intr_p redundant with kpreempt_disabled?
161            */
162           return __predict_true(((pflag & (LP_INTR|LP_BOUND)) | nopreempt)
163                     != 0) ||
164               kpreempt_disabled() ||
165               cpu_intr_p();
166 }
167 
168 /*
169  * Collect CPU topology information as each CPU is attached.  This can be
170  * called early during boot, so we need to be careful what we do.
171  */
172 void
cpu_topology_set(struct cpu_info * ci,u_int package_id,u_int core_id,u_int smt_id,u_int numa_id)173 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id,
174     u_int smt_id, u_int numa_id)
175 {
176           enum cpu_rel rel;
177 
178           cpu_topology_present = true;
179           ci->ci_package_id = package_id;
180           ci->ci_core_id = core_id;
181           ci->ci_smt_id = smt_id;
182           ci->ci_numa_id = numa_id;
183           for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
184                     ci->ci_sibling[rel] = ci;
185                     ci->ci_nsibling[rel] = 1;
186           }
187 }
188 
189 /*
190  * Collect CPU relative speed
191  */
192 void
cpu_topology_setspeed(struct cpu_info * ci,bool slow)193 cpu_topology_setspeed(struct cpu_info *ci, bool slow)
194 {
195 
196           cpu_topology_haveslow |= slow;
197           ci->ci_is_slow = slow;
198 }
199 
200 /*
201  * Link a CPU into the given circular list.
202  */
203 static void
cpu_topology_link(struct cpu_info * ci,struct cpu_info * ci2,enum cpu_rel rel)204 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
205 {
206           struct cpu_info *ci3;
207 
208           /* Walk to the end of the existing circular list and append. */
209           for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
210                     ci3->ci_nsibling[rel]++;
211                     if (ci3->ci_sibling[rel] == ci2) {
212                               break;
213                     }
214           }
215           ci->ci_sibling[rel] = ci2;
216           ci3->ci_sibling[rel] = ci;
217           ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
218 }
219 
220 /*
221  * Print out the topology lists.
222  */
223 static void
cpu_topology_dump(void)224 cpu_topology_dump(void)
225 {
226           CPU_INFO_ITERATOR cii;
227           struct cpu_info *ci, *ci2;
228           const char *names[] = { "core", "pkg", "1st" };
229           enum cpu_rel rel;
230           int i;
231 
232           CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling));
233           if (ncpu == 1) {
234                     return;
235           }
236 
237           for (CPU_INFO_FOREACH(cii, ci)) {
238                     if (cpu_topology_haveslow)
239                               aprint_debug("%s ", ci->ci_is_slow ? "slow" : "fast");
240                     for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
241                               aprint_debug("%s has %d %s siblings:", cpu_name(ci),
242                                   ci->ci_nsibling[rel], names[rel]);
243                               ci2 = ci->ci_sibling[rel];
244                               i = 0;
245                               do {
246                                         aprint_debug(" %s", cpu_name(ci2));
247                                         ci2 = ci2->ci_sibling[rel];
248                               } while (++i < 64 && ci2 != ci->ci_sibling[rel]);
249                               if (i == 64) {
250                                         aprint_debug(" GAVE UP");
251                               }
252                               aprint_debug("\n");
253                     }
254                     aprint_debug("%s first in package: %s\n", cpu_name(ci),
255                         cpu_name(ci->ci_package1st));
256           }
257 }
258 
259 /*
260  * Fake up topology info if we have none, or if what we got was bogus.
261  * Used early in boot, and by cpu_topology_fake().
262  */
263 static void
cpu_topology_fake1(struct cpu_info * ci)264 cpu_topology_fake1(struct cpu_info *ci)
265 {
266           enum cpu_rel rel;
267 
268           for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
269                     ci->ci_sibling[rel] = ci;
270                     ci->ci_nsibling[rel] = 1;
271           }
272           if (!cpu_topology_present) {
273                     ci->ci_package_id = cpu_index(ci);
274           }
275           ci->ci_schedstate.spc_flags |=
276               (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
277           ci->ci_package1st = ci;
278           if (!cpu_topology_haveslow) {
279                     ci->ci_is_slow = false;
280           }
281 }
282 
283 /*
284  * Fake up topology info if we have none, or if what we got was bogus.
285  * Don't override ci_package_id, etc, if cpu_topology_present is set.
286  * MD code also uses these.
287  */
288 static void
cpu_topology_fake(void)289 cpu_topology_fake(void)
290 {
291           CPU_INFO_ITERATOR cii;
292           struct cpu_info *ci;
293 
294           for (CPU_INFO_FOREACH(cii, ci)) {
295                     cpu_topology_fake1(ci);
296                     /* Undo (early boot) flag set so everything links OK. */
297                     ci->ci_schedstate.spc_flags &=
298                         ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
299           }
300 }
301 
302 /*
303  * Fix up basic CPU topology info.  Right now that means attach each CPU to
304  * circular lists of its siblings in the same core, and in the same package.
305  */
306 void
cpu_topology_init(void)307 cpu_topology_init(void)
308 {
309           CPU_INFO_ITERATOR cii, cii2;
310           struct cpu_info *ci, *ci2, *ci3;
311           u_int minsmt, mincore;
312 
313           if (!cpu_topology_present) {
314                     cpu_topology_fake();
315                     goto linkit;
316           }
317 
318           /* Find siblings in same core and package. */
319           for (CPU_INFO_FOREACH(cii, ci)) {
320                     ci->ci_schedstate.spc_flags &=
321                         ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
322                     for (CPU_INFO_FOREACH(cii2, ci2)) {
323                               /* Avoid bad things happening. */
324                               if (ci2->ci_package_id == ci->ci_package_id &&
325                                   ci2->ci_core_id == ci->ci_core_id &&
326                                   ci2->ci_smt_id == ci->ci_smt_id &&
327                                   ci2 != ci) {
328 #ifdef DEBUG
329                                         printf("cpu%u %p pkg %u core %u smt %u same as "
330                                                "cpu%u %p pkg %u core %u smt %u\n",
331                                                cpu_index(ci), ci, ci->ci_package_id,
332                                                ci->ci_core_id, ci->ci_smt_id,
333                                                cpu_index(ci2), ci2, ci2->ci_package_id,
334                                                ci2->ci_core_id, ci2->ci_smt_id);
335 #endif
336                                         printf("cpu_topology_init: info bogus, "
337                                             "faking it\n");
338                                         cpu_topology_fake();
339                                         goto linkit;
340                               }
341                               if (ci2 == ci ||
342                                   ci2->ci_package_id != ci->ci_package_id) {
343                                         continue;
344                               }
345                               /* Find CPUs in the same core. */
346                               if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
347                                   ci->ci_core_id == ci2->ci_core_id) {
348                                         cpu_topology_link(ci, ci2, CPUREL_CORE);
349                               }
350                               /* Find CPUs in the same package. */
351                               if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
352                                         cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
353                               }
354                               if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
355                                   ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
356                                         break;
357                               }
358                     }
359           }
360 
361  linkit:
362           /* Identify lowest numbered SMT in each core. */
363           for (CPU_INFO_FOREACH(cii, ci)) {
364                     ci2 = ci3 = ci;
365                     minsmt = ci->ci_smt_id;
366                     do {
367                               if (ci2->ci_smt_id < minsmt) {
368                                         ci3 = ci2;
369                                         minsmt = ci2->ci_smt_id;
370                               }
371                               ci2 = ci2->ci_sibling[CPUREL_CORE];
372                     } while (ci2 != ci);
373                     ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST;
374           }
375 
376           /* Identify lowest numbered SMT in each package. */
377           ci3 = NULL;
378           for (CPU_INFO_FOREACH(cii, ci)) {
379                     if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) {
380                               continue;
381                     }
382                     ci2 = ci3 = ci;
383                     mincore = ci->ci_core_id;
384                     do {
385                               if ((ci2->ci_schedstate.spc_flags &
386                                   SPCF_CORE1ST) != 0 &&
387                                   ci2->ci_core_id < mincore) {
388                                         ci3 = ci2;
389                                         mincore = ci2->ci_core_id;
390                               }
391                               ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
392                     } while (ci2 != ci);
393 
394                     if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) {
395                               /* Already identified - nothing more to do. */
396                               continue;
397                     }
398                     ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST;
399 
400                     /* Walk through all CPUs in package and point to first. */
401                     ci2 = ci3;
402                     do {
403                               ci2->ci_package1st = ci3;
404                               ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3;
405                               ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
406                     } while (ci2 != ci3);
407 
408                     /* Now look for somebody else to link to. */
409                     for (CPU_INFO_FOREACH(cii2, ci2)) {
410                               if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST)
411                                   != 0 && ci2 != ci3) {
412                                         cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST);
413                                         break;
414                               }
415                     }
416           }
417 
418           /* Walk through all packages, starting with value of ci3 from above. */
419           KASSERT(ci3 != NULL);
420           ci = ci3;
421           do {
422                     /* Walk through CPUs in the package and copy in PACKAGE1ST. */
423                     ci2 = ci;
424                     do {
425                               ci2->ci_sibling[CPUREL_PACKAGE1ST] =
426                                   ci->ci_sibling[CPUREL_PACKAGE1ST];
427                               ci2->ci_nsibling[CPUREL_PACKAGE1ST] =
428                                   ci->ci_nsibling[CPUREL_PACKAGE1ST];
429                               ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
430                     } while (ci2 != ci);
431                     ci = ci->ci_sibling[CPUREL_PACKAGE1ST];
432           } while (ci != ci3);
433 
434           if (cpu_topology_haveslow) {
435                     /*
436                      * For asymmetric systems where some CPUs are slower than
437                      * others, mark first class CPUs for the scheduler.  This
438                      * conflicts with SMT right now so whinge if observed.
439                      */
440                     if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) {
441                               printf("cpu_topology_init: asymmetric & SMT??\n");
442                     }
443                     for (CPU_INFO_FOREACH(cii, ci)) {
444                               if (!ci->ci_is_slow) {
445                                         ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
446                               }
447                     }
448           } else {
449                     /*
450                      * For any other configuration mark the 1st CPU in each
451                      * core as a first class CPU.
452                      */
453                     for (CPU_INFO_FOREACH(cii, ci)) {
454                               if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) {
455                                         ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
456                               }
457                     }
458           }
459 
460           cpu_topology_dump();
461 }
462 
463 /*
464  * Adjust one count, for a counter that's NOT updated from interrupt
465  * context.  Hardly worth making an inline due to preemption stuff.
466  */
467 void
cpu_count(enum cpu_count idx,int64_t delta)468 cpu_count(enum cpu_count idx, int64_t delta)
469 {
470           lwp_t *l = curlwp;
471           KPREEMPT_DISABLE(l);
472           l->l_cpu->ci_counts[idx] += delta;
473           KPREEMPT_ENABLE(l);
474 }
475 
476 /*
477  * Fetch fresh sum total for all counts.  Expensive - don't call often.
478  *
479  * If poll is true, the caller is okay with less recent values (but
480  * no more than 1/hz seconds old).  Where this is called very often that
481  * should be the case.
482  *
483  * This should be reasonably quick so that any value collected get isn't
484  * totally out of whack, and it can also be called from interrupt context,
485  * so go to splvm() while summing the counters.  It's tempting to use a spin
486  * mutex here but this routine is called from DDB.
487  */
488 void
cpu_count_sync(bool poll)489 cpu_count_sync(bool poll)
490 {
491           CPU_INFO_ITERATOR cii;
492           struct cpu_info *ci;
493           int64_t sum[CPU_COUNT_MAX], *ptr;
494           static int lasttick;
495           int curtick, s;
496           enum cpu_count i;
497 
498           KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts));
499 
500           if (__predict_false(!mp_online)) {
501                     memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts));
502                     return;
503           }
504 
505           s = splvm();
506           curtick = getticks();
507           if (poll && atomic_load_acquire(&lasttick) == curtick) {
508                     splx(s);
509                     return;
510           }
511           memset(sum, 0, sizeof(sum));
512           curcpu()->ci_counts[CPU_COUNT_SYNC]++;
513           for (CPU_INFO_FOREACH(cii, ci)) {
514                     ptr = ci->ci_counts;
515                     for (i = 0; i < CPU_COUNT_MAX; i += 8) {
516                               sum[i+0] += ptr[i+0];
517                               sum[i+1] += ptr[i+1];
518                               sum[i+2] += ptr[i+2];
519                               sum[i+3] += ptr[i+3];
520                               sum[i+4] += ptr[i+4];
521                               sum[i+5] += ptr[i+5];
522                               sum[i+6] += ptr[i+6];
523                               sum[i+7] += ptr[i+7];
524                     }
525                     KASSERT(i == CPU_COUNT_MAX);
526           }
527           memcpy(cpu_counts, sum, sizeof(cpu_counts));
528           atomic_store_release(&lasttick, curtick);
529           splx(s);
530 }
531