1 /* $OpenBSD: identcpu.c,v 1.148 2024/10/07 20:30:17 dv Exp $ */
2 /* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */
3
4 /*
5 * Copyright (c) 2003 Wasabi Systems, Inc.
6 * All rights reserved.
7 *
8 * Written by Frank van der Linden for Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project by
21 * Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 * or promote products derived from this software without specific prior
24 * written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/atomic.h>
42 #include <sys/proc.h>
43 #include <sys/sysctl.h>
44
45 #include "vmm.h"
46 #include "pvbus.h"
47
48 #include <machine/cpu.h>
49 #include <machine/cpufunc.h>
50
51 #if NPVBUS > 0
52 #include <dev/pv/pvvar.h>
53 #endif
54
55 void replacesmap(void);
56 void replacemeltdown(void);
57 uint64_t cpu_freq(struct cpu_info *);
58 void tsc_identify(struct cpu_info *);
59 void tsc_timecounter_init(struct cpu_info *, uint64_t);
60 #if NVMM > 0
61 void cpu_check_vmm_cap(struct cpu_info *);
62 #endif /* NVMM > 0 */
63
64 /* sysctl wants this. */
65 char cpu_model[48];
66 int cpuspeed;
67
68 int amd64_has_xcrypt;
69 int amd64_pos_cbit; /* C bit position for SEV */
70 int has_rdrand;
71 int has_rdseed;
72
73 int
cpu_amd64speed(int * freq)74 cpu_amd64speed(int *freq)
75 {
76 *freq = cpuspeed;
77 return (0);
78 }
79
80 #ifndef SMALL_KERNEL
81 void intelcore_update_sensor(void *);
82 void cpu_hz_update_sensor(void *);
83
84 /*
85 * Temperature read on the CPU is relative to the maximum
86 * temperature supported by the CPU, Tj(Max).
87 * Refer to:
88 * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
89 * Section 35 and
90 * http://www.intel.com/content/dam/www/public/us/en/documents/
91 * white-papers/cpu-monitoring-dts-peci-paper.pdf
92 *
93 * The temperature on Intel CPUs can be between 70 and 105 degC, since
94 * Westmere we can read the TJmax from the die. For older CPUs we have
95 * to guess or use undocumented MSRs. Then we subtract the temperature
96 * portion of thermal status from max to get current temperature.
97 */
98 void
intelcore_update_sensor(void * args)99 intelcore_update_sensor(void *args)
100 {
101 struct cpu_info *ci = (struct cpu_info *) args;
102 u_int64_t msr;
103 int max = 100;
104
105 /* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
106 if (ci->ci_model == 0x0e &&
107 (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) &
108 MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
109 max = 85;
110
111 /*
112 * Newer CPUs can tell you what their max temperature is.
113 * See: '64-ia-32-architectures-software-developer-
114 * vol-3c-part-3-manual.pdf'
115 */
116 if (ci->ci_model > 0x17 && ci->ci_model != 0x1c &&
117 ci->ci_model != 0x26 && ci->ci_model != 0x27 &&
118 ci->ci_model != 0x35 && ci->ci_model != 0x36)
119 max = MSR_TEMPERATURE_TARGET_TJMAX(
120 rdmsr(MSR_TEMPERATURE_TARGET));
121
122 msr = rdmsr(MSR_THERM_STATUS);
123 if (msr & MSR_THERM_STATUS_VALID_BIT) {
124 ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr);
125 /* micro degrees */
126 ci->ci_sensor.value *= 1000000;
127 /* kelvin */
128 ci->ci_sensor.value += 273150000;
129 ci->ci_sensor.flags &= ~SENSOR_FINVALID;
130 } else {
131 ci->ci_sensor.value = 0;
132 ci->ci_sensor.flags |= SENSOR_FINVALID;
133 }
134 }
135
136 /*
137 * Effective CPU frequency measurement
138 *
139 * Refer to:
140 * 64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf
141 * Section 14.2 and
142 * OSRR for AMD Family 17h processors Section 2.1.2
143 * Round to 50Mhz which is the accuracy of this measurement.
144 */
145 #define FREQ_50MHZ (50ULL * 1000000ULL * 1000000ULL)
146 void
cpu_hz_update_sensor(void * args)147 cpu_hz_update_sensor(void *args)
148 {
149 extern uint64_t tsc_frequency;
150 struct cpu_info *ci = args;
151 uint64_t mperf, aperf, mdelta, adelta, val;
152 unsigned long s;
153
154 sched_peg_curproc(ci);
155
156 s = intr_disable();
157 mperf = rdmsr(MSR_MPERF);
158 aperf = rdmsr(MSR_APERF);
159 intr_restore(s);
160
161 mdelta = mperf - ci->ci_hz_mperf;
162 adelta = aperf - ci->ci_hz_aperf;
163 ci->ci_hz_mperf = mperf;
164 ci->ci_hz_aperf = aperf;
165
166 if (mdelta > 0) {
167 val = (adelta * 1000000) / mdelta * tsc_frequency;
168 val = ((val + FREQ_50MHZ / 2) / FREQ_50MHZ) * FREQ_50MHZ;
169 ci->ci_hz_sensor.value = val;
170 }
171
172 sched_unpeg_curproc();
173 }
174 #endif
175
176 void (*setperf_setup)(struct cpu_info *);
177
178 void via_nano_setup(struct cpu_info *ci);
179
180 void cpu_topology(struct cpu_info *ci);
181
182 void
via_nano_setup(struct cpu_info * ci)183 via_nano_setup(struct cpu_info *ci)
184 {
185 u_int32_t regs[4], val;
186 u_int64_t msreg;
187 int model = (ci->ci_signature >> 4) & 15;
188
189 if (model >= 9) {
190 CPUID(0xC0000000, regs[0], regs[1], regs[2], regs[3]);
191 val = regs[0];
192 if (val >= 0xC0000001) {
193 CPUID(0xC0000001, regs[0], regs[1], regs[2], regs[3]);
194 val = regs[3];
195 } else
196 val = 0;
197
198 if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE))
199 printf("%s:", ci->ci_dev->dv_xname);
200
201 /* Enable RNG if present and disabled */
202 if (val & C3_CPUID_HAS_RNG) {
203 extern int viac3_rnd_present;
204
205 if (!(val & C3_CPUID_DO_RNG)) {
206 msreg = rdmsr(0x110B);
207 msreg |= 0x40;
208 wrmsr(0x110B, msreg);
209 }
210 viac3_rnd_present = 1;
211 printf(" RNG");
212 }
213
214 /* Enable AES engine if present and disabled */
215 if (val & C3_CPUID_HAS_ACE) {
216 #ifdef CRYPTO
217 if (!(val & C3_CPUID_DO_ACE)) {
218 msreg = rdmsr(0x1107);
219 msreg |= (0x01 << 28);
220 wrmsr(0x1107, msreg);
221 }
222 amd64_has_xcrypt |= C3_HAS_AES;
223 #endif /* CRYPTO */
224 printf(" AES");
225 }
226
227 /* Enable ACE2 engine if present and disabled */
228 if (val & C3_CPUID_HAS_ACE2) {
229 #ifdef CRYPTO
230 if (!(val & C3_CPUID_DO_ACE2)) {
231 msreg = rdmsr(0x1107);
232 msreg |= (0x01 << 28);
233 wrmsr(0x1107, msreg);
234 }
235 amd64_has_xcrypt |= C3_HAS_AESCTR;
236 #endif /* CRYPTO */
237 printf(" AES-CTR");
238 }
239
240 /* Enable SHA engine if present and disabled */
241 if (val & C3_CPUID_HAS_PHE) {
242 #ifdef CRYPTO
243 if (!(val & C3_CPUID_DO_PHE)) {
244 msreg = rdmsr(0x1107);
245 msreg |= (0x01 << 28/**/);
246 wrmsr(0x1107, msreg);
247 }
248 amd64_has_xcrypt |= C3_HAS_SHA;
249 #endif /* CRYPTO */
250 printf(" SHA1 SHA256");
251 }
252
253 /* Enable MM engine if present and disabled */
254 if (val & C3_CPUID_HAS_PMM) {
255 #ifdef CRYPTO
256 if (!(val & C3_CPUID_DO_PMM)) {
257 msreg = rdmsr(0x1107);
258 msreg |= (0x01 << 28/**/);
259 wrmsr(0x1107, msreg);
260 }
261 amd64_has_xcrypt |= C3_HAS_MM;
262 #endif /* CRYPTO */
263 printf(" RSA");
264 }
265
266 printf("\n");
267 }
268 }
269
270 #ifndef SMALL_KERNEL
271 void via_update_sensor(void *args);
272 void
via_update_sensor(void * args)273 via_update_sensor(void *args)
274 {
275 struct cpu_info *ci = (struct cpu_info *) args;
276 u_int64_t msr;
277
278 msr = rdmsr(MSR_CENT_TMTEMPERATURE);
279 ci->ci_sensor.value = (msr & 0xffffff);
280 /* micro degrees */
281 ci->ci_sensor.value *= 1000000;
282 ci->ci_sensor.value += 273150000;
283 ci->ci_sensor.flags &= ~SENSOR_FINVALID;
284 }
285 #endif
286
287 uint64_t
cpu_freq_ctr(struct cpu_info * ci,uint32_t cpu_perf_eax,uint32_t cpu_perf_edx)288 cpu_freq_ctr(struct cpu_info *ci, uint32_t cpu_perf_eax,
289 uint32_t cpu_perf_edx)
290 {
291 uint64_t count, last_count, msr;
292
293 if ((ci->ci_flags & CPUF_CONST_TSC) == 0 ||
294 (cpu_perf_eax & CPUIDEAX_VERID) <= 1 ||
295 CPUIDEDX_NUM_FC(cpu_perf_edx) <= 1)
296 return (0);
297
298 msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
299 if (msr & MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK)) {
300 /* some hypervisor is dicking us around */
301 return (0);
302 }
303
304 msr |= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_1);
305 wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr);
306
307 msr = rdmsr(MSR_PERF_GLOBAL_CTRL) | MSR_PERF_GLOBAL_CTR1_EN;
308 wrmsr(MSR_PERF_GLOBAL_CTRL, msr);
309
310 last_count = rdmsr(MSR_PERF_FIXED_CTR1);
311 delay(100000);
312 count = rdmsr(MSR_PERF_FIXED_CTR1);
313
314 msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
315 msr &= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK);
316 wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr);
317
318 msr = rdmsr(MSR_PERF_GLOBAL_CTRL);
319 msr &= ~MSR_PERF_GLOBAL_CTR1_EN;
320 wrmsr(MSR_PERF_GLOBAL_CTRL, msr);
321
322 return ((count - last_count) * 10);
323 }
324
325 uint64_t
cpu_freq(struct cpu_info * ci)326 cpu_freq(struct cpu_info *ci)
327 {
328 uint64_t last_count, count;
329
330 last_count = rdtsc();
331 delay(100000);
332 count = rdtsc();
333
334 return ((count - last_count) * 10);
335 }
336
337 /* print flags from one cpuid for cpu0 */
338 static inline void
pcpu0id3(const char * id,char reg1,uint32_t val1,const char * bits1,char reg2,uint32_t val2,const char * bits2,char reg3,uint32_t val3,const char * bits3)339 pcpu0id3(const char *id, char reg1, uint32_t val1, const char *bits1,
340 char reg2, uint32_t val2, const char *bits2,
341 char reg3, uint32_t val3, const char *bits3)
342 {
343 if (val1 || val2 || val3) {
344 printf("\ncpu0: cpuid %s", id);
345 if (val1)
346 printf(" e%cx=%b", reg1, val1, bits1);
347 if (val2)
348 printf(" e%cx=%b", reg2, val2, bits2);
349 if (val3)
350 printf(" e%cx=%b", reg3, val3, bits3);
351 }
352 }
353
354 /* print flags from one, 32-bit MSR for cpu0 */
355 static inline void
pmsr032(uint32_t msr,uint32_t value,const char * bits)356 pmsr032(uint32_t msr, uint32_t value, const char *bits)
357 {
358 if (value)
359 printf("\ncpu0: msr %x=%b", msr, value, bits);
360 }
361
362 static void
pbitdiff(uint32_t value,uint32_t base_value,const char * bits)363 pbitdiff(uint32_t value, uint32_t base_value, const char *bits)
364 {
365 uint32_t minus;
366 if (value == base_value)
367 return;
368 minus = base_value & ~value;
369 value &= ~base_value;
370 if (minus)
371 printf("-%b", minus, bits);
372 if (value)
373 printf("+%b", value, bits);
374 }
375
376 static inline void
pcpuid(struct cpu_info * ci,const char * id,char reg,uint32_t val,uint32_t prev_val,const char * bits)377 pcpuid(struct cpu_info *ci, const char *id, char reg, uint32_t val,
378 uint32_t prev_val, const char *bits)
379 {
380 if (CPU_IS_PRIMARY(ci))
381 pcpu0id3(id, reg, val, bits, 0, 0, NULL, 0, 0, NULL);
382 else if (val != prev_val) {
383 printf("\n%s: cpuid %s e%cx=", ci->ci_dev->dv_xname, id, reg);
384 pbitdiff(val, prev_val, bits);
385 }
386 }
387
388 static inline void
pcpuid2(struct cpu_info * ci,const char * id,char reg1,uint32_t val1,uint32_t prev_val1,const char * bits1,char reg2,uint32_t val2,uint32_t prev_val2,const char * bits2)389 pcpuid2(struct cpu_info *ci, const char *id,
390 char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1,
391 char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2)
392 {
393 if (CPU_IS_PRIMARY(ci))
394 pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, 0, 0,
395 NULL);
396 else if (val1 != prev_val1 || val2 != prev_val2) {
397 printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id);
398 if (val1 != prev_val1) {
399 printf(" e%cx=", reg1);
400 pbitdiff(val1, prev_val1, bits1);
401 }
402 if (val2 != prev_val2) {
403 printf(" e%cx=", reg2);
404 pbitdiff(val2, prev_val2, bits2);
405 }
406 }
407 }
408
409 static inline void
pcpuid3(struct cpu_info * ci,const char * id,char reg1,uint32_t val1,uint32_t prev_val1,const char * bits1,char reg2,uint32_t val2,uint32_t prev_val2,const char * bits2,char reg3,uint32_t val3,uint32_t prev_val3,const char * bits3)410 pcpuid3(struct cpu_info *ci, const char *id,
411 char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1,
412 char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2,
413 char reg3, uint32_t val3, uint32_t prev_val3, const char *bits3)
414 {
415 if (CPU_IS_PRIMARY(ci))
416 pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, reg3, val3,
417 bits3);
418 else if (val1 != prev_val1 || val2 != prev_val2 || val3 != prev_val3) {
419 printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id);
420 if (val1 != prev_val1) {
421 printf(" e%cx=", reg1);
422 pbitdiff(val1, prev_val1, bits1);
423 }
424 if (val2 != prev_val2) {
425 printf(" e%cx=", reg2);
426 pbitdiff(val2, prev_val2, bits2);
427 }
428 if (val3 != prev_val3) {
429 printf(" e%cx=", reg3);
430 pbitdiff(val3, prev_val3, bits3);
431 }
432 }
433 }
434
435 static inline void
pmsr32(struct cpu_info * ci,uint32_t msr,uint32_t value,uint32_t prev_value,const char * bits)436 pmsr32(struct cpu_info *ci, uint32_t msr, uint32_t value, uint32_t prev_value,
437 const char *bits)
438 {
439 if (CPU_IS_PRIMARY(ci))
440 pmsr032(msr, value, bits);
441 else if (value != prev_value) {
442 printf("\n%s: msr %x=", ci->ci_dev->dv_xname, msr);
443 pbitdiff(value, prev_value, bits);
444 }
445 }
446
447 #ifdef MULTIPROCESSOR
448 static uint32_t prevcpu_perf_eax;
449 static uint32_t prevcpu_perf_edx;
450 #endif
451
452 static inline void
print_perf_cpuid(struct cpu_info * ci,uint32_t cpu_perf_eax,uint32_t cpu_perf_edx)453 print_perf_cpuid(struct cpu_info *ci, uint32_t cpu_perf_eax,
454 uint32_t cpu_perf_edx)
455 {
456 uint32_t version;
457
458 if (CPU_IS_PRIMARY(ci)) {
459 version = cpu_perf_eax & CPUIDEAX_VERID;
460 if (version == 0)
461 return;
462 }
463 #ifdef MULTIPROCESSOR
464 else {
465 /* if no difference on the bits we care about, say nothing */
466 if (((cpu_perf_eax ^ prevcpu_perf_eax) & 0x00ffffff) == 0 &&
467 ((cpu_perf_edx ^ prevcpu_perf_edx) & 0x00001fff) == 0)
468 return;
469 version = cpu_perf_eax & CPUIDEAX_VERID;
470 }
471 prevcpu_perf_eax = cpu_perf_eax;
472 prevcpu_perf_edx = cpu_perf_edx;
473 #endif
474
475 printf("\n%s: cpuid a vers=%d", ci->ci_dev->dv_xname, version);
476 if (version) {
477 printf(", gp=%d, gpwidth=%d", CPUIDEAX_NUM_GC(cpu_perf_eax),
478 CPUIDEAX_BIT_GC(cpu_perf_eax));
479 if (version > 1) {
480 printf(", ff=%d, ffwidth=%d",
481 CPUIDEDX_NUM_FC(cpu_perf_edx),
482 CPUIDEDX_BIT_FC(cpu_perf_edx));
483 }
484 }
485 }
486
487 void
identifycpu(struct cpu_info * ci)488 identifycpu(struct cpu_info *ci)
489 {
490 static uint32_t prevcpu_1_ecx, prevcpu_tpm_ecxflags, prevcpu_d_1_eax;
491 static uint32_t prevcpu_apmi_edx, prevcpu_arch_capa;
492 static struct cpu_info *prevci = &cpu_info_primary;
493 #define CPUID_MEMBER(member) ci->member, prevci->member
494 uint32_t cflushsz, curcpu_1_ecx, curcpu_apmi_edx = 0;
495 uint32_t curcpu_perf_eax = 0, curcpu_perf_edx = 0;
496 uint32_t curcpu_tpm_ecxflags = 0, curcpu_d_1_eax = 0;
497 uint64_t freq = 0;
498 u_int32_t dummy;
499 char mycpu_model[48];
500 char *brandstr_from, *brandstr_to;
501 int skipspace;
502
503 CPUID(0x80000000, ci->ci_pnfeatset, dummy, dummy, dummy);
504 CPUID(0x80000001, ci->ci_efeature_eax, dummy, ci->ci_efeature_ecx,
505 ci->ci_feature_eflags);
506
507 if (CPU_IS_PRIMARY(ci)) {
508 ci->ci_signature = cpu_id;
509 ci->ci_feature_flags = cpu_feature & ~CPUID_NXE;
510 cflushsz = cpu_ebxfeature;
511 curcpu_1_ecx = cpu_ecxfeature;
512 ecpu_ecxfeature = ci->ci_efeature_ecx;
513 } else {
514 CPUID(1, ci->ci_signature, cflushsz, curcpu_1_ecx,
515 ci->ci_feature_flags);
516 /* Let cpu_feature be the common bits */
517 cpu_feature &= ci->ci_feature_flags |
518 (ci->ci_feature_eflags & CPUID_NXE);
519 cpu_ecxfeature &= curcpu_1_ecx;
520 }
521 /* cflush cacheline size is equal to bits 15-8 of ebx * 8 */
522 ci->ci_cflushsz = ((cflushsz >> 8) & 0xff) * 8;
523
524 CPUID(0x80000002, ci->ci_brand[0],
525 ci->ci_brand[1], ci->ci_brand[2], ci->ci_brand[3]);
526 CPUID(0x80000003, ci->ci_brand[4],
527 ci->ci_brand[5], ci->ci_brand[6], ci->ci_brand[7]);
528 CPUID(0x80000004, ci->ci_brand[8],
529 ci->ci_brand[9], ci->ci_brand[10], ci->ci_brand[11]);
530 strlcpy(mycpu_model, (char *)ci->ci_brand, sizeof(mycpu_model));
531
532 /* Remove leading, trailing and duplicated spaces from mycpu_model */
533 brandstr_from = brandstr_to = mycpu_model;
534 skipspace = 1;
535 while (*brandstr_from != '\0') {
536 if (!skipspace || *brandstr_from != ' ') {
537 skipspace = 0;
538 *(brandstr_to++) = *brandstr_from;
539 }
540 if (*brandstr_from == ' ')
541 skipspace = 1;
542 brandstr_from++;
543 }
544 if (skipspace && brandstr_to > mycpu_model)
545 brandstr_to--;
546 *brandstr_to = '\0';
547
548 if (mycpu_model[0] == 0)
549 strlcpy(mycpu_model, "Opteron or Athlon 64",
550 sizeof(mycpu_model));
551
552 /* If primary cpu, fill in the global cpu_model used by sysctl */
553 if (CPU_IS_PRIMARY(ci))
554 strlcpy(cpu_model, mycpu_model, sizeof(cpu_model));
555
556 ci->ci_family = (ci->ci_signature >> 8) & 0x0f;
557 ci->ci_model = (ci->ci_signature >> 4) & 0x0f;
558 if (ci->ci_family == 0x6 || ci->ci_family == 0xf) {
559 ci->ci_family += (ci->ci_signature >> 20) & 0xff;
560 ci->ci_model += ((ci->ci_signature >> 16) & 0x0f) << 4;
561 }
562
563 #if NPVBUS > 0
564 /* Detect hypervisors early, attach the paravirtual bus later */
565 if (CPU_IS_PRIMARY(ci) && cpu_ecxfeature & CPUIDECX_HV)
566 pvbus_identify();
567 #endif
568
569 if (ci->ci_pnfeatset >= 0x80000007)
570 CPUID(0x80000007, dummy, dummy, dummy, curcpu_apmi_edx);
571
572 if (ci->ci_feature_flags && ci->ci_feature_flags & CPUID_TSC) {
573 /* Has TSC, check if it's constant */
574 if (ci->ci_vendor == CPUV_INTEL) {
575 if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) ||
576 (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) {
577 atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
578 }
579 } else if (ci->ci_vendor == CPUV_VIA) {
580 /* VIA */
581 if (ci->ci_model >= 0x0f) {
582 atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
583 }
584 } else if (ci->ci_vendor == CPUV_AMD) {
585 if (curcpu_apmi_edx & CPUIDEDX_ITSC) {
586 /* Invariant TSC indicates constant TSC on AMD */
587 atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
588 }
589 }
590
591 /* Check if it's an invariant TSC */
592 if (curcpu_apmi_edx & CPUIDEDX_ITSC)
593 atomic_setbits_int(&ci->ci_flags, CPUF_INVAR_TSC);
594
595 tsc_identify(ci);
596 }
597
598 if (ci->ci_cpuid_level >= 0xa) {
599 CPUID(0xa, curcpu_perf_eax, dummy, dummy, curcpu_perf_edx);
600
601 freq = cpu_freq_ctr(ci, curcpu_perf_eax, curcpu_perf_edx);
602 }
603 if (freq == 0)
604 freq = cpu_freq(ci);
605
606 if (ci->ci_cpuid_level >= 0x07) {
607 /* "Structured Extended Feature Flags" */
608 CPUID_LEAF(0x7, 0, dummy, ci->ci_feature_sefflags_ebx,
609 ci->ci_feature_sefflags_ecx, ci->ci_feature_sefflags_edx);
610 /* SEFF0ECX_OSPKE is set late on AP */
611 ci->ci_feature_sefflags_ecx &= ~SEFF0ECX_OSPKE;
612 }
613
614 printf("%s: %s", ci->ci_dev->dv_xname, mycpu_model);
615
616 if (freq != 0)
617 printf(", %llu.%02llu MHz", (freq + 4999) / 1000000,
618 ((freq + 4999) / 10000) % 100);
619
620 if (CPU_IS_PRIMARY(ci)) {
621 cpuspeed = (freq + 4999) / 1000000;
622 cpu_cpuspeed = cpu_amd64speed;
623 }
624
625 printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
626 ci->ci_signature & 0x0f);
627
628 if ((cpu_ecxfeature & CPUIDECX_HV) == 0) {
629 uint64_t level = 0;
630 uint32_t dummy;
631
632 if (ci->ci_vendor == CPUV_AMD) {
633 level = rdmsr(MSR_PATCH_LEVEL);
634 } else if (ci->ci_vendor == CPUV_INTEL) {
635 wrmsr(MSR_BIOS_SIGN, 0);
636 CPUID(1, dummy, dummy, dummy, dummy);
637 level = rdmsr(MSR_BIOS_SIGN) >> 32;
638 }
639 if (level != 0)
640 printf(", patch %08llx", level);
641 }
642
643 if (ci->ci_cpuid_level >= 0x06)
644 CPUID(0x06, ci->ci_feature_tpmflags, dummy,
645 curcpu_tpm_ecxflags, dummy);
646 if (ci->ci_vendor == CPUV_AMD && ci->ci_family >= 0x12)
647 ci->ci_feature_tpmflags |= TPM_ARAT;
648
649 /* xsave subfeatures */
650 if (ci->ci_cpuid_level >= 0xd)
651 CPUID_LEAF(0xd, 1, curcpu_d_1_eax, dummy, dummy, dummy);
652
653 pcpuid2(ci, "1", 'd', CPUID_MEMBER(ci_feature_flags), CPUID_EDX_BITS,
654 'c', curcpu_1_ecx, prevcpu_1_ecx, CPUID_ECX_BITS);
655 pcpuid2(ci, "6", 'a', CPUID_MEMBER(ci_feature_tpmflags), TPM_EAX_BITS,
656 'c', curcpu_tpm_ecxflags, prevcpu_tpm_ecxflags, TPM_ECX_BITS);
657 pcpuid3(ci, "7.0",
658 'b', CPUID_MEMBER(ci_feature_sefflags_ebx), SEFF0_EBX_BITS,
659 'c', CPUID_MEMBER(ci_feature_sefflags_ecx), SEFF0_ECX_BITS,
660 'd', CPUID_MEMBER(ci_feature_sefflags_edx), SEFF0_EDX_BITS);
661 print_perf_cpuid(ci, curcpu_perf_eax, curcpu_perf_edx);
662 pcpuid(ci, "d.1", 'a', curcpu_d_1_eax, prevcpu_d_1_eax, XSAVE_BITS);
663 pcpuid2(ci, "80000001",
664 'd', CPUID_MEMBER(ci_feature_eflags), CPUIDE_EDX_BITS,
665 'c', CPUID_MEMBER(ci_efeature_ecx), CPUIDE_ECX_BITS);
666 pcpuid(ci, "80000007", 'd', curcpu_apmi_edx, prevcpu_apmi_edx,
667 CPUID_APMI_EDX_BITS);
668 #ifdef MULTIPROCESSOR
669 prevcpu_1_ecx = curcpu_1_ecx;
670 prevcpu_tpm_ecxflags = curcpu_tpm_ecxflags;
671 prevcpu_d_1_eax = curcpu_d_1_eax;
672 prevcpu_apmi_edx = curcpu_apmi_edx;
673 #endif
674
675 /* speculation control features */
676 if (ci->ci_vendor == CPUV_AMD) {
677 if (ci->ci_pnfeatset >= 0x80000008) {
678 CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx,
679 dummy, dummy);
680 pcpuid(ci, "80000008", 'b',
681 CPUID_MEMBER(ci_feature_amdspec_ebx),
682 CPUID_AMDSPEC_EBX_BITS);
683 }
684 } else if (ci->ci_vendor == CPUV_INTEL) {
685 if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) {
686 uint32_t msr = rdmsr(MSR_ARCH_CAPABILITIES);
687
688 pmsr32(ci, MSR_ARCH_CAPABILITIES, msr,
689 prevcpu_arch_capa, ARCH_CAP_MSR_BITS);
690 prevcpu_arch_capa = msr;
691 if (!CPU_IS_PRIMARY(ci) && cpu_meltdown &&
692 (msr & ARCH_CAP_RDCL_NO))
693 printf("\n%s: -MELTDOWN", ci->ci_dev->dv_xname);
694 }
695 if (cpu_meltdown && CPU_IS_PRIMARY(ci))
696 printf("\n%s: MELTDOWN", ci->ci_dev->dv_xname);
697 }
698
699 /* AMD secure memory encryption and encrypted virtualization features */
700 if (ci->ci_vendor == CPUV_AMD &&
701 ci->ci_pnfeatset >= CPUID_AMD_SEV_CAP) {
702 CPUID(CPUID_AMD_SEV_CAP, ci->ci_feature_amdsev_eax,
703 ci->ci_feature_amdsev_ebx, ci->ci_feature_amdsev_ecx,
704 ci->ci_feature_amdsev_edx);
705 pcpuid3(ci, "8000001F",
706 'a', CPUID_MEMBER(ci_feature_amdsev_eax),
707 CPUID_AMDSEV_EAX_BITS,
708 'c', CPUID_MEMBER(ci_feature_amdsev_ecx),
709 CPUID_AMDSEV_ECX_BITS,
710 'd', CPUID_MEMBER(ci_feature_amdsev_edx),
711 CPUID_AMDSEV_EDX_BITS);
712 amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f);
713 }
714
715 printf("\n");
716
717 replacemeltdown();
718 x86_print_cacheinfo(ci);
719
720 if (CPU_IS_PRIMARY(ci)) {
721 #ifndef SMALL_KERNEL
722 if (ci->ci_vendor == CPUV_AMD &&
723 ci->ci_pnfeatset >= 0x80000007) {
724 if (curcpu_apmi_edx & 0x06) {
725 if ((ci->ci_signature & 0xF00) == 0xF00)
726 setperf_setup = k8_powernow_init;
727 }
728 if (ci->ci_family >= 0x10)
729 setperf_setup = k1x_init;
730 }
731
732 if (cpu_ecxfeature & CPUIDECX_EST)
733 setperf_setup = est_init;
734 #endif
735
736 if (cpu_ecxfeature & CPUIDECX_RDRAND)
737 has_rdrand = 1;
738
739 if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED)
740 has_rdseed = 1;
741
742 if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
743 replacesmap();
744 }
745
746 #ifndef SMALL_KERNEL
747 if (CPU_IS_PRIMARY(ci) && (ci->ci_feature_tpmflags & TPM_SENSOR) &&
748 ci->ci_vendor == CPUV_INTEL) {
749 ci->ci_sensor.type = SENSOR_TEMP;
750 sensor_task_register(ci, intelcore_update_sensor, 5);
751 sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
752 }
753 #endif
754
755 if (CPU_IS_PRIMARY(ci) && ci->ci_vendor == CPUV_VIA) {
756 ci->cpu_setup = via_nano_setup;
757 #ifndef SMALL_KERNEL
758 ci->ci_sensor.type = SENSOR_TEMP;
759 sensor_task_register(ci, via_update_sensor, 5);
760 sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
761 #endif
762 }
763
764 tsc_timecounter_init(ci, freq);
765
766 cpu_topology(ci);
767 #if NVMM > 0
768 cpu_check_vmm_cap(ci);
769 #endif /* NVMM > 0 */
770
771 /* Check for effective frequency via MPERF, APERF */
772 if ((curcpu_tpm_ecxflags & TPM_EFFFREQ) && ci->ci_smt_id == 0) {
773 #ifndef SMALL_KERNEL
774 ci->ci_hz_sensor.type = SENSOR_FREQ;
775 sensor_task_register(ci, cpu_hz_update_sensor, 1);
776 sensor_attach(&ci->ci_sensordev, &ci->ci_hz_sensor);
777 #endif
778 }
779 prevci = ci;
780 }
781
782 #ifndef SMALL_KERNEL
783 /*
784 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
785 */
786 static int
log2(unsigned int i)787 log2(unsigned int i)
788 {
789 int ret = 0;
790
791 while (i >>= 1)
792 ret++;
793
794 return (ret);
795 }
796
797 static int
mask_width(u_int x)798 mask_width(u_int x)
799 {
800 int bit;
801 int mask;
802 int powerof2;
803
804 powerof2 = ((x - 1) & x) == 0;
805 mask = (x << (1 - powerof2)) - 1;
806
807 /* fls */
808 if (mask == 0)
809 return (0);
810 for (bit = 1; mask != 1; bit++)
811 mask = (unsigned int)mask >> 1;
812
813 return (bit);
814 }
815 #endif
816
817 /*
818 * Build up cpu topology for given cpu, must run on the core itself.
819 */
820 void
cpu_topology(struct cpu_info * ci)821 cpu_topology(struct cpu_info *ci)
822 {
823 #ifndef SMALL_KERNEL
824 u_int32_t eax, ebx, ecx, edx;
825 u_int32_t apicid, max_apicid = 0, max_coreid = 0;
826 u_int32_t smt_bits = 0, core_bits, pkg_bits = 0;
827 u_int32_t smt_mask = 0, core_mask, pkg_mask = 0;
828
829 /* We need at least apicid at CPUID 1 */
830 if (ci->ci_cpuid_level < 1)
831 goto no_topology;
832
833 /* Initial apicid */
834 CPUID(1, eax, ebx, ecx, edx);
835 apicid = (ebx >> 24) & 0xff;
836
837 if (ci->ci_vendor == CPUV_AMD) {
838 uint32_t nthreads = 1; /* per core */
839 uint32_t thread_id; /* within a package */
840
841 /* We need at least apicid at CPUID 0x80000008 */
842 if (ci->ci_pnfeatset < 0x80000008)
843 goto no_topology;
844
845 CPUID(0x80000008, eax, ebx, ecx, edx);
846 core_bits = (ecx >> 12) & 0xf;
847
848 if (ci->ci_pnfeatset >= 0x8000001e) {
849 CPUID(0x8000001e, eax, ebx, ecx, edx);
850 nthreads = ((ebx >> 8) & 0xf) + 1;
851 }
852
853 /* Shift the core_bits off to get at the pkg bits */
854 ci->ci_pkg_id = apicid >> core_bits;
855
856 /* Get rid of the package bits */
857 core_mask = (1U << core_bits) - 1;
858 thread_id = apicid & core_mask;
859
860 /* Cut logical thread_id into core id, and smt id in a core */
861 ci->ci_core_id = thread_id / nthreads;
862 ci->ci_smt_id = thread_id % nthreads;
863 } else if (ci->ci_vendor == CPUV_INTEL) {
864 /* We only support leaf 1/4 detection */
865 if (ci->ci_cpuid_level < 4)
866 goto no_topology;
867 /* Get max_apicid */
868 CPUID(1, eax, ebx, ecx, edx);
869 max_apicid = (ebx >> 16) & 0xff;
870 /* Get max_coreid */
871 CPUID_LEAF(4, 0, eax, ebx, ecx, edx);
872 max_coreid = ((eax >> 26) & 0x3f) + 1;
873 /* SMT */
874 smt_bits = mask_width(max_apicid / max_coreid);
875 smt_mask = (1U << smt_bits) - 1;
876 /* Core */
877 core_bits = log2(max_coreid);
878 core_mask = (1U << (core_bits + smt_bits)) - 1;
879 core_mask ^= smt_mask;
880 /* Pkg */
881 pkg_bits = core_bits + smt_bits;
882 pkg_mask = ~0U << core_bits;
883
884 ci->ci_smt_id = apicid & smt_mask;
885 ci->ci_core_id = (apicid & core_mask) >> smt_bits;
886 ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
887 } else
888 goto no_topology;
889 #ifdef DEBUG
890 printf("cpu%d: smt %u, core %u, pkg %u "
891 "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, "
892 "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n",
893 ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
894 apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
895 core_mask, pkg_bits, pkg_mask);
896 #else
897 printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
898 ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
899
900 #endif
901 return;
902 /* We can't map, so consider ci_core_id as ci_cpuid */
903 no_topology:
904 #endif
905 ci->ci_smt_id = 0;
906 ci->ci_core_id = ci->ci_cpuid;
907 ci->ci_pkg_id = 0;
908 }
909
910 #if NVMM > 0
911 /*
912 * cpu_check_vmm_cap
913 *
914 * Checks for VMM capabilities for 'ci'. Initializes certain per-cpu VMM
915 * state in 'ci' if virtualization extensions are found.
916 *
917 * Parameters:
918 * ci: the cpu being checked
919 */
920 void
cpu_check_vmm_cap(struct cpu_info * ci)921 cpu_check_vmm_cap(struct cpu_info *ci)
922 {
923 uint64_t msr;
924 uint32_t cap, dummy, edx;
925
926 /*
927 * Check for workable VMX
928 */
929 if (cpu_ecxfeature & CPUIDECX_VMX) {
930 msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
931
932 if (!(msr & IA32_FEATURE_CONTROL_LOCK))
933 ci->ci_vmm_flags |= CI_VMM_VMX;
934 else {
935 if (msr & IA32_FEATURE_CONTROL_VMX_EN)
936 ci->ci_vmm_flags |= CI_VMM_VMX;
937 else
938 ci->ci_vmm_flags |= CI_VMM_DIS;
939 }
940 }
941
942 /*
943 * Check for EPT (Intel Nested Paging) and other secondary
944 * controls
945 */
946 if (ci->ci_vmm_flags & CI_VMM_VMX) {
947 /* Secondary controls available? */
948 /* XXX should we check true procbased ctls here if avail? */
949 msr = rdmsr(IA32_VMX_PROCBASED_CTLS);
950 if (msr & (IA32_VMX_ACTIVATE_SECONDARY_CONTROLS) << 32) {
951 msr = rdmsr(IA32_VMX_PROCBASED2_CTLS);
952 /* EPT available? */
953 if (msr & (IA32_VMX_ENABLE_EPT) << 32)
954 ci->ci_vmm_flags |= CI_VMM_EPT;
955 }
956 }
957
958 /*
959 * Check startup config (VMX)
960 */
961 if (ci->ci_vmm_flags & CI_VMM_VMX) {
962 /* CR0 fixed and flexible bits */
963 msr = rdmsr(IA32_VMX_CR0_FIXED0);
964 ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0 = msr;
965 msr = rdmsr(IA32_VMX_CR0_FIXED1);
966 ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1 = msr;
967
968 /* CR4 fixed and flexible bits */
969 msr = rdmsr(IA32_VMX_CR4_FIXED0);
970 ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0 = msr;
971 msr = rdmsr(IA32_VMX_CR4_FIXED1);
972 ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1 = msr;
973
974 /* VMXON region revision ID (bits 30:0 of IA32_VMX_BASIC) */
975 msr = rdmsr(IA32_VMX_BASIC);
976 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision =
977 (uint32_t)(msr & 0x7FFFFFFF);
978
979 /* MSR save / load table size */
980 msr = rdmsr(IA32_VMX_MISC);
981 ci->ci_vmm_cap.vcc_vmx.vmx_msr_table_size =
982 (uint32_t)(msr & IA32_VMX_MSR_LIST_SIZE_MASK) >> 25;
983
984 /* CR3 target count size */
985 ci->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count =
986 (uint32_t)(msr & IA32_VMX_CR3_TGT_SIZE_MASK) >> 16;
987 }
988
989 /*
990 * Check for workable SVM
991 */
992 if (ecpu_ecxfeature & CPUIDECX_SVM) {
993 msr = rdmsr(MSR_AMD_VM_CR);
994
995 if (!(msr & AMD_SVMDIS))
996 ci->ci_vmm_flags |= CI_VMM_SVM;
997
998 CPUID(CPUID_AMD_SVM_CAP, dummy,
999 ci->ci_vmm_cap.vcc_svm.svm_max_asid, dummy, edx);
1000
1001 if (ci->ci_vmm_cap.vcc_svm.svm_max_asid > 0xFFF)
1002 ci->ci_vmm_cap.vcc_svm.svm_max_asid = 0xFFF;
1003
1004 if (edx & AMD_SVM_FLUSH_BY_ASID_CAP)
1005 ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid = 1;
1006
1007 if (edx & AMD_SVM_VMCB_CLEAN_CAP)
1008 ci->ci_vmm_cap.vcc_svm.svm_vmcb_clean = 1;
1009
1010 if (edx & AMD_SVM_DECODE_ASSIST_CAP)
1011 ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1;
1012 }
1013
1014 /*
1015 * Check for SVM Nested Paging
1016 */
1017 if ((ci->ci_vmm_flags & CI_VMM_SVM) &&
1018 ci->ci_pnfeatset >= CPUID_AMD_SVM_CAP) {
1019 CPUID(CPUID_AMD_SVM_CAP, dummy, dummy, dummy, cap);
1020 if (cap & AMD_SVM_NESTED_PAGING_CAP)
1021 ci->ci_vmm_flags |= CI_VMM_RVI;
1022 }
1023
1024 /*
1025 * Check "L1 flush on VM entry" (Intel L1TF vuln) semantics
1026 * Full details can be found here:
1027 * https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-l1-terminal-fault
1028 */
1029 if (ci->ci_vendor == CPUV_INTEL) {
1030 if (ci->ci_feature_sefflags_edx & SEFF0EDX_L1DF)
1031 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 1;
1032 else
1033 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 0;
1034
1035 /*
1036 * Certain CPUs may have the vulnerability remedied in
1037 * hardware (RDCL_NO), or we may be nested in an VMM that
1038 * is doing flushes (SKIP_L1DFL_VMENTRY) using the MSR.
1039 * In either case no mitigation at all is necessary.
1040 */
1041 if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) {
1042 msr = rdmsr(MSR_ARCH_CAPABILITIES);
1043 if ((msr & ARCH_CAP_RDCL_NO) ||
1044 ((msr & ARCH_CAP_SKIP_L1DFL_VMENTRY) &&
1045 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr))
1046 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr =
1047 VMX_SKIP_L1D_FLUSH;
1048 }
1049 }
1050 }
1051 #endif /* NVMM > 0 */
1052