1 /*        $NetBSD: identcpu.c,v 1.138 2025/05/01 06:01:47 imil Exp $  */
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden,  and by Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.138 2025/05/01 06:01:47 imil Exp $");
34 
35 #include "opt_xen.h"
36 
37 #include <sys/param.h>
38 
39 #include <sys/cpu.h>
40 #include <sys/device.h>
41 #include <sys/systm.h>
42 
43 #include <crypto/aes/aes_impl.h>
44 #include <crypto/aes/arch/x86/aes_ni.h>
45 #include <crypto/aes/arch/x86/aes_sse2.h>
46 #include <crypto/aes/arch/x86/aes_ssse3.h>
47 #include <crypto/aes/arch/x86/aes_via.h>
48 #include <crypto/chacha/arch/x86/chacha_sse2.h>
49 #include <crypto/chacha/chacha_impl.h>
50 
51 #include <uvm/uvm_extern.h>
52 
53 #include <machine/cpu.h>
54 #include <machine/pio.h>
55 #include <machine/specialreg.h>
56 
57 #include <x86/cacheinfo.h>
58 #include <x86/cputypes.h>
59 #include <x86/cpuvar.h>
60 #include <x86/fpu.h>
61 
62 #include <dev/vmt/vmtreg.h>   /* for vmt_hvcall() */
63 #include <dev/vmt/vmtvar.h>   /* for vmt_hvcall() */
64 
65 #ifndef XENPV
66 #include "hyperv.h"
67 #if NHYPERV > 0
68 #include <x86/x86/hypervvar.h>
69 #endif
70 #endif
71 
72 static const struct x86_cache_info intel_cpuid_cache_info[] = INTEL_CACHE_INFO;
73 
74 static const struct x86_cache_info amd_cpuid_l2l3cache_assoc_info[] =
75           AMD_L2L3CACHE_INFO;
76 
77 int cpu_vendor;
78 char cpu_brand_string[49];
79 
80 int x86_fpu_save __read_mostly;
81 unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87);
82 uint64_t x86_xsave_features __read_mostly = 0;
83 size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT+1] __read_mostly;
84 size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT+1] __read_mostly;
85 u_int cpu_max_hypervisor_cpuid = 0;
86 
87 /*
88  * Note: these are just the ones that may not have a cpuid instruction.
89  * We deal with the rest in a different way.
90  */
91 const int i386_nocpuid_cpus[] = {
92           CPUVENDOR_INTEL, CPUCLASS_386,          /* CPU_386SX */
93           CPUVENDOR_INTEL, CPUCLASS_386,          /* CPU_386   */
94           CPUVENDOR_INTEL, CPUCLASS_486,          /* CPU_486SX */
95           CPUVENDOR_INTEL, CPUCLASS_486,          /* CPU_486   */
96           CPUVENDOR_CYRIX, CPUCLASS_486,          /* CPU_486DLC */
97           CPUVENDOR_CYRIX, CPUCLASS_486,          /* CPU_6x86 */
98           CPUVENDOR_NEXGEN, CPUCLASS_386,         /* CPU_NX586 */
99 };
100 
101 static const char cpu_vendor_names[][10] = {
102           "Unknown", "Intel", "NS/Cyrix", "NexGen", "AMD", "IDT/VIA", "Transmeta",
103           "Vortex86"
104 };
105 
106 static void
cpu_probe_intel_cache(struct cpu_info * ci)107 cpu_probe_intel_cache(struct cpu_info *ci)
108 {
109           const struct x86_cache_info *cai;
110           u_int descs[4];
111           int iterations, i, j;
112           uint8_t desc;
113 
114           if (cpuid_level >= 2) {
115                     /* Parse the cache info from `cpuid leaf 2', if we have it. */
116                     x86_cpuid(2, descs);
117                     iterations = descs[0] & 0xff;
118                     while (iterations-- > 0) {
119                               for (i = 0; i < 4; i++) {
120                                         if (descs[i] & 0x80000000)
121                                                   continue;
122                                         for (j = 0; j < 4; j++) {
123                                                   if (i == 0 && j == 0)
124                                                             continue;
125                                                   desc = (descs[i] >> (j * 8)) & 0xff;
126                                                   if (desc == 0)
127                                                             continue;
128                                                   cai = cpu_cacheinfo_lookup(
129                                                       intel_cpuid_cache_info, desc);
130                                                   if (cai != NULL) {
131                                                             ci->ci_cinfo[cai->cai_index] =
132                                                                 *cai;
133                                                   }
134                                         }
135                               }
136                     }
137           }
138 
139           if (cpuid_level < 4)
140                     return;
141 
142           /* Parse the cache info from `cpuid leaf 4', if we have it. */
143           cpu_dcp_cacheinfo(ci, 4);
144 }
145 
146 static void
cpu_probe_intel_errata(struct cpu_info * ci)147 cpu_probe_intel_errata(struct cpu_info *ci)
148 {
149           u_int family, model;
150 
151           family = CPUID_TO_FAMILY(ci->ci_signature);
152           model = CPUID_TO_MODEL(ci->ci_signature);
153 
154           /*
155            * For details, refer to the Intel Pentium and Celeron Processor
156            * N- and J- Series Specification Update (Document number: 334820-010),
157            * August 2022, Revision 010. See page 28, Section 5.30: "APL30 A Store
158            * Instruction May Not Wake Up MWAIT."
159            * https://cdrdv2-public.intel.com/334820/334820-APL_Spec_Update_rev010.pdf
160            * https://web.archive.org/web/20250114072355/https://cdrdv2-public.intel.com/334820/334820-APL_Spec_Update_rev010.pdf
161            *
162            * Disable MWAIT/MONITOR on Apollo Lake CPUs to address the
163            * APL30 erratum.  When using the MONITOR/MWAIT instruction
164            * pair, stores to the armed address range may fail to trigger
165            * MWAIT to resume execution.  When these instructions are used
166            * to hatch secondary CPUs, this erratum causes SMP boot
167            * failures.
168            */
169           if (family == 0x6 && model == 0x5C) {
170                     wrmsr(MSR_MISC_ENABLE,
171                         rdmsr(MSR_MISC_ENABLE) & ~IA32_MISC_MWAIT_EN);
172 
173                     cpu_feature[1] &= ~CPUID2_MONITOR;
174                     ci->ci_feat_val[1] &= ~CPUID2_MONITOR;
175           }
176 }
177 
178 static void
cpu_probe_intel(struct cpu_info * ci)179 cpu_probe_intel(struct cpu_info *ci)
180 {
181 
182           if (cpu_vendor != CPUVENDOR_INTEL)
183                     return;
184 
185           cpu_probe_intel_cache(ci);
186           cpu_probe_intel_errata(ci);
187 }
188 
189 static void
cpu_probe_amd_cache(struct cpu_info * ci)190 cpu_probe_amd_cache(struct cpu_info *ci)
191 {
192           const struct x86_cache_info *cp;
193           struct x86_cache_info *cai;
194           int family, model;
195           u_int descs[4];
196           u_int lfunc;
197 
198           family = CPUID_TO_FAMILY(ci->ci_signature);
199           model = CPUID_TO_MODEL(ci->ci_signature);
200 
201           /* K5 model 0 has none of this info. */
202           if (family == 5 && model == 0)
203                     return;
204 
205           /* Determine the largest extended function value. */
206           x86_cpuid(0x80000000, descs);
207           lfunc = descs[0];
208 
209           if (lfunc < 0x80000005)
210                     return;
211 
212           /* Determine L1 cache/TLB info. */
213           x86_cpuid(0x80000005, descs);
214 
215           /* K6-III and higher have large page TLBs. */
216           if ((family == 5 && model >= 9) || family >= 6) {
217                     cai = &ci->ci_cinfo[CAI_ITLB2];
218                     cai->cai_totalsize = AMD_L1_EAX_ITLB_ENTRIES(descs[0]);
219                     cai->cai_associativity = AMD_L1_EAX_ITLB_ASSOC(descs[0]);
220                     cai->cai_linesize = (4 * 1024 * 1024);
221 
222                     cai = &ci->ci_cinfo[CAI_DTLB2];
223                     cai->cai_totalsize = AMD_L1_EAX_DTLB_ENTRIES(descs[0]);
224                     cai->cai_associativity = AMD_L1_EAX_DTLB_ASSOC(descs[0]);
225                     cai->cai_linesize = (4 * 1024 * 1024);
226           }
227 
228           cai = &ci->ci_cinfo[CAI_ITLB];
229           cai->cai_totalsize = AMD_L1_EBX_ITLB_ENTRIES(descs[1]);
230           cai->cai_associativity = AMD_L1_EBX_ITLB_ASSOC(descs[1]);
231           cai->cai_linesize = (4 * 1024);
232 
233           cai = &ci->ci_cinfo[CAI_DTLB];
234           cai->cai_totalsize = AMD_L1_EBX_DTLB_ENTRIES(descs[1]);
235           cai->cai_associativity = AMD_L1_EBX_DTLB_ASSOC(descs[1]);
236           cai->cai_linesize = (4 * 1024);
237 
238           cai = &ci->ci_cinfo[CAI_DCACHE];
239           cai->cai_totalsize = AMD_L1_ECX_DC_SIZE(descs[2]);
240           cai->cai_associativity = AMD_L1_ECX_DC_ASSOC(descs[2]);
241           cai->cai_linesize = AMD_L1_ECX_DC_LS(descs[2]);
242 
243           cai = &ci->ci_cinfo[CAI_ICACHE];
244           cai->cai_totalsize = AMD_L1_EDX_IC_SIZE(descs[3]);
245           cai->cai_associativity = AMD_L1_EDX_IC_ASSOC(descs[3]);
246           cai->cai_linesize = AMD_L1_EDX_IC_LS(descs[3]);
247 
248           if (lfunc < 0x80000006)
249                     return;
250 
251           /* Determine L2 cache/TLB info. */
252           x86_cpuid(0x80000006, descs);
253 
254           cai = &ci->ci_cinfo[CAI_L2CACHE];
255           cai->cai_totalsize = AMD_L2_ECX_C_SIZE(descs[2]);
256           cai->cai_associativity = AMD_L2_ECX_C_ASSOC(descs[2]);
257           cai->cai_linesize = AMD_L2_ECX_C_LS(descs[2]);
258 
259           cp = cpu_cacheinfo_lookup(amd_cpuid_l2l3cache_assoc_info,
260               cai->cai_associativity);
261           if (cp != NULL)
262                     cai->cai_associativity = cp->cai_associativity;
263           else
264                     cai->cai_associativity = 0;   /* XXX Unknown/reserved */
265 
266           if (family < 0xf)
267                     return;
268 
269           /* Determine L3 cache info on AMD Family 10h and newer processors */
270           cai = &ci->ci_cinfo[CAI_L3CACHE];
271           cai->cai_totalsize = AMD_L3_EDX_C_SIZE(descs[3]);
272           cai->cai_associativity = AMD_L3_EDX_C_ASSOC(descs[3]);
273           cai->cai_linesize = AMD_L3_EDX_C_LS(descs[3]);
274 
275           cp = cpu_cacheinfo_lookup(amd_cpuid_l2l3cache_assoc_info,
276               cai->cai_associativity);
277           if (cp != NULL)
278                     cai->cai_associativity = cp->cai_associativity;
279           else
280                     cai->cai_associativity = 0;   /* XXX Unknown reserved */
281 
282           if (lfunc < 0x80000019)
283                     return;
284 
285           /* Determine 1GB TLB info. */
286           x86_cpuid(0x80000019, descs);
287 
288           cai = &ci->ci_cinfo[CAI_L1_1GBDTLB];
289           cai->cai_totalsize = AMD_L1_1GB_EAX_DTLB_ENTRIES(descs[1]);
290           cai->cai_associativity = AMD_L1_1GB_EAX_DTLB_ASSOC(descs[1]);
291           cai->cai_linesize = (1 * 1024);
292 
293           cai = &ci->ci_cinfo[CAI_L1_1GBITLB];
294           cai->cai_totalsize = AMD_L1_1GB_EAX_IUTLB_ENTRIES(descs[0]);
295           cai->cai_associativity = AMD_L1_1GB_EAX_IUTLB_ASSOC(descs[0]);
296           cai->cai_linesize = (1 * 1024);
297 
298           cai = &ci->ci_cinfo[CAI_L2_1GBDTLB];
299           cai->cai_totalsize = AMD_L2_1GB_EBX_DUTLB_ENTRIES(descs[1]);
300           cai->cai_associativity = AMD_L2_1GB_EBX_DUTLB_ASSOC(descs[1]);
301           cai->cai_linesize = (1 * 1024);
302 
303           cai = &ci->ci_cinfo[CAI_L2_1GBITLB];
304           cai->cai_totalsize = AMD_L2_1GB_EBX_IUTLB_ENTRIES(descs[0]);
305           cai->cai_associativity = AMD_L2_1GB_EBX_IUTLB_ASSOC(descs[0]);
306           cai->cai_linesize = (1 * 1024);
307 
308           if (lfunc < 0x8000001d)
309                     return;
310 
311           if (ci->ci_feat_val[3] & CPUID_TOPOEXT)
312                     cpu_dcp_cacheinfo(ci, 0x8000001d);
313 }
314 
315 static void
cpu_probe_amd_errata(struct cpu_info * ci)316 cpu_probe_amd_errata(struct cpu_info *ci)
317 {
318           u_int model;
319           uint64_t val;
320           int flag;
321 
322           model = CPUID_TO_MODEL(ci->ci_signature);
323 
324           switch (CPUID_TO_FAMILY(ci->ci_signature)) {
325           case 0x05: /* K5 */
326                     if (model == 0) {
327                               /*
328                                * According to the AMD Processor Recognition App Note,
329                                * the AMD-K5 Model 0 uses the wrong bit to indicate
330                                * support for global PTEs, instead using bit 9 (APIC)
331                                * rather than bit 13 (i.e. "0x200" vs. 0x2000").
332                                */
333                               flag = ci->ci_feat_val[0];
334                               if ((flag & CPUID_APIC) != 0)
335                                         flag = (flag & ~CPUID_APIC) | CPUID_PGE;
336                               ci->ci_feat_val[0] = flag;
337                     }
338                     break;
339 
340           case 0x10: /* Family 10h */
341                     /*
342                      * On Family 10h, certain BIOSes do not enable WC+ support.
343                      * This causes WC+ to become CD, and degrades guest
344                      * performance at the NPT level.
345                      *
346                      * Explicitly enable WC+ if we're not a guest.
347                      */
348                     if (!ISSET(ci->ci_feat_val[1], CPUID2_RAZ)) {
349                               val = rdmsr(MSR_BU_CFG2);
350                               val &= ~BU_CFG2_CWPLUS_DIS;
351                               wrmsr(MSR_BU_CFG2, val);
352                     }
353                     break;
354 
355           case 0x17:
356                     /*
357                      * "Revision Guide for AMD Family 17h Models 00h-0Fh
358                      * Processors" revision 1.12:
359                      *
360                      * 1057 MWAIT or MWAITX Instructions May Fail to Correctly
361                      * Exit From the Monitor Event Pending State
362                      *
363                      * 1109 MWAIT Instruction May Hang a Thread
364                      */
365                     if (model == 0x01) {
366                               cpu_feature[1] &= ~CPUID2_MONITOR;
367                               ci->ci_feat_val[1] &= ~CPUID2_MONITOR;
368                     }
369                     break;
370           }
371 }
372 
373 static void
cpu_probe_amd(struct cpu_info * ci)374 cpu_probe_amd(struct cpu_info *ci)
375 {
376 
377           if (cpu_vendor != CPUVENDOR_AMD)
378                     return;
379 
380           cpu_probe_amd_cache(ci);
381           cpu_probe_amd_errata(ci);
382 }
383 
384 static inline uint8_t
cyrix_read_reg(uint8_t reg)385 cyrix_read_reg(uint8_t reg)
386 {
387 
388           outb(0x22, reg);
389           return inb(0x23);
390 }
391 
392 static inline void
cyrix_write_reg(uint8_t reg,uint8_t data)393 cyrix_write_reg(uint8_t reg, uint8_t data)
394 {
395 
396           outb(0x22, reg);
397           outb(0x23, data);
398 }
399 
400 static void
cpu_probe_cyrix_cmn(struct cpu_info * ci)401 cpu_probe_cyrix_cmn(struct cpu_info *ci)
402 {
403           /*
404            * i8254 latch check routine:
405            *     National Geode (formerly Cyrix MediaGX) has a serious bug in
406            *     its built-in i8254-compatible clock module (cs5510 cs5520).
407            *     Set the variable 'clock_broken_latch' to indicate it.
408            *
409            * This bug is not present in the cs5530, and the flag
410            * is disabled again in sys/arch/i386/pci/pcib.c if this later
411            * model device is detected. Ideally, this work-around should not
412            * even be in here, it should be in there. XXX
413            */
414           uint8_t c3;
415 #ifndef XENPV
416           extern int clock_broken_latch;
417 
418           switch (ci->ci_signature) {
419           case 0x440:     /* Cyrix MediaGX */
420           case 0x540:     /* GXm */
421                     clock_broken_latch = 1;
422                     break;
423           }
424 #endif
425 
426           /* set up various cyrix registers */
427           /*
428            * Enable suspend on halt (powersave mode).
429            * When powersave mode is enabled, the TSC stops counting
430            * while the CPU is halted in idle() waiting for an interrupt.
431            * This means we can't use the TSC for interval time in
432            * microtime(9), and thus it is disabled here.
433            *
434            * It still makes a perfectly good cycle counter
435            * for program profiling, so long as you remember you're
436            * counting cycles, and not time. Further, if you don't
437            * mind not using powersave mode, the TSC works just fine,
438            * so this should really be optional. XXX
439            */
440           cyrix_write_reg(0xc2, cyrix_read_reg(0xc2) | 0x08);
441 
442           /*
443            * Do not disable the TSC on the Geode GX, it's reported to
444            * work fine.
445            */
446           if (ci->ci_signature != 0x552)
447                     ci->ci_feat_val[0] &= ~CPUID_TSC;
448 
449           /* enable access to ccr4/ccr5 */
450           c3 = cyrix_read_reg(0xC3);
451           cyrix_write_reg(0xC3, c3 | 0x10);
452           /* cyrix's workaround  for the "coma bug" */
453           cyrix_write_reg(0x31, cyrix_read_reg(0x31) | 0xf8);
454           cyrix_write_reg(0x32, cyrix_read_reg(0x32) | 0x7f);
455           cyrix_write_reg(0x33, cyrix_read_reg(0x33) & ~0xffu);
456           cyrix_write_reg(0x3c, cyrix_read_reg(0x3c) | 0x87);
457           /* disable access to ccr4/ccr5 */
458           cyrix_write_reg(0xC3, c3);
459 }
460 
461 static void
cpu_probe_cyrix(struct cpu_info * ci)462 cpu_probe_cyrix(struct cpu_info *ci)
463 {
464 
465           if (cpu_vendor != CPUVENDOR_CYRIX ||
466               CPUID_TO_FAMILY(ci->ci_signature) < 4 ||
467               CPUID_TO_FAMILY(ci->ci_signature) > 6)
468                     return;
469 
470           cpu_probe_cyrix_cmn(ci);
471 }
472 
473 static void
cpu_probe_winchip(struct cpu_info * ci)474 cpu_probe_winchip(struct cpu_info *ci)
475 {
476 
477           if (cpu_vendor != CPUVENDOR_IDT ||
478               CPUID_TO_FAMILY(ci->ci_signature) != 5)
479                     return;
480 
481           /* WinChip C6 */
482           if (CPUID_TO_MODEL(ci->ci_signature) == 4)
483                     ci->ci_feat_val[0] &= ~CPUID_TSC;
484 }
485 
486 static void
cpu_probe_c3(struct cpu_info * ci)487 cpu_probe_c3(struct cpu_info *ci)
488 {
489           u_int family, model, stepping, descs[4], lfunc, msr;
490           struct x86_cache_info *cai;
491 
492           if (cpu_vendor != CPUVENDOR_IDT ||
493               CPUID_TO_FAMILY(ci->ci_signature) < 6)
494                     return;
495 
496           family = CPUID_TO_FAMILY(ci->ci_signature);
497           model = CPUID_TO_MODEL(ci->ci_signature);
498           stepping = CPUID_TO_STEPPING(ci->ci_signature);
499 
500           if (family == 6) {
501                     /*
502                      * VIA Eden ESP.
503                      *
504                      * Quoting from page 3-4 of: "VIA Eden ESP Processor Datasheet"
505                      * http://www.via.com.tw/download/mainboards/6/14/Eden20v115.pdf
506                      *
507                      * 1. The CMPXCHG8B instruction is provided and always enabled,
508                      *    however, it appears disabled in the corresponding CPUID
509                      *    function bit 0 to avoid a bug in an early version of
510                      *    Windows NT. However, this default can be changed via a
511                      *    bit in the FCR MSR.
512                      */
513                     ci->ci_feat_val[0] |= CPUID_CX8;
514                     wrmsr(MSR_VIA_FCR, rdmsr(MSR_VIA_FCR) | VIA_FCR_CX8_REPORT);
515 
516                     /*
517                      * For reference on VIA Alternate Instructions, see the VIA C3
518                      * Processor Alternate Instruction Set Application Note, 2002.
519                      * http://www.bitsavers.org/components/viaTechnologies/C3-ais-appnote.pdf
520                      *
521                      * Disable unsafe ALTINST mode for VIA C3 processors, if necessary.
522                      *
523                      * This is done for the security reasons, as some CPUs were
524                      * found with ALTINST enabled by default.  This functionality
525                      * has ability to bypass many x86 architecture memory
526                      * protections and privilege checks, exposing a possibility
527                      * for backdoors and should not be enabled unintentionally.
528                      */
529                     if (model > 0x5 && model < 0xA) {
530                               int disable_ais = 0;
531                               x86_cpuid(0xc0000000, descs);
532                               lfunc = descs[0];
533                               /* Check AIS flags first if supported ("Nehemiah"). */
534                               if (lfunc >= 0xc0000001) {
535                                         x86_cpuid(0xc0000001, descs);
536                                         lfunc = descs[3];
537                                         if ((lfunc & CPUID_VIA_HAS_AIS)
538                                             && (lfunc & CPUID_VIA_DO_AIS)) {
539                                                   disable_ais = 1;
540                                         }
541                               } else    /* Explicitly disable AIS for pre-CX5L CPUs. */
542                                         disable_ais = 1;
543 
544                               if (disable_ais) {
545                                         msr = rdmsr(MSR_VIA_FCR);
546                                         wrmsr(MSR_VIA_FCR, msr & ~VIA_FCR_ALTINST_ENABLE);
547                               }
548                     }
549           }
550 
551           if (family > 6 || model > 0x9 || (model == 0x9 && stepping >= 3)) {
552                     /* VIA Nehemiah or later. */
553                     x86_cpuid(0xc0000000, descs);
554                     lfunc = descs[0];
555                     if (lfunc >= 0xc0000001) {    /* has ACE, RNG */
556                         int rng_enable = 0, ace_enable = 0;
557                         x86_cpuid(0xc0000001, descs);
558                         lfunc = descs[3];
559                         ci->ci_feat_val[4] = lfunc;
560                         /* Check for and enable RNG */
561                         if (lfunc & CPUID_VIA_HAS_RNG) {
562                               if (!(lfunc & CPUID_VIA_DO_RNG)) {
563                                   rng_enable++;
564                                   ci->ci_feat_val[4] |= CPUID_VIA_DO_RNG;
565                               }
566                         }
567                         /* Check for and enable ACE (AES-CBC) */
568                         if (lfunc & CPUID_VIA_HAS_ACE) {
569                               if (!(lfunc & CPUID_VIA_DO_ACE)) {
570                                   ace_enable++;
571                                   ci->ci_feat_val[4] |= CPUID_VIA_DO_ACE;
572                               }
573                         }
574                         /* Check for and enable SHA */
575                         if (lfunc & CPUID_VIA_HAS_PHE) {
576                               if (!(lfunc & CPUID_VIA_DO_PHE)) {
577                                   ace_enable++;
578                                   ci->ci_feat_val[4] |= CPUID_VIA_DO_PHE;
579                               }
580                         }
581                         /* Check for and enable ACE2 (AES-CTR) */
582                         if (lfunc & CPUID_VIA_HAS_ACE2) {
583                               if (!(lfunc & CPUID_VIA_DO_ACE2)) {
584                                   ace_enable++;
585                                   ci->ci_feat_val[4] |= CPUID_VIA_DO_ACE2;
586                               }
587                         }
588                         /* Check for and enable PMM (modmult engine) */
589                         if (lfunc & CPUID_VIA_HAS_PMM) {
590                               if (!(lfunc & CPUID_VIA_DO_PMM)) {
591                                   ace_enable++;
592                                   ci->ci_feat_val[4] |= CPUID_VIA_DO_PMM;
593                               }
594                         }
595 
596                         /*
597                          * Actually do the enables.  It's a little gross,
598                          * but per the PadLock programming guide, "Enabling
599                          * PadLock", condition 3, we must enable SSE too or
600                          * else the first use of RNG or ACE instructions
601                          * will generate a trap.
602                          *
603                          * We must do this early because of kernel RNG
604                          * initialization but it is safe without the full
605                          * FPU-detect as all these CPUs have SSE.
606                          */
607                         lcr4(rcr4() | CR4_OSFXSR);
608 
609                         if (rng_enable) {
610                               msr = rdmsr(MSR_VIA_RNG);
611                               msr |= MSR_VIA_RNG_ENABLE;
612                               /* C7 stepping 8 and subsequent CPUs have dual RNG */
613                               if (model > 0xA || (model == 0xA && stepping > 0x7)) {
614                                         msr |= MSR_VIA_RNG_2NOISE;
615                               }
616                               wrmsr(MSR_VIA_RNG, msr);
617                         }
618 
619                         if (ace_enable) {
620                               msr = rdmsr(MSR_VIA_FCR);
621                               wrmsr(MSR_VIA_FCR, msr | VIA_FCR_ACE_ENABLE);
622                         }
623                     }
624           }
625 
626           /* Determine the largest extended function value. */
627           x86_cpuid(0x80000000, descs);
628           lfunc = descs[0];
629 
630           /*
631            * Determine L1 cache/TLB info.
632            */
633           if (lfunc < 0x80000005) {
634                     /* No L1 cache info available. */
635                     return;
636           }
637 
638           x86_cpuid(0x80000005, descs);
639 
640           cai = &ci->ci_cinfo[CAI_ITLB];
641           cai->cai_totalsize = VIA_L1_EBX_ITLB_ENTRIES(descs[1]);
642           cai->cai_associativity = VIA_L1_EBX_ITLB_ASSOC(descs[1]);
643           cai->cai_linesize = (4 * 1024);
644 
645           cai = &ci->ci_cinfo[CAI_DTLB];
646           cai->cai_totalsize = VIA_L1_EBX_DTLB_ENTRIES(descs[1]);
647           cai->cai_associativity = VIA_L1_EBX_DTLB_ASSOC(descs[1]);
648           cai->cai_linesize = (4 * 1024);
649 
650           cai = &ci->ci_cinfo[CAI_DCACHE];
651           cai->cai_totalsize = VIA_L1_ECX_DC_SIZE(descs[2]);
652           cai->cai_associativity = VIA_L1_ECX_DC_ASSOC(descs[2]);
653           cai->cai_linesize = VIA_L1_EDX_IC_LS(descs[2]);
654           if (family == 6 && model == 9 && stepping == 8) {
655                     /* Erratum: stepping 8 reports 4 when it should be 2 */
656                     cai->cai_associativity = 2;
657           }
658 
659           cai = &ci->ci_cinfo[CAI_ICACHE];
660           cai->cai_totalsize = VIA_L1_EDX_IC_SIZE(descs[3]);
661           cai->cai_associativity = VIA_L1_EDX_IC_ASSOC(descs[3]);
662           cai->cai_linesize = VIA_L1_EDX_IC_LS(descs[3]);
663           if (family == 6 && model == 9 && stepping == 8) {
664                     /* Erratum: stepping 8 reports 4 when it should be 2 */
665                     cai->cai_associativity = 2;
666           }
667 
668           /*
669            * Determine L2 cache/TLB info.
670            */
671           if (lfunc < 0x80000006) {
672                     /* No L2 cache info available. */
673                     return;
674           }
675 
676           x86_cpuid(0x80000006, descs);
677 
678           cai = &ci->ci_cinfo[CAI_L2CACHE];
679           if (family > 6 || model >= 9) {
680                     cai->cai_totalsize = VIA_L2N_ECX_C_SIZE(descs[2]);
681                     cai->cai_associativity = VIA_L2N_ECX_C_ASSOC(descs[2]);
682                     cai->cai_linesize = VIA_L2N_ECX_C_LS(descs[2]);
683           } else {
684                     cai->cai_totalsize = VIA_L2_ECX_C_SIZE(descs[2]);
685                     cai->cai_associativity = VIA_L2_ECX_C_ASSOC(descs[2]);
686                     cai->cai_linesize = VIA_L2_ECX_C_LS(descs[2]);
687           }
688 }
689 
690 static void
cpu_probe_geode(struct cpu_info * ci)691 cpu_probe_geode(struct cpu_info *ci)
692 {
693 
694           if (memcmp("Geode by NSC", ci->ci_vendor, 12) != 0 ||
695               CPUID_TO_FAMILY(ci->ci_signature) != 5)
696                     return;
697 
698           cpu_probe_cyrix_cmn(ci);
699           cpu_probe_amd_cache(ci);
700 }
701 
702 static void
cpu_probe_vortex86(struct cpu_info * ci)703 cpu_probe_vortex86(struct cpu_info *ci)
704 {
705 #define PCI_MODE1_ADDRESS_REG 0x0cf8
706 #define PCI_MODE1_DATA_REG    0x0cfc
707 #define PCI_MODE1_ENABLE      0x80000000UL
708 
709           uint32_t reg, idx;
710 
711           if (cpu_vendor != CPUVENDOR_VORTEX86)
712                     return;
713           /*
714            * CPU model available from "Customer ID register" in
715            * North Bridge Function 0 PCI space
716            * we can't use pci_conf_read() because the PCI subsystem is not
717            * not initialised early enough
718            */
719 
720           outl(PCI_MODE1_ADDRESS_REG, PCI_MODE1_ENABLE | 0x90);
721           reg = inl(PCI_MODE1_DATA_REG);
722 
723           if ((reg & 0xf0ffffff) != 0x30504d44) {
724                     idx = 0;
725           } else {
726                     idx = (reg >> 24) & 0xf;
727           }
728 
729           static const char *cpu_vortex86_flavor[] = {
730               "??", "SX", "DX", "MX", "DX2", "MX+", "DX3", "EX", "EX2",
731           };
732           idx = idx < __arraycount(cpu_vortex86_flavor) ? idx : 0;
733           snprintf(cpu_brand_string, sizeof(cpu_brand_string), "Vortex86%s",
734               cpu_vortex86_flavor[idx]);
735 
736 #undef PCI_MODE1_ENABLE
737 #undef PCI_MODE1_ADDRESS_REG
738 #undef PCI_MODE1_DATA_REG
739 }
740 
741 static void
cpu_probe_fpu_old(struct cpu_info * ci)742 cpu_probe_fpu_old(struct cpu_info *ci)
743 {
744 #if defined(__i386__) && !defined(XENPV)
745 
746           clts();
747           fninit();
748 
749           /* Check for 'FDIV' bug on the original Pentium */
750           if (npx586bug1(4195835, 3145727) != 0)
751                     /* NB 120+MHz cpus are not affected */
752                     i386_fpu_fdivbug = 1;
753 
754           stts();
755 #endif
756 }
757 
758 static void
cpu_probe_fpu(struct cpu_info * ci)759 cpu_probe_fpu(struct cpu_info *ci)
760 {
761           u_int descs[4];
762           int i;
763 
764           x86_fpu_save = FPU_SAVE_FSAVE;
765 
766 #ifdef i386
767           /* If we have FXSAVE/FXRESTOR, use them. */
768           if ((ci->ci_feat_val[0] & CPUID_FXSR) == 0) {
769                     i386_use_fxsave = 0;
770                     cpu_probe_fpu_old(ci);
771                     return;
772           }
773 
774           i386_use_fxsave = 1;
775           /*
776            * If we have SSE/SSE2, enable XMM exceptions, and
777            * notify userland.
778            */
779           if (ci->ci_feat_val[0] & CPUID_SSE)
780                     i386_has_sse = 1;
781           if (ci->ci_feat_val[0] & CPUID_SSE2)
782                     i386_has_sse2 = 1;
783 #else
784           /*
785            * For amd64 i386_use_fxsave, i386_has_sse and i386_has_sse2 are
786            * #defined to 1, because fxsave/sse/sse2 are always present.
787            */
788 #endif
789 
790           x86_fpu_save = FPU_SAVE_FXSAVE;
791           x86_fpu_save_size = sizeof(struct fxsave);
792 
793           /* See if XSAVE is supported */
794           if ((ci->ci_feat_val[1] & CPUID2_XSAVE) == 0)
795                     return;
796 
797 #ifdef XENPV
798           /*
799            * Xen kernel can disable XSAVE via "no-xsave" option, in that case
800            * the XSAVE/XRSTOR instructions become privileged and trigger
801            * supervisor trap. OSXSAVE flag seems to be reliably set according
802            * to whether XSAVE is actually available.
803            */
804           if ((ci->ci_feat_val[1] & CPUID2_OSXSAVE) == 0)
805                     return;
806 #endif
807 
808           x86_fpu_save = FPU_SAVE_XSAVE;
809 
810           x86_cpuid2(0x0d, 1, descs);
811           if (descs[0] & CPUID_PES1_XSAVEOPT)
812                     x86_fpu_save = FPU_SAVE_XSAVEOPT;
813 
814           /*
815            * Get the hardware-supported features with CPUID.
816            */
817           x86_cpuid2(0x0d, 0, descs);
818           x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0];
819 
820           /*
821            * Turn on XSAVE in CR4 so we can write to XCR0, and write to
822            * XCR0 enable only those features that NetBSD software
823            * supports.
824            *
825            * CR4_OSXSAVE support and and XCR0 access are both allowed
826            * because we tested ci->ci_feat_val[1] & CPUID2_XSAVE above.
827            *
828            * (This is redundant with cpu_init when it runs on the primary
829            * CPU, but it's harmless.)
830            */
831           lcr4(rcr4() | CR4_OSXSAVE);
832           wrxcr(0, x86_xsave_features & XCR0_FPU);
833 
834           /*
835            * Get the size of the save area with those features enabled
836            * with the second CPUID.
837            *
838            * (Let's hope the features don't change!)
839            */
840           x86_cpuid2(0x0d, 0, descs);
841           if (descs[1] > x86_fpu_save_size)
842                     x86_fpu_save_size = descs[1];
843 
844           /* Get component offsets and sizes for the save area */
845           for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) {
846                     if (x86_xsave_features & __BIT(i)) {
847                               x86_cpuid2(0x0d, i, descs);
848                               x86_xsave_offsets[i] = descs[1];
849                               x86_xsave_sizes[i] = descs[0];
850                     }
851           }
852 }
853 
854 void
cpu_probe(struct cpu_info * ci)855 cpu_probe(struct cpu_info *ci)
856 {
857           u_int descs[4];
858           int i;
859           uint32_t miscbytes;
860           uint32_t brand[12];
861 
862           if (ci == &cpu_info_primary) {
863                     cpu_vendor = i386_nocpuid_cpus[cputype << 1];
864                     cpu_class = i386_nocpuid_cpus[(cputype << 1) + 1];
865           }
866 
867           if (cpuid_level < 0) {
868                     /* cpuid instruction not supported */
869                     cpu_probe_fpu_old(ci);
870                     return;
871           }
872 
873           for (i = 0; i < __arraycount(ci->ci_feat_val); i++) {
874                     ci->ci_feat_val[i] = 0;
875           }
876 
877           x86_cpuid(0, descs);
878           cpuid_level = descs[0];
879           ci->ci_max_cpuid = descs[0];
880 
881           ci->ci_vendor[0] = descs[1];
882           ci->ci_vendor[2] = descs[2];
883           ci->ci_vendor[1] = descs[3];
884           ci->ci_vendor[3] = 0;
885 
886           if (ci == &cpu_info_primary) {
887                     if (memcmp(ci->ci_vendor, "GenuineIntel", 12) == 0)
888                               cpu_vendor = CPUVENDOR_INTEL;
889                     else if (memcmp(ci->ci_vendor, "AuthenticAMD", 12) == 0)
890                               cpu_vendor = CPUVENDOR_AMD;
891                     else if (memcmp(ci->ci_vendor, "CyrixInstead", 12) == 0)
892                               cpu_vendor = CPUVENDOR_CYRIX;
893                     else if (memcmp(ci->ci_vendor, "Geode by NSC", 12) == 0)
894                               cpu_vendor = CPUVENDOR_CYRIX;
895                     else if (memcmp(ci->ci_vendor, "CentaurHauls", 12) == 0)
896                               cpu_vendor = CPUVENDOR_IDT;
897                     else if (memcmp(ci->ci_vendor, "GenuineTMx86", 12) == 0)
898                               cpu_vendor = CPUVENDOR_TRANSMETA;
899                     else if (memcmp(ci->ci_vendor, "Vortex86 SoC", 12) == 0)
900                               cpu_vendor = CPUVENDOR_VORTEX86;
901                     else
902                               cpu_vendor = CPUVENDOR_UNKNOWN;
903           }
904 
905           if (cpuid_level >= 1) {
906                     x86_cpuid(1, descs);
907                     ci->ci_signature = descs[0];
908                     miscbytes = descs[1];
909                     ci->ci_feat_val[1] = descs[2];
910                     ci->ci_feat_val[0] = descs[3];
911 
912                     if (ci == &cpu_info_primary) {
913                               /* Determine family + class. */
914                               cpu_class = CPUID_TO_FAMILY(ci->ci_signature)
915                                   + (CPUCLASS_386 - 3);
916                               if (cpu_class > CPUCLASS_686)
917                                         cpu_class = CPUCLASS_686;
918                     }
919 
920                     /* CLFLUSH line size is next 8 bits */
921                     if (ci->ci_feat_val[0] & CPUID_CLFSH)
922                               ci->ci_cflush_lsize
923                                   = __SHIFTOUT(miscbytes, CPUID_CLFLUSH_SIZE) << 3;
924                     ci->ci_initapicid = __SHIFTOUT(miscbytes, CPUID_LOCAL_APIC_ID);
925           }
926 
927           /*
928            * Get the basic information from the extended cpuid leafs.
929            * These were first implemented by amd, but most of the values
930            * match with those generated by modern intel cpus.
931            */
932           x86_cpuid(0x80000000, descs);
933           if (descs[0] >= 0x80000000)
934                     ci->ci_max_ext_cpuid = descs[0];
935           else
936                     ci->ci_max_ext_cpuid = 0;
937 
938           if (ci->ci_max_ext_cpuid >= 0x80000001) {
939                     /* Determine the extended feature flags. */
940                     x86_cpuid(0x80000001, descs);
941                     ci->ci_feat_val[3] = descs[2]; /* %ecx */
942                     ci->ci_feat_val[2] = descs[3]; /* %edx */
943           }
944 
945           if (ci->ci_max_ext_cpuid >= 0x80000004) {
946                     x86_cpuid(0x80000002, brand);
947                     x86_cpuid(0x80000003, brand + 4);
948                     x86_cpuid(0x80000004, brand + 8);
949                     /* Skip leading spaces on brand */
950                     for (i = 0; i < 48; i++) {
951                               if (((char *) brand)[i] != ' ')
952                                         break;
953                     }
954                     memcpy(cpu_brand_string, ((char *) brand) + i, 48 - i);
955           }
956 
957           /*
958            * Get the structured extended features.
959            */
960           if (cpuid_level >= 7) {
961                     x86_cpuid(7, descs);
962                     ci->ci_feat_val[5] = descs[1]; /* %ebx */
963                     ci->ci_feat_val[6] = descs[2]; /* %ecx */
964                     ci->ci_feat_val[7] = descs[3]; /* %edx */
965           }
966 
967           cpu_probe_intel(ci);
968           cpu_probe_amd(ci);
969           cpu_probe_cyrix(ci);
970           cpu_probe_winchip(ci);
971           cpu_probe_c3(ci);
972           cpu_probe_geode(ci);
973           cpu_probe_vortex86(ci);
974 
975           if (ci == &cpu_info_primary) {
976                     cpu_probe_fpu(ci);
977           }
978 
979 #ifndef XENPV
980           x86_cpu_topology(ci);
981 #endif
982 
983           if (cpu_vendor != CPUVENDOR_AMD && (ci->ci_feat_val[0] & CPUID_TM) &&
984               (rdmsr(MSR_MISC_ENABLE) & (1 << 3)) == 0) {
985                     /* Enable thermal monitor 1. */
986                     wrmsr(MSR_MISC_ENABLE, rdmsr(MSR_MISC_ENABLE) | (1<<3));
987           }
988 
989           ci->ci_feat_val[0] &= ~CPUID_FEAT_BLACKLIST;
990           if (ci == &cpu_info_primary) {
991                     /* If first. Boot Processor is the cpu_feature reference. */
992                     for (i = 0; i < __arraycount(cpu_feature); i++) {
993                               cpu_feature[i] = ci->ci_feat_val[i];
994                     }
995                     identify_hypervisor();
996 #ifndef XENPV
997                     /* Early patch of text segment. */
998                     x86_patch(true);
999 #endif
1000 
1001                     /* AES */
1002 #ifdef __x86_64__   /* not yet implemented on i386 */
1003                     if (cpu_feature[1] & CPUID2_AESNI)
1004                               aes_md_init(&aes_ni_impl);
1005                     else
1006 #endif
1007                     if (cpu_feature[4] & CPUID_VIA_HAS_ACE)
1008                               aes_md_init(&aes_via_impl);
1009                     else if (i386_has_sse && i386_has_sse2 &&
1010                         (cpu_feature[1] & CPUID2_SSE3) &&
1011                         (cpu_feature[1] & CPUID2_SSSE3))
1012                               aes_md_init(&aes_ssse3_impl);
1013                     else if (i386_has_sse && i386_has_sse2)
1014                               aes_md_init(&aes_sse2_impl);
1015 
1016                     /* ChaCha */
1017                     if (i386_has_sse && i386_has_sse2)
1018                               chacha_md_init(&chacha_sse2_impl);
1019           } else {
1020                     /*
1021                      * If not first. Warn about cpu_feature mismatch for
1022                      * secondary CPUs.
1023                      */
1024                     for (i = 0; i < __arraycount(cpu_feature); i++) {
1025                               if (cpu_feature[i] != ci->ci_feat_val[i])
1026                                         aprint_error_dev(ci->ci_dev,
1027                                             "feature mismatch: cpu_feature[%d] is "
1028                                             "%#x, but CPU reported %#x\n",
1029                                             i, cpu_feature[i], ci->ci_feat_val[i]);
1030                     }
1031           }
1032 }
1033 
1034 /* Write what we know about the cpu to the console... */
1035 void
cpu_identify(struct cpu_info * ci)1036 cpu_identify(struct cpu_info *ci)
1037 {
1038 
1039           cpu_setmodel("%s %d86-class",
1040               cpu_vendor_names[cpu_vendor], cpu_class + 3);
1041           if (cpu_brand_string[0] != '\0') {
1042                     aprint_normal_dev(ci->ci_dev, "%s", cpu_brand_string);
1043           } else {
1044                     aprint_normal_dev(ci->ci_dev, "%s", cpu_getmodel());
1045                     if (ci->ci_data.cpu_cc_freq != 0)
1046                               aprint_normal(", %dMHz",
1047                                   (int)(ci->ci_data.cpu_cc_freq / 1000000));
1048           }
1049           if (ci->ci_signature != 0)
1050                     aprint_normal(", id 0x%x", ci->ci_signature);
1051           aprint_normal("\n");
1052           aprint_normal_dev(ci->ci_dev, "node %u, package %u, core %u, smt %u\n",
1053               ci->ci_numa_id, ci->ci_package_id, ci->ci_core_id, ci->ci_smt_id);
1054           if (cpu_brand_string[0] == '\0') {
1055                     strlcpy(cpu_brand_string, cpu_getmodel(),
1056                         sizeof(cpu_brand_string));
1057           }
1058           if (cpu_class == CPUCLASS_386) {
1059                     panic("NetBSD requires an 80486DX or later processor");
1060           }
1061           if (cputype == CPU_486DLC) {
1062                     aprint_error("WARNING: BUGGY CYRIX CACHE\n");
1063           }
1064 
1065 #if !defined(XENPV) || defined(DOM0OPS)       /* on Xen PV rdmsr is for Dom0 only */
1066           if (cpu_vendor == CPUVENDOR_AMD     /* check enablement of an */
1067               && device_unit(ci->ci_dev) == 0 /* AMD feature only once */
1068               && ((cpu_feature[3] & CPUID_SVM) == CPUID_SVM)) {
1069                     uint64_t val;
1070 
1071                     val = rdmsr(MSR_VMCR);
1072                     if (((val & VMCR_SVMED) == VMCR_SVMED)
1073                         && ((val & VMCR_LOCK) == VMCR_LOCK)) {
1074                               aprint_normal_dev(ci->ci_dev,
1075                                         "SVM disabled by the BIOS\n");
1076                     }
1077           }
1078 #endif
1079 
1080 #ifdef i386
1081           if (i386_fpu_fdivbug == 1)
1082                     aprint_normal_dev(ci->ci_dev,
1083                         "WARNING: Pentium FDIV bug detected!\n");
1084 
1085           if (cpu_vendor == CPUVENDOR_TRANSMETA) {
1086                     u_int descs[4];
1087                     x86_cpuid(0x80860000, descs);
1088                     if (descs[0] >= 0x80860007)
1089                               /* Create longrun sysctls */
1090                               tmx86_init_longrun();
1091           }
1092 #endif    /* i386 */
1093 
1094 }
1095 
1096 /*
1097  * Hypervisor
1098  */
1099 vm_guest_t vm_guest = VM_GUEST_NO;
1100 
1101 struct vm_name_guest {
1102           const char *name;
1103           vm_guest_t guest;
1104 };
1105 
1106 static const struct vm_name_guest vm_bios_vendors[] = {
1107           { "QEMU", VM_GUEST_VM },                          /* QEMU */
1108           { "Plex86", VM_GUEST_VM },                        /* Plex86 */
1109           { "Bochs", VM_GUEST_VM },                         /* Bochs */
1110           { "Xen", VM_GUEST_VM },                                     /* Xen */
1111           { "BHYVE", VM_GUEST_VM },                         /* bhyve */
1112           { "Seabios", VM_GUEST_VM },                       /* KVM */
1113           { "innotek GmbH", VM_GUEST_VIRTUALBOX },          /* Oracle VirtualBox */
1114           { "Generic PVH", VM_GUEST_GENPVH},                /* Generic PVH */
1115 };
1116 
1117 static const struct vm_name_guest vm_system_products[] = {
1118           { "VMware Virtual Platform", VM_GUEST_VM },       /* VMWare VM */
1119           { "Virtual Machine", VM_GUEST_VM },               /* Microsoft VirtualPC */
1120           { "VirtualBox", VM_GUEST_VIRTUALBOX },            /* Sun xVM VirtualBox */
1121           { "Parallels Virtual Platform", VM_GUEST_VM },    /* Parallels VM */
1122           { "KVM", VM_GUEST_KVM },                          /* KVM */
1123           { "NVMM", VM_GUEST_NVMM },                        /* NVMM */
1124 };
1125 
1126 void
identify_hypervisor(void)1127 identify_hypervisor(void)
1128 {
1129           u_int regs[6];
1130           char hv_vendor[12];
1131           const char *p;
1132           int i;
1133 
1134           switch (vm_guest) {
1135           /* guest type already known, no bios info */
1136           case VM_GUEST_XENPV:
1137           case VM_GUEST_XENPVH:
1138           /* The following are known from first pass */
1139           case VM_GUEST_VMWARE:
1140           case VM_GUEST_HV:
1141           case VM_GUEST_XENHVM:
1142           case VM_GUEST_KVM:
1143           case VM_GUEST_NVMM:
1144                     return;
1145           default:
1146                     break;
1147           }
1148 
1149           /*
1150            * [RFC] CPUID usage for interaction between Hypervisors and Linux.
1151            * http://lkml.org/lkml/2008/10/1/246
1152            *
1153            * KB1009458: Mechanisms to determine if software is running in
1154            * a VMware virtual machine
1155            * http://kb.vmware.com/kb/1009458
1156            */
1157           if (ISSET(cpu_feature[1], CPUID2_RAZ)) {
1158                     /*
1159                      * don't override if vm_guest is unknown but has booted in PVH
1160                      * mode, so it can attach to pv(4) in (amd64|i386)_mainbus.c
1161                      */
1162                     if (vm_guest != VM_GUEST_GENPVH)
1163                               vm_guest = VM_GUEST_VM;
1164                     x86_cpuid(0x40000000, regs);
1165                     if (regs[0] >= 0x40000000) {
1166                               cpu_max_hypervisor_cpuid = regs[0];
1167                               memcpy(&hv_vendor[0], &regs[1], sizeof(*regs));
1168                               memcpy(&hv_vendor[4], &regs[2], sizeof(*regs));
1169                               memcpy(&hv_vendor[8], &regs[3], sizeof(*regs));
1170                               if (memcmp(hv_vendor, "VMwareVMware", 12) == 0)
1171                                         vm_guest = VM_GUEST_VMWARE;
1172                               else if (memcmp(hv_vendor, "Microsoft Hv", 12) == 0) {
1173                                         vm_guest = VM_GUEST_HV;
1174 #if NHYPERV > 0
1175                                         hyperv_early_init();
1176 #endif
1177                               } else if (memcmp(hv_vendor, "KVMKVMKVM\0\0\0", 12) == 0)
1178                                         vm_guest = VM_GUEST_KVM;
1179                               else if (memcmp(hv_vendor, "XenVMMXenVMM", 12) == 0)
1180                                         vm_guest = VM_GUEST_XENHVM;
1181                               else if (memcmp(hv_vendor, "___ NVMM ___", 12) == 0)
1182                                         vm_guest = VM_GUEST_NVMM;
1183                               /* FreeBSD bhyve: "bhyve bhyve " */
1184                               /* OpenBSD vmm:   "OpenBSDVMM58" */
1185                     }
1186                     // VirtualBox returns KVM, so keep going.
1187                     if (vm_guest != VM_GUEST_KVM)
1188                               return;
1189           }
1190 
1191           /*
1192            * Examine SMBIOS strings for older hypervisors.
1193            */
1194           p = pmf_get_platform("system-serial");
1195           if (p != NULL) {
1196                     if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
1197                               vmt_hvcall(VM_CMD_GET_VERSION, regs);
1198                               if (regs[1] == VM_MAGIC) {
1199                                         vm_guest = VM_GUEST_VMWARE;
1200                                         return;
1201                               }
1202                     }
1203           }
1204           p = pmf_get_platform("bios-vendor");
1205           if (p != NULL) {
1206                     for (i = 0; i < __arraycount(vm_bios_vendors); i++) {
1207                               if (strcmp(p, vm_bios_vendors[i].name) == 0) {
1208                                         vm_guest = vm_bios_vendors[i].guest;
1209                                         return;
1210                               }
1211                     }
1212           }
1213           p = pmf_get_platform("system-product");
1214           if (p != NULL) {
1215                     for (i = 0; i < __arraycount(vm_system_products); i++) {
1216                               if (strcmp(p, vm_system_products[i].name) == 0) {
1217                                         vm_guest = vm_system_products[i].guest;
1218                                         return;
1219                               }
1220                     }
1221           }
1222 }
1223