xref: /dragonfly/sys/platform/pc64/x86_64/initcpu.c (revision 45dd33f25a21807f2e759f0267b1faf6c6581700)
1 /*-
2  * Copyright (c) KATO Takenori, 1997, 1998.
3  * Copyright (c) 2008 The DragonFly Project.
4  *
5  * All rights reserved.  Unpublished rights reserved under the copyright
6  * laws of Japan.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer as
14  *    the first lines of this file unmodified.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "opt_cpu.h"
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 
38 #include <machine/clock.h>
39 #include <machine/cputypes.h>
40 #include <machine/md_var.h>
41 #include <machine/specialreg.h>
42 #include <machine/smp.h>
43 
44 #include <vm/vm.h>
45 #include <vm/pmap.h>
46 
47 static int tsc_ignore_cpuid = 0;
48 TUNABLE_INT("hw.tsc_ignore_cpuid", &tsc_ignore_cpuid);
49 
50 static int          hw_instruction_sse;
51 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
52     &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
53 
54 int       cpu_type;           /* XXX CPU_CLAWHAMMER */
55 u_int     cpu_feature;                  /* Feature flags */
56 u_int     cpu_feature2;                 /* Feature flags */
57 u_int     amd_feature;                  /* AMD feature flags */
58 u_int     amd_feature2;                 /* AMD feature flags */
59 u_int     via_feature_rng;    /* VIA RNG features */
60 u_int     via_feature_xcrypt; /* VIA ACE features */
61 u_int     cpu_high;           /* Highest arg to CPUID */
62 u_int     cpu_exthigh;                  /* Highest arg to extended CPUID */
63 u_int     cpu_id;                       /* Stepping ID */
64 u_int     cpu_procinfo;                 /* HyperThreading Info / Brand Index / CLFUSH */
65 u_int     cpu_procinfo2;                /* Multicore info */
66 char      cpu_vendor[20];               /* CPU Origin code */
67 u_int     cpu_vendor_id;                /* CPU vendor ID */
68 u_int     cpu_fxsr;           /* SSE enabled */
69 u_int     cpu_xsave;                    /* Using XSAVE */
70 u_int     cpu_clflush_line_size = 32;   /* Default CLFLUSH line size */
71 u_int     cpu_stdext_feature;
72 u_int     cpu_stdext_feature2;
73 u_int     cpu_stdext_feature3;
74 u_long    cpu_ia32_arch_caps;
75 u_int     cpu_thermal_feature;
76 u_int     cpu_mwait_feature;
77 u_int     cpu_mwait_extemu;
78 
79 /*
80  * -1: automatic (enable on h/w, disable on VMs)
81  * 0: disable
82  * 1: enable (where available)
83  */
84 static int hw_clflush_enable = -1;
85 
86 SYSCTL_INT(_hw, OID_AUTO, clflush_enable, CTLFLAG_RD, &hw_clflush_enable, 0,
87              "");
88 
89 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
90           &via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU");
91 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
92           &via_feature_xcrypt, 0, "VIA C3/C7 xcrypt feature available in CPU");
93 
94 /*
95  * Initialize special VIA C3/C7 features
96  */
97 static void
init_via(void)98 init_via(void)
99 {
100           u_int regs[4], val;
101           u_int64_t msreg;
102 
103           do_cpuid(0xc0000000, regs);
104           val = regs[0];
105           if (val >= 0xc0000001) {
106                     do_cpuid(0xc0000001, regs);
107                     val = regs[3];
108           } else
109                     val = 0;
110 
111           /* Enable RNG if present and disabled */
112           if (val & VIA_CPUID_HAS_RNG) {
113                     if (!(val & VIA_CPUID_DO_RNG)) {
114                               msreg = rdmsr(0x110B);
115                               msreg |= 0x40;
116                               wrmsr(0x110B, msreg);
117                     }
118                     via_feature_rng = VIA_HAS_RNG;
119           }
120           /* Enable AES engine if present and disabled */
121           if (val & VIA_CPUID_HAS_ACE) {
122                     if (!(val & VIA_CPUID_DO_ACE)) {
123                               msreg = rdmsr(0x1107);
124                               msreg |= (0x01 << 28);
125                               wrmsr(0x1107, msreg);
126                     }
127                     via_feature_xcrypt |= VIA_HAS_AES;
128           }
129           /* Enable ACE2 engine if present and disabled */
130           if (val & VIA_CPUID_HAS_ACE2) {
131                     if (!(val & VIA_CPUID_DO_ACE2)) {
132                               msreg = rdmsr(0x1107);
133                               msreg |= (0x01 << 28);
134                               wrmsr(0x1107, msreg);
135                     }
136                     via_feature_xcrypt |= VIA_HAS_AESCTR;
137           }
138           /* Enable SHA engine if present and disabled */
139           if (val & VIA_CPUID_HAS_PHE) {
140                     if (!(val & VIA_CPUID_DO_PHE)) {
141                               msreg = rdmsr(0x1107);
142                               msreg |= (0x01 << 28/**/);
143                               wrmsr(0x1107, msreg);
144                     }
145                     via_feature_xcrypt |= VIA_HAS_SHA;
146           }
147           /* Enable MM engine if present and disabled */
148           if (val & VIA_CPUID_HAS_PMM) {
149                     if (!(val & VIA_CPUID_DO_PMM)) {
150                               msreg = rdmsr(0x1107);
151                               msreg |= (0x01 << 28/**/);
152                               wrmsr(0x1107, msreg);
153                     }
154                     via_feature_xcrypt |= VIA_HAS_MM;
155           }
156 }
157 
158 static enum vmm_guest_type
detect_vmm(void)159 detect_vmm(void)
160 {
161           enum vmm_guest_type guest;
162           char vendor[16];
163 
164           /*
165            * [RFC] CPUID usage for interaction between Hypervisors and Linux.
166            * http://lkml.org/lkml/2008/10/1/246
167            *
168            * KB1009458: Mechanisms to determine if software is running in
169            * a VMware virtual machine
170            * http://kb.vmware.com/kb/1009458
171            */
172           if (cpu_feature2 & CPUID2_VMM) {
173                     u_int regs[4];
174 
175                     do_cpuid(0x40000000, regs);
176                     ((u_int *)&vendor)[0] = regs[1];
177                     ((u_int *)&vendor)[1] = regs[2];
178                     ((u_int *)&vendor)[2] = regs[3];
179                     vendor[12] = '\0';
180                     if (regs[0] >= 0x40000000) {
181                               memcpy(vmm_vendor, vendor, 13);
182                               if (strcmp(vmm_vendor, "VMwareVMware") == 0)
183                                         return VMM_GUEST_VMWARE;
184                               else if (strcmp(vmm_vendor, "Microsoft Hv") == 0)
185                                         return VMM_GUEST_HYPERV;
186                               else if (strcmp(vmm_vendor, "KVMKVMKVM") == 0)
187                                         return VMM_GUEST_KVM;
188                               else if (strcmp(vmm_vendor, "___ NVMM ___") == 0)
189                                         return VMM_GUEST_NVMM;
190                     } else if (regs[0] == 0) {
191                               /* Also detect old KVM versions with regs[0] == 0 */
192                               if (strcmp(vendor, "KVMKVMKVM") == 0) {
193                                         memcpy(vmm_vendor, vendor, 13);
194                                         return VMM_GUEST_KVM;
195                               }
196                     }
197           }
198 
199           guest = detect_virtual();
200           if (guest == VMM_GUEST_NONE && (cpu_feature2 & CPUID2_VMM))
201                     guest = VMM_GUEST_UNKNOWN;
202           return guest;
203 }
204 
205 /*
206  * Initialize CPU control registers
207  */
208 void
initializecpu(int cpu)209 initializecpu(int cpu)
210 {
211           uint64_t msr;
212 
213           /*
214            * Check for FXSR and SSE support and enable if available
215            */
216           if ((cpu_feature & CPUID_SSE) && (cpu_feature & CPUID_FXSR)) {
217                     load_cr4(rcr4() | CR4_OSFXSR | CR4_OSXMMEXCPT);
218                     cpu_fxsr = hw_instruction_sse = 1;
219           }
220 
221           if (cpu == 0) {
222                     /* Check if we are running in a hypervisor. */
223                     vmm_guest = detect_vmm();
224           }
225 
226 #if !defined(CPU_DISABLE_AVX)
227           /* Use XSAVE if supported */
228           if (cpu_feature2 & CPUID2_XSAVE) {
229                     load_cr4(rcr4() | CR4_OSXSAVE);
230 
231                     /* Adjust size of savefpu in npx.h before adding to mask.*/
232                     npx_xcr0_mask = CPU_XFEATURE_X87 | CPU_XFEATURE_SSE;
233                     if (cpu_feature2 & CPUID2_AVX)
234                               npx_xcr0_mask |= CPU_XFEATURE_YMM;
235 
236                     load_xcr(0, npx_xcr0_mask);
237                     cpu_xsave = 1;
238           }
239 #endif
240 
241           if (cpu_vendor_id == CPU_VENDOR_AMD) {
242                     switch((cpu_id & 0xFF0000)) {
243                     case 0x100000:
244                     case 0x120000:
245                               /*
246                                * Errata 721 is the cpu bug found by your's truly
247                                * (Matthew Dillon).  It is a bug where a sequence
248                                * of 5 or more popq's + a retq, under involved
249                                * deep recursion circumstances, can cause the %rsp
250                                * to not be properly updated, almost always
251                                * resulting in a seg-fault soon after.
252                                *
253                                * Do not install the workaround when we are running
254                                * in a virtual machine.
255                                */
256                               if (vmm_guest)
257                                         break;
258 
259                               msr = rdmsr(MSR_AMD_DE_CFG);
260                               if ((msr & 1) == 0) {
261                                         if (cpu == 0)
262                                                   kprintf("Errata 721 workaround "
263                                                             "installed\n");
264                                         msr |= 1;
265                                         wrmsr(MSR_AMD_DE_CFG, msr);
266                               }
267                               break;
268                     }
269 
270                     /*
271                      * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should
272                      * per BKDG.  So, do it here or otherwise some tools could
273                      * be confused by Initial Local APIC ID reported with
274                      * CPUID Function 1 in EBX.
275                      */
276                     if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
277                               if ((cpu_feature2 & CPUID2_VMM) == 0) {
278                                         msr = rdmsr(0xc001001f);
279                                         msr |= (uint64_t)1 << 54;
280                                         wrmsr(0xc001001f, msr);
281                               }
282                     }
283 
284                     /*
285                      * BIOS may configure Family 10h processors to convert
286                      * WC+ cache type to CD.  That can hurt performance of
287                      * guest VMs using nested paging.
288                      *
289                      * The relevant MSR bit is not documented in the BKDG,
290                      * the fix is borrowed from Linux.
291                      */
292                     if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
293                               if ((cpu_feature2 & CPUID2_VMM) == 0) {
294                                         msr = rdmsr(0xc001102a);
295                                         msr &= ~((uint64_t)1 << 24);
296                                         wrmsr(0xc001102a, msr);
297                               }
298                     }
299 
300                     /*
301                      * Work around Erratum 793: Specific Combination of Writes
302                      * to Write Combined Memory Types and Locked Instructions
303                      * May Cause Core Hang.  See Revision Guide for AMD Family
304                      * 16h Models 00h-0Fh Processors, revision 3.04 or later,
305                      * publication 51810.
306                      */
307                     if (CPUID_TO_FAMILY(cpu_id) == 0x16 &&
308                         CPUID_TO_MODEL(cpu_id) <= 0xf) {
309                               if ((cpu_feature2 & CPUID2_VMM) == 0) {
310                                         msr = rdmsr(0xc0011020);
311                                         msr |= (uint64_t)1 << 15;
312                                         wrmsr(0xc0011020, msr);
313                               }
314                     }
315           }
316 
317           if ((amd_feature & AMDID_NX) != 0) {
318                     msr = rdmsr(MSR_EFER) | EFER_NXE;
319                     wrmsr(MSR_EFER, msr);
320 #if 0 /* JG */
321                     pg_nx = PG_NX;
322 #endif
323           }
324           if (cpu_vendor_id == CPU_VENDOR_CENTAUR &&
325               CPUID_TO_FAMILY(cpu_id) == 0x6 &&
326               CPUID_TO_MODEL(cpu_id) >= 0xf)
327                     init_via();
328 
329           TUNABLE_INT_FETCH("hw.clflush_enable", &hw_clflush_enable);
330           if (cpu_feature & CPUID_CLFSH) {
331                     cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
332 
333                     if (hw_clflush_enable == 0 ||
334                         ((hw_clflush_enable == -1) && vmm_guest))
335                               cpu_feature &= ~CPUID_CLFSH;
336           }
337 
338           /* Set TSC_AUX register to the cpuid, for using rdtscp in userland. */
339           if ((amd_feature & AMDID_RDTSCP) != 0)
340                     wrmsr(MSR_TSC_AUX, cpu);
341 }
342 
343 /*
344  * This method should be at least as good as calibrating the TSC based on the
345  * HPET timer, since the HPET runs with the core crystal clock apparently.
346  */
347 static void
detect_tsc_frequency(void)348 detect_tsc_frequency(void)
349 {
350           int cpu_family, cpu_model;
351           u_int regs[4];
352           uint64_t crystal = 0;
353 
354           cpu_model = CPUID_TO_MODEL(cpu_id);
355           cpu_family = CPUID_TO_FAMILY(cpu_id);
356 
357           if (cpu_vendor_id != CPU_VENDOR_INTEL)
358                     return;
359 
360           if (cpu_high < 0x15)
361                     return;
362 
363           do_cpuid(0x15, regs);
364           if (regs[0] == 0 || regs[1] == 0)
365                     return;
366 
367           if (regs[2] == 0) {
368                     /* For some families the SDM contains the core crystal clock. */
369                     if (cpu_family == 0x6) {
370                               switch (cpu_model) {
371                               case 0x55:          /* Xeon Scalable */
372                                         crystal = 25000000; /* 25 MHz */
373                                         break;
374                               /* Skylake */
375                               case 0x4e:
376                               case 0x5e:
377                               /* Kabylake/Coffeelake */
378                               case 0x8e:
379                               case 0x9e:
380                                         crystal = 24000000; /* 24 MHz */
381                                         break;
382                               case 0x5c:          /* Goldmont Atom */
383                                         crystal = 19200000; /* 19.2 MHz */
384                                         break;
385                               default:
386                                         break;
387                               }
388                     }
389           } else {
390                     crystal = regs[2];
391           }
392 
393           if (crystal == 0)
394                     return;
395 
396           kprintf("TSC crystal clock: %ju Hz, TSC/crystal ratio: %u/%u\n",
397               crystal, regs[1], regs[0]);
398 
399           if (tsc_ignore_cpuid == 0) {
400                     tsc_frequency = (crystal * regs[1]) / regs[0];
401                     i8254_cputimer_disable = 1;
402           }
403 }
404 
405 TIMECOUNTER_INIT(cpuid_tsc_frequency, detect_tsc_frequency);
406