1 /* $NetBSD: acpi_cpu_md.c,v 1.84 2020/10/25 16:39:00 nia Exp $ */
2 
3 /*-
4  * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.84 2020/10/25 16:39:00 nia Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/cpufreq.h>
35 #include <sys/device.h>
36 #include <sys/kcore.h>
37 #include <sys/sysctl.h>
38 #include <sys/xcall.h>
39 
40 #include <x86/cpu.h>
41 #include <x86/cpufunc.h>
42 #include <x86/cputypes.h>
43 #include <x86/cpuvar.h>
44 #include <x86/machdep.h>
45 #include <x86/x86/tsc.h>
46 
47 #include <dev/acpi/acpica.h>
48 #include <dev/acpi/acpi_cpu.h>
49 
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcidevs.h>
52 
53 #include <machine/acpi_machdep.h>
54 
55 /*
56  * Intel IA32_MISC_ENABLE.
57  */
58 #define MSR_MISC_ENABLE_EST   __BIT(16)
59 #define MSR_MISC_ENABLE_TURBO __BIT(38)
60 
61 /*
62  * AMD C1E.
63  */
64 #define MSR_CMPHALT           0xc0010055
65 
66 #define MSR_CMPHALT_SMI                 __BIT(27)
67 #define MSR_CMPHALT_C1E                 __BIT(28)
68 #define MSR_CMPHALT_BMSTS     __BIT(29)
69 
70 /*
71  * AMD families 10h, 11h, 12h, 14h, and 15h.
72  */
73 #define MSR_10H_LIMIT                   0xc0010061
74 #define MSR_10H_CONTROL                 0xc0010062
75 #define MSR_10H_STATUS                  0xc0010063
76 #define MSR_10H_CONFIG                  0xc0010064
77 
78 /*
79  * AMD family 0Fh.
80  */
81 #define MSR_0FH_CONTROL                 0xc0010041
82 #define MSR_0FH_STATUS                  0xc0010042
83 
84 #define MSR_0FH_STATUS_CFID   __BITS( 0,  5)
85 #define MSR_0FH_STATUS_CVID   __BITS(32, 36)
86 #define MSR_0FH_STATUS_PENDING          __BITS(31, 31)
87 
88 #define MSR_0FH_CONTROL_FID   __BITS( 0,  5)
89 #define MSR_0FH_CONTROL_VID   __BITS( 8, 12)
90 #define MSR_0FH_CONTROL_CHG   __BITS(16, 16)
91 #define MSR_0FH_CONTROL_CNT   __BITS(32, 51)
92 
93 #define ACPI_0FH_STATUS_FID   __BITS( 0,  5)
94 #define ACPI_0FH_STATUS_VID   __BITS( 6, 10)
95 
96 #define ACPI_0FH_CONTROL_FID  __BITS( 0,  5)
97 #define ACPI_0FH_CONTROL_VID  __BITS( 6, 10)
98 #define ACPI_0FH_CONTROL_VST  __BITS(11, 17)
99 #define ACPI_0FH_CONTROL_MVS  __BITS(18, 19)
100 #define ACPI_0FH_CONTROL_PLL  __BITS(20, 26)
101 #define ACPI_0FH_CONTROL_RVO  __BITS(28, 29)
102 #define ACPI_0FH_CONTROL_IRT  __BITS(30, 31)
103 
104 #define FID_TO_VCO_FID(fidd)  (((fid) < 8) ? (8 + ((fid) << 1)) : (fid))
105 
106 static char           native_idle_text[16];
107 void                (*native_idle)(void) = NULL;
108 
109 static int           acpicpu_md_quirk_piix4(const struct pci_attach_args *);
110 static void          acpicpu_md_pstate_hwf_reset(void *, void *);
111 static int           acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *,
112                                               uint32_t *);
113 static int           acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *);
114 static int           acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *);
115 static void          acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t,
116                                                           uint32_t, uint32_t);
117 static int           acpicpu_md_pstate_sysctl_init(void);
118 static int           acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
119 static int           acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
120 static int           acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
121 
122 extern struct acpicpu_softc **acpicpu_sc;
123 static struct sysctllog *acpicpu_log = NULL;
124 
125 struct cpu_info *
acpicpu_md_match(device_t parent,cfdata_t match,void * aux)126 acpicpu_md_match(device_t parent, cfdata_t match, void *aux)
127 {
128           struct cpufeature_attach_args *cfaa = aux;
129 
130           if (strcmp(cfaa->name, "frequency") != 0)
131                     return NULL;
132 
133           return cfaa->ci;
134 }
135 
136 struct cpu_info *
acpicpu_md_attach(device_t parent,device_t self,void * aux)137 acpicpu_md_attach(device_t parent, device_t self, void *aux)
138 {
139           struct cpufeature_attach_args *cfaa = aux;
140 
141           return cfaa->ci;
142 }
143 
144 uint32_t
acpicpu_md_flags(void)145 acpicpu_md_flags(void)
146 {
147           struct cpu_info *ci = curcpu();
148           struct pci_attach_args pa;
149           uint32_t family, val = 0;
150           uint32_t regs[4];
151           uint64_t msr;
152 
153           if (acpi_md_ncpus() == 1)
154                     val |= ACPICPU_FLAG_C_BM;
155 
156           if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
157                     val |= ACPICPU_FLAG_C_FFH;
158 
159           /*
160            * By default, assume that the local APIC timer
161            * as well as TSC are stalled during C3 sleep.
162            */
163           val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC;
164 
165           /*
166            * Detect whether TSC is invariant. If it is not, we keep the flag to
167            * note that TSC will not run at constant rate. Depending on the CPU,
168            * this may affect P- and T-state changes, but especially relevant
169            * are C-states; with variant TSC, states larger than C1 may
170            * completely stop the counter.
171            */
172           if (tsc_is_invariant())
173                     val &= ~ACPICPU_FLAG_C_TSC;
174 
175           switch (cpu_vendor) {
176 
177           case CPUVENDOR_IDT:
178 
179                     if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
180                               val |= ACPICPU_FLAG_P_FFH;
181 
182                     if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
183                               val |= ACPICPU_FLAG_T_FFH;
184 
185                     break;
186 
187           case CPUVENDOR_INTEL:
188 
189                     /*
190                      * Bus master control and arbitration should be
191                      * available on all supported Intel CPUs (to be
192                      * sure, this is double-checked later from the
193                      * firmware data). These flags imply that it is
194                      * not necessary to flush caches before C3 state.
195                      */
196                     val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
197 
198                     /*
199                      * Check if we can use "native", MSR-based,
200                      * access. If not, we have to resort to I/O.
201                      */
202                     if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
203                               val |= ACPICPU_FLAG_P_FFH;
204 
205                     if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
206                               val |= ACPICPU_FLAG_T_FFH;
207 
208                     /*
209                      * Check whether MSR_APERF, MSR_MPERF, and Turbo
210                      * Boost are available. Also see if we might have
211                      * an invariant local APIC timer ("ARAT").
212                      */
213                     if (cpuid_level >= 0x06) {
214 
215                               x86_cpuid(0x00000006, regs);
216 
217                               if ((regs[2] & CPUID_DSPM_HWF) != 0)
218                                         val |= ACPICPU_FLAG_P_HWF;
219 
220                               if ((regs[0] & CPUID_DSPM_IDA) != 0)
221                                         val |= ACPICPU_FLAG_P_TURBO;
222 
223                               if ((regs[0] & CPUID_DSPM_ARAT) != 0)
224                                         val &= ~ACPICPU_FLAG_C_APIC;
225 
226                     }
227 
228                     break;
229 
230           case CPUVENDOR_AMD:
231 
232                     x86_cpuid(0x80000000, regs);
233 
234                     if (regs[0] < 0x80000007)
235                               break;
236 
237                     x86_cpuid(0x80000007, regs);
238 
239                     family = CPUID_TO_FAMILY(ci->ci_signature);
240 
241                     switch (family) {
242 
243                     case 0x0f:
244 
245                               /*
246                                * Disable C1E if present.
247                                */
248                               if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
249                                         val |= ACPICPU_FLAG_C_C1E;
250 
251                               /*
252                                * Evaluate support for the "FID/VID
253                                * algorithm" also used by powernow(4).
254                                */
255                               if ((regs[3] & CPUID_APM_FID) == 0)
256                                         break;
257 
258                               if ((regs[3] & CPUID_APM_VID) == 0)
259                                         break;
260 
261                               val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID;
262                               break;
263 
264                     case 0x10:
265                     case 0x11:
266 
267                               /*
268                                * Disable C1E if present.
269                                */
270                               if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
271                                         val |= ACPICPU_FLAG_C_C1E;
272 
273                               /* FALLTHROUGH */
274 
275                     case 0x12:
276                     case 0x14: /* AMD Fusion */
277                     case 0x15: /* AMD Bulldozer */
278 
279                               /*
280                                * Like with Intel, detect MSR-based P-states,
281                                * and AMD's "turbo" (Core Performance Boost),
282                                * respectively.
283                                */
284                               if ((regs[3] & CPUID_APM_HWP) != 0)
285                                         val |= ACPICPU_FLAG_P_FFH;
286 
287                               if ((regs[3] & CPUID_APM_CPB) != 0)
288                                         val |= ACPICPU_FLAG_P_TURBO;
289 
290                               /*
291                                * Also check for APERF and MPERF,
292                                * first available in the family 10h.
293                                */
294                               if (cpuid_level >= 0x06) {
295 
296                                         x86_cpuid(0x00000006, regs);
297 
298                                         if ((regs[2] & CPUID_DSPM_HWF) != 0)
299                                                   val |= ACPICPU_FLAG_P_HWF;
300                               }
301 
302                               break;
303                     }
304 
305                     break;
306           }
307 
308           /*
309            * There are several erratums for PIIX4.
310            */
311           if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0)
312                     val |= ACPICPU_FLAG_PIIX4;
313 
314           return val;
315 }
316 
317 static int
acpicpu_md_quirk_piix4(const struct pci_attach_args * pa)318 acpicpu_md_quirk_piix4(const struct pci_attach_args *pa)
319 {
320 
321           /*
322            * XXX: The pci_find_device(9) function only
323            *        deals with attached devices. Change this
324            *        to use something like pci_device_foreach().
325            */
326           if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
327                     return 0;
328 
329           if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
330               PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
331                     return 1;
332 
333           return 0;
334 }
335 
336 void
acpicpu_md_quirk_c1e(void)337 acpicpu_md_quirk_c1e(void)
338 {
339           const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E;
340           uint64_t val;
341 
342           val = rdmsr(MSR_CMPHALT);
343 
344           if ((val & c1e) != 0)
345                     wrmsr(MSR_CMPHALT, val & ~c1e);
346 }
347 
348 int
acpicpu_md_cstate_start(struct acpicpu_softc * sc)349 acpicpu_md_cstate_start(struct acpicpu_softc *sc)
350 {
351           const size_t size = sizeof(native_idle_text);
352           struct acpicpu_cstate *cs;
353           bool ipi = false;
354           int i;
355 
356           /*
357            * Save the cpu_idle(9) loop used by default.
358            */
359           x86_cpu_idle_get(&native_idle, native_idle_text, size);
360 
361           for (i = 0; i < ACPI_C_STATE_COUNT; i++) {
362 
363                     cs = &sc->sc_cstate[i];
364 
365                     if (cs->cs_method == ACPICPU_C_STATE_HALT) {
366                               ipi = true;
367                               break;
368                     }
369           }
370 
371           x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi);
372 
373           return 0;
374 }
375 
376 int
acpicpu_md_cstate_stop(void)377 acpicpu_md_cstate_stop(void)
378 {
379           static char text[16];
380           void (*func)(void);
381           bool ipi;
382 
383           x86_cpu_idle_get(&func, text, sizeof(text));
384 
385           if (func == native_idle)
386                     return EALREADY;
387 
388           ipi = (native_idle != x86_cpu_idle_halt) ? false : true;
389           x86_cpu_idle_set(native_idle, native_idle_text, ipi);
390 
391           /*
392            * Run a cross-call to ensure that all CPUs are
393            * out from the ACPI idle-loop before detachment.
394            */
395           xc_barrier(0);
396 
397           return 0;
398 }
399 
400 /*
401  * Called with interrupts enabled.
402  */
403 void __nocsan
acpicpu_md_cstate_enter(int method,int state)404 acpicpu_md_cstate_enter(int method, int state)
405 {
406           struct cpu_info *ci = curcpu();
407 
408           KASSERT(ci->ci_ilevel == IPL_NONE);
409 
410           switch (method) {
411 
412           case ACPICPU_C_STATE_FFH:
413 
414                     x86_monitor(&ci->ci_want_resched, 0, 0);
415 
416                     if (__predict_false(ci->ci_want_resched != 0))
417                               return;
418 
419                     x86_mwait((state - 1) << 4, 0);
420                     break;
421 
422           case ACPICPU_C_STATE_HALT:
423 
424                     x86_disable_intr();
425 
426                     if (__predict_false(ci->ci_want_resched != 0)) {
427                               x86_enable_intr();
428                               return;
429                     }
430 
431                     x86_stihlt();
432                     break;
433           }
434 }
435 
436 int
acpicpu_md_pstate_start(struct acpicpu_softc * sc)437 acpicpu_md_pstate_start(struct acpicpu_softc *sc)
438 {
439           uint64_t xc, val;
440 
441           switch (cpu_vendor) {
442 
443           case CPUVENDOR_IDT:
444           case CPUVENDOR_INTEL:
445 
446                     /*
447                      * Make sure EST is enabled.
448                      */
449                     if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) {
450 
451                               val = rdmsr(MSR_MISC_ENABLE);
452 
453                               if ((val & MSR_MISC_ENABLE_EST) == 0) {
454 
455                                         val |= MSR_MISC_ENABLE_EST;
456                                         wrmsr(MSR_MISC_ENABLE, val);
457                                         val = rdmsr(MSR_MISC_ENABLE);
458 
459                                         if ((val & MSR_MISC_ENABLE_EST) == 0)
460                                                   return ENOTTY;
461                               }
462                     }
463           }
464 
465           /*
466            * Reset the APERF and MPERF counters.
467            */
468           if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
469                     xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL);
470                     xc_wait(xc);
471           }
472 
473           return acpicpu_md_pstate_sysctl_init();
474 }
475 
476 int
acpicpu_md_pstate_stop(void)477 acpicpu_md_pstate_stop(void)
478 {
479 
480           if (acpicpu_log == NULL)
481                     return EALREADY;
482 
483           sysctl_teardown(&acpicpu_log);
484           acpicpu_log = NULL;
485 
486           return 0;
487 }
488 
489 int
acpicpu_md_pstate_init(struct acpicpu_softc * sc)490 acpicpu_md_pstate_init(struct acpicpu_softc *sc)
491 {
492           struct cpu_info *ci = sc->sc_ci;
493           struct acpicpu_pstate *ps, msr;
494           uint32_t family, i = 0;
495 
496           (void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
497 
498           switch (cpu_vendor) {
499 
500           case CPUVENDOR_IDT:
501           case CPUVENDOR_INTEL:
502 
503                     /*
504                      * If the so-called Turbo Boost is present,
505                      * the P0-state is always the "turbo state".
506                      * It is shown as the P1 frequency + 1 MHz.
507                      *
508                      * For discussion, see:
509                      *
510                      *        Intel Corporation: Intel Turbo Boost Technology
511                      *        in Intel Core(tm) Microarchitectures (Nehalem)
512                      *        Based Processors. White Paper, November 2008.
513                      */
514                     if (sc->sc_pstate_count >= 2 &&
515                        (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) {
516 
517                               ps = &sc->sc_pstate[0];
518 
519                               if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1)
520                                         ps->ps_flags |= ACPICPU_FLAG_P_TURBO;
521                     }
522 
523                     msr.ps_control_addr = MSR_PERF_CTL;
524                     msr.ps_control_mask = __BITS(0, 15);
525 
526                     msr.ps_status_addr  = MSR_PERF_STATUS;
527                     msr.ps_status_mask  = __BITS(0, 15);
528                     break;
529 
530           case CPUVENDOR_AMD:
531 
532                     if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
533                               msr.ps_flags |= ACPICPU_FLAG_P_FIDVID;
534 
535                     family = CPUID_TO_FAMILY(ci->ci_signature);
536 
537                     switch (family) {
538 
539                     case 0x0f:
540                               msr.ps_control_addr = MSR_0FH_CONTROL;
541                               msr.ps_status_addr  = MSR_0FH_STATUS;
542                               break;
543 
544                     case 0x10:
545                     case 0x11:
546                     case 0x12:
547                     case 0x14:
548                     case 0x15:
549                               msr.ps_control_addr = MSR_10H_CONTROL;
550                               msr.ps_control_mask = __BITS(0, 2);
551 
552                               msr.ps_status_addr  = MSR_10H_STATUS;
553                               msr.ps_status_mask  = __BITS(0, 2);
554                               break;
555 
556                     default:
557                               /*
558                                * If we have an unknown AMD CPU, rely on XPSS.
559                                */
560                               if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
561                                         return EOPNOTSUPP;
562                     }
563 
564                     break;
565 
566           default:
567                     return ENODEV;
568           }
569 
570           /*
571            * Fill the P-state structures with MSR addresses that are
572            * known to be correct. If we do not know the addresses,
573            * leave the values intact. If a vendor uses XPSS, we do
574            * not necessarily need to do anything to support new CPUs.
575            */
576           while (i < sc->sc_pstate_count) {
577 
578                     ps = &sc->sc_pstate[i];
579 
580                     if (msr.ps_flags != 0)
581                               ps->ps_flags |= msr.ps_flags;
582 
583                     if (msr.ps_status_addr != 0)
584                               ps->ps_status_addr = msr.ps_status_addr;
585 
586                     if (msr.ps_status_mask != 0)
587                               ps->ps_status_mask = msr.ps_status_mask;
588 
589                     if (msr.ps_control_addr != 0)
590                               ps->ps_control_addr = msr.ps_control_addr;
591 
592                     if (msr.ps_control_mask != 0)
593                               ps->ps_control_mask = msr.ps_control_mask;
594 
595                     i++;
596           }
597 
598           return 0;
599 }
600 
601 /*
602  * Read the IA32_APERF and IA32_MPERF counters. The first
603  * increments at the rate of the fixed maximum frequency
604  * configured during the boot, whereas APERF counts at the
605  * rate of the actual frequency. Note that the MSRs must be
606  * read without delay, and that only the ratio between
607  * IA32_APERF and IA32_MPERF is architecturally defined.
608  *
609  * The function thus returns the percentage of the actual
610  * frequency in terms of the maximum frequency of the calling
611  * CPU since the last call. A value zero implies an error.
612  *
613  * For further details, refer to:
614  *
615  *        Intel Corporation: Intel 64 and IA-32 Architectures
616  *        Software Developer's Manual. Section 13.2, Volume 3A:
617  *        System Programming Guide, Part 1. July, 2008.
618  *
619  *        Advanced Micro Devices: BIOS and Kernel Developer's
620  *        Guide (BKDG) for AMD Family 10h Processors. Section
621  *        2.4.5, Revision 3.48, April 2010.
622  */
623 uint8_t
acpicpu_md_pstate_hwf(struct cpu_info * ci)624 acpicpu_md_pstate_hwf(struct cpu_info *ci)
625 {
626           struct acpicpu_softc *sc;
627           uint64_t aperf, mperf;
628           uint8_t rv = 0;
629 
630           sc = acpicpu_sc[ci->ci_acpiid];
631 
632           if (__predict_false(sc == NULL))
633                     return 0;
634 
635           if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0))
636                     return 0;
637 
638           aperf = sc->sc_pstate_aperf;
639           mperf = sc->sc_pstate_mperf;
640 
641           x86_disable_intr();
642 
643           sc->sc_pstate_aperf = rdmsr(MSR_APERF);
644           sc->sc_pstate_mperf = rdmsr(MSR_MPERF);
645 
646           x86_enable_intr();
647 
648           aperf = sc->sc_pstate_aperf - aperf;
649           mperf = sc->sc_pstate_mperf - mperf;
650 
651           if (__predict_true(mperf != 0))
652                     rv = (aperf * 100) / mperf;
653 
654           return rv;
655 }
656 
657 static void
acpicpu_md_pstate_hwf_reset(void * arg1,void * arg2)658 acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2)
659 {
660           struct cpu_info *ci = curcpu();
661           struct acpicpu_softc *sc;
662 
663           sc = acpicpu_sc[ci->ci_acpiid];
664 
665           if (__predict_false(sc == NULL))
666                     return;
667 
668           x86_disable_intr();
669 
670           wrmsr(MSR_APERF, 0);
671           wrmsr(MSR_MPERF, 0);
672 
673           x86_enable_intr();
674 
675           sc->sc_pstate_aperf = 0;
676           sc->sc_pstate_mperf = 0;
677 }
678 
679 int
acpicpu_md_pstate_get(struct acpicpu_softc * sc,uint32_t * freq)680 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
681 {
682           struct acpicpu_pstate *ps = NULL;
683           uint64_t val;
684           uint32_t i;
685 
686           if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
687                     return acpicpu_md_pstate_fidvid_get(sc, freq);
688 
689           /*
690            * Pick any P-state for the status address.
691            */
692           for (i = 0; i < sc->sc_pstate_count; i++) {
693 
694                     ps = &sc->sc_pstate[i];
695 
696                     if (__predict_true(ps->ps_freq != 0))
697                               break;
698           }
699 
700           if (__predict_false(ps == NULL))
701                     return ENODEV;
702 
703           if (__predict_false(ps->ps_status_addr == 0))
704                     return EINVAL;
705 
706           val = rdmsr(ps->ps_status_addr);
707 
708           if (__predict_true(ps->ps_status_mask != 0))
709                     val = val & ps->ps_status_mask;
710 
711           /*
712            * Search for the value from known P-states.
713            */
714           for (i = 0; i < sc->sc_pstate_count; i++) {
715 
716                     ps = &sc->sc_pstate[i];
717 
718                     if (__predict_false(ps->ps_freq == 0))
719                               continue;
720 
721                     if (val == ps->ps_status) {
722                               *freq = ps->ps_freq;
723                               return 0;
724                     }
725           }
726 
727           /*
728            * If the value was not found, try APERF/MPERF.
729            * The state is P0 if the return value is 100 %.
730            */
731           if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
732 
733                     KASSERT(sc->sc_pstate_count > 0);
734                     KASSERT(sc->sc_pstate[0].ps_freq != 0);
735 
736                     if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) {
737                               *freq = sc->sc_pstate[0].ps_freq;
738                               return 0;
739                     }
740           }
741 
742           return EIO;
743 }
744 
745 int
acpicpu_md_pstate_set(struct acpicpu_pstate * ps)746 acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
747 {
748           uint64_t val = 0;
749 
750           if (__predict_false(ps->ps_control_addr == 0))
751                     return EINVAL;
752 
753           if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0)
754                     return acpicpu_md_pstate_fidvid_set(ps);
755 
756           /*
757            * If the mask is set, do a read-modify-write.
758            */
759           if (__predict_true(ps->ps_control_mask != 0)) {
760                     val = rdmsr(ps->ps_control_addr);
761                     val &= ~ps->ps_control_mask;
762           }
763 
764           val |= ps->ps_control;
765 
766           wrmsr(ps->ps_control_addr, val);
767           DELAY(ps->ps_latency);
768 
769           return 0;
770 }
771 
772 static int
acpicpu_md_pstate_fidvid_get(struct acpicpu_softc * sc,uint32_t * freq)773 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq)
774 {
775           struct acpicpu_pstate *ps;
776           uint32_t fid, i, vid;
777           uint32_t cfid, cvid;
778           int rv;
779 
780           /*
781            * AMD family 0Fh needs special treatment.
782            * While it wants to use ACPI, it does not
783            * comply with the ACPI specifications.
784            */
785           rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
786 
787           if (rv != 0)
788                     return rv;
789 
790           for (i = 0; i < sc->sc_pstate_count; i++) {
791 
792                     ps = &sc->sc_pstate[i];
793 
794                     if (__predict_false(ps->ps_freq == 0))
795                               continue;
796 
797                     fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID);
798                     vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID);
799 
800                     if (cfid == fid && cvid == vid) {
801                               *freq = ps->ps_freq;
802                               return 0;
803                     }
804           }
805 
806           return EIO;
807 }
808 
809 static int
acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate * ps)810 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps)
811 {
812           const uint64_t ctrl = ps->ps_control;
813           uint32_t cfid, cvid, fid, i, irt;
814           uint32_t pll, vco_cfid, vco_fid;
815           uint32_t val, vid, vst;
816           int rv;
817 
818           rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
819 
820           if (rv != 0)
821                     return rv;
822 
823           fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID);
824           vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID);
825           irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT);
826           vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST);
827           pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL);
828 
829           vst = vst * 20;
830           pll = pll * 1000 / 5;
831           irt = 10 * __BIT(irt);
832 
833           /*
834            * Phase 1.
835            */
836           while (cvid > vid) {
837 
838                     val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS);
839                     val = (val > cvid) ? 0 : cvid - val;
840 
841                     acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst);
842                     rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
843 
844                     if (rv != 0)
845                               return rv;
846           }
847 
848           i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO);
849 
850           for (; i > 0 && cvid > 0; --i) {
851 
852                     acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst);
853                     rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
854 
855                     if (rv != 0)
856                               return rv;
857           }
858 
859           /*
860            * Phase 2.
861            */
862           if (cfid != fid) {
863 
864                     vco_fid  = FID_TO_VCO_FID(fid);
865                     vco_cfid = FID_TO_VCO_FID(cfid);
866 
867                     while (abs(vco_fid - vco_cfid) > 2) {
868 
869                               if (fid <= cfid)
870                                         val = cfid - 2;
871                               else {
872                                         val = (cfid > 6) ? cfid + 2 :
873                                             FID_TO_VCO_FID(cfid) + 2;
874                               }
875 
876                               acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt);
877                               rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
878 
879                               if (rv != 0)
880                                         return rv;
881 
882                               vco_cfid = FID_TO_VCO_FID(cfid);
883                     }
884 
885                     acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt);
886                     rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
887 
888                     if (rv != 0)
889                               return rv;
890           }
891 
892           /*
893            * Phase 3.
894            */
895           if (cvid != vid) {
896 
897                     acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst);
898                     rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
899 
900                     if (rv != 0)
901                               return rv;
902           }
903 
904           return 0;
905 }
906 
907 static int
acpicpu_md_pstate_fidvid_read(uint32_t * cfid,uint32_t * cvid)908 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid)
909 {
910           int i = ACPICPU_P_STATE_RETRY * 100;
911           uint64_t val;
912 
913           do {
914                     val = rdmsr(MSR_0FH_STATUS);
915 
916           } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0);
917 
918           if (i == 0)
919                     return EAGAIN;
920 
921           if (cfid != NULL)
922                     *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID);
923 
924           if (cvid != NULL)
925                     *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID);
926 
927           return 0;
928 }
929 
930 static void
acpicpu_md_pstate_fidvid_write(uint32_t fid,uint32_t vid,uint32_t cnt,uint32_t tmo)931 acpicpu_md_pstate_fidvid_write(uint32_t fid,
932     uint32_t vid, uint32_t cnt, uint32_t tmo)
933 {
934           uint64_t val = 0;
935 
936           val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID);
937           val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID);
938           val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT);
939           val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG);
940 
941           wrmsr(MSR_0FH_CONTROL, val);
942           DELAY(tmo);
943 }
944 
945 int
acpicpu_md_tstate_get(struct acpicpu_softc * sc,uint32_t * percent)946 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
947 {
948           struct acpicpu_tstate *ts;
949           uint64_t val;
950           uint32_t i;
951 
952           val = rdmsr(MSR_THERM_CONTROL);
953 
954           for (i = 0; i < sc->sc_tstate_count; i++) {
955 
956                     ts = &sc->sc_tstate[i];
957 
958                     if (ts->ts_percent == 0)
959                               continue;
960 
961                     if (val == ts->ts_status) {
962                               *percent = ts->ts_percent;
963                               return 0;
964                     }
965           }
966 
967           return EIO;
968 }
969 
970 int
acpicpu_md_tstate_set(struct acpicpu_tstate * ts)971 acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
972 {
973           uint64_t val;
974           uint8_t i;
975 
976           val = ts->ts_control;
977           val = val & __BITS(0, 4);
978 
979           wrmsr(MSR_THERM_CONTROL, val);
980 
981           if (ts->ts_status == 0) {
982                     DELAY(ts->ts_latency);
983                     return 0;
984           }
985 
986           for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
987 
988                     val = rdmsr(MSR_THERM_CONTROL);
989 
990                     if (val == ts->ts_status)
991                               return 0;
992 
993                     DELAY(ts->ts_latency);
994           }
995 
996           return EAGAIN;
997 }
998 
999 /*
1000  * A kludge for backwards compatibility.
1001  */
1002 static int
acpicpu_md_pstate_sysctl_init(void)1003 acpicpu_md_pstate_sysctl_init(void)
1004 {
1005           const struct sysctlnode       *fnode, *mnode, *rnode;
1006           int rv;
1007 
1008           rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
1009               CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
1010               NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
1011 
1012           if (rv != 0)
1013                     goto fail;
1014 
1015           rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
1016               0, CTLTYPE_NODE, "cpu", NULL,
1017               NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1018 
1019           if (rv != 0)
1020                     goto fail;
1021 
1022           rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
1023               0, CTLTYPE_NODE, "frequency", NULL,
1024               NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1025 
1026           if (rv != 0)
1027                     goto fail;
1028 
1029           rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1030               CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
1031               acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1032 
1033           if (rv != 0)
1034                     goto fail;
1035 
1036           rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1037               CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
1038               acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1039 
1040           if (rv != 0)
1041                     goto fail;
1042 
1043           rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1044               CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
1045               acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1046 
1047           if (rv != 0)
1048                     goto fail;
1049 
1050           return 0;
1051 
1052 fail:
1053           if (acpicpu_log != NULL) {
1054                     sysctl_teardown(&acpicpu_log);
1055                     acpicpu_log = NULL;
1056           }
1057 
1058           return rv;
1059 }
1060 
1061 static int
acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)1062 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
1063 {
1064           struct sysctlnode node;
1065           uint32_t freq;
1066           int err;
1067 
1068           freq = cpufreq_get(curcpu());
1069 
1070           if (freq == 0)
1071                     return ENXIO;
1072 
1073           node = *rnode;
1074           node.sysctl_data = &freq;
1075 
1076           err = sysctl_lookup(SYSCTLFN_CALL(&node));
1077 
1078           if (err != 0 || newp == NULL)
1079                     return err;
1080 
1081           return 0;
1082 }
1083 
1084 static int
acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)1085 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
1086 {
1087           struct sysctlnode node;
1088           uint32_t freq;
1089           int err;
1090 
1091           freq = cpufreq_get(curcpu());
1092 
1093           if (freq == 0)
1094                     return ENXIO;
1095 
1096           node = *rnode;
1097           node.sysctl_data = &freq;
1098 
1099           err = sysctl_lookup(SYSCTLFN_CALL(&node));
1100 
1101           if (err != 0 || newp == NULL)
1102                     return err;
1103 
1104           cpufreq_set_all(freq);
1105 
1106           return 0;
1107 }
1108 
1109 static int
acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)1110 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
1111 {
1112           struct cpu_info *ci = curcpu();
1113           struct acpicpu_softc *sc;
1114           struct sysctlnode node;
1115           char buf[1024];
1116           size_t len;
1117           uint32_t i;
1118           int err;
1119 
1120           sc = acpicpu_sc[ci->ci_acpiid];
1121 
1122           if (sc == NULL)
1123                     return ENXIO;
1124 
1125           (void)memset(&buf, 0, sizeof(buf));
1126 
1127           mutex_enter(&sc->sc_mtx);
1128 
1129           for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
1130 
1131                     if (sc->sc_pstate[i].ps_freq == 0)
1132                               continue;
1133 
1134                     if (len >= sizeof(buf))
1135                               break;
1136                     len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
1137                         sc->sc_pstate[i].ps_freq,
1138                         i < (sc->sc_pstate_count - 1) ? " " : "");
1139           }
1140 
1141           mutex_exit(&sc->sc_mtx);
1142 
1143           node = *rnode;
1144           node.sysctl_data = buf;
1145 
1146           err = sysctl_lookup(SYSCTLFN_CALL(&node));
1147 
1148           if (err != 0 || newp == NULL)
1149                     return err;
1150 
1151           return 0;
1152 }
1153 
1154