1 /*        $NetBSD: hyperv.c,v 1.17 2025/04/12 19:31:44 nonaka Exp $   */
2 
3 /*-
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * Implements low-level interactions with Hyper-V/Azure
33  */
34 #include <sys/cdefs.h>
35 #ifdef __KERNEL_RCSID
36 __KERNEL_RCSID(0, "$NetBSD: hyperv.c,v 1.17 2025/04/12 19:31:44 nonaka Exp $");
37 #endif
38 #ifdef __FBSDID
39 __FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hyperv.c 331757 2018-03-30 02:25:12Z emaste $");
40 #endif
41 
42 #ifdef _KERNEL_OPT
43 #include "lapic.h"
44 #include "genfb.h"
45 #include "opt_ddb.h"
46 #include "vmbus.h"
47 #include "wsdisplay.h"
48 #endif
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/kernel.h>
53 #include <sys/device.h>
54 #include <sys/bus.h>
55 #include <sys/cpu.h>
56 #include <sys/kmem.h>
57 #include <sys/module.h>
58 #include <sys/pmf.h>
59 #include <sys/sysctl.h>
60 #include <sys/timetc.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 #include <machine/autoconf.h>
65 #include <machine/bootinfo.h>
66 #include <machine/cpufunc.h>
67 #include <machine/cputypes.h>
68 #include <machine/cpuvar.h>
69 #include <machine/cpu_counter.h>
70 #include <x86/apicvar.h>
71 #include <x86/efi.h>
72 
73 #include <dev/wsfb/genfbvar.h>
74 #include <x86/genfb_machdep.h>
75 
76 #include <x86/x86/hypervreg.h>
77 #include <x86/x86/hypervvar.h>
78 #include <dev/hyperv/vmbusvar.h>
79 #include <dev/hyperv/genfb_vmbusvar.h>
80 
81 #ifdef DDB
82 #include <machine/db_machdep.h>
83 #include <ddb/db_sym.h>
84 #include <ddb/db_extern.h>
85 #endif
86 
87 struct hyperv_softc {
88           device_t            sc_dev;
89 
90           struct sysctllog    *sc_log;
91 };
92 
93 struct hyperv_hypercall_ctx {
94           void                *hc_addr;
95           paddr_t             hc_paddr;
96 };
97 
98 struct hyperv_percpu_data {
99           int       pd_idtvec;
100 };
101 
102 static struct hyperv_hypercall_ctx hyperv_hypercall_ctx;
103 
__aligned(PAGE_SIZE)104 static void __attribute__((naked)) __aligned(PAGE_SIZE)
105 hyperv_hypercall_page(void)
106 {
107           __asm__ __volatile__ (".fill %c0, 1, 0xcc" :: "i" (PAGE_SIZE));
108 }
109 
110 static u_int        hyperv_get_timecount(struct timecounter *);
111 
112 static u_int hyperv_features;           /* CPUID_HV_MSR_ */
113 static u_int hyperv_recommends;
114 
115 static u_int hyperv_pm_features;
116 static u_int hyperv_features3;
117 
118 static char hyperv_version_str[64];
119 static char hyperv_features_str[256];
120 static char hyperv_pm_features_str[256];
121 static char hyperv_features3_str[256];
122 
123 uint32_t hyperv_vcpuid[MAXCPUS];
124 
125 static struct timecounter hyperv_timecounter = {
126           .tc_get_timecount = hyperv_get_timecount,
127           .tc_counter_mask = 0xffffffff,
128           .tc_frequency = HYPERV_TIMER_FREQ,
129           .tc_name = "Hyper-V",
130           .tc_quality = 2000,
131 };
132 
133 static void         hyperv_proc_dummy(void *, struct cpu_info *);
134 
135 struct hyperv_proc {
136           hyperv_proc_t       func;
137           void                *arg;
138 };
139 
140 static struct hyperv_proc hyperv_event_proc = {
141           .func = hyperv_proc_dummy,
142 };
143 
144 static struct hyperv_proc hyperv_message_proc = {
145           .func = hyperv_proc_dummy,
146 };
147 
148 static int          hyperv_match(device_t, cfdata_t, void *);
149 static void         hyperv_attach(device_t, device_t, void *);
150 static int          hyperv_detach(device_t, int);
151 
152 CFATTACH_DECL_NEW(hyperv, sizeof(struct hyperv_softc),
153     hyperv_match, hyperv_attach, hyperv_detach, NULL);
154 
155 static void         hyperv_hypercall_memfree(void);
156 static bool         hyperv_init_hypercall(void);
157 static int          hyperv_sysctl_setup_root(struct hyperv_softc *);
158 
159 static u_int
hyperv_get_timecount(struct timecounter * tc)160 hyperv_get_timecount(struct timecounter *tc)
161 {
162 
163           return (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
164 }
165 
166 static uint64_t
hyperv_tc64_rdmsr(void)167 hyperv_tc64_rdmsr(void)
168 {
169 
170           return rdmsr(MSR_HV_TIME_REF_COUNT);
171 }
172 
173 #ifdef __amd64__
174 /*
175  * Reference TSC
176  */
177 struct hyperv_ref_tsc {
178           struct hyperv_reftsc          *tsc_ref;
179           paddr_t                       tsc_paddr;
180 };
181 
182 static struct hyperv_ref_tsc hyperv_ref_tsc;
183 
184 static u_int        hyperv_tsc_timecount(struct timecounter *);
185 
186 static struct timecounter hyperv_tsc_timecounter = {
187           .tc_get_timecount = hyperv_tsc_timecount,
188           .tc_counter_mask = 0xffffffff,
189           .tc_frequency = HYPERV_TIMER_FREQ,
190           .tc_name = "Hyper-V-TSC",
191           .tc_quality = 3000,
192 };
193 
194 static __inline u_int
atomic_load_acq_int(volatile u_int * p)195 atomic_load_acq_int(volatile u_int *p)
196 {
197           u_int r = *p;
198           __insn_barrier();
199           return r;
200 }
201 
202 static uint64_t
hyperv_tc64_tsc(void)203 hyperv_tc64_tsc(void)
204 {
205           struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;
206           uint32_t seq;
207 
208           while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
209                     uint64_t disc, ret, tsc;
210                     uint64_t scale = tsc_ref->tsc_scale;
211                     int64_t ofs = tsc_ref->tsc_ofs;
212 
213                     tsc = cpu_counter();
214 
215                     /* ret = ((tsc * scale) >> 64) + ofs */
216                     __asm__ __volatile__ ("mulq %3" :
217                         "=d" (ret), "=a" (disc) :
218                         "a" (tsc), "r" (scale));
219                     ret += ofs;
220 
221                     __insn_barrier();
222                     if (tsc_ref->tsc_seq == seq)
223                               return ret;
224 
225                     /* Sequence changed; re-sync. */
226           }
227           /* Fallback to the generic timecounter, i.e. rdmsr. */
228           return rdmsr(MSR_HV_TIME_REF_COUNT);
229 }
230 
231 static u_int
hyperv_tsc_timecount(struct timecounter * tc __unused)232 hyperv_tsc_timecount(struct timecounter *tc __unused)
233 {
234 
235           return hyperv_tc64_tsc();
236 }
237 
238 static bool
hyperv_tsc_tcinit(void)239 hyperv_tsc_tcinit(void)
240 {
241           uint64_t orig_msr, msr;
242 
243           if ((hyperv_features &
244                (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
245               (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
246               (cpu_feature[0] & CPUID_SSE2) == 0) /* SSE2 for mfence/lfence */
247                     return false;
248 
249           hyperv_ref_tsc.tsc_ref = (void *)uvm_km_alloc(kernel_map,
250               PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
251           if (hyperv_ref_tsc.tsc_ref == NULL) {
252                     aprint_error("Hyper-V: reference TSC page allocation failed\n");
253                     return false;
254           }
255 
256           if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_ref_tsc.tsc_ref,
257               &hyperv_ref_tsc.tsc_paddr)) {
258                     aprint_error("Hyper-V: reference TSC page setup failed\n");
259                     uvm_km_free(kernel_map, (vaddr_t)hyperv_ref_tsc.tsc_ref,
260                         PAGE_SIZE, UVM_KMF_WIRED);
261                     hyperv_ref_tsc.tsc_ref = NULL;
262                     return false;
263           }
264 
265           orig_msr = rdmsr(MSR_HV_REFERENCE_TSC);
266           msr = MSR_HV_REFTSC_ENABLE | (orig_msr & MSR_HV_REFTSC_RSVD_MASK) |
267               (atop(hyperv_ref_tsc.tsc_paddr) << MSR_HV_REFTSC_PGSHIFT);
268           wrmsr(MSR_HV_REFERENCE_TSC, msr);
269 
270           /* Install 64 bits timecounter method for other modules to use. */
271           hyperv_tc64 = hyperv_tc64_tsc;
272 
273           /* Register "enlightened" timecounter. */
274           tc_init(&hyperv_tsc_timecounter);
275 
276           return true;
277 }
278 #endif /* __amd64__ */
279 
280 static void
delay_tc(unsigned int n)281 delay_tc(unsigned int n)
282 {
283           struct timecounter *tc;
284           uint64_t end, now;
285           u_int last, u;
286 
287           tc = timecounter;
288           if (tc->tc_quality <= 0) {
289                     x86_delay(n);
290                     return;
291           }
292 
293           now = 0;
294           end = tc->tc_frequency * n / 1000000;
295           last = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
296           do {
297                     x86_pause();
298                     u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
299                     if (u < last)
300                               now += tc->tc_counter_mask - last + u + 1;
301                     else
302                               now += u - last;
303                     last = u;
304           } while (now < end);
305 }
306 
307 static void
delay_msr(unsigned int n)308 delay_msr(unsigned int n)
309 {
310           uint64_t end, now;
311           u_int last, u;
312 
313           now = 0;
314           end = HYPERV_TIMER_FREQ * n / 1000000ULL;
315           last = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
316           do {
317                     x86_pause();
318                     u = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
319                     if (u < last)
320                               now += 0xffffffff - last + u + 1;
321                     else
322                               now += u - last;
323                     last = u;
324           } while (now < end);
325 }
326 
327 static __inline uint64_t
hyperv_hypercall_md(volatile void * hc_addr,uint64_t in_val,uint64_t in_paddr,uint64_t out_paddr)328 hyperv_hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr,
329     uint64_t out_paddr)
330 {
331           uint64_t status;
332 
333 #ifdef __amd64__
334           __asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
335           __asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val),
336               "d" (in_paddr), "m" (hc_addr));
337 #else
338           uint32_t in_val_hi = in_val >> 32;
339           uint32_t in_val_lo = in_val & 0xFFFFFFFF;
340           uint32_t status_hi, status_lo;
341           uint32_t in_paddr_hi = in_paddr >> 32;
342           uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
343           uint32_t out_paddr_hi = out_paddr >> 32;
344           uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;
345 
346           __asm__ __volatile__ ("call *%8" : "=d" (status_hi), "=a" (status_lo) :
347               "d" (in_val_hi), "a" (in_val_lo),
348               "b" (in_paddr_hi), "c" (in_paddr_lo),
349               "D" (out_paddr_hi), "S" (out_paddr_lo),
350               "m" (hc_addr));
351           status = status_lo | ((uint64_t)status_hi << 32);
352 #endif
353 
354           return status;
355 }
356 
357 uint64_t
hyperv_hypercall(uint64_t control,paddr_t in_paddr,paddr_t out_paddr)358 hyperv_hypercall(uint64_t control, paddr_t in_paddr, paddr_t out_paddr)
359 {
360 
361           if (hyperv_hypercall_ctx.hc_addr == NULL)
362                     return ~HYPERCALL_STATUS_SUCCESS;
363 
364           return hyperv_hypercall_md(hyperv_hypercall_ctx.hc_addr, control,
365               in_paddr, out_paddr);
366 }
367 
368 static bool
hyperv_probe(u_int * maxleaf,u_int * features,u_int * pm_features,u_int * features3)369 hyperv_probe(u_int *maxleaf, u_int *features, u_int *pm_features,
370     u_int *features3)
371 {
372           u_int regs[4];
373 
374           if (vm_guest != VM_GUEST_HV)
375                     return false;
376 
377           x86_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
378           *maxleaf = regs[0];
379           if (*maxleaf < CPUID_LEAF_HV_LIMITS)
380                     return false;
381 
382           x86_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
383           if (regs[0] != CPUID_HV_IFACE_HYPERV)
384                     return false;
385 
386           x86_cpuid(CPUID_LEAF_HV_FEATURES, regs);
387           if (!(regs[0] & CPUID_HV_MSR_HYPERCALL)) {
388                     /*
389                      * Hyper-V w/o Hypercall is impossible; someone
390                      * is faking Hyper-V.
391                      */
392                     return false;
393           }
394 
395           *features = regs[0];
396           *pm_features = regs[2];
397           *features3 = regs[3];
398 
399           return true;
400 }
401 
402 static bool
hyperv_identify(void)403 hyperv_identify(void)
404 {
405           char buf[256];
406           u_int regs[4];
407           u_int maxleaf;
408 
409           if (!hyperv_probe(&maxleaf, &hyperv_features, &hyperv_pm_features,
410               &hyperv_features3))
411                     return false;
412 
413           x86_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
414           hyperv_ver_major = regs[1] >> 16;
415           snprintf(hyperv_version_str, sizeof(hyperv_version_str),
416               "%d.%d.%d [SP%d]",
417               hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
418           aprint_verbose("Hyper-V Version: %s\n", hyperv_version_str);
419 
420           snprintb(hyperv_features_str, sizeof(hyperv_features_str),
421               "\020"
422               "\001VPRUNTIME" /* MSR_HV_VP_RUNTIME */
423               "\002TMREFCNT"  /* MSR_HV_TIME_REF_COUNT */
424               "\003SYNIC"               /* MSRs for SynIC */
425               "\004SYNTM"               /* MSRs for SynTimer */
426               "\005APIC"                /* MSR_HV_{EOI,ICR,TPR} */
427               "\006HYPERCALL" /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
428               "\007VPINDEX"   /* MSR_HV_VP_INDEX */
429               "\010RESET"               /* MSR_HV_RESET */
430               "\011STATS"               /* MSR_HV_STATS_ */
431               "\012REFTSC"    /* MSR_HV_REFERENCE_TSC */
432               "\013IDLE"                /* MSR_HV_GUEST_IDLE */
433               "\014TMFREQ"    /* MSR_HV_{TSC,APIC}_FREQUENCY */
434               "\015DEBUG",    /* MSR_HV_SYNTH_DEBUG_ */
435               hyperv_features);
436           aprint_verbose("  Features=%s\n", hyperv_features_str);
437           snprintb(buf, sizeof(buf),
438               "\020"
439               "\005C3HPET",   /* HPET is required for C3 state */
440               (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK));
441           snprintf(hyperv_pm_features_str, sizeof(hyperv_pm_features_str),
442               "%s [C%u]", buf, CPUPM_HV_CSTATE(hyperv_pm_features));
443           aprint_verbose("  PM Features=%s\n", hyperv_pm_features_str);
444           snprintb(hyperv_features3_str, sizeof(hyperv_features3_str),
445               "\020"
446               "\001MWAIT"               /* MWAIT */
447               "\002DEBUG"               /* guest debug support */
448               "\003PERFMON"   /* performance monitor */
449               "\004PCPUDPE"   /* physical CPU dynamic partition event */
450               "\005XMMHC"               /* hypercall input through XMM regs */
451               "\006IDLE"                /* guest idle support */
452               "\007SLEEP"               /* hypervisor sleep support */
453               "\010NUMA"                /* NUMA distance query support */
454               "\011TMFREQ"    /* timer frequency query (TSC, LAPIC) */
455               "\012SYNCMC"    /* inject synthetic machine checks */
456               "\013CRASH"               /* MSRs for guest crash */
457               "\014DEBUGMSR"  /* MSRs for guest debug */
458               "\015NPIEP"               /* NPIEP */
459               "\016HVDIS",    /* disabling hypervisor */
460               hyperv_features3);
461           aprint_verbose("  Features3=%s\n", hyperv_features3_str);
462 
463           x86_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
464           hyperv_recommends = regs[0];
465           aprint_verbose("  Recommends: %08x %08x\n", regs[0], regs[1]);
466 
467           x86_cpuid(CPUID_LEAF_HV_LIMITS, regs);
468           aprint_verbose("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
469               regs[0], regs[1], regs[2]);
470 
471           if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
472                     x86_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
473                     aprint_verbose("  HW Features: %08x, AMD: %08x\n",
474                         regs[0], regs[3]);
475           }
476 
477           return true;
478 }
479 
480 void
hyperv_early_init(void)481 hyperv_early_init(void)
482 {
483           u_int features, pm_features, features3;
484           u_int maxleaf;
485           int i;
486 
487           if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
488                     return;
489 
490           if (features & CPUID_HV_MSR_TIME_REFCNT)
491                     x86_delay = delay_func = delay_msr;
492 
493           if (features & CPUID_HV_MSR_VP_INDEX) {
494                     /* Save virtual processor id. */
495                     hyperv_vcpuid[0] = rdmsr(MSR_HV_VP_INDEX);
496           } else {
497                     /* Set virtual processor id to 0 for compatibility. */
498                     hyperv_vcpuid[0] = 0;
499           }
500           for (i = 1; i < MAXCPUS; i++)
501                     hyperv_vcpuid[i] = hyperv_vcpuid[0];
502 }
503 
504 void
hyperv_init_cpu(struct cpu_info * ci)505 hyperv_init_cpu(struct cpu_info *ci)
506 {
507           u_int features, pm_features, features3;
508           u_int maxleaf;
509 
510           if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
511                     return;
512 
513           if (features & CPUID_HV_MSR_VP_INDEX)
514                     hyperv_vcpuid[ci->ci_index] = rdmsr(MSR_HV_VP_INDEX);
515 }
516 
517 uint32_t
hyperv_get_vcpuid(cpuid_t cpu)518 hyperv_get_vcpuid(cpuid_t cpu)
519 {
520 
521           if (cpu < MAXCPUS)
522                     return hyperv_vcpuid[cpu];
523           return 0;
524 }
525 
526 static bool
hyperv_init(void)527 hyperv_init(void)
528 {
529 
530           if (!hyperv_identify()) {
531                     /* Not Hyper-V; reset guest id to the generic one. */
532                     if (vm_guest == VM_GUEST_HV)
533                               vm_guest = VM_GUEST_VM;
534                     return false;
535           }
536 
537           /* Set guest id */
538           wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_OSTYPE_NETBSD |
539               (uint64_t)__NetBSD_Version__ << MSR_HV_GUESTID_VERSION_SHIFT);
540 
541           if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
542                     /* Register Hyper-V timecounter */
543                     tc_init(&hyperv_timecounter);
544 
545                     /*
546                      * Install 64 bits timecounter method for other modules to use.
547                      */
548                     hyperv_tc64 = hyperv_tc64_rdmsr;
549 #ifdef __amd64__
550                     hyperv_tsc_tcinit();
551 #endif
552 
553                     /* delay with timecounter */
554                     x86_delay = delay_func = delay_tc;
555           }
556 
557 #if NLAPIC > 0
558           if ((hyperv_features & CPUID_HV_MSR_TIME_FREQ) &&
559               (hyperv_features3 & CPUID3_HV_TIME_FREQ))
560                     lapic_per_second = rdmsr(MSR_HV_APIC_FREQUENCY);
561 #endif
562 
563           return hyperv_init_hypercall();
564 }
565 
566 static bool
hyperv_is_initialized(void)567 hyperv_is_initialized(void)
568 {
569           uint64_t msr;
570 
571           if (vm_guest != VM_GUEST_HV)
572                     return false;
573           if (rdmsr_safe(MSR_HV_HYPERCALL, &msr) == EFAULT)
574                     return false;
575           return (msr & MSR_HV_HYPERCALL_ENABLE) ? true : false;
576 }
577 
578 static int
hyperv_match(device_t parent,cfdata_t cf,void * aux)579 hyperv_match(device_t parent, cfdata_t cf, void *aux)
580 {
581           struct cpufeature_attach_args *cfaa = aux;
582           struct cpu_info *ci = cfaa->ci;
583 
584           if (strcmp(cfaa->name, "vm") != 0)
585                     return 0;
586           if ((ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) == 0)
587                     return 0;
588           if (vm_guest != VM_GUEST_HV)
589                     return 0;
590 
591           return 1;
592 }
593 
594 static void
hyperv_attach(device_t parent,device_t self,void * aux)595 hyperv_attach(device_t parent, device_t self, void *aux)
596 {
597           struct hyperv_softc *sc = device_private(self);
598 
599           sc->sc_dev = self;
600 
601           aprint_naive("\n");
602           aprint_normal(": Hyper-V\n");
603 
604           if (!hyperv_is_initialized()) {
605                     if (rdmsr(MSR_HV_GUEST_OS_ID) == 0) {
606                               if (!hyperv_init()) {
607                                         aprint_error_dev(self, "initialize failed\n");
608                                         return;
609                               }
610                     }
611                     hyperv_init_hypercall();
612           }
613 
614           (void) pmf_device_register(self, NULL, NULL);
615 
616           (void) hyperv_sysctl_setup_root(sc);
617 }
618 
619 static int
hyperv_detach(device_t self,int flags)620 hyperv_detach(device_t self, int flags)
621 {
622           struct hyperv_softc *sc = device_private(self);
623           uint64_t hc;
624 
625           /* Disable Hypercall */
626           hc = rdmsr(MSR_HV_HYPERCALL);
627           wrmsr(MSR_HV_HYPERCALL, hc & MSR_HV_HYPERCALL_RSVD_MASK);
628           hyperv_hypercall_memfree();
629 
630           if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)
631                     tc_detach(&hyperv_timecounter);
632 
633           wrmsr(MSR_HV_GUEST_OS_ID, 0);
634 
635           pmf_device_deregister(self);
636 
637           if (sc->sc_log != NULL) {
638                     sysctl_teardown(&sc->sc_log);
639                     sc->sc_log = NULL;
640           }
641 
642           return 0;
643 }
644 
645 void
hyperv_intr(void)646 hyperv_intr(void)
647 {
648           struct cpu_info *ci = curcpu();
649 
650           (*hyperv_event_proc.func)(hyperv_event_proc.arg, ci);
651           (*hyperv_message_proc.func)(hyperv_message_proc.arg, ci);
652 }
653 
654 void hyperv_hypercall_intr(struct trapframe *);
655 void
hyperv_hypercall_intr(struct trapframe * frame __unused)656 hyperv_hypercall_intr(struct trapframe *frame __unused)
657 {
658           struct cpu_info *ci = curcpu();
659 
660           ci->ci_isources[LIR_HV]->is_evcnt.ev_count++;
661 
662           hyperv_intr();
663 }
664 
665 static void
hyperv_proc_dummy(void * arg __unused,struct cpu_info * ci __unused)666 hyperv_proc_dummy(void *arg __unused, struct cpu_info *ci __unused)
667 {
668 }
669 
670 void
hyperv_set_event_proc(void (* func)(void *,struct cpu_info *),void * arg)671 hyperv_set_event_proc(void (*func)(void *, struct cpu_info *), void *arg)
672 {
673 
674           hyperv_event_proc.func = func;
675           hyperv_event_proc.arg = arg;
676 }
677 
678 void
hyperv_set_message_proc(void (* func)(void *,struct cpu_info *),void * arg)679 hyperv_set_message_proc(void (*func)(void *, struct cpu_info *), void *arg)
680 {
681 
682           hyperv_message_proc.func = func;
683           hyperv_message_proc.arg = arg;
684 }
685 
686 static void
hyperv_hypercall_memfree(void)687 hyperv_hypercall_memfree(void)
688 {
689 
690           hyperv_hypercall_ctx.hc_addr = NULL;
691 }
692 
693 static bool
hyperv_init_hypercall(void)694 hyperv_init_hypercall(void)
695 {
696           uint64_t hc, hc_orig;
697 
698           hyperv_hypercall_ctx.hc_addr = hyperv_hypercall_page;
699           hyperv_hypercall_ctx.hc_paddr = vtophys((vaddr_t)hyperv_hypercall_page);
700           KASSERT(hyperv_hypercall_ctx.hc_paddr != 0);
701 
702           /* Get the 'reserved' bits, which requires preservation. */
703           hc_orig = rdmsr(MSR_HV_HYPERCALL);
704 
705           /*
706            * Setup the Hypercall page.
707            *
708            * NOTE: 'reserved' bits MUST be preserved.
709            */
710           hc = (atop(hyperv_hypercall_ctx.hc_paddr) << MSR_HV_HYPERCALL_PGSHIFT) |
711               (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
712               MSR_HV_HYPERCALL_ENABLE;
713           wrmsr(MSR_HV_HYPERCALL, hc);
714 
715           /*
716            * Confirm that Hypercall page did get setup.
717            */
718           hc = rdmsr(MSR_HV_HYPERCALL);
719           if (!(hc & MSR_HV_HYPERCALL_ENABLE)) {
720                     aprint_error("Hyper-V: Hypercall setup failed\n");
721                     hyperv_hypercall_memfree();
722                     /* Can't perform any Hyper-V specific actions */
723                     vm_guest = VM_GUEST_VM;
724                     return false;
725           }
726 
727           return true;
728 }
729 
730 int
hyperv_hypercall_enabled(void)731 hyperv_hypercall_enabled(void)
732 {
733 
734           return hyperv_is_initialized();
735 }
736 
737 int
hyperv_synic_supported(void)738 hyperv_synic_supported(void)
739 {
740 
741           return (hyperv_features & CPUID_HV_MSR_SYNIC) ? 1 : 0;
742 }
743 
744 int
hyperv_is_gen1(void)745 hyperv_is_gen1(void)
746 {
747 
748           return !efi_probe();
749 }
750 
751 void
hyperv_send_eom(void)752 hyperv_send_eom(void)
753 {
754 
755           wrmsr(MSR_HV_EOM, 0);
756 }
757 
758 void
vmbus_init_interrupts_md(struct vmbus_softc * sc,cpuid_t cpu)759 vmbus_init_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
760 {
761           extern void Xintr_hyperv_hypercall(void);
762           struct vmbus_percpu_data *pd;
763           struct hyperv_percpu_data *hv_pd;
764           struct cpu_info *ci;
765           struct idt_vec *iv;
766           int hyperv_idtvec;
767           cpuid_t cpu0;
768 
769           cpu0 = cpu_index(&cpu_info_primary);
770 
771           if (cpu == cpu0 || idt_vec_is_pcpu()) {
772                     /*
773                      * All Hyper-V ISR required resources are setup, now let's find a
774                      * free IDT vector for Hyper-V ISR and set it up.
775                      */
776                     ci = cpu_lookup(cpu);
777                     iv = &ci->ci_idtvec;
778                     mutex_enter(&cpu_lock);
779                     hyperv_idtvec = idt_vec_alloc(iv,
780                         APIC_LEVEL(NIPL), IDT_INTR_HIGH);
781                     mutex_exit(&cpu_lock);
782                     KASSERT(hyperv_idtvec > 0);
783                     idt_vec_set(iv, hyperv_idtvec, Xintr_hyperv_hypercall);
784           } else {
785                     pd = &sc->sc_percpu[cpu0];
786                     hv_pd = pd->md_cookie;
787                     KASSERT(hv_pd != NULL && hv_pd->pd_idtvec > 0);
788                     hyperv_idtvec = hv_pd->pd_idtvec;
789           }
790 
791           hv_pd = kmem_zalloc(sizeof(*hv_pd), KM_SLEEP);
792           hv_pd->pd_idtvec = hyperv_idtvec;
793           pd = &sc->sc_percpu[cpu];
794           pd->md_cookie = (void *)hv_pd;
795 }
796 
797 void
vmbus_deinit_interrupts_md(struct vmbus_softc * sc,cpuid_t cpu)798 vmbus_deinit_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
799 {
800           struct vmbus_percpu_data *pd;
801           struct hyperv_percpu_data *hv_pd;
802           struct cpu_info *ci;
803           struct idt_vec *iv;
804 
805           pd = &sc->sc_percpu[cpu];
806           hv_pd = pd->md_cookie;
807           KASSERT(hv_pd != NULL);
808 
809           if (cpu == cpu_index(&cpu_info_primary) ||
810               idt_vec_is_pcpu()) {
811                     ci = cpu_lookup(cpu);
812                     iv = &ci->ci_idtvec;
813 
814                     if (hv_pd->pd_idtvec > 0) {
815                               idt_vec_free(iv, hv_pd->pd_idtvec);
816                     }
817           }
818 
819           pd->md_cookie = NULL;
820           kmem_free(hv_pd, sizeof(*hv_pd));
821 }
822 
823 void
vmbus_init_synic_md(struct vmbus_softc * sc,cpuid_t cpu)824 vmbus_init_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
825 {
826           extern void Xintr_hyperv_hypercall(void);
827           struct vmbus_percpu_data *pd;
828           struct hyperv_percpu_data *hv_pd;
829           uint64_t val, orig;
830           uint32_t sint;
831           int hyperv_idtvec;
832 
833           pd = &sc->sc_percpu[cpu];
834           hv_pd = pd->md_cookie;
835           hyperv_idtvec = hv_pd->pd_idtvec;
836 
837           /*
838            * Setup the SynIC message.
839            */
840           orig = rdmsr(MSR_HV_SIMP);
841           val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
842               (atop(hyperv_dma_get_paddr(&pd->simp_dma)) << MSR_HV_SIMP_PGSHIFT);
843           wrmsr(MSR_HV_SIMP, val);
844 
845           /*
846            * Setup the SynIC event flags.
847            */
848           orig = rdmsr(MSR_HV_SIEFP);
849           val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
850               (atop(hyperv_dma_get_paddr(&pd->siep_dma)) << MSR_HV_SIEFP_PGSHIFT);
851           wrmsr(MSR_HV_SIEFP, val);
852 
853           /*
854            * Configure and unmask SINT for message and event flags.
855            */
856           sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
857           orig = rdmsr(sint);
858           val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
859               (orig & MSR_HV_SINT_RSVD_MASK);
860           wrmsr(sint, val);
861 
862           /*
863            * Configure and unmask SINT for timer.
864            */
865           sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
866           orig = rdmsr(sint);
867           val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
868               (orig & MSR_HV_SINT_RSVD_MASK);
869           wrmsr(sint, val);
870 
871           /*
872            * All done; enable SynIC.
873            */
874           orig = rdmsr(MSR_HV_SCONTROL);
875           val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
876           wrmsr(MSR_HV_SCONTROL, val);
877 }
878 
879 void
vmbus_deinit_synic_md(struct vmbus_softc * sc,cpuid_t cpu)880 vmbus_deinit_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
881 {
882           uint64_t orig;
883           uint32_t sint;
884 
885           /*
886            * Disable SynIC.
887            */
888           orig = rdmsr(MSR_HV_SCONTROL);
889           wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
890 
891           /*
892            * Mask message and event flags SINT.
893            */
894           sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
895           orig = rdmsr(sint);
896           wrmsr(sint, orig | MSR_HV_SINT_MASKED);
897 
898           /*
899            * Mask timer SINT.
900            */
901           sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
902           orig = rdmsr(sint);
903           wrmsr(sint, orig | MSR_HV_SINT_MASKED);
904 
905           /*
906            * Teardown SynIC message.
907            */
908           orig = rdmsr(MSR_HV_SIMP);
909           wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
910 
911           /*
912            * Teardown SynIC event flags.
913            */
914           orig = rdmsr(MSR_HV_SIEFP);
915           wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
916 }
917 
918 static int
hyperv_sysctl_setup(struct hyperv_softc * sc,const struct sysctlnode * hyperv_node)919 hyperv_sysctl_setup(struct hyperv_softc *sc,
920     const struct sysctlnode *hyperv_node)
921 {
922           int error;
923 
924           error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
925               CTLFLAG_READONLY, CTLTYPE_STRING, "version", NULL,
926               NULL, 0, hyperv_version_str,
927               0, CTL_CREATE, CTL_EOL);
928           if (error)
929                     return error;
930 
931           error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
932               CTLFLAG_READONLY, CTLTYPE_STRING, "features", NULL,
933               NULL, 0, hyperv_features_str,
934               0, CTL_CREATE, CTL_EOL);
935           if (error)
936                     return error;
937 
938           error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
939               CTLFLAG_READONLY, CTLTYPE_STRING, "pm_features", NULL,
940               NULL, 0, hyperv_pm_features_str,
941               0, CTL_CREATE, CTL_EOL);
942           if (error)
943                     return error;
944 
945           error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
946               CTLFLAG_READONLY, CTLTYPE_STRING, "features3", NULL,
947               NULL, 0, hyperv_features3_str,
948               0, CTL_CREATE, CTL_EOL);
949           if (error)
950                     return error;
951 
952           return 0;
953 }
954 
955 static int
hyperv_sysctl_setup_root(struct hyperv_softc * sc)956 hyperv_sysctl_setup_root(struct hyperv_softc *sc)
957 {
958           const struct sysctlnode *machdep_node, *hyperv_node;
959           int error;
960 
961           error = sysctl_createv(&sc->sc_log, 0, NULL, &machdep_node,
962               CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
963               NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
964           if (error)
965                     goto fail;
966 
967           error = sysctl_createv(&sc->sc_log, 0, &machdep_node, &hyperv_node,
968               CTLFLAG_PERMANENT, CTLTYPE_NODE, "hyperv", NULL,
969               NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
970           if (error)
971                     goto fail;
972 
973           error = hyperv_sysctl_setup(sc, hyperv_node);
974           if (error)
975                     goto fail;
976 
977           return 0;
978 
979 fail:
980           sysctl_teardown(&sc->sc_log);
981           sc->sc_log = NULL;
982           return error;
983 }
984 
985 MODULE(MODULE_CLASS_DRIVER, hyperv, NULL);
986 
987 #ifdef _MODULE
988 #include "ioconf.c"
989 #endif
990 
991 static int
hyperv_modcmd(modcmd_t cmd,void * aux)992 hyperv_modcmd(modcmd_t cmd, void *aux)
993 {
994           int rv = 0;
995 
996           switch (cmd) {
997           case MODULE_CMD_INIT:
998 #ifdef _MODULE
999                     rv = config_init_component(cfdriver_ioconf_hyperv,
1000                         cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
1001 #endif
1002                     hyperv_init();
1003                     break;
1004 
1005           case MODULE_CMD_FINI:
1006 #ifdef _MODULE
1007                     rv = config_fini_component(cfdriver_ioconf_hyperv,
1008                         cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
1009 #endif
1010                     break;
1011 
1012           default:
1013                     rv = ENOTTY;
1014                     break;
1015           }
1016 
1017           return rv;
1018 }
1019 
1020 #if NVMBUS > 0
1021 /*
1022  * genfb at vmbus
1023  */
1024 static struct genfb_pmf_callback pmf_cb;
1025 static struct genfb_mode_callback mode_cb;
1026 
1027 static bool
x86_genfb_setmode(struct genfb_softc * sc,int newmode)1028 x86_genfb_setmode(struct genfb_softc *sc, int newmode)
1029 {
1030           return true;
1031 }
1032 
1033 static bool
x86_genfb_suspend(device_t dev,const pmf_qual_t * qual)1034 x86_genfb_suspend(device_t dev, const pmf_qual_t *qual)
1035 {
1036           return true;
1037 }
1038 
1039 static bool
x86_genfb_resume(device_t dev,const pmf_qual_t * qual)1040 x86_genfb_resume(device_t dev, const pmf_qual_t *qual)
1041 {
1042 #if NGENFB > 0
1043           struct genfb_vmbus_softc *sc = device_private(dev);
1044 
1045           genfb_restore_palette(&sc->sc_gen);
1046 #endif
1047           return true;
1048 }
1049 
1050 static void
populate_fbinfo(device_t dev,prop_dictionary_t dict)1051 populate_fbinfo(device_t dev, prop_dictionary_t dict)
1052 {
1053 #if NWSDISPLAY > 0 && NGENFB > 0
1054           struct rasops_info *ri = &x86_genfb_console_screen.scr_ri;
1055 #endif
1056           const void *fbptr = lookup_bootinfo(BTINFO_FRAMEBUFFER);
1057           struct btinfo_framebuffer fbinfo;
1058 
1059           if (fbptr == NULL)
1060                     return;
1061 
1062           memcpy(&fbinfo, fbptr, sizeof(fbinfo));
1063 
1064           if (fbinfo.physaddr != 0) {
1065                     prop_dictionary_set_uint32(dict, "width", fbinfo.width);
1066                     prop_dictionary_set_uint32(dict, "height", fbinfo.height);
1067                     prop_dictionary_set_uint8(dict, "depth", fbinfo.depth);
1068                     prop_dictionary_set_uint16(dict, "linebytes", fbinfo.stride);
1069 
1070                     prop_dictionary_set_uint64(dict, "address", fbinfo.physaddr);
1071 #if NWSDISPLAY > 0 && NGENFB > 0
1072                     if (ri->ri_bits != NULL) {
1073                               prop_dictionary_set_uint64(dict, "virtual_address",
1074                                   ri->ri_hwbits != NULL ?
1075                                   (vaddr_t)ri->ri_hworigbits :
1076                                   (vaddr_t)ri->ri_origbits);
1077                     }
1078 #endif
1079           }
1080 #if notyet
1081           prop_dictionary_set_bool(dict, "splash",
1082               (fbinfo.flags & BI_FB_SPLASH) != 0);
1083 #endif
1084 #if 0
1085           if (fbinfo.depth == 8) {
1086                     gfb_cb.gcc_cookie = NULL;
1087                     gfb_cb.gcc_set_mapreg = x86_genfb_set_mapreg;
1088                     prop_dictionary_set_uint64(dict, "cmap_callback",
1089                         (uint64_t)(uintptr_t)&gfb_cb);
1090           }
1091 #endif
1092           if (fbinfo.physaddr != 0) {
1093                     mode_cb.gmc_setmode = x86_genfb_setmode;
1094                     prop_dictionary_set_uint64(dict, "mode_callback",
1095                         (uint64_t)(uintptr_t)&mode_cb);
1096           }
1097 
1098 #if NWSDISPLAY > 0 && NGENFB > 0
1099           if (device_is_a(dev, "genfb")) {
1100                     prop_dictionary_set_bool(dict, "enable_shadowfb",
1101                         ri->ri_hwbits != NULL);
1102 
1103                     x86_genfb_set_console_dev(dev);
1104 #ifdef DDB
1105                     db_trap_callback = x86_genfb_ddb_trap_callback;
1106 #endif
1107           }
1108 #endif
1109 }
1110 #endif
1111 
1112 device_t
device_hyperv_register(device_t dev,void * aux)1113 device_hyperv_register(device_t dev, void *aux)
1114 {
1115 #if NVMBUS > 0
1116           device_t parent = device_parent(dev);
1117 
1118           if (parent && device_is_a(parent, "vmbus") && !x86_found_console) {
1119                     struct vmbus_attach_args *aa = aux;
1120 
1121                     if (memcmp(aa->aa_type, &hyperv_guid_video,
1122                         sizeof(*aa->aa_type)) == 0) {
1123                               prop_dictionary_t dict = device_properties(dev);
1124 
1125                               /* Initialize genfb for serial console */
1126                               x86_genfb_init();
1127 
1128                               /*
1129                                * framebuffer drivers other than genfb can work
1130                                * without the address property
1131                                */
1132                               populate_fbinfo(dev, dict);
1133 
1134 #if 1 && NWSDISPLAY > 0 && NGENFB > 0
1135                               /* XXX */
1136                               if (device_is_a(dev, "genfb")) {
1137                                         prop_dictionary_set_bool(dict, "is_console",
1138                                             genfb_is_console());
1139                               } else
1140 #endif
1141                               prop_dictionary_set_bool(dict, "is_console", true);
1142 
1143                               prop_dictionary_set_bool(dict, "clear-screen", false);
1144 #if NWSDISPLAY > 0 && NGENFB > 0
1145                               prop_dictionary_set_uint16(dict, "cursor-row",
1146                                   x86_genfb_console_screen.scr_ri.ri_crow);
1147 #endif
1148                               pmf_cb.gpc_suspend = x86_genfb_suspend;
1149                               pmf_cb.gpc_resume = x86_genfb_resume;
1150                               prop_dictionary_set_uint64(dict, "pmf_callback",
1151                                   (uint64_t)(uintptr_t)&pmf_cb);
1152                               x86_found_console = true;
1153                               return NULL;
1154                     }
1155           }
1156 #endif
1157           return NULL;
1158 }
1159