1 /* $NetBSD: hyperv.c,v 1.17 2025/04/12 19:31:44 nonaka Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5 * Copyright (c) 2012 NetApp Inc.
6 * Copyright (c) 2012 Citrix Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice unmodified, this list of conditions, and the following
14 * disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /**
32 * Implements low-level interactions with Hyper-V/Azure
33 */
34 #include <sys/cdefs.h>
35 #ifdef __KERNEL_RCSID
36 __KERNEL_RCSID(0, "$NetBSD: hyperv.c,v 1.17 2025/04/12 19:31:44 nonaka Exp $");
37 #endif
38 #ifdef __FBSDID
39 __FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hyperv.c 331757 2018-03-30 02:25:12Z emaste $");
40 #endif
41
42 #ifdef _KERNEL_OPT
43 #include "lapic.h"
44 #include "genfb.h"
45 #include "opt_ddb.h"
46 #include "vmbus.h"
47 #include "wsdisplay.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/kernel.h>
53 #include <sys/device.h>
54 #include <sys/bus.h>
55 #include <sys/cpu.h>
56 #include <sys/kmem.h>
57 #include <sys/module.h>
58 #include <sys/pmf.h>
59 #include <sys/sysctl.h>
60 #include <sys/timetc.h>
61
62 #include <uvm/uvm_extern.h>
63
64 #include <machine/autoconf.h>
65 #include <machine/bootinfo.h>
66 #include <machine/cpufunc.h>
67 #include <machine/cputypes.h>
68 #include <machine/cpuvar.h>
69 #include <machine/cpu_counter.h>
70 #include <x86/apicvar.h>
71 #include <x86/efi.h>
72
73 #include <dev/wsfb/genfbvar.h>
74 #include <x86/genfb_machdep.h>
75
76 #include <x86/x86/hypervreg.h>
77 #include <x86/x86/hypervvar.h>
78 #include <dev/hyperv/vmbusvar.h>
79 #include <dev/hyperv/genfb_vmbusvar.h>
80
81 #ifdef DDB
82 #include <machine/db_machdep.h>
83 #include <ddb/db_sym.h>
84 #include <ddb/db_extern.h>
85 #endif
86
87 struct hyperv_softc {
88 device_t sc_dev;
89
90 struct sysctllog *sc_log;
91 };
92
93 struct hyperv_hypercall_ctx {
94 void *hc_addr;
95 paddr_t hc_paddr;
96 };
97
98 struct hyperv_percpu_data {
99 int pd_idtvec;
100 };
101
102 static struct hyperv_hypercall_ctx hyperv_hypercall_ctx;
103
__aligned(PAGE_SIZE)104 static void __attribute__((naked)) __aligned(PAGE_SIZE)
105 hyperv_hypercall_page(void)
106 {
107 __asm__ __volatile__ (".fill %c0, 1, 0xcc" :: "i" (PAGE_SIZE));
108 }
109
110 static u_int hyperv_get_timecount(struct timecounter *);
111
112 static u_int hyperv_features; /* CPUID_HV_MSR_ */
113 static u_int hyperv_recommends;
114
115 static u_int hyperv_pm_features;
116 static u_int hyperv_features3;
117
118 static char hyperv_version_str[64];
119 static char hyperv_features_str[256];
120 static char hyperv_pm_features_str[256];
121 static char hyperv_features3_str[256];
122
123 uint32_t hyperv_vcpuid[MAXCPUS];
124
125 static struct timecounter hyperv_timecounter = {
126 .tc_get_timecount = hyperv_get_timecount,
127 .tc_counter_mask = 0xffffffff,
128 .tc_frequency = HYPERV_TIMER_FREQ,
129 .tc_name = "Hyper-V",
130 .tc_quality = 2000,
131 };
132
133 static void hyperv_proc_dummy(void *, struct cpu_info *);
134
135 struct hyperv_proc {
136 hyperv_proc_t func;
137 void *arg;
138 };
139
140 static struct hyperv_proc hyperv_event_proc = {
141 .func = hyperv_proc_dummy,
142 };
143
144 static struct hyperv_proc hyperv_message_proc = {
145 .func = hyperv_proc_dummy,
146 };
147
148 static int hyperv_match(device_t, cfdata_t, void *);
149 static void hyperv_attach(device_t, device_t, void *);
150 static int hyperv_detach(device_t, int);
151
152 CFATTACH_DECL_NEW(hyperv, sizeof(struct hyperv_softc),
153 hyperv_match, hyperv_attach, hyperv_detach, NULL);
154
155 static void hyperv_hypercall_memfree(void);
156 static bool hyperv_init_hypercall(void);
157 static int hyperv_sysctl_setup_root(struct hyperv_softc *);
158
159 static u_int
hyperv_get_timecount(struct timecounter * tc)160 hyperv_get_timecount(struct timecounter *tc)
161 {
162
163 return (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
164 }
165
166 static uint64_t
hyperv_tc64_rdmsr(void)167 hyperv_tc64_rdmsr(void)
168 {
169
170 return rdmsr(MSR_HV_TIME_REF_COUNT);
171 }
172
173 #ifdef __amd64__
174 /*
175 * Reference TSC
176 */
177 struct hyperv_ref_tsc {
178 struct hyperv_reftsc *tsc_ref;
179 paddr_t tsc_paddr;
180 };
181
182 static struct hyperv_ref_tsc hyperv_ref_tsc;
183
184 static u_int hyperv_tsc_timecount(struct timecounter *);
185
186 static struct timecounter hyperv_tsc_timecounter = {
187 .tc_get_timecount = hyperv_tsc_timecount,
188 .tc_counter_mask = 0xffffffff,
189 .tc_frequency = HYPERV_TIMER_FREQ,
190 .tc_name = "Hyper-V-TSC",
191 .tc_quality = 3000,
192 };
193
194 static __inline u_int
atomic_load_acq_int(volatile u_int * p)195 atomic_load_acq_int(volatile u_int *p)
196 {
197 u_int r = *p;
198 __insn_barrier();
199 return r;
200 }
201
202 static uint64_t
hyperv_tc64_tsc(void)203 hyperv_tc64_tsc(void)
204 {
205 struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;
206 uint32_t seq;
207
208 while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
209 uint64_t disc, ret, tsc;
210 uint64_t scale = tsc_ref->tsc_scale;
211 int64_t ofs = tsc_ref->tsc_ofs;
212
213 tsc = cpu_counter();
214
215 /* ret = ((tsc * scale) >> 64) + ofs */
216 __asm__ __volatile__ ("mulq %3" :
217 "=d" (ret), "=a" (disc) :
218 "a" (tsc), "r" (scale));
219 ret += ofs;
220
221 __insn_barrier();
222 if (tsc_ref->tsc_seq == seq)
223 return ret;
224
225 /* Sequence changed; re-sync. */
226 }
227 /* Fallback to the generic timecounter, i.e. rdmsr. */
228 return rdmsr(MSR_HV_TIME_REF_COUNT);
229 }
230
231 static u_int
hyperv_tsc_timecount(struct timecounter * tc __unused)232 hyperv_tsc_timecount(struct timecounter *tc __unused)
233 {
234
235 return hyperv_tc64_tsc();
236 }
237
238 static bool
hyperv_tsc_tcinit(void)239 hyperv_tsc_tcinit(void)
240 {
241 uint64_t orig_msr, msr;
242
243 if ((hyperv_features &
244 (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
245 (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
246 (cpu_feature[0] & CPUID_SSE2) == 0) /* SSE2 for mfence/lfence */
247 return false;
248
249 hyperv_ref_tsc.tsc_ref = (void *)uvm_km_alloc(kernel_map,
250 PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
251 if (hyperv_ref_tsc.tsc_ref == NULL) {
252 aprint_error("Hyper-V: reference TSC page allocation failed\n");
253 return false;
254 }
255
256 if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_ref_tsc.tsc_ref,
257 &hyperv_ref_tsc.tsc_paddr)) {
258 aprint_error("Hyper-V: reference TSC page setup failed\n");
259 uvm_km_free(kernel_map, (vaddr_t)hyperv_ref_tsc.tsc_ref,
260 PAGE_SIZE, UVM_KMF_WIRED);
261 hyperv_ref_tsc.tsc_ref = NULL;
262 return false;
263 }
264
265 orig_msr = rdmsr(MSR_HV_REFERENCE_TSC);
266 msr = MSR_HV_REFTSC_ENABLE | (orig_msr & MSR_HV_REFTSC_RSVD_MASK) |
267 (atop(hyperv_ref_tsc.tsc_paddr) << MSR_HV_REFTSC_PGSHIFT);
268 wrmsr(MSR_HV_REFERENCE_TSC, msr);
269
270 /* Install 64 bits timecounter method for other modules to use. */
271 hyperv_tc64 = hyperv_tc64_tsc;
272
273 /* Register "enlightened" timecounter. */
274 tc_init(&hyperv_tsc_timecounter);
275
276 return true;
277 }
278 #endif /* __amd64__ */
279
280 static void
delay_tc(unsigned int n)281 delay_tc(unsigned int n)
282 {
283 struct timecounter *tc;
284 uint64_t end, now;
285 u_int last, u;
286
287 tc = timecounter;
288 if (tc->tc_quality <= 0) {
289 x86_delay(n);
290 return;
291 }
292
293 now = 0;
294 end = tc->tc_frequency * n / 1000000;
295 last = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
296 do {
297 x86_pause();
298 u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
299 if (u < last)
300 now += tc->tc_counter_mask - last + u + 1;
301 else
302 now += u - last;
303 last = u;
304 } while (now < end);
305 }
306
307 static void
delay_msr(unsigned int n)308 delay_msr(unsigned int n)
309 {
310 uint64_t end, now;
311 u_int last, u;
312
313 now = 0;
314 end = HYPERV_TIMER_FREQ * n / 1000000ULL;
315 last = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
316 do {
317 x86_pause();
318 u = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
319 if (u < last)
320 now += 0xffffffff - last + u + 1;
321 else
322 now += u - last;
323 last = u;
324 } while (now < end);
325 }
326
327 static __inline uint64_t
hyperv_hypercall_md(volatile void * hc_addr,uint64_t in_val,uint64_t in_paddr,uint64_t out_paddr)328 hyperv_hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr,
329 uint64_t out_paddr)
330 {
331 uint64_t status;
332
333 #ifdef __amd64__
334 __asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
335 __asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val),
336 "d" (in_paddr), "m" (hc_addr));
337 #else
338 uint32_t in_val_hi = in_val >> 32;
339 uint32_t in_val_lo = in_val & 0xFFFFFFFF;
340 uint32_t status_hi, status_lo;
341 uint32_t in_paddr_hi = in_paddr >> 32;
342 uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
343 uint32_t out_paddr_hi = out_paddr >> 32;
344 uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;
345
346 __asm__ __volatile__ ("call *%8" : "=d" (status_hi), "=a" (status_lo) :
347 "d" (in_val_hi), "a" (in_val_lo),
348 "b" (in_paddr_hi), "c" (in_paddr_lo),
349 "D" (out_paddr_hi), "S" (out_paddr_lo),
350 "m" (hc_addr));
351 status = status_lo | ((uint64_t)status_hi << 32);
352 #endif
353
354 return status;
355 }
356
357 uint64_t
hyperv_hypercall(uint64_t control,paddr_t in_paddr,paddr_t out_paddr)358 hyperv_hypercall(uint64_t control, paddr_t in_paddr, paddr_t out_paddr)
359 {
360
361 if (hyperv_hypercall_ctx.hc_addr == NULL)
362 return ~HYPERCALL_STATUS_SUCCESS;
363
364 return hyperv_hypercall_md(hyperv_hypercall_ctx.hc_addr, control,
365 in_paddr, out_paddr);
366 }
367
368 static bool
hyperv_probe(u_int * maxleaf,u_int * features,u_int * pm_features,u_int * features3)369 hyperv_probe(u_int *maxleaf, u_int *features, u_int *pm_features,
370 u_int *features3)
371 {
372 u_int regs[4];
373
374 if (vm_guest != VM_GUEST_HV)
375 return false;
376
377 x86_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
378 *maxleaf = regs[0];
379 if (*maxleaf < CPUID_LEAF_HV_LIMITS)
380 return false;
381
382 x86_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
383 if (regs[0] != CPUID_HV_IFACE_HYPERV)
384 return false;
385
386 x86_cpuid(CPUID_LEAF_HV_FEATURES, regs);
387 if (!(regs[0] & CPUID_HV_MSR_HYPERCALL)) {
388 /*
389 * Hyper-V w/o Hypercall is impossible; someone
390 * is faking Hyper-V.
391 */
392 return false;
393 }
394
395 *features = regs[0];
396 *pm_features = regs[2];
397 *features3 = regs[3];
398
399 return true;
400 }
401
402 static bool
hyperv_identify(void)403 hyperv_identify(void)
404 {
405 char buf[256];
406 u_int regs[4];
407 u_int maxleaf;
408
409 if (!hyperv_probe(&maxleaf, &hyperv_features, &hyperv_pm_features,
410 &hyperv_features3))
411 return false;
412
413 x86_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
414 hyperv_ver_major = regs[1] >> 16;
415 snprintf(hyperv_version_str, sizeof(hyperv_version_str),
416 "%d.%d.%d [SP%d]",
417 hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
418 aprint_verbose("Hyper-V Version: %s\n", hyperv_version_str);
419
420 snprintb(hyperv_features_str, sizeof(hyperv_features_str),
421 "\020"
422 "\001VPRUNTIME" /* MSR_HV_VP_RUNTIME */
423 "\002TMREFCNT" /* MSR_HV_TIME_REF_COUNT */
424 "\003SYNIC" /* MSRs for SynIC */
425 "\004SYNTM" /* MSRs for SynTimer */
426 "\005APIC" /* MSR_HV_{EOI,ICR,TPR} */
427 "\006HYPERCALL" /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
428 "\007VPINDEX" /* MSR_HV_VP_INDEX */
429 "\010RESET" /* MSR_HV_RESET */
430 "\011STATS" /* MSR_HV_STATS_ */
431 "\012REFTSC" /* MSR_HV_REFERENCE_TSC */
432 "\013IDLE" /* MSR_HV_GUEST_IDLE */
433 "\014TMFREQ" /* MSR_HV_{TSC,APIC}_FREQUENCY */
434 "\015DEBUG", /* MSR_HV_SYNTH_DEBUG_ */
435 hyperv_features);
436 aprint_verbose(" Features=%s\n", hyperv_features_str);
437 snprintb(buf, sizeof(buf),
438 "\020"
439 "\005C3HPET", /* HPET is required for C3 state */
440 (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK));
441 snprintf(hyperv_pm_features_str, sizeof(hyperv_pm_features_str),
442 "%s [C%u]", buf, CPUPM_HV_CSTATE(hyperv_pm_features));
443 aprint_verbose(" PM Features=%s\n", hyperv_pm_features_str);
444 snprintb(hyperv_features3_str, sizeof(hyperv_features3_str),
445 "\020"
446 "\001MWAIT" /* MWAIT */
447 "\002DEBUG" /* guest debug support */
448 "\003PERFMON" /* performance monitor */
449 "\004PCPUDPE" /* physical CPU dynamic partition event */
450 "\005XMMHC" /* hypercall input through XMM regs */
451 "\006IDLE" /* guest idle support */
452 "\007SLEEP" /* hypervisor sleep support */
453 "\010NUMA" /* NUMA distance query support */
454 "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */
455 "\012SYNCMC" /* inject synthetic machine checks */
456 "\013CRASH" /* MSRs for guest crash */
457 "\014DEBUGMSR" /* MSRs for guest debug */
458 "\015NPIEP" /* NPIEP */
459 "\016HVDIS", /* disabling hypervisor */
460 hyperv_features3);
461 aprint_verbose(" Features3=%s\n", hyperv_features3_str);
462
463 x86_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
464 hyperv_recommends = regs[0];
465 aprint_verbose(" Recommends: %08x %08x\n", regs[0], regs[1]);
466
467 x86_cpuid(CPUID_LEAF_HV_LIMITS, regs);
468 aprint_verbose(" Limits: Vcpu:%d Lcpu:%d Int:%d\n",
469 regs[0], regs[1], regs[2]);
470
471 if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
472 x86_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
473 aprint_verbose(" HW Features: %08x, AMD: %08x\n",
474 regs[0], regs[3]);
475 }
476
477 return true;
478 }
479
480 void
hyperv_early_init(void)481 hyperv_early_init(void)
482 {
483 u_int features, pm_features, features3;
484 u_int maxleaf;
485 int i;
486
487 if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
488 return;
489
490 if (features & CPUID_HV_MSR_TIME_REFCNT)
491 x86_delay = delay_func = delay_msr;
492
493 if (features & CPUID_HV_MSR_VP_INDEX) {
494 /* Save virtual processor id. */
495 hyperv_vcpuid[0] = rdmsr(MSR_HV_VP_INDEX);
496 } else {
497 /* Set virtual processor id to 0 for compatibility. */
498 hyperv_vcpuid[0] = 0;
499 }
500 for (i = 1; i < MAXCPUS; i++)
501 hyperv_vcpuid[i] = hyperv_vcpuid[0];
502 }
503
504 void
hyperv_init_cpu(struct cpu_info * ci)505 hyperv_init_cpu(struct cpu_info *ci)
506 {
507 u_int features, pm_features, features3;
508 u_int maxleaf;
509
510 if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
511 return;
512
513 if (features & CPUID_HV_MSR_VP_INDEX)
514 hyperv_vcpuid[ci->ci_index] = rdmsr(MSR_HV_VP_INDEX);
515 }
516
517 uint32_t
hyperv_get_vcpuid(cpuid_t cpu)518 hyperv_get_vcpuid(cpuid_t cpu)
519 {
520
521 if (cpu < MAXCPUS)
522 return hyperv_vcpuid[cpu];
523 return 0;
524 }
525
526 static bool
hyperv_init(void)527 hyperv_init(void)
528 {
529
530 if (!hyperv_identify()) {
531 /* Not Hyper-V; reset guest id to the generic one. */
532 if (vm_guest == VM_GUEST_HV)
533 vm_guest = VM_GUEST_VM;
534 return false;
535 }
536
537 /* Set guest id */
538 wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_OSTYPE_NETBSD |
539 (uint64_t)__NetBSD_Version__ << MSR_HV_GUESTID_VERSION_SHIFT);
540
541 if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
542 /* Register Hyper-V timecounter */
543 tc_init(&hyperv_timecounter);
544
545 /*
546 * Install 64 bits timecounter method for other modules to use.
547 */
548 hyperv_tc64 = hyperv_tc64_rdmsr;
549 #ifdef __amd64__
550 hyperv_tsc_tcinit();
551 #endif
552
553 /* delay with timecounter */
554 x86_delay = delay_func = delay_tc;
555 }
556
557 #if NLAPIC > 0
558 if ((hyperv_features & CPUID_HV_MSR_TIME_FREQ) &&
559 (hyperv_features3 & CPUID3_HV_TIME_FREQ))
560 lapic_per_second = rdmsr(MSR_HV_APIC_FREQUENCY);
561 #endif
562
563 return hyperv_init_hypercall();
564 }
565
566 static bool
hyperv_is_initialized(void)567 hyperv_is_initialized(void)
568 {
569 uint64_t msr;
570
571 if (vm_guest != VM_GUEST_HV)
572 return false;
573 if (rdmsr_safe(MSR_HV_HYPERCALL, &msr) == EFAULT)
574 return false;
575 return (msr & MSR_HV_HYPERCALL_ENABLE) ? true : false;
576 }
577
578 static int
hyperv_match(device_t parent,cfdata_t cf,void * aux)579 hyperv_match(device_t parent, cfdata_t cf, void *aux)
580 {
581 struct cpufeature_attach_args *cfaa = aux;
582 struct cpu_info *ci = cfaa->ci;
583
584 if (strcmp(cfaa->name, "vm") != 0)
585 return 0;
586 if ((ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) == 0)
587 return 0;
588 if (vm_guest != VM_GUEST_HV)
589 return 0;
590
591 return 1;
592 }
593
594 static void
hyperv_attach(device_t parent,device_t self,void * aux)595 hyperv_attach(device_t parent, device_t self, void *aux)
596 {
597 struct hyperv_softc *sc = device_private(self);
598
599 sc->sc_dev = self;
600
601 aprint_naive("\n");
602 aprint_normal(": Hyper-V\n");
603
604 if (!hyperv_is_initialized()) {
605 if (rdmsr(MSR_HV_GUEST_OS_ID) == 0) {
606 if (!hyperv_init()) {
607 aprint_error_dev(self, "initialize failed\n");
608 return;
609 }
610 }
611 hyperv_init_hypercall();
612 }
613
614 (void) pmf_device_register(self, NULL, NULL);
615
616 (void) hyperv_sysctl_setup_root(sc);
617 }
618
619 static int
hyperv_detach(device_t self,int flags)620 hyperv_detach(device_t self, int flags)
621 {
622 struct hyperv_softc *sc = device_private(self);
623 uint64_t hc;
624
625 /* Disable Hypercall */
626 hc = rdmsr(MSR_HV_HYPERCALL);
627 wrmsr(MSR_HV_HYPERCALL, hc & MSR_HV_HYPERCALL_RSVD_MASK);
628 hyperv_hypercall_memfree();
629
630 if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)
631 tc_detach(&hyperv_timecounter);
632
633 wrmsr(MSR_HV_GUEST_OS_ID, 0);
634
635 pmf_device_deregister(self);
636
637 if (sc->sc_log != NULL) {
638 sysctl_teardown(&sc->sc_log);
639 sc->sc_log = NULL;
640 }
641
642 return 0;
643 }
644
645 void
hyperv_intr(void)646 hyperv_intr(void)
647 {
648 struct cpu_info *ci = curcpu();
649
650 (*hyperv_event_proc.func)(hyperv_event_proc.arg, ci);
651 (*hyperv_message_proc.func)(hyperv_message_proc.arg, ci);
652 }
653
654 void hyperv_hypercall_intr(struct trapframe *);
655 void
hyperv_hypercall_intr(struct trapframe * frame __unused)656 hyperv_hypercall_intr(struct trapframe *frame __unused)
657 {
658 struct cpu_info *ci = curcpu();
659
660 ci->ci_isources[LIR_HV]->is_evcnt.ev_count++;
661
662 hyperv_intr();
663 }
664
665 static void
hyperv_proc_dummy(void * arg __unused,struct cpu_info * ci __unused)666 hyperv_proc_dummy(void *arg __unused, struct cpu_info *ci __unused)
667 {
668 }
669
670 void
hyperv_set_event_proc(void (* func)(void *,struct cpu_info *),void * arg)671 hyperv_set_event_proc(void (*func)(void *, struct cpu_info *), void *arg)
672 {
673
674 hyperv_event_proc.func = func;
675 hyperv_event_proc.arg = arg;
676 }
677
678 void
hyperv_set_message_proc(void (* func)(void *,struct cpu_info *),void * arg)679 hyperv_set_message_proc(void (*func)(void *, struct cpu_info *), void *arg)
680 {
681
682 hyperv_message_proc.func = func;
683 hyperv_message_proc.arg = arg;
684 }
685
686 static void
hyperv_hypercall_memfree(void)687 hyperv_hypercall_memfree(void)
688 {
689
690 hyperv_hypercall_ctx.hc_addr = NULL;
691 }
692
693 static bool
hyperv_init_hypercall(void)694 hyperv_init_hypercall(void)
695 {
696 uint64_t hc, hc_orig;
697
698 hyperv_hypercall_ctx.hc_addr = hyperv_hypercall_page;
699 hyperv_hypercall_ctx.hc_paddr = vtophys((vaddr_t)hyperv_hypercall_page);
700 KASSERT(hyperv_hypercall_ctx.hc_paddr != 0);
701
702 /* Get the 'reserved' bits, which requires preservation. */
703 hc_orig = rdmsr(MSR_HV_HYPERCALL);
704
705 /*
706 * Setup the Hypercall page.
707 *
708 * NOTE: 'reserved' bits MUST be preserved.
709 */
710 hc = (atop(hyperv_hypercall_ctx.hc_paddr) << MSR_HV_HYPERCALL_PGSHIFT) |
711 (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
712 MSR_HV_HYPERCALL_ENABLE;
713 wrmsr(MSR_HV_HYPERCALL, hc);
714
715 /*
716 * Confirm that Hypercall page did get setup.
717 */
718 hc = rdmsr(MSR_HV_HYPERCALL);
719 if (!(hc & MSR_HV_HYPERCALL_ENABLE)) {
720 aprint_error("Hyper-V: Hypercall setup failed\n");
721 hyperv_hypercall_memfree();
722 /* Can't perform any Hyper-V specific actions */
723 vm_guest = VM_GUEST_VM;
724 return false;
725 }
726
727 return true;
728 }
729
730 int
hyperv_hypercall_enabled(void)731 hyperv_hypercall_enabled(void)
732 {
733
734 return hyperv_is_initialized();
735 }
736
737 int
hyperv_synic_supported(void)738 hyperv_synic_supported(void)
739 {
740
741 return (hyperv_features & CPUID_HV_MSR_SYNIC) ? 1 : 0;
742 }
743
744 int
hyperv_is_gen1(void)745 hyperv_is_gen1(void)
746 {
747
748 return !efi_probe();
749 }
750
751 void
hyperv_send_eom(void)752 hyperv_send_eom(void)
753 {
754
755 wrmsr(MSR_HV_EOM, 0);
756 }
757
758 void
vmbus_init_interrupts_md(struct vmbus_softc * sc,cpuid_t cpu)759 vmbus_init_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
760 {
761 extern void Xintr_hyperv_hypercall(void);
762 struct vmbus_percpu_data *pd;
763 struct hyperv_percpu_data *hv_pd;
764 struct cpu_info *ci;
765 struct idt_vec *iv;
766 int hyperv_idtvec;
767 cpuid_t cpu0;
768
769 cpu0 = cpu_index(&cpu_info_primary);
770
771 if (cpu == cpu0 || idt_vec_is_pcpu()) {
772 /*
773 * All Hyper-V ISR required resources are setup, now let's find a
774 * free IDT vector for Hyper-V ISR and set it up.
775 */
776 ci = cpu_lookup(cpu);
777 iv = &ci->ci_idtvec;
778 mutex_enter(&cpu_lock);
779 hyperv_idtvec = idt_vec_alloc(iv,
780 APIC_LEVEL(NIPL), IDT_INTR_HIGH);
781 mutex_exit(&cpu_lock);
782 KASSERT(hyperv_idtvec > 0);
783 idt_vec_set(iv, hyperv_idtvec, Xintr_hyperv_hypercall);
784 } else {
785 pd = &sc->sc_percpu[cpu0];
786 hv_pd = pd->md_cookie;
787 KASSERT(hv_pd != NULL && hv_pd->pd_idtvec > 0);
788 hyperv_idtvec = hv_pd->pd_idtvec;
789 }
790
791 hv_pd = kmem_zalloc(sizeof(*hv_pd), KM_SLEEP);
792 hv_pd->pd_idtvec = hyperv_idtvec;
793 pd = &sc->sc_percpu[cpu];
794 pd->md_cookie = (void *)hv_pd;
795 }
796
797 void
vmbus_deinit_interrupts_md(struct vmbus_softc * sc,cpuid_t cpu)798 vmbus_deinit_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
799 {
800 struct vmbus_percpu_data *pd;
801 struct hyperv_percpu_data *hv_pd;
802 struct cpu_info *ci;
803 struct idt_vec *iv;
804
805 pd = &sc->sc_percpu[cpu];
806 hv_pd = pd->md_cookie;
807 KASSERT(hv_pd != NULL);
808
809 if (cpu == cpu_index(&cpu_info_primary) ||
810 idt_vec_is_pcpu()) {
811 ci = cpu_lookup(cpu);
812 iv = &ci->ci_idtvec;
813
814 if (hv_pd->pd_idtvec > 0) {
815 idt_vec_free(iv, hv_pd->pd_idtvec);
816 }
817 }
818
819 pd->md_cookie = NULL;
820 kmem_free(hv_pd, sizeof(*hv_pd));
821 }
822
823 void
vmbus_init_synic_md(struct vmbus_softc * sc,cpuid_t cpu)824 vmbus_init_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
825 {
826 extern void Xintr_hyperv_hypercall(void);
827 struct vmbus_percpu_data *pd;
828 struct hyperv_percpu_data *hv_pd;
829 uint64_t val, orig;
830 uint32_t sint;
831 int hyperv_idtvec;
832
833 pd = &sc->sc_percpu[cpu];
834 hv_pd = pd->md_cookie;
835 hyperv_idtvec = hv_pd->pd_idtvec;
836
837 /*
838 * Setup the SynIC message.
839 */
840 orig = rdmsr(MSR_HV_SIMP);
841 val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
842 (atop(hyperv_dma_get_paddr(&pd->simp_dma)) << MSR_HV_SIMP_PGSHIFT);
843 wrmsr(MSR_HV_SIMP, val);
844
845 /*
846 * Setup the SynIC event flags.
847 */
848 orig = rdmsr(MSR_HV_SIEFP);
849 val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
850 (atop(hyperv_dma_get_paddr(&pd->siep_dma)) << MSR_HV_SIEFP_PGSHIFT);
851 wrmsr(MSR_HV_SIEFP, val);
852
853 /*
854 * Configure and unmask SINT for message and event flags.
855 */
856 sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
857 orig = rdmsr(sint);
858 val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
859 (orig & MSR_HV_SINT_RSVD_MASK);
860 wrmsr(sint, val);
861
862 /*
863 * Configure and unmask SINT for timer.
864 */
865 sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
866 orig = rdmsr(sint);
867 val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
868 (orig & MSR_HV_SINT_RSVD_MASK);
869 wrmsr(sint, val);
870
871 /*
872 * All done; enable SynIC.
873 */
874 orig = rdmsr(MSR_HV_SCONTROL);
875 val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
876 wrmsr(MSR_HV_SCONTROL, val);
877 }
878
879 void
vmbus_deinit_synic_md(struct vmbus_softc * sc,cpuid_t cpu)880 vmbus_deinit_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
881 {
882 uint64_t orig;
883 uint32_t sint;
884
885 /*
886 * Disable SynIC.
887 */
888 orig = rdmsr(MSR_HV_SCONTROL);
889 wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
890
891 /*
892 * Mask message and event flags SINT.
893 */
894 sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
895 orig = rdmsr(sint);
896 wrmsr(sint, orig | MSR_HV_SINT_MASKED);
897
898 /*
899 * Mask timer SINT.
900 */
901 sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
902 orig = rdmsr(sint);
903 wrmsr(sint, orig | MSR_HV_SINT_MASKED);
904
905 /*
906 * Teardown SynIC message.
907 */
908 orig = rdmsr(MSR_HV_SIMP);
909 wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
910
911 /*
912 * Teardown SynIC event flags.
913 */
914 orig = rdmsr(MSR_HV_SIEFP);
915 wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
916 }
917
918 static int
hyperv_sysctl_setup(struct hyperv_softc * sc,const struct sysctlnode * hyperv_node)919 hyperv_sysctl_setup(struct hyperv_softc *sc,
920 const struct sysctlnode *hyperv_node)
921 {
922 int error;
923
924 error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
925 CTLFLAG_READONLY, CTLTYPE_STRING, "version", NULL,
926 NULL, 0, hyperv_version_str,
927 0, CTL_CREATE, CTL_EOL);
928 if (error)
929 return error;
930
931 error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
932 CTLFLAG_READONLY, CTLTYPE_STRING, "features", NULL,
933 NULL, 0, hyperv_features_str,
934 0, CTL_CREATE, CTL_EOL);
935 if (error)
936 return error;
937
938 error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
939 CTLFLAG_READONLY, CTLTYPE_STRING, "pm_features", NULL,
940 NULL, 0, hyperv_pm_features_str,
941 0, CTL_CREATE, CTL_EOL);
942 if (error)
943 return error;
944
945 error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
946 CTLFLAG_READONLY, CTLTYPE_STRING, "features3", NULL,
947 NULL, 0, hyperv_features3_str,
948 0, CTL_CREATE, CTL_EOL);
949 if (error)
950 return error;
951
952 return 0;
953 }
954
955 static int
hyperv_sysctl_setup_root(struct hyperv_softc * sc)956 hyperv_sysctl_setup_root(struct hyperv_softc *sc)
957 {
958 const struct sysctlnode *machdep_node, *hyperv_node;
959 int error;
960
961 error = sysctl_createv(&sc->sc_log, 0, NULL, &machdep_node,
962 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
963 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
964 if (error)
965 goto fail;
966
967 error = sysctl_createv(&sc->sc_log, 0, &machdep_node, &hyperv_node,
968 CTLFLAG_PERMANENT, CTLTYPE_NODE, "hyperv", NULL,
969 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
970 if (error)
971 goto fail;
972
973 error = hyperv_sysctl_setup(sc, hyperv_node);
974 if (error)
975 goto fail;
976
977 return 0;
978
979 fail:
980 sysctl_teardown(&sc->sc_log);
981 sc->sc_log = NULL;
982 return error;
983 }
984
985 MODULE(MODULE_CLASS_DRIVER, hyperv, NULL);
986
987 #ifdef _MODULE
988 #include "ioconf.c"
989 #endif
990
991 static int
hyperv_modcmd(modcmd_t cmd,void * aux)992 hyperv_modcmd(modcmd_t cmd, void *aux)
993 {
994 int rv = 0;
995
996 switch (cmd) {
997 case MODULE_CMD_INIT:
998 #ifdef _MODULE
999 rv = config_init_component(cfdriver_ioconf_hyperv,
1000 cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
1001 #endif
1002 hyperv_init();
1003 break;
1004
1005 case MODULE_CMD_FINI:
1006 #ifdef _MODULE
1007 rv = config_fini_component(cfdriver_ioconf_hyperv,
1008 cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
1009 #endif
1010 break;
1011
1012 default:
1013 rv = ENOTTY;
1014 break;
1015 }
1016
1017 return rv;
1018 }
1019
1020 #if NVMBUS > 0
1021 /*
1022 * genfb at vmbus
1023 */
1024 static struct genfb_pmf_callback pmf_cb;
1025 static struct genfb_mode_callback mode_cb;
1026
1027 static bool
x86_genfb_setmode(struct genfb_softc * sc,int newmode)1028 x86_genfb_setmode(struct genfb_softc *sc, int newmode)
1029 {
1030 return true;
1031 }
1032
1033 static bool
x86_genfb_suspend(device_t dev,const pmf_qual_t * qual)1034 x86_genfb_suspend(device_t dev, const pmf_qual_t *qual)
1035 {
1036 return true;
1037 }
1038
1039 static bool
x86_genfb_resume(device_t dev,const pmf_qual_t * qual)1040 x86_genfb_resume(device_t dev, const pmf_qual_t *qual)
1041 {
1042 #if NGENFB > 0
1043 struct genfb_vmbus_softc *sc = device_private(dev);
1044
1045 genfb_restore_palette(&sc->sc_gen);
1046 #endif
1047 return true;
1048 }
1049
1050 static void
populate_fbinfo(device_t dev,prop_dictionary_t dict)1051 populate_fbinfo(device_t dev, prop_dictionary_t dict)
1052 {
1053 #if NWSDISPLAY > 0 && NGENFB > 0
1054 struct rasops_info *ri = &x86_genfb_console_screen.scr_ri;
1055 #endif
1056 const void *fbptr = lookup_bootinfo(BTINFO_FRAMEBUFFER);
1057 struct btinfo_framebuffer fbinfo;
1058
1059 if (fbptr == NULL)
1060 return;
1061
1062 memcpy(&fbinfo, fbptr, sizeof(fbinfo));
1063
1064 if (fbinfo.physaddr != 0) {
1065 prop_dictionary_set_uint32(dict, "width", fbinfo.width);
1066 prop_dictionary_set_uint32(dict, "height", fbinfo.height);
1067 prop_dictionary_set_uint8(dict, "depth", fbinfo.depth);
1068 prop_dictionary_set_uint16(dict, "linebytes", fbinfo.stride);
1069
1070 prop_dictionary_set_uint64(dict, "address", fbinfo.physaddr);
1071 #if NWSDISPLAY > 0 && NGENFB > 0
1072 if (ri->ri_bits != NULL) {
1073 prop_dictionary_set_uint64(dict, "virtual_address",
1074 ri->ri_hwbits != NULL ?
1075 (vaddr_t)ri->ri_hworigbits :
1076 (vaddr_t)ri->ri_origbits);
1077 }
1078 #endif
1079 }
1080 #if notyet
1081 prop_dictionary_set_bool(dict, "splash",
1082 (fbinfo.flags & BI_FB_SPLASH) != 0);
1083 #endif
1084 #if 0
1085 if (fbinfo.depth == 8) {
1086 gfb_cb.gcc_cookie = NULL;
1087 gfb_cb.gcc_set_mapreg = x86_genfb_set_mapreg;
1088 prop_dictionary_set_uint64(dict, "cmap_callback",
1089 (uint64_t)(uintptr_t)&gfb_cb);
1090 }
1091 #endif
1092 if (fbinfo.physaddr != 0) {
1093 mode_cb.gmc_setmode = x86_genfb_setmode;
1094 prop_dictionary_set_uint64(dict, "mode_callback",
1095 (uint64_t)(uintptr_t)&mode_cb);
1096 }
1097
1098 #if NWSDISPLAY > 0 && NGENFB > 0
1099 if (device_is_a(dev, "genfb")) {
1100 prop_dictionary_set_bool(dict, "enable_shadowfb",
1101 ri->ri_hwbits != NULL);
1102
1103 x86_genfb_set_console_dev(dev);
1104 #ifdef DDB
1105 db_trap_callback = x86_genfb_ddb_trap_callback;
1106 #endif
1107 }
1108 #endif
1109 }
1110 #endif
1111
1112 device_t
device_hyperv_register(device_t dev,void * aux)1113 device_hyperv_register(device_t dev, void *aux)
1114 {
1115 #if NVMBUS > 0
1116 device_t parent = device_parent(dev);
1117
1118 if (parent && device_is_a(parent, "vmbus") && !x86_found_console) {
1119 struct vmbus_attach_args *aa = aux;
1120
1121 if (memcmp(aa->aa_type, &hyperv_guid_video,
1122 sizeof(*aa->aa_type)) == 0) {
1123 prop_dictionary_t dict = device_properties(dev);
1124
1125 /* Initialize genfb for serial console */
1126 x86_genfb_init();
1127
1128 /*
1129 * framebuffer drivers other than genfb can work
1130 * without the address property
1131 */
1132 populate_fbinfo(dev, dict);
1133
1134 #if 1 && NWSDISPLAY > 0 && NGENFB > 0
1135 /* XXX */
1136 if (device_is_a(dev, "genfb")) {
1137 prop_dictionary_set_bool(dict, "is_console",
1138 genfb_is_console());
1139 } else
1140 #endif
1141 prop_dictionary_set_bool(dict, "is_console", true);
1142
1143 prop_dictionary_set_bool(dict, "clear-screen", false);
1144 #if NWSDISPLAY > 0 && NGENFB > 0
1145 prop_dictionary_set_uint16(dict, "cursor-row",
1146 x86_genfb_console_screen.scr_ri.ri_crow);
1147 #endif
1148 pmf_cb.gpc_suspend = x86_genfb_suspend;
1149 pmf_cb.gpc_resume = x86_genfb_resume;
1150 prop_dictionary_set_uint64(dict, "pmf_callback",
1151 (uint64_t)(uintptr_t)&pmf_cb);
1152 x86_found_console = true;
1153 return NULL;
1154 }
1155 }
1156 #endif
1157 return NULL;
1158 }
1159