1 /* $NetBSD: hypervisor.c,v 1.99 2025/04/30 05:15:08 imil Exp $ */
2 
3 /*
4  * Copyright (c) 2005 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  *
30  * Copyright (c) 2004 Christian Limpach.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52  */
53 
54 
55 #include <sys/cdefs.h>
56 __KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.99 2025/04/30 05:15:08 imil Exp $");
57 
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/device.h>
61 #include <sys/sysctl.h>
62 
63 #include "xenbus.h"
64 #include "xencons.h"
65 #include "isa.h"
66 #include "isadma.h"
67 #include "pci.h"
68 #include "acpica.h"
69 #include "kernfs.h"
70 
71 #include "opt_xen.h"
72 #include "opt_mpbios.h"
73 
74 #include <xen/xen.h>
75 #include <xen/hypervisor.h>
76 #include <xen/evtchn.h>
77 #include <xen/include/public/version.h>
78 #include <xen/include/public/vcpu.h>
79 #include <x86/pio.h>
80 #include <x86/machdep.h>
81 
82 #include <sys/cpu.h>
83 #include <sys/dirent.h>
84 #include <sys/stat.h>
85 #include <sys/tree.h>
86 #include <sys/vnode.h>
87 #include <miscfs/specfs/specdev.h>
88 #include <miscfs/kernfs/kernfs.h>
89 #include <xen/kernfs_machdep.h>
90 #include <dev/isa/isavar.h>
91 #include <xen/granttables.h>
92 #include <xen/vcpuvar.h>
93 #if NPCI > 0
94 #include <dev/pci/pcivar.h>
95 #if NACPICA > 0
96 #include <dev/acpi/acpivar.h>
97 #include <machine/mpconfig.h>
98 #include <xen/mpacpi.h>
99 #endif
100 #ifdef MPBIOS
101 #include <machine/mpbiosvar.h>
102 #endif
103 #endif /* NPCI */
104 
105 #if NXENBUS > 0
106 #include <xen/xenbus.h>
107 #endif
108 
109 #if NXENNET_HYPERVISOR > 0
110 #include <net/if.h>
111 #include <net/if_ether.h>
112 #include <net/if_media.h>
113 #include <xen/if_xennetvar.h>
114 #endif
115 
116 #if NXBD_HYPERVISOR > 0
117 #include <sys/buf.h>
118 #include <sys/disk.h>
119 #include <sys/bufq.h>
120 #include <dev/dkvar.h>
121 #include <xen/xbdvar.h>
122 #endif
123 
124 int       hypervisor_match(device_t, cfdata_t, void *);
125 void      hypervisor_attach(device_t, device_t, void *);
126 
127 CFATTACH_DECL_NEW(hypervisor, 0,
128     hypervisor_match, hypervisor_attach, NULL, NULL);
129 
130 static int hypervisor_print(void *, const char *);
131 
132 union hypervisor_attach_cookie {
133           const char *hac_device;                 /* first elem of all */
134 #if NXENCONS > 0
135           struct xencons_attach_args hac_xencons;
136 #endif
137 #if NXENBUS > 0
138           struct xenbus_attach_args hac_xenbus;
139 #endif
140 #if NXENNET_HYPERVISOR > 0
141           struct xennet_attach_args hac_xennet;
142 #endif
143 #if NXBD_HYPERVISOR > 0
144           struct xbd_attach_args hac_xbd;
145 #endif
146 #if NPCI > 0
147           struct pcibus_attach_args hac_pba;
148 #if defined(DOM0OPS) && NISA > 0
149           struct isabus_attach_args hac_iba;
150 #endif
151 #if NACPICA > 0
152           struct acpibus_attach_args hac_acpi;
153 #endif
154 #endif /* NPCI */
155           struct vcpu_attach_args hac_vcaa;
156 };
157 
158 /*
159  * This is set when the ISA bus is attached.  If it's not set by the
160  * time it's checked below, then mainbus attempts to attach an ISA.
161  */
162 #if defined(XENPV) && defined(DOM0OPS)
163 int     isa_has_been_seen;
164 #if NISA > 0
165 struct  x86_isa_chipset x86_isa_chipset;
166 #endif
167 #endif
168 
169 #if defined(XENPVHVM) || defined(XENPVH)
170 #include <xen/include/public/arch-x86/cpuid.h>
171 #include <xen/include/public/arch-x86/hvm/start_info.h>
172 #include <xen/include/public/hvm/hvm_op.h>
173 #include <xen/include/public/hvm/params.h>
174 
175 #include <x86/bootinfo.h>
176 
177 #define   IDTVEC(name)        __CONCAT(X, name)
178 typedef void (vector)(void);
179 extern vector IDTVEC(syscall);
180 extern vector IDTVEC(syscall32);
181 extern vector IDTVEC(osyscall);
182 extern vector *x86_exceptions[];
183 
184 extern vector IDTVEC(hypervisor_pvhvm_callback);
185 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
186 
187 volatile shared_info_t *HYPERVISOR_shared_info __read_mostly;
188 paddr_t HYPERVISOR_shared_info_pa;
189 union start_info_union start_info_union __aligned(PAGE_SIZE);
190 struct hvm_start_info *hvm_start_info;
191 
192 static int xen_hvm_vec = 0;
193 
194 #endif
195 
196 int xen_version;
197 bool pvh_boot = false;
198 
199 /* power management, for save/restore */
200 static bool hypervisor_suspend(device_t, const pmf_qual_t *);
201 static bool hypervisor_resume(device_t, const pmf_qual_t *);
202 
203 /* from FreeBSD */
204 #define XEN_MAGIC_IOPORT 0x10
205 enum {
206           XMI_MAGIC                        = 0x49d2,
207           XMI_UNPLUG_IDE_DISKS             = 0x01,
208           XMI_UNPLUG_NICS                  = 0x02,
209           XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
210 };
211 
212 
213 #ifdef XENPVHVM
214 
215 bool xenhvm_use_percpu_callback = 0;
216 
217 static void
xen_init_hypercall_page(void)218 xen_init_hypercall_page(void)
219 {
220           extern vaddr_t hypercall_page;
221           u_int descs[4];
222 
223           x86_cpuid(XEN_CPUID_LEAF(2), descs);
224 
225           /*
226            * Given 32 bytes per hypercall stub, and an optimistic number
227            * of 100 hypercalls ( the current max is 55), there shouldn't
228            * be any reason to spill over the arbitrary number of 1
229            * hypercall page. This is what we allocate in locore.S
230            * anyway. Make sure the allocation matches the registration.
231            */
232 
233           KASSERT(descs[0] == 1);
234 
235           /* XXX: vtophys(&hypercall_page) */
236           wrmsr(descs[1], (uintptr_t)&hypercall_page - KERNBASE);
237 }
238 
239 uint32_t hvm_start_paddr;
240 
241 void init_xen_early(void);
242 void
init_xen_early(void)243 init_xen_early(void)
244 {
245           const char *cmd_line;
246           if (!vm_guest_is_pvh())
247                     return;
248 
249           pvh_boot = true;
250 
251           hvm_start_info = (void *)((uintptr_t)hvm_start_paddr + KERNBASE);
252 
253           if (hvm_start_info->cmdline_paddr != 0) {
254                     cmd_line =
255                         (void *)((uintptr_t)hvm_start_info->cmdline_paddr + KERNBASE);
256                     strlcpy(xen_start_info.cmd_line, cmd_line,
257                         sizeof(xen_start_info.cmd_line));
258           } else {
259                     xen_start_info.cmd_line[0] = '\0';
260           }
261           xen_start_info.flags = hvm_start_info->flags;
262 
263           if (vm_guest != VM_GUEST_XENPVH)
264                     return;
265 
266           xen_init_hypercall_page();
267 
268           HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE);
269           struct xen_add_to_physmap xmap = {
270                     .domid = DOMID_SELF,
271                     .space = XENMAPSPACE_shared_info,
272                     .idx = 0, /* Important - XEN checks for this */
273                     .gpfn = atop(HYPERVISOR_shared_info_pa)
274           };
275 
276           int err;
277 
278           if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap)) < 0) {
279                     printk(
280                         "Xen HVM: Unable to register HYPERVISOR_shared_info %d\n", err);
281           }
282           delay_func = x86_delay = xen_delay;
283           x86_initclock_func = xen_initclocks;
284 }
285 
286 
287 static bool
xen_check_hypervisordev(void)288 xen_check_hypervisordev(void)
289 {
290           extern struct cfdata cfdata[];
291           for (int i = 0; cfdata[i].cf_name != NULL; i++) {
292                     if (strcasecmp("hypervisor", cfdata[i].cf_name) == 0) {
293                               switch(cfdata[i].cf_fstate) {
294                               case FSTATE_NOTFOUND:
295                               case FSTATE_FOUND:
296                               case FSTATE_STAR:
297                                         return true;
298                               default:
299                                         return false;
300                               }
301                     }
302           }
303           return 0;
304 }
305 
306 static int
xen_hvm_init_late(void)307 xen_hvm_init_late(void)
308 {
309           struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
310 
311           if (HYPERVISOR_xen_version(XENVER_version, NULL) < 0) {
312                     aprint_error("Xen HVM: hypercall page not working\n");
313                     return 0;
314           }
315           xen_init_features();
316 
317           /* Init various preset boot time data structures  */
318           /* XEN xenstore shared page address, event channel */
319           struct xen_hvm_param xen_hvm_param;
320 
321           xen_hvm_param.domid = DOMID_SELF;
322           xen_hvm_param.index = HVM_PARAM_STORE_PFN;
323 
324           if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
325                     aprint_error(
326                         "Xen HVM: Unable to obtain xenstore page address\n");
327                     return 0;
328           }
329 
330           /* Re-use PV field */
331           xen_start_info.store_mfn = xen_hvm_param.value;
332 
333           pmap_kenter_pa((vaddr_t) xenstore_interface, ptoa(xen_start_info.store_mfn),
334               VM_PROT_READ|VM_PROT_WRITE, 0);
335 
336           xen_hvm_param.domid = DOMID_SELF;
337           xen_hvm_param.index = HVM_PARAM_STORE_EVTCHN;
338 
339           if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
340                     aprint_error(
341                         "Xen HVM: Unable to obtain xenstore event channel\n");
342                     return 0;
343           }
344 
345           xen_start_info.store_evtchn = xen_hvm_param.value;
346 
347           /*
348            * First register callback: here's why
349            * http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=7b5b8ca7dffde866d851f0b87b994e0b13e5b867
350            */
351 
352           /*
353            * Check for XENFEAT_hvm_callback_vector. Can't proceed
354            * without it.
355            */
356           if (!xen_feature(XENFEAT_hvm_callback_vector)) {
357                     aprint_error("Xen HVM: XENFEAT_hvm_callback_vector"
358                         "not available, cannot proceed");
359                     return 0;
360           }
361 
362           /*
363            * prepare vector.
364            * We don't really care where it is, as long as it's free
365            */
366           xen_hvm_vec = idt_vec_alloc(iv, 129, 255);
367           idt_vec_set(iv, xen_hvm_vec, &IDTVEC(hypervisor_pvhvm_callback));
368 
369           events_default_setup();
370           return 1;
371 }
372 
373 int
xen_hvm_init(void)374 xen_hvm_init(void)
375 {
376           /*
377            * We need to setup the HVM interfaces early, so that we can
378            * properly setup the CPUs later (especially, all CPUs needs to
379            * run x86_cpuid() locally to get their vcpuid.
380            *
381            * For PVH, part of it has already been done.
382            */
383           if (vm_guest == VM_GUEST_XENPVH) {
384                     if (xen_hvm_init_late() == 0) {
385                               panic("hvm_init failed");
386                     }
387                     return 1;
388           }
389 
390           if (vm_guest != VM_GUEST_XENHVM)
391                     return 0;
392 
393           /* check if hypervisor was disabled with userconf */
394           if (!xen_check_hypervisordev())
395                     return 0;
396 
397           aprint_normal("Identified Guest XEN in HVM mode.\n");
398 
399           xen_init_hypercall_page();
400 
401           /* HYPERVISOR_shared_info */
402           struct xen_add_to_physmap xmap = {
403                     .domid = DOMID_SELF,
404                     .space = XENMAPSPACE_shared_info,
405                     .idx = 0, /* Important - XEN checks for this */
406                     .gpfn = atop(HYPERVISOR_shared_info_pa)
407           };
408 
409           if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0) {
410                     aprint_error(
411                         "Xen HVM: Unable to register HYPERVISOR_shared_info\n");
412                     return 0;
413           }
414 
415           /* HYPERVISOR_shared_info va,pa has been allocated in pmap_bootstrap() */
416           pmap_kenter_pa((vaddr_t) HYPERVISOR_shared_info,
417               HYPERVISOR_shared_info_pa, VM_PROT_READ|VM_PROT_WRITE, 0);
418 
419           if (xen_hvm_init_late() == 0)
420                     return 0;
421 
422           struct xen_hvm_param xen_hvm_param;
423           xen_hvm_param.domid = DOMID_SELF;
424           xen_hvm_param.index = HVM_PARAM_CONSOLE_PFN;
425 
426           if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
427                     aprint_debug(
428                         "Xen HVM: Unable to obtain xencons page address\n");
429                     xen_start_info.console.domU.mfn = 0;
430                     xen_start_info.console.domU.evtchn = -1;
431                     xencons_interface = 0;
432           } else {
433                     /* Re-use PV field */
434                     xen_start_info.console.domU.mfn = xen_hvm_param.value;
435 
436                     pmap_kenter_pa((vaddr_t) xencons_interface,
437                         ptoa(xen_start_info.console.domU.mfn),
438                         VM_PROT_READ|VM_PROT_WRITE, 0);
439 
440                     xen_hvm_param.domid = DOMID_SELF;
441                     xen_hvm_param.index = HVM_PARAM_CONSOLE_EVTCHN;
442 
443                     if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
444                               aprint_error(
445                                  "Xen HVM: Unable to obtain xencons event channel\n");
446                               return 0;
447                     }
448 
449                     xen_start_info.console.domU.evtchn = xen_hvm_param.value;
450           }
451 
452           /*
453            * PR port-amd64/55543
454            * workround for amazon's Xen 4.2: it looks like the Xen clock is not
455            * fully functional here. This version also doesn't support
456            * HVM_PARAM_CONSOLE_PFN.
457            */
458           if (xencons_interface != 0) {
459                     delay_func = x86_delay = xen_delay;
460                     x86_initclock_func = xen_initclocks;
461           }
462 
463           vm_guest = VM_GUEST_XENPVHVM; /* Be more specific */
464           return 1;
465 }
466 
467 int
xen_hvm_init_cpu(struct cpu_info * ci)468 xen_hvm_init_cpu(struct cpu_info *ci)
469 {
470           u_int32_t descs[4];
471           struct xen_hvm_param xen_hvm_param;
472           int error;
473           static bool again = 0;
474 
475           if (!vm_guest_is_xenpvh_or_pvhvm())
476                     return 0;
477 
478           KASSERT(ci == curcpu());
479 
480           descs[0] = 0;
481           x86_cpuid(XEN_CPUID_LEAF(4), descs);
482           if (descs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) {
483                     ci->ci_vcpuid = descs[1];
484           } else {
485                     aprint_debug_dev(ci->ci_dev,
486                         "Xen HVM: can't get VCPU id, falling back to ci_acpiid\n");
487                     ci->ci_vcpuid = ci->ci_acpiid;
488           }
489 
490           xen_map_vcpu(ci);
491 
492           /* Register event callback handler. */
493 
494           xen_hvm_param.domid = DOMID_SELF;
495           xen_hvm_param.index = HVM_PARAM_CALLBACK_IRQ;
496 
497           /* val[63:56] = 2, val[7:0] = vec */
498           xen_hvm_param.value = ((int64_t)0x2 << 56) | xen_hvm_vec;
499 
500           /* First try to set up a per-cpu vector. */
501           if (!again || xenhvm_use_percpu_callback) {
502                     struct xen_hvm_evtchn_upcall_vector xen_hvm_uvec;
503                     xen_hvm_uvec.vcpu = ci->ci_vcpuid;
504                     xen_hvm_uvec.vector = xen_hvm_vec;
505 
506                     xenhvm_use_percpu_callback = 1;
507                     error = HYPERVISOR_hvm_op(
508                         HVMOP_set_evtchn_upcall_vector, &xen_hvm_uvec);
509                     if (error < 0) {
510                               aprint_error_dev(ci->ci_dev,
511                                   "failed to set event upcall vector: %d\n", error);
512                               if (again)
513                                         panic("event upcall vector");
514                               aprint_error_dev(ci->ci_dev,
515                                   "falling back to global vector\n");
516                               xenhvm_use_percpu_callback = 0;
517                     } else {
518                               /*
519                                * From FreeBSD:
520                                * Trick toolstack to think we are enlightened
521                                */
522                               xen_hvm_param.value = 1;
523                               aprint_verbose_dev(ci->ci_dev,
524                                   "using event upcall vector: %d\n", xen_hvm_vec );
525                     }
526           }
527 
528           if (again)
529                     return 1;
530 
531           if (HYPERVISOR_hvm_op(HVMOP_set_param, &xen_hvm_param) < 0) {
532                     aprint_error_dev(ci->ci_dev,
533                         "Xen HVM: Unable to register event callback vector\n");
534                     vm_guest = VM_GUEST_XENHVM;
535                     return 0;
536           }
537           again = 1;
538           return 1;
539 }
540 
541 #endif /* XENPVHVM */
542 
543 /*
544  * Probe for the hypervisor; always succeeds.
545  */
546 int
hypervisor_match(device_t parent,cfdata_t match,void * aux)547 hypervisor_match(device_t parent, cfdata_t match, void *aux)
548 {
549           struct hypervisor_attach_args *haa = aux;
550 
551           /* Attach path sanity check */
552           if (strncmp(haa->haa_busname, "hypervisor", sizeof("hypervisor")) != 0)
553                     return 0;
554 
555 
556 #ifdef XENPVHVM
557           if (!vm_guest_is_xenpvh_or_pvhvm())
558                     return 0;
559 #endif
560           /* If we got here, it must mean we matched */
561           return 1;
562 }
563 
564 #if defined(MULTIPROCESSOR) && defined(XENPV)
565 static int
hypervisor_vcpu_print(void * aux,const char * parent)566 hypervisor_vcpu_print(void *aux, const char *parent)
567 {
568           /* Unconfigured cpus are ignored quietly. */
569           return (QUIET);
570 }
571 #endif /* MULTIPROCESSOR && XENPV */
572 
573 /*
574  * Attach the hypervisor.
575  */
576 void
hypervisor_attach(device_t parent,device_t self,void * aux)577 hypervisor_attach(device_t parent, device_t self, void *aux)
578 {
579 
580 #if NPCI >0
581 #ifdef PCI_BUS_FIXUP
582           int pci_maxbus = 0;
583 #endif
584 #endif /* NPCI */
585           union hypervisor_attach_cookie hac;
586           char xen_extra_version[XEN_EXTRAVERSION_LEN];
587           static char xen_version_string[20];
588           int rc;
589           const struct sysctlnode *node = NULL;
590 
591 #ifdef XENPVHVM
592           if (vm_guest == VM_GUEST_XENPVHVM) {
593                     /* disable emulated devices */
594                     if (inw(XEN_MAGIC_IOPORT) == XMI_MAGIC) {
595                               outw(XEN_MAGIC_IOPORT,
596                                   XMI_UNPLUG_IDE_DISKS | XMI_UNPLUG_NICS);
597                     } else {
598                               aprint_error_dev(self,
599                                   "Unable to disable emulated devices\n");
600                     }
601           }
602 #endif /* XENPVHVM */
603           xenkernfs_init();
604 
605           xen_version = HYPERVISOR_xen_version(XENVER_version, NULL);
606           memset(xen_extra_version, 0, sizeof(xen_extra_version));
607           HYPERVISOR_xen_version(XENVER_extraversion, xen_extra_version);
608           rc = snprintf(xen_version_string, 20, "%d.%d%s", XEN_MAJOR(xen_version),
609                     XEN_MINOR(xen_version), xen_extra_version);
610           aprint_normal(": Xen version %s\n", xen_version_string);
611           if (rc >= 20)
612                     aprint_debug(": xen_version_string truncated\n");
613 
614           sysctl_createv(NULL, 0, NULL, &node, 0,
615               CTLTYPE_NODE, "xen",
616               SYSCTL_DESCR("Xen top level node"),
617               NULL, 0, NULL, 0, CTL_MACHDEP, CTL_CREATE, CTL_EOL);
618 
619           if (node != NULL) {
620                     sysctl_createv(NULL, 0, &node, NULL, CTLFLAG_READONLY,
621                         CTLTYPE_STRING, "version",
622                         SYSCTL_DESCR("Xen hypervisor version"),
623                         NULL, 0, xen_version_string, 0, CTL_CREATE, CTL_EOL);
624           }
625 
626           aprint_verbose_dev(self, "features: ");
627 #define XEN_TST_F(n) \
628           if (xen_feature(XENFEAT_##n)) \
629                     aprint_verbose(" %s", #n);
630 
631           XEN_TST_F(writable_page_tables);
632           XEN_TST_F(writable_descriptor_tables);
633           XEN_TST_F(auto_translated_physmap);
634           XEN_TST_F(supervisor_mode_kernel);
635           XEN_TST_F(pae_pgdir_above_4gb);
636           XEN_TST_F(mmu_pt_update_preserve_ad);
637           XEN_TST_F(highmem_assist);
638           XEN_TST_F(gnttab_map_avail_bits);
639           XEN_TST_F(hvm_callback_vector);
640           XEN_TST_F(hvm_safe_pvclock);
641           XEN_TST_F(hvm_pirqs);
642 #undef XEN_TST_F
643           aprint_verbose("\n");
644 
645           xengnt_init();
646           events_init();
647 
648 #ifdef XENPV
649           memset(&hac, 0, sizeof(hac));
650           hac.hac_vcaa.vcaa_name = "vcpu";
651           hac.hac_vcaa.vcaa_caa.cpu_number = 0;
652           hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_BP;
653           hac.hac_vcaa.vcaa_caa.cpu_func = NULL; /* See xen/x86/cpu.c:vcpu_attach() */
654           config_found(self, &hac.hac_vcaa, hypervisor_print,
655               CFARGS(.iattr = "xendevbus"));
656 
657 #ifdef MULTIPROCESSOR
658 
659           /*
660            * The xenstore contains the configured number of vcpus.
661            * The xenstore however, is not accessible until much later in
662            * the boot sequence. We therefore bruteforce check for
663            * allocated vcpus (See: cpu.c:vcpu_match()) by iterating
664            * through the maximum supported by NetBSD MP.
665            */
666           cpuid_t vcpuid;
667 
668           for (vcpuid = 1; vcpuid < maxcpus; vcpuid++) {
669                     memset(&hac, 0, sizeof(hac));
670                     hac.hac_vcaa.vcaa_name = "vcpu";
671                     hac.hac_vcaa.vcaa_caa.cpu_number = vcpuid;
672                     hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_AP;
673                     hac.hac_vcaa.vcaa_caa.cpu_func = NULL; /* See xen/x86/cpu.c:vcpu_attach() */
674                     if (NULL == config_found(self, &hac.hac_vcaa,
675                                                    hypervisor_vcpu_print,
676                                                    CFARGS(.iattr = "xendevbus"))) {
677                               break;
678                     }
679           }
680 
681 #endif /* MULTIPROCESSOR */
682 #endif /* XENPV */
683 
684 #if NXENBUS > 0
685           extern struct x86_bus_dma_tag xenbus_bus_dma_tag;
686           memset(&hac, 0, sizeof(hac));
687           hac.hac_xenbus.xa_device = "xenbus";
688           hac.hac_xenbus.xa_dmat = &xenbus_bus_dma_tag;
689           config_found(self, &hac.hac_xenbus, hypervisor_print,
690               CFARGS(.iattr = "xendevbus"));
691 #endif
692 #if NXENCONS > 0
693           if (xencons_interface != 0 || vm_guest != VM_GUEST_XENPVHVM) {
694                     memset(&hac, 0, sizeof(hac));
695                     hac.hac_xencons.xa_device = "xencons";
696                     config_found(self, &hac.hac_xencons, hypervisor_print,
697                         CFARGS(.iattr = "xendevbus"));
698           }
699 #endif
700 
701 #if defined(DOM0OPS)
702 #if defined(XENPV)
703 #if NISADMA > 0 && NACPICA > 0
704         /*
705            * ACPI needs ISA DMA initialized before they start probing.
706            */
707           isa_dmainit(&x86_isa_chipset, x86_bus_space_io, &isa_bus_dma_tag,
708               self);
709 #endif
710 
711 #if NPCI > 0
712 #if NACPICA > 0
713           if (acpi_present) {
714                     memset(&hac, 0, sizeof(hac));
715                     hac.hac_acpi.aa_iot = x86_bus_space_io;
716                     hac.hac_acpi.aa_memt = x86_bus_space_mem;
717                     hac.hac_acpi.aa_pc = NULL;
718                     hac.hac_acpi.aa_pciflags =
719                               PCI_FLAGS_IO_OKAY | PCI_FLAGS_MEM_OKAY |
720                               PCI_FLAGS_MRL_OKAY | PCI_FLAGS_MRM_OKAY |
721                               PCI_FLAGS_MWI_OKAY;
722                     hac.hac_acpi.aa_ic = &x86_isa_chipset;
723                     hac.hac_acpi.aa_dmat = &pci_bus_dma_tag;
724 #ifdef _LP64
725                     hac.hac_acpi.aa_dmat64 = &pci_bus_dma64_tag;
726 #else
727                     hac.hac_acpi.aa_dmat64 = NULL;
728 #endif /* _LP64 */
729                     config_found(self, &hac.hac_acpi, NULL,
730                         CFARGS(.iattr = "acpibus"));
731           }
732 #endif /* NACPICA */
733           memset(&hac, 0, sizeof(hac));
734           hac.hac_pba.pba_iot = x86_bus_space_io;
735           hac.hac_pba.pba_memt = x86_bus_space_mem;
736           hac.hac_pba.pba_dmat = &pci_bus_dma_tag;
737 #ifdef _LP64
738           hac.hac_pba.pba_dmat64 = &pci_bus_dma64_tag;
739 #else
740           hac.hac_pba.pba_dmat64 = NULL;
741 #endif /* _LP64 */
742           hac.hac_pba.pba_flags = PCI_FLAGS_MEM_OKAY | PCI_FLAGS_IO_OKAY;
743           hac.hac_pba.pba_bridgetag = NULL;
744           hac.hac_pba.pba_bus = 0;
745 #if NACPICA > 0 && defined(ACPI_SCANPCI)
746           if (mpacpi_active)
747                     mp_pci_scan(self, &hac.hac_pba, pcibusprint);
748           else
749 #endif
750 #if defined(MPBIOS) && defined(MPBIOS_SCANPCI)
751           if (mpbios_scanned != 0)
752                     mp_pci_scan(self, &hac.hac_pba, pcibusprint);
753           else
754 #endif
755           config_found(self, &hac.hac_pba, pcibusprint,
756               CFARGS(.iattr = "pcibus"));
757 #if NACPICA > 0
758           if (mp_verbose)
759                     acpi_pci_link_state();
760 #endif
761 #if NISA > 0
762           if (isa_has_been_seen == 0) {
763                     memset(&hac, 0, sizeof(hac));
764                     hac.hac_iba._iba_busname = "isa";
765                     hac.hac_iba.iba_iot = x86_bus_space_io;
766                     hac.hac_iba.iba_memt = x86_bus_space_mem;
767                     hac.hac_iba.iba_dmat = &isa_bus_dma_tag;
768                     hac.hac_iba.iba_ic = NULL; /* No isa DMA yet */
769                     config_found(self, &hac.hac_iba, isabusprint,
770                         CFARGS(.iattr = "isabus"));
771           }
772 #endif /* NISA */
773 #endif /* NPCI */
774 #endif /* XENPV */
775 
776           if (xendomain_is_privileged()) {
777                     xenprivcmd_init();
778           }
779 #endif /* DOM0OPS */
780 
781           hypervisor_machdep_attach();
782 
783           if (!pmf_device_register(self, hypervisor_suspend, hypervisor_resume))
784                     aprint_error_dev(self, "couldn't establish power handler\n");
785 
786 }
787 
788 static bool
hypervisor_suspend(device_t dev,const pmf_qual_t * qual)789 hypervisor_suspend(device_t dev, const pmf_qual_t *qual)
790 {
791 #ifdef XENPV
792           events_suspend();
793           xengnt_suspend();
794 #endif
795           return true;
796 }
797 
798 static bool
hypervisor_resume(device_t dev,const pmf_qual_t * qual)799 hypervisor_resume(device_t dev, const pmf_qual_t *qual)
800 {
801 #ifdef XENPV
802           hypervisor_machdep_resume();
803 
804           xengnt_resume();
805           events_resume();
806 #endif
807           return true;
808 }
809 
810 static int
hypervisor_print(void * aux,const char * parent)811 hypervisor_print(void *aux, const char *parent)
812 {
813           union hypervisor_attach_cookie *hac = aux;
814 
815           if (parent)
816                     aprint_normal("%s at %s", hac->hac_device, parent);
817           return (UNCONF);
818 }
819 
820 #define DIR_MODE    (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
821 
822 kernfs_parentdir_t *kernxen_pkt;
823 
824 void
xenkernfs_init(void)825 xenkernfs_init(void)
826 {
827 #if NKERNFS > 0
828           kernfs_entry_t *dkt;
829 
830           KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
831           KERNFS_INITENTRY(dkt, DT_DIR, "xen", NULL, KFSsubdir, VDIR, DIR_MODE);
832           kernfs_addentry(NULL, dkt);
833           kernxen_pkt = KERNFS_ENTOPARENTDIR(dkt);
834 #endif
835 }
836 
837 /*
838  * setup Xen's vcpu_info. requires ci_vcpuid to be initialized.
839  */
840 void
xen_map_vcpu(struct cpu_info * ci)841 xen_map_vcpu(struct cpu_info *ci)
842 {
843           int size;
844           uintptr_t ptr;
845           struct vcpu_register_vcpu_info vcpu_info_op;
846           paddr_t ma;
847           int ret;
848 
849           if (ci->ci_vcpuid < XEN_LEGACY_MAX_VCPUS) {
850                     ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[ci->ci_vcpuid];
851                     return;
852           }
853 
854           /*
855            * need to map it via VCPUOP_register_vcpu_info
856            * aligning to the smallest power-of-2 size which can contain
857            * vcpu_info ensures this. Also make sure it's cache-line aligned,
858            * for performances.
859            */
860           size = CACHE_LINE_SIZE;
861           while (size < sizeof(struct vcpu_info)) {
862                     size = size << 1;
863           }
864           ptr = (uintptr_t)uvm_km_alloc(kernel_map,
865                         sizeof(struct vcpu_info) + size - 1, 0,
866                         UVM_KMF_WIRED|UVM_KMF_ZERO);
867           ptr = roundup2(ptr, size);
868           ci->ci_vcpu = (struct vcpu_info *)ptr;
869 
870           pmap_extract_ma(pmap_kernel(), (ptr & ~PAGE_MASK), &ma);
871           vcpu_info_op.mfn = ma >> PAGE_SHIFT;
872           vcpu_info_op.offset = (ptr & PAGE_MASK);
873           vcpu_info_op.rsvd = 0;
874 
875           ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info,
876               ci->ci_vcpuid, &vcpu_info_op);
877           if (ret) {
878                     panic("VCPUOP_register_vcpu_info for %d failed: %d",
879                         ci->ci_vcpuid, ret);
880           }
881 }
882