1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus.c 324572 2017-10-13 02:01:03Z sephe $");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/taskqueue.h>
47 
48 #include <machine/bus.h>
49 #include <machine/intr_machdep.h>
50 #include <machine/resource.h>
51 #include <machine/apicvar.h>
52 #include <machine/md_var.h>
53 
54 #include <contrib/dev/acpica/include/acpi.h>
55 #include <dev/acpica/acpivar.h>
56 
57 #include <dev/hyperv/include/hyperv.h>
58 #include <dev/hyperv/include/vmbus_xact.h>
59 #include <dev/hyperv/vmbus/hyperv_reg.h>
60 #include <dev/hyperv/vmbus/hyperv_var.h>
61 #include <dev/hyperv/vmbus/vmbus_reg.h>
62 #include <dev/hyperv/vmbus/vmbus_var.h>
63 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
64 
65 #include "acpi_if.h"
66 #include "pcib_if.h"
67 #include "vmbus_if.h"
68 
69 #define VMBUS_GPADL_START		0xe1e10
70 
71 struct vmbus_msghc {
72 	struct vmbus_xact		*mh_xact;
73 	struct hypercall_postmsg_in	mh_inprm_save;
74 };
75 
76 static void			vmbus_identify(driver_t *, device_t);
77 static int			vmbus_probe(device_t);
78 static int			vmbus_attach(device_t);
79 static int			vmbus_detach(device_t);
80 static int			vmbus_read_ivar(device_t, device_t, int,
81 				    uintptr_t *);
82 static int			vmbus_child_pnpinfo_str(device_t, device_t,
83 				    char *, size_t);
84 static struct resource		*vmbus_alloc_resource(device_t dev,
85 				    device_t child, int type, int *rid,
86 				    rman_res_t start, rman_res_t end,
87 				    rman_res_t count, u_int flags);
88 static int			vmbus_alloc_msi(device_t bus, device_t dev,
89 				    int count, int maxcount, int *irqs);
90 static int			vmbus_release_msi(device_t bus, device_t dev,
91 				    int count, int *irqs);
92 static int			vmbus_alloc_msix(device_t bus, device_t dev,
93 				    int *irq);
94 static int			vmbus_release_msix(device_t bus, device_t dev,
95 				    int irq);
96 static int			vmbus_map_msi(device_t bus, device_t dev,
97 				    int irq, uint64_t *addr, uint32_t *data);
98 static uint32_t			vmbus_get_version_method(device_t, device_t);
99 static int			vmbus_probe_guid_method(device_t, device_t,
100 				    const struct hyperv_guid *);
101 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
102 				    device_t dev, int cpu);
103 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
104 				    int);
105 
106 static int			vmbus_init(struct vmbus_softc *);
107 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
108 static int			vmbus_req_channels(struct vmbus_softc *sc);
109 static void			vmbus_disconnect(struct vmbus_softc *);
110 static int			vmbus_scan(struct vmbus_softc *);
111 static void			vmbus_scan_teardown(struct vmbus_softc *);
112 static void			vmbus_scan_done(struct vmbus_softc *,
113 				    const struct vmbus_message *);
114 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
115 				    const struct vmbus_message *);
116 static void			vmbus_msg_task(void *, int);
117 static void			vmbus_synic_setup(void *);
118 static void			vmbus_synic_teardown(void *);
119 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
120 static int			vmbus_dma_alloc(struct vmbus_softc *);
121 static void			vmbus_dma_free(struct vmbus_softc *);
122 static int			vmbus_intr_setup(struct vmbus_softc *);
123 static void			vmbus_intr_teardown(struct vmbus_softc *);
124 static int			vmbus_doattach(struct vmbus_softc *);
125 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
126 				    int);
127 
128 static struct vmbus_softc	*vmbus_sc;
129 
130 extern inthand_t IDTVEC(rsvd), IDTVEC(vmbus_isr);
131 
132 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
133     "Hyper-V vmbus");
134 
135 static int			vmbus_pin_evttask = 1;
136 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
137     &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
138 
139 static const uint32_t		vmbus_version[] = {
140 	VMBUS_VERSION_WIN8_1,
141 	VMBUS_VERSION_WIN8,
142 	VMBUS_VERSION_WIN7,
143 	VMBUS_VERSION_WS2008
144 };
145 
146 static const vmbus_chanmsg_proc_t
147 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
148 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
149 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
150 };
151 
152 static device_method_t vmbus_methods[] = {
153 	/* Device interface */
154 	DEVMETHOD(device_identify,		vmbus_identify),
155 	DEVMETHOD(device_probe,			vmbus_probe),
156 	DEVMETHOD(device_attach,		vmbus_attach),
157 	DEVMETHOD(device_detach,		vmbus_detach),
158 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
159 	DEVMETHOD(device_suspend,		bus_generic_suspend),
160 	DEVMETHOD(device_resume,		bus_generic_resume),
161 
162 	/* Bus interface */
163 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
164 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
165 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
166 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
167 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
168 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
169 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
170 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
171 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
172 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
173 #if __FreeBSD_version >= 1100000
174 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
175 #endif
176 
177 	/* pcib interface */
178 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
179 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
180 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
181 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
182 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
183 
184 	/* Vmbus interface */
185 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
186 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
187 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
188 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
189 
190 	DEVMETHOD_END
191 };
192 
193 static driver_t vmbus_driver = {
194 	"vmbus",
195 	vmbus_methods,
196 	sizeof(struct vmbus_softc)
197 };
198 
199 static devclass_t vmbus_devclass;
200 
201 DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
202 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
203     NULL, NULL);
204 
205 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
206 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
207 MODULE_VERSION(vmbus, 1);
208 
209 static __inline struct vmbus_softc *
vmbus_get_softc(void)210 vmbus_get_softc(void)
211 {
212 	return vmbus_sc;
213 }
214 
215 void
vmbus_msghc_reset(struct vmbus_msghc * mh,size_t dsize)216 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
217 {
218 	struct hypercall_postmsg_in *inprm;
219 
220 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
221 		panic("invalid data size %zu", dsize);
222 
223 	inprm = vmbus_xact_req_data(mh->mh_xact);
224 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
225 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
226 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
227 	inprm->hc_dsize = dsize;
228 }
229 
230 struct vmbus_msghc *
vmbus_msghc_get(struct vmbus_softc * sc,size_t dsize)231 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
232 {
233 	struct vmbus_msghc *mh;
234 	struct vmbus_xact *xact;
235 
236 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
237 		panic("invalid data size %zu", dsize);
238 
239 	xact = vmbus_xact_get(sc->vmbus_xc,
240 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
241 	if (xact == NULL)
242 		return (NULL);
243 
244 	mh = vmbus_xact_priv(xact, sizeof(*mh));
245 	mh->mh_xact = xact;
246 
247 	vmbus_msghc_reset(mh, dsize);
248 	return (mh);
249 }
250 
251 void
vmbus_msghc_put(struct vmbus_softc * sc __unused,struct vmbus_msghc * mh)252 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
253 {
254 
255 	vmbus_xact_put(mh->mh_xact);
256 }
257 
258 void *
vmbus_msghc_dataptr(struct vmbus_msghc * mh)259 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
260 {
261 	struct hypercall_postmsg_in *inprm;
262 
263 	inprm = vmbus_xact_req_data(mh->mh_xact);
264 	return (inprm->hc_data);
265 }
266 
267 int
vmbus_msghc_exec_noresult(struct vmbus_msghc * mh)268 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
269 {
270 	sbintime_t time = SBT_1MS;
271 	struct hypercall_postmsg_in *inprm;
272 	bus_addr_t inprm_paddr;
273 	int i;
274 
275 	inprm = vmbus_xact_req_data(mh->mh_xact);
276 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
277 
278 	/*
279 	 * Save the input parameter so that we could restore the input
280 	 * parameter if the Hypercall failed.
281 	 *
282 	 * XXX
283 	 * Is this really necessary?!  i.e. Will the Hypercall ever
284 	 * overwrite the input parameter?
285 	 */
286 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
287 
288 	/*
289 	 * In order to cope with transient failures, e.g. insufficient
290 	 * resources on host side, we retry the post message Hypercall
291 	 * several times.  20 retries seem sufficient.
292 	 */
293 #define HC_RETRY_MAX	20
294 
295 	for (i = 0; i < HC_RETRY_MAX; ++i) {
296 		uint64_t status;
297 
298 		status = hypercall_post_message(inprm_paddr);
299 		if (status == HYPERCALL_STATUS_SUCCESS)
300 			return 0;
301 
302 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
303 		if (time < SBT_1S * 2)
304 			time *= 2;
305 
306 		/* Restore input parameter and try again */
307 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
308 	}
309 
310 #undef HC_RETRY_MAX
311 
312 	return EIO;
313 }
314 
315 int
vmbus_msghc_exec(struct vmbus_softc * sc __unused,struct vmbus_msghc * mh)316 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
317 {
318 	int error;
319 
320 	vmbus_xact_activate(mh->mh_xact);
321 	error = vmbus_msghc_exec_noresult(mh);
322 	if (error)
323 		vmbus_xact_deactivate(mh->mh_xact);
324 	return error;
325 }
326 
327 void
vmbus_msghc_exec_cancel(struct vmbus_softc * sc __unused,struct vmbus_msghc * mh)328 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
329 {
330 
331 	vmbus_xact_deactivate(mh->mh_xact);
332 }
333 
334 const struct vmbus_message *
vmbus_msghc_wait_result(struct vmbus_softc * sc __unused,struct vmbus_msghc * mh)335 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
336 {
337 	size_t resp_len;
338 
339 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
340 }
341 
342 const struct vmbus_message *
vmbus_msghc_poll_result(struct vmbus_softc * sc __unused,struct vmbus_msghc * mh)343 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
344 {
345 	size_t resp_len;
346 
347 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
348 }
349 
350 void
vmbus_msghc_wakeup(struct vmbus_softc * sc,const struct vmbus_message * msg)351 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
352 {
353 
354 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
355 }
356 
357 uint32_t
vmbus_gpadl_alloc(struct vmbus_softc * sc)358 vmbus_gpadl_alloc(struct vmbus_softc *sc)
359 {
360 	uint32_t gpadl;
361 
362 again:
363 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
364 	if (gpadl == 0)
365 		goto again;
366 	return (gpadl);
367 }
368 
369 static int
vmbus_connect(struct vmbus_softc * sc,uint32_t version)370 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
371 {
372 	struct vmbus_chanmsg_connect *req;
373 	const struct vmbus_message *msg;
374 	struct vmbus_msghc *mh;
375 	int error, done = 0;
376 
377 	mh = vmbus_msghc_get(sc, sizeof(*req));
378 	if (mh == NULL)
379 		return ENXIO;
380 
381 	req = vmbus_msghc_dataptr(mh);
382 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
383 	req->chm_ver = version;
384 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
385 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
386 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
387 
388 	error = vmbus_msghc_exec(sc, mh);
389 	if (error) {
390 		vmbus_msghc_put(sc, mh);
391 		return error;
392 	}
393 
394 	msg = vmbus_msghc_wait_result(sc, mh);
395 	done = ((const struct vmbus_chanmsg_connect_resp *)
396 	    msg->msg_data)->chm_done;
397 
398 	vmbus_msghc_put(sc, mh);
399 
400 	return (done ? 0 : EOPNOTSUPP);
401 }
402 
403 static int
vmbus_init(struct vmbus_softc * sc)404 vmbus_init(struct vmbus_softc *sc)
405 {
406 	int i;
407 
408 	for (i = 0; i < nitems(vmbus_version); ++i) {
409 		int error;
410 
411 		error = vmbus_connect(sc, vmbus_version[i]);
412 		if (!error) {
413 			sc->vmbus_version = vmbus_version[i];
414 			device_printf(sc->vmbus_dev, "version %u.%u\n",
415 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
416 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
417 			return 0;
418 		}
419 	}
420 	return ENXIO;
421 }
422 
423 static void
vmbus_disconnect(struct vmbus_softc * sc)424 vmbus_disconnect(struct vmbus_softc *sc)
425 {
426 	struct vmbus_chanmsg_disconnect *req;
427 	struct vmbus_msghc *mh;
428 	int error;
429 
430 	mh = vmbus_msghc_get(sc, sizeof(*req));
431 	if (mh == NULL) {
432 		device_printf(sc->vmbus_dev,
433 		    "can not get msg hypercall for disconnect\n");
434 		return;
435 	}
436 
437 	req = vmbus_msghc_dataptr(mh);
438 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
439 
440 	error = vmbus_msghc_exec_noresult(mh);
441 	vmbus_msghc_put(sc, mh);
442 
443 	if (error) {
444 		device_printf(sc->vmbus_dev,
445 		    "disconnect msg hypercall failed\n");
446 	}
447 }
448 
449 static int
vmbus_req_channels(struct vmbus_softc * sc)450 vmbus_req_channels(struct vmbus_softc *sc)
451 {
452 	struct vmbus_chanmsg_chrequest *req;
453 	struct vmbus_msghc *mh;
454 	int error;
455 
456 	mh = vmbus_msghc_get(sc, sizeof(*req));
457 	if (mh == NULL)
458 		return ENXIO;
459 
460 	req = vmbus_msghc_dataptr(mh);
461 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
462 
463 	error = vmbus_msghc_exec_noresult(mh);
464 	vmbus_msghc_put(sc, mh);
465 
466 	return error;
467 }
468 
469 static void
vmbus_scan_done_task(void * xsc,int pending __unused)470 vmbus_scan_done_task(void *xsc, int pending __unused)
471 {
472 	struct vmbus_softc *sc = xsc;
473 
474 	mtx_lock(&Giant);
475 	sc->vmbus_scandone = true;
476 	mtx_unlock(&Giant);
477 	wakeup(&sc->vmbus_scandone);
478 }
479 
480 static void
vmbus_scan_done(struct vmbus_softc * sc,const struct vmbus_message * msg __unused)481 vmbus_scan_done(struct vmbus_softc *sc,
482     const struct vmbus_message *msg __unused)
483 {
484 
485 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
486 }
487 
488 static int
vmbus_scan(struct vmbus_softc * sc)489 vmbus_scan(struct vmbus_softc *sc)
490 {
491 	int error;
492 
493 	/*
494 	 * Identify, probe and attach for non-channel devices.
495 	 */
496 	bus_generic_probe(sc->vmbus_dev);
497 	bus_generic_attach(sc->vmbus_dev);
498 
499 	/*
500 	 * This taskqueue serializes vmbus devices' attach and detach
501 	 * for channel offer and rescind messages.
502 	 */
503 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
504 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
505 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
506 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
507 
508 	/*
509 	 * This taskqueue handles sub-channel detach, so that vmbus
510 	 * device's detach running in vmbus_devtq can drain its sub-
511 	 * channels.
512 	 */
513 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
514 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
515 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
516 
517 	/*
518 	 * Start vmbus scanning.
519 	 */
520 	error = vmbus_req_channels(sc);
521 	if (error) {
522 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
523 		    error);
524 		return (error);
525 	}
526 
527 	/*
528 	 * Wait for all vmbus devices from the initial channel offers to be
529 	 * attached.
530 	 */
531 	GIANT_REQUIRED;
532 	while (!sc->vmbus_scandone)
533 		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
534 
535 	if (bootverbose) {
536 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
537 		    "done\n");
538 	}
539 	return (0);
540 }
541 
542 static void
vmbus_scan_teardown(struct vmbus_softc * sc)543 vmbus_scan_teardown(struct vmbus_softc *sc)
544 {
545 
546 	GIANT_REQUIRED;
547 	if (sc->vmbus_devtq != NULL) {
548 		mtx_unlock(&Giant);
549 		taskqueue_free(sc->vmbus_devtq);
550 		mtx_lock(&Giant);
551 		sc->vmbus_devtq = NULL;
552 	}
553 	if (sc->vmbus_subchtq != NULL) {
554 		mtx_unlock(&Giant);
555 		taskqueue_free(sc->vmbus_subchtq);
556 		mtx_lock(&Giant);
557 		sc->vmbus_subchtq = NULL;
558 	}
559 }
560 
561 static void
vmbus_chanmsg_handle(struct vmbus_softc * sc,const struct vmbus_message * msg)562 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
563 {
564 	vmbus_chanmsg_proc_t msg_proc;
565 	uint32_t msg_type;
566 
567 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
568 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
569 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
570 		    msg_type);
571 		return;
572 	}
573 
574 	msg_proc = vmbus_chanmsg_handlers[msg_type];
575 	if (msg_proc != NULL)
576 		msg_proc(sc, msg);
577 
578 	/* Channel specific processing */
579 	vmbus_chan_msgproc(sc, msg);
580 }
581 
582 static void
vmbus_msg_task(void * xsc,int pending __unused)583 vmbus_msg_task(void *xsc, int pending __unused)
584 {
585 	struct vmbus_softc *sc = xsc;
586 	volatile struct vmbus_message *msg;
587 
588 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
589 	for (;;) {
590 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
591 			/* No message */
592 			break;
593 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
594 			/* Channel message */
595 			vmbus_chanmsg_handle(sc,
596 			    __DEVOLATILE(const struct vmbus_message *, msg));
597 		}
598 
599 		msg->msg_type = HYPERV_MSGTYPE_NONE;
600 		/*
601 		 * Make sure the write to msg_type (i.e. set to
602 		 * HYPERV_MSGTYPE_NONE) happens before we read the
603 		 * msg_flags and EOMing. Otherwise, the EOMing will
604 		 * not deliver any more messages since there is no
605 		 * empty slot
606 		 *
607 		 * NOTE:
608 		 * mb() is used here, since atomic_thread_fence_seq_cst()
609 		 * will become compiler fence on UP kernel.
610 		 */
611 		mb();
612 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
613 			/*
614 			 * This will cause message queue rescan to possibly
615 			 * deliver another msg from the hypervisor
616 			 */
617 			wrmsr(MSR_HV_EOM, 0);
618 		}
619 	}
620 }
621 
622 static __inline int
vmbus_handle_intr1(struct vmbus_softc * sc,struct trapframe * frame,int cpu)623 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
624 {
625 	volatile struct vmbus_message *msg;
626 	struct vmbus_message *msg_base;
627 
628 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
629 
630 	/*
631 	 * Check event timer.
632 	 *
633 	 * TODO: move this to independent IDT vector.
634 	 */
635 	msg = msg_base + VMBUS_SINT_TIMER;
636 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
637 		msg->msg_type = HYPERV_MSGTYPE_NONE;
638 
639 		vmbus_et_intr(frame);
640 
641 		/*
642 		 * Make sure the write to msg_type (i.e. set to
643 		 * HYPERV_MSGTYPE_NONE) happens before we read the
644 		 * msg_flags and EOMing. Otherwise, the EOMing will
645 		 * not deliver any more messages since there is no
646 		 * empty slot
647 		 *
648 		 * NOTE:
649 		 * mb() is used here, since atomic_thread_fence_seq_cst()
650 		 * will become compiler fence on UP kernel.
651 		 */
652 		mb();
653 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
654 			/*
655 			 * This will cause message queue rescan to possibly
656 			 * deliver another msg from the hypervisor
657 			 */
658 			wrmsr(MSR_HV_EOM, 0);
659 		}
660 	}
661 
662 	/*
663 	 * Check events.  Hot path for network and storage I/O data; high rate.
664 	 *
665 	 * NOTE:
666 	 * As recommended by the Windows guest fellows, we check events before
667 	 * checking messages.
668 	 */
669 	sc->vmbus_event_proc(sc, cpu);
670 
671 	/*
672 	 * Check messages.  Mainly management stuffs; ultra low rate.
673 	 */
674 	msg = msg_base + VMBUS_SINT_MESSAGE;
675 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
676 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
677 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
678 	}
679 
680 	return (FILTER_HANDLED);
681 }
682 
683 void
vmbus_handle_intr(struct trapframe * trap_frame)684 vmbus_handle_intr(struct trapframe *trap_frame)
685 {
686 	struct vmbus_softc *sc = vmbus_get_softc();
687 	int cpu = curcpu;
688 
689 	/*
690 	 * Disable preemption.
691 	 */
692 	critical_enter();
693 
694 	/*
695 	 * Do a little interrupt counting.
696 	 */
697 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
698 
699 	vmbus_handle_intr1(sc, trap_frame, cpu);
700 
701 	/*
702 	 * Enable preemption.
703 	 */
704 	critical_exit();
705 }
706 
707 static void
vmbus_synic_setup(void * xsc)708 vmbus_synic_setup(void *xsc)
709 {
710 	struct vmbus_softc *sc = xsc;
711 	int cpu = curcpu;
712 	uint64_t val, orig;
713 	uint32_t sint;
714 
715 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
716 		/* Save virtual processor id. */
717 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
718 	} else {
719 		/* Set virtual processor id to 0 for compatibility. */
720 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
721 	}
722 
723 	/*
724 	 * Setup the SynIC message.
725 	 */
726 	orig = rdmsr(MSR_HV_SIMP);
727 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
728 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
729 	     MSR_HV_SIMP_PGSHIFT);
730 	wrmsr(MSR_HV_SIMP, val);
731 
732 	/*
733 	 * Setup the SynIC event flags.
734 	 */
735 	orig = rdmsr(MSR_HV_SIEFP);
736 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
737 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
738 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
739 	wrmsr(MSR_HV_SIEFP, val);
740 
741 
742 	/*
743 	 * Configure and unmask SINT for message and event flags.
744 	 */
745 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
746 	orig = rdmsr(sint);
747 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
748 	    (orig & MSR_HV_SINT_RSVD_MASK);
749 	wrmsr(sint, val);
750 
751 	/*
752 	 * Configure and unmask SINT for timer.
753 	 */
754 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
755 	orig = rdmsr(sint);
756 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
757 	    (orig & MSR_HV_SINT_RSVD_MASK);
758 	wrmsr(sint, val);
759 
760 	/*
761 	 * All done; enable SynIC.
762 	 */
763 	orig = rdmsr(MSR_HV_SCONTROL);
764 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
765 	wrmsr(MSR_HV_SCONTROL, val);
766 }
767 
768 static void
vmbus_synic_teardown(void * arg)769 vmbus_synic_teardown(void *arg)
770 {
771 	uint64_t orig;
772 	uint32_t sint;
773 
774 	/*
775 	 * Disable SynIC.
776 	 */
777 	orig = rdmsr(MSR_HV_SCONTROL);
778 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
779 
780 	/*
781 	 * Mask message and event flags SINT.
782 	 */
783 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
784 	orig = rdmsr(sint);
785 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
786 
787 	/*
788 	 * Mask timer SINT.
789 	 */
790 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
791 	orig = rdmsr(sint);
792 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
793 
794 	/*
795 	 * Teardown SynIC message.
796 	 */
797 	orig = rdmsr(MSR_HV_SIMP);
798 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
799 
800 	/*
801 	 * Teardown SynIC event flags.
802 	 */
803 	orig = rdmsr(MSR_HV_SIEFP);
804 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
805 }
806 
807 static int
vmbus_dma_alloc(struct vmbus_softc * sc)808 vmbus_dma_alloc(struct vmbus_softc *sc)
809 {
810 	bus_dma_tag_t parent_dtag;
811 	uint8_t *evtflags;
812 	int cpu;
813 
814 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
815 	CPU_FOREACH(cpu) {
816 		void *ptr;
817 
818 		/*
819 		 * Per-cpu messages and event flags.
820 		 */
821 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
822 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
823 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
824 		if (ptr == NULL)
825 			return ENOMEM;
826 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
827 
828 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
829 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
830 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
831 		if (ptr == NULL)
832 			return ENOMEM;
833 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
834 	}
835 
836 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
837 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
838 	if (evtflags == NULL)
839 		return ENOMEM;
840 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
841 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
842 	sc->vmbus_evtflags = evtflags;
843 
844 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
845 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
846 	if (sc->vmbus_mnf1 == NULL)
847 		return ENOMEM;
848 
849 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
850 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
851 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
852 	if (sc->vmbus_mnf2 == NULL)
853 		return ENOMEM;
854 
855 	return 0;
856 }
857 
858 static void
vmbus_dma_free(struct vmbus_softc * sc)859 vmbus_dma_free(struct vmbus_softc *sc)
860 {
861 	int cpu;
862 
863 	if (sc->vmbus_evtflags != NULL) {
864 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
865 		sc->vmbus_evtflags = NULL;
866 		sc->vmbus_rx_evtflags = NULL;
867 		sc->vmbus_tx_evtflags = NULL;
868 	}
869 	if (sc->vmbus_mnf1 != NULL) {
870 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
871 		sc->vmbus_mnf1 = NULL;
872 	}
873 	if (sc->vmbus_mnf2 != NULL) {
874 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
875 		sc->vmbus_mnf2 = NULL;
876 	}
877 
878 	CPU_FOREACH(cpu) {
879 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
880 			hyperv_dmamem_free(
881 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
882 			    VMBUS_PCPU_GET(sc, message, cpu));
883 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
884 		}
885 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
886 			hyperv_dmamem_free(
887 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
888 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
889 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
890 		}
891 	}
892 }
893 
894 /**
895  * @brief Find a free IDT slot and setup the interrupt handler.
896  */
897 static int
vmbus_vector_alloc(void)898 vmbus_vector_alloc(void)
899 {
900 	int vector;
901 	uintptr_t func;
902 	struct gate_descriptor *ip;
903 
904 	/*
905 	 * Search backwards form the highest IDT vector available for use
906 	 * as vmbus channel callback vector. We install 'vmbus_isr'
907 	 * handler at that vector and use it to interrupt vcpus.
908 	 */
909 	vector = APIC_SPURIOUS_INT;
910 	while (--vector >= APIC_IPI_INTS) {
911 		ip = &idt[vector];
912 		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
913 		if (func == (uintptr_t)&IDTVEC(rsvd)) {
914 #ifdef __i386__
915 			setidt(vector , IDTVEC(vmbus_isr), SDT_SYS386IGT,
916 			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
917 #else
918 			setidt(vector , IDTVEC(vmbus_isr), SDT_SYSIGT,
919 			    SEL_KPL, 0);
920 #endif
921 
922 			return (vector);
923 		}
924 	}
925 	return (0);
926 }
927 
928 /**
929  * @brief Restore the IDT slot to rsvd.
930  */
931 static void
vmbus_vector_free(int vector)932 vmbus_vector_free(int vector)
933 {
934 	uintptr_t func;
935 	struct gate_descriptor *ip;
936 
937 	if (vector == 0)
938 		return;
939 
940 	KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
941 	    ("invalid vector %d", vector));
942 
943 	ip = &idt[vector];
944 	func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
945 	KASSERT(func == (uintptr_t)&IDTVEC(vmbus_isr),
946 	    ("invalid vector %d", vector));
947 
948 	setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
949 }
950 
951 static void
vmbus_cpuset_setthread_task(void * xmask,int pending __unused)952 vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
953 {
954 	cpuset_t *mask = xmask;
955 	int error;
956 
957 	error = cpuset_setthread(curthread->td_tid, mask);
958 	if (error) {
959 		panic("curthread=%ju: can't pin; error=%d",
960 		    (uintmax_t)curthread->td_tid, error);
961 	}
962 }
963 
964 static int
vmbus_intr_setup(struct vmbus_softc * sc)965 vmbus_intr_setup(struct vmbus_softc *sc)
966 {
967 	int cpu;
968 
969 	CPU_FOREACH(cpu) {
970 		struct task cpuset_task;
971 		char buf[MAXCOMLEN + 1];
972 		cpuset_t cpu_mask;
973 
974 		/* Allocate an interrupt counter for Hyper-V interrupt */
975 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
976 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
977 
978 		/*
979 		 * Setup taskqueue to handle events.  Task will be per-
980 		 * channel.
981 		 */
982 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
983 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
984 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
985 		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, event_tq, cpu),
986 		    1, PI_NET, "hvevent%d", cpu);
987 
988 		if (vmbus_pin_evttask) {
989 			CPU_SETOF(cpu, &cpu_mask);
990 			TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
991 			    &cpu_mask);
992 			taskqueue_enqueue(VMBUS_PCPU_GET(sc, event_tq, cpu),
993 			    &cpuset_task);
994 			taskqueue_drain(VMBUS_PCPU_GET(sc, event_tq, cpu),
995 			    &cpuset_task);
996 		}
997 
998 		/*
999 		 * Setup tasks and taskqueues to handle messages.
1000 		 */
1001 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
1002 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
1003 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
1004 		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, message_tq, cpu), 1,
1005 		    PI_NET, "hvmsg%d", cpu);
1006 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
1007 		    vmbus_msg_task, sc);
1008 
1009 		CPU_SETOF(cpu, &cpu_mask);
1010 		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
1011 		    &cpu_mask);
1012 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
1013 		    &cpuset_task);
1014 		taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1015 		    &cpuset_task);
1016 	}
1017 
1018 	/*
1019 	 * All Hyper-V ISR required resources are setup, now let's find a
1020 	 * free IDT vector for Hyper-V ISR and set it up.
1021 	 */
1022 	sc->vmbus_idtvec = vmbus_vector_alloc();
1023 	if (sc->vmbus_idtvec == 0) {
1024 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
1025 		return ENXIO;
1026 	}
1027 	if (bootverbose) {
1028 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
1029 		    sc->vmbus_idtvec);
1030 	}
1031 	return 0;
1032 }
1033 
1034 static void
vmbus_intr_teardown(struct vmbus_softc * sc)1035 vmbus_intr_teardown(struct vmbus_softc *sc)
1036 {
1037 	int cpu;
1038 
1039 	vmbus_vector_free(sc->vmbus_idtvec);
1040 
1041 	CPU_FOREACH(cpu) {
1042 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
1043 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
1044 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
1045 		}
1046 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
1047 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1048 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
1049 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
1050 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
1051 		}
1052 	}
1053 }
1054 
1055 static int
vmbus_read_ivar(device_t dev,device_t child,int index,uintptr_t * result)1056 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
1057 {
1058 	return (ENOENT);
1059 }
1060 
1061 static int
vmbus_child_pnpinfo_str(device_t dev,device_t child,char * buf,size_t buflen)1062 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
1063 {
1064 	const struct vmbus_channel *chan;
1065 	char guidbuf[HYPERV_GUID_STRLEN];
1066 
1067 	chan = vmbus_get_channel(child);
1068 	if (chan == NULL) {
1069 		/* Event timer device, which does not belong to a channel */
1070 		return (0);
1071 	}
1072 
1073 	strlcat(buf, "classid=", buflen);
1074 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1075 	strlcat(buf, guidbuf, buflen);
1076 
1077 	strlcat(buf, " deviceid=", buflen);
1078 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1079 	strlcat(buf, guidbuf, buflen);
1080 
1081 	return (0);
1082 }
1083 
1084 int
vmbus_add_child(struct vmbus_channel * chan)1085 vmbus_add_child(struct vmbus_channel *chan)
1086 {
1087 	struct vmbus_softc *sc = chan->ch_vmbus;
1088 	device_t parent = sc->vmbus_dev;
1089 
1090 	mtx_lock(&Giant);
1091 
1092 	chan->ch_dev = device_add_child(parent, NULL, -1);
1093 	if (chan->ch_dev == NULL) {
1094 		mtx_unlock(&Giant);
1095 		device_printf(parent, "device_add_child for chan%u failed\n",
1096 		    chan->ch_id);
1097 		return (ENXIO);
1098 	}
1099 	device_set_ivars(chan->ch_dev, chan);
1100 	device_probe_and_attach(chan->ch_dev);
1101 
1102 	mtx_unlock(&Giant);
1103 	return (0);
1104 }
1105 
1106 int
vmbus_delete_child(struct vmbus_channel * chan)1107 vmbus_delete_child(struct vmbus_channel *chan)
1108 {
1109 	int error = 0;
1110 
1111 	mtx_lock(&Giant);
1112 	if (chan->ch_dev != NULL) {
1113 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1114 		    chan->ch_dev);
1115 		chan->ch_dev = NULL;
1116 	}
1117 	mtx_unlock(&Giant);
1118 	return (error);
1119 }
1120 
1121 static int
vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)1122 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1123 {
1124 	struct vmbus_softc *sc = arg1;
1125 	char verstr[16];
1126 
1127 	snprintf(verstr, sizeof(verstr), "%u.%u",
1128 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1129 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1130 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1131 }
1132 
1133 /*
1134  * We need the function to make sure the MMIO resource is allocated from the
1135  * ranges found in _CRS.
1136  *
1137  * For the release function, we can use bus_generic_release_resource().
1138  */
1139 static struct resource *
vmbus_alloc_resource(device_t dev,device_t child,int type,int * rid,rman_res_t start,rman_res_t end,rman_res_t count,u_int flags)1140 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1141     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1142 {
1143 	device_t parent = device_get_parent(dev);
1144 	struct resource *res;
1145 
1146 #ifdef NEW_PCIB
1147 	if (type == SYS_RES_MEMORY) {
1148 		struct vmbus_softc *sc = device_get_softc(dev);
1149 
1150 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1151 		    rid, start, end, count, flags);
1152 	} else
1153 #endif
1154 	{
1155 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1156 		    end, count, flags);
1157 	}
1158 
1159 	return (res);
1160 }
1161 
1162 static int
vmbus_alloc_msi(device_t bus,device_t dev,int count,int maxcount,int * irqs)1163 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1164 {
1165 
1166 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1167 	    irqs));
1168 }
1169 
1170 static int
vmbus_release_msi(device_t bus,device_t dev,int count,int * irqs)1171 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1172 {
1173 
1174 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1175 }
1176 
1177 static int
vmbus_alloc_msix(device_t bus,device_t dev,int * irq)1178 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1179 {
1180 
1181 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1182 }
1183 
1184 static int
vmbus_release_msix(device_t bus,device_t dev,int irq)1185 vmbus_release_msix(device_t bus, device_t dev, int irq)
1186 {
1187 
1188 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1189 }
1190 
1191 static int
vmbus_map_msi(device_t bus,device_t dev,int irq,uint64_t * addr,uint32_t * data)1192 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1193 	uint32_t *data)
1194 {
1195 
1196 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1197 }
1198 
1199 static uint32_t
vmbus_get_version_method(device_t bus,device_t dev)1200 vmbus_get_version_method(device_t bus, device_t dev)
1201 {
1202 	struct vmbus_softc *sc = device_get_softc(bus);
1203 
1204 	return sc->vmbus_version;
1205 }
1206 
1207 static int
vmbus_probe_guid_method(device_t bus,device_t dev,const struct hyperv_guid * guid)1208 vmbus_probe_guid_method(device_t bus, device_t dev,
1209     const struct hyperv_guid *guid)
1210 {
1211 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1212 
1213 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1214 		return 0;
1215 	return ENXIO;
1216 }
1217 
1218 static uint32_t
vmbus_get_vcpu_id_method(device_t bus,device_t dev,int cpu)1219 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1220 {
1221 	const struct vmbus_softc *sc = device_get_softc(bus);
1222 
1223 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1224 }
1225 
1226 static struct taskqueue *
vmbus_get_eventtq_method(device_t bus,device_t dev __unused,int cpu)1227 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1228 {
1229 	const struct vmbus_softc *sc = device_get_softc(bus);
1230 
1231 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1232 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1233 }
1234 
1235 #ifdef NEW_PCIB
1236 #define VTPM_BASE_ADDR 0xfed40000
1237 #define FOUR_GB (1ULL << 32)
1238 
1239 enum parse_pass { parse_64, parse_32 };
1240 
1241 struct parse_context {
1242 	device_t vmbus_dev;
1243 	enum parse_pass pass;
1244 };
1245 
1246 static ACPI_STATUS
parse_crs(ACPI_RESOURCE * res,void * ctx)1247 parse_crs(ACPI_RESOURCE *res, void *ctx)
1248 {
1249 	const struct parse_context *pc = ctx;
1250 	device_t vmbus_dev = pc->vmbus_dev;
1251 
1252 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1253 	UINT64 start, end;
1254 
1255 	switch (res->Type) {
1256 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1257 		start = res->Data.Address32.Address.Minimum;
1258 		end = res->Data.Address32.Address.Maximum;
1259 		break;
1260 
1261 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1262 		start = res->Data.Address64.Address.Minimum;
1263 		end = res->Data.Address64.Address.Maximum;
1264 		break;
1265 
1266 	default:
1267 		/* Unused types. */
1268 		return (AE_OK);
1269 	}
1270 
1271 	/*
1272 	 * We don't use <1MB addresses.
1273 	 */
1274 	if (end < 0x100000)
1275 		return (AE_OK);
1276 
1277 	/* Don't conflict with vTPM. */
1278 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1279 		end = VTPM_BASE_ADDR - 1;
1280 
1281 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1282 	    (pc->pass == parse_64 && start >= FOUR_GB))
1283 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1284 		    start, end, 0);
1285 
1286 	return (AE_OK);
1287 }
1288 
1289 static void
vmbus_get_crs(device_t dev,device_t vmbus_dev,enum parse_pass pass)1290 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1291 {
1292 	struct parse_context pc;
1293 	ACPI_STATUS status;
1294 
1295 	if (bootverbose)
1296 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1297 
1298 	pc.vmbus_dev = vmbus_dev;
1299 	pc.pass = pass;
1300 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1301 			parse_crs, &pc);
1302 
1303 	if (bootverbose && ACPI_FAILURE(status))
1304 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1305 }
1306 
1307 static void
vmbus_get_mmio_res_pass(device_t dev,enum parse_pass pass)1308 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1309 {
1310 	device_t acpi0, parent;
1311 
1312 	parent = device_get_parent(dev);
1313 
1314 	acpi0 = device_get_parent(parent);
1315 	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1316 		device_t *children;
1317 		int count;
1318 
1319 		/*
1320 		 * Try to locate VMBUS resources and find _CRS on them.
1321 		 */
1322 		if (device_get_children(acpi0, &children, &count) == 0) {
1323 			int i;
1324 
1325 			for (i = 0; i < count; ++i) {
1326 				if (!device_is_attached(children[i]))
1327 					continue;
1328 
1329 				if (strcmp("vmbus_res",
1330 				    device_get_name(children[i])) == 0)
1331 					vmbus_get_crs(children[i], dev, pass);
1332 			}
1333 			free(children, M_TEMP);
1334 		}
1335 
1336 		/*
1337 		 * Try to find _CRS on acpi.
1338 		 */
1339 		vmbus_get_crs(acpi0, dev, pass);
1340 	} else {
1341 		device_printf(dev, "not grandchild of acpi\n");
1342 	}
1343 
1344 	/*
1345 	 * Try to find _CRS on parent.
1346 	 */
1347 	vmbus_get_crs(parent, dev, pass);
1348 }
1349 
1350 static void
vmbus_get_mmio_res(device_t dev)1351 vmbus_get_mmio_res(device_t dev)
1352 {
1353 	struct vmbus_softc *sc = device_get_softc(dev);
1354 	/*
1355 	 * We walk the resources twice to make sure that: in the resource
1356 	 * list, the 32-bit resources appear behind the 64-bit resources.
1357 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1358 	 * iterate through the list to find a range for a 64-bit BAR in
1359 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1360 	 * ranges first.
1361 	 */
1362 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1363 
1364 	vmbus_get_mmio_res_pass(dev, parse_64);
1365 	vmbus_get_mmio_res_pass(dev, parse_32);
1366 }
1367 
1368 static void
vmbus_free_mmio_res(device_t dev)1369 vmbus_free_mmio_res(device_t dev)
1370 {
1371 	struct vmbus_softc *sc = device_get_softc(dev);
1372 
1373 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1374 }
1375 #endif	/* NEW_PCIB */
1376 
1377 static void
vmbus_identify(driver_t * driver,device_t parent)1378 vmbus_identify(driver_t *driver, device_t parent)
1379 {
1380 
1381 	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1382 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1383 		return;
1384 	device_add_child(parent, "vmbus", -1);
1385 }
1386 
1387 static int
vmbus_probe(device_t dev)1388 vmbus_probe(device_t dev)
1389 {
1390 
1391 	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1392 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1393 		return (ENXIO);
1394 
1395 	device_set_desc(dev, "Hyper-V Vmbus");
1396 	return (BUS_PROBE_DEFAULT);
1397 }
1398 
1399 /**
1400  * @brief Main vmbus driver initialization routine.
1401  *
1402  * Here, we
1403  * - initialize the vmbus driver context
1404  * - setup various driver entry points
1405  * - invoke the vmbus hv main init routine
1406  * - get the irq resource
1407  * - invoke the vmbus to add the vmbus root device
1408  * - setup the vmbus root device
1409  * - retrieve the channel offers
1410  */
1411 static int
vmbus_doattach(struct vmbus_softc * sc)1412 vmbus_doattach(struct vmbus_softc *sc)
1413 {
1414 	struct sysctl_oid_list *child;
1415 	struct sysctl_ctx_list *ctx;
1416 	int ret;
1417 
1418 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1419 		return (0);
1420 
1421 #ifdef NEW_PCIB
1422 	vmbus_get_mmio_res(sc->vmbus_dev);
1423 #endif
1424 
1425 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1426 
1427 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1428 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1429 	TAILQ_INIT(&sc->vmbus_prichans);
1430 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1431 	TAILQ_INIT(&sc->vmbus_chans);
1432 	sc->vmbus_chmap = malloc(
1433 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1434 	    M_WAITOK | M_ZERO);
1435 
1436 	/*
1437 	 * Create context for "post message" Hypercalls
1438 	 */
1439 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1440 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1441 	    sizeof(struct vmbus_msghc));
1442 	if (sc->vmbus_xc == NULL) {
1443 		ret = ENXIO;
1444 		goto cleanup;
1445 	}
1446 
1447 	/*
1448 	 * Allocate DMA stuffs.
1449 	 */
1450 	ret = vmbus_dma_alloc(sc);
1451 	if (ret != 0)
1452 		goto cleanup;
1453 
1454 	/*
1455 	 * Setup interrupt.
1456 	 */
1457 	ret = vmbus_intr_setup(sc);
1458 	if (ret != 0)
1459 		goto cleanup;
1460 
1461 	/*
1462 	 * Setup SynIC.
1463 	 */
1464 	if (bootverbose)
1465 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1466 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1467 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1468 
1469 	/*
1470 	 * Initialize vmbus, e.g. connect to Hypervisor.
1471 	 */
1472 	ret = vmbus_init(sc);
1473 	if (ret != 0)
1474 		goto cleanup;
1475 
1476 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1477 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1478 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1479 	else
1480 		sc->vmbus_event_proc = vmbus_event_proc;
1481 
1482 	ret = vmbus_scan(sc);
1483 	if (ret != 0)
1484 		goto cleanup;
1485 
1486 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1487 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1488 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1489 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1490 	    vmbus_sysctl_version, "A", "vmbus version");
1491 
1492 	return (ret);
1493 
1494 cleanup:
1495 	vmbus_scan_teardown(sc);
1496 	vmbus_intr_teardown(sc);
1497 	vmbus_dma_free(sc);
1498 	if (sc->vmbus_xc != NULL) {
1499 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1500 		sc->vmbus_xc = NULL;
1501 	}
1502 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1503 	mtx_destroy(&sc->vmbus_prichan_lock);
1504 	mtx_destroy(&sc->vmbus_chan_lock);
1505 
1506 	return (ret);
1507 }
1508 
1509 static void
vmbus_event_proc_dummy(struct vmbus_softc * sc __unused,int cpu __unused)1510 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1511 {
1512 }
1513 
1514 static int
vmbus_attach(device_t dev)1515 vmbus_attach(device_t dev)
1516 {
1517 	vmbus_sc = device_get_softc(dev);
1518 	vmbus_sc->vmbus_dev = dev;
1519 
1520 	/*
1521 	 * Event processing logic will be configured:
1522 	 * - After the vmbus protocol version negotiation.
1523 	 * - Before we request channel offers.
1524 	 */
1525 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1526 
1527 	/*
1528 	 * If the system has already booted and thread
1529 	 * scheduling is possible indicated by the global
1530 	 * cold set to zero, we just call the driver
1531 	 * initialization directly.
1532 	 */
1533 	if (!cold)
1534 		vmbus_doattach(vmbus_sc);
1535 
1536 	return (0);
1537 }
1538 
1539 static int
vmbus_detach(device_t dev)1540 vmbus_detach(device_t dev)
1541 {
1542 	struct vmbus_softc *sc = device_get_softc(dev);
1543 
1544 	bus_generic_detach(dev);
1545 	vmbus_chan_destroy_all(sc);
1546 
1547 	vmbus_scan_teardown(sc);
1548 
1549 	vmbus_disconnect(sc);
1550 
1551 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1552 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1553 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1554 	}
1555 
1556 	vmbus_intr_teardown(sc);
1557 	vmbus_dma_free(sc);
1558 
1559 	if (sc->vmbus_xc != NULL) {
1560 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1561 		sc->vmbus_xc = NULL;
1562 	}
1563 
1564 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1565 	mtx_destroy(&sc->vmbus_prichan_lock);
1566 	mtx_destroy(&sc->vmbus_chan_lock);
1567 
1568 #ifdef NEW_PCIB
1569 	vmbus_free_mmio_res(dev);
1570 #endif
1571 
1572 	return (0);
1573 }
1574 
1575 static void
vmbus_sysinit(void * arg __unused)1576 vmbus_sysinit(void *arg __unused)
1577 {
1578 	struct vmbus_softc *sc = vmbus_get_softc();
1579 
1580 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1581 		return;
1582 
1583 	/*
1584 	 * If the system has already booted and thread
1585 	 * scheduling is possible, as indicated by the
1586 	 * global cold set to zero, we just call the driver
1587 	 * initialization directly.
1588 	 */
1589 	if (!cold)
1590 		vmbus_doattach(sc);
1591 }
1592 /*
1593  * NOTE:
1594  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1595  * initialized.
1596  */
1597 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1598