1 /*-
2 * Copyright (c) 2016-2017 Microsoft Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #ifdef NEW_PCIB
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/types.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/kernel.h>
38 #include <sys/queue.h>
39 #include <sys/lock.h>
40 #include <sys/sx.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/bus.h>
44 #include <sys/rman.h>
45 #include <sys/mutex.h>
46 #include <sys/errno.h>
47
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_kern.h>
51 #include <vm/pmap.h>
52
53 #include <machine/atomic.h>
54 #include <machine/bus.h>
55 #include <machine/frame.h>
56 #include <machine/pci_cfgreg.h>
57 #include <machine/resource.h>
58
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 #include <dev/pci/pcib_private.h>
64 #include "pcib_if.h"
65
66 #include <machine/intr_machdep.h>
67 #include <x86/apicreg.h>
68
69 #include <dev/hyperv/include/hyperv.h>
70 #include <dev/hyperv/include/hyperv_busdma.h>
71 #include <dev/hyperv/include/vmbus_xact.h>
72 #include <dev/hyperv/vmbus/vmbus_reg.h>
73 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
74
75 #include "vmbus_if.h"
76
77 #if __FreeBSD_version < 1100000
78 typedef u_long rman_res_t;
79 #define RM_MAX_END (~(rman_res_t)0)
80 #endif
81
82 struct completion {
83 unsigned int done;
84 struct mtx lock;
85 };
86
87 static void
init_completion(struct completion * c)88 init_completion(struct completion *c)
89 {
90 memset(c, 0, sizeof(*c));
91 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
92 c->done = 0;
93 }
94
95 static void
free_completion(struct completion * c)96 free_completion(struct completion *c)
97 {
98 mtx_destroy(&c->lock);
99 }
100
101 static void
complete(struct completion * c)102 complete(struct completion *c)
103 {
104 mtx_lock(&c->lock);
105 c->done++;
106 mtx_unlock(&c->lock);
107 wakeup(c);
108 }
109
110 static void
wait_for_completion(struct completion * c)111 wait_for_completion(struct completion *c)
112 {
113 mtx_lock(&c->lock);
114 while (c->done == 0)
115 mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
116 c->done--;
117 mtx_unlock(&c->lock);
118 }
119
120 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major)))
121
122 enum {
123 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
124 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1
125 };
126
127 #define PCI_CONFIG_MMIO_LENGTH 0x2000
128 #define CFG_PAGE_OFFSET 0x1000
129 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
130
131 /*
132 * Message Types
133 */
134
135 enum pci_message_type {
136 /*
137 * Version 1.1
138 */
139 PCI_MESSAGE_BASE = 0x42490000,
140 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0,
141 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1,
142 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4,
143 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
144 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6,
145 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7,
146 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8,
147 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9,
148 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA,
149 PCI_EJECT = PCI_MESSAGE_BASE + 0xB,
150 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC,
151 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD,
152 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE,
153 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF,
154 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10,
155 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11,
156 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12,
157 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13,
158 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14,
159 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15,
160 PCI_MESSAGE_MAXIMUM
161 };
162
163 /*
164 * Structures defining the virtual PCI Express protocol.
165 */
166
167 union pci_version {
168 struct {
169 uint16_t minor_version;
170 uint16_t major_version;
171 } parts;
172 uint32_t version;
173 } __packed;
174
175 /*
176 * This representation is the one used in Windows, which is
177 * what is expected when sending this back and forth with
178 * the Hyper-V parent partition.
179 */
180 union win_slot_encoding {
181 struct {
182 uint32_t slot:5;
183 uint32_t func:3;
184 uint32_t reserved:24;
185 } bits;
186 uint32_t val;
187 } __packed;
188
189 struct pci_func_desc {
190 uint16_t v_id; /* vendor ID */
191 uint16_t d_id; /* device ID */
192 uint8_t rev;
193 uint8_t prog_intf;
194 uint8_t subclass;
195 uint8_t base_class;
196 uint32_t subsystem_id;
197 union win_slot_encoding wslot;
198 uint32_t ser; /* serial number */
199 } __packed;
200
201 struct hv_msi_desc {
202 uint8_t vector;
203 uint8_t delivery_mode;
204 uint16_t vector_count;
205 uint32_t reserved;
206 uint64_t cpu_mask;
207 } __packed;
208
209 struct tran_int_desc {
210 uint16_t reserved;
211 uint16_t vector_count;
212 uint32_t data;
213 uint64_t address;
214 } __packed;
215
216 struct pci_message {
217 uint32_t type;
218 } __packed;
219
220 struct pci_child_message {
221 struct pci_message message_type;
222 union win_slot_encoding wslot;
223 } __packed;
224
225 struct pci_incoming_message {
226 struct vmbus_chanpkt_hdr hdr;
227 struct pci_message message_type;
228 } __packed;
229
230 struct pci_response {
231 struct vmbus_chanpkt_hdr hdr;
232 int32_t status; /* negative values are failures */
233 } __packed;
234
235 struct pci_packet {
236 void (*completion_func)(void *context, struct pci_response *resp,
237 int resp_packet_size);
238 void *compl_ctxt;
239
240 struct pci_message message[0];
241 };
242
243 /*
244 * Specific message types supporting the PCI protocol.
245 */
246
247 struct pci_version_request {
248 struct pci_message message_type;
249 uint32_t protocol_version;
250 uint32_t is_last_attempt:1;
251 uint32_t reservedz:31;
252 } __packed;
253
254 struct pci_bus_d0_entry {
255 struct pci_message message_type;
256 uint32_t reserved;
257 uint64_t mmio_base;
258 } __packed;
259
260 struct pci_bus_relations {
261 struct pci_incoming_message incoming;
262 uint32_t device_count;
263 struct pci_func_desc func[0];
264 } __packed;
265
266 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1)
267 struct pci_q_res_req_response {
268 struct vmbus_chanpkt_hdr hdr;
269 int32_t status; /* negative values are failures */
270 uint32_t probed_bar[MAX_NUM_BARS];
271 } __packed;
272
273 struct pci_resources_assigned {
274 struct pci_message message_type;
275 union win_slot_encoding wslot;
276 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
277 uint32_t msi_descriptors;
278 uint32_t reserved[4];
279 } __packed;
280
281 struct pci_create_interrupt {
282 struct pci_message message_type;
283 union win_slot_encoding wslot;
284 struct hv_msi_desc int_desc;
285 } __packed;
286
287 struct pci_create_int_response {
288 struct pci_response response;
289 uint32_t reserved;
290 struct tran_int_desc int_desc;
291 } __packed;
292
293 struct pci_delete_interrupt {
294 struct pci_message message_type;
295 union win_slot_encoding wslot;
296 struct tran_int_desc int_desc;
297 } __packed;
298
299 struct pci_dev_incoming {
300 struct pci_incoming_message incoming;
301 union win_slot_encoding wslot;
302 } __packed;
303
304 struct pci_eject_response {
305 struct pci_message message_type;
306 union win_slot_encoding wslot;
307 uint32_t status;
308 } __packed;
309
310 /*
311 * Driver specific state.
312 */
313
314 enum hv_pcibus_state {
315 hv_pcibus_init = 0,
316 hv_pcibus_installed,
317 };
318
319 struct hv_pcibus {
320 device_t pcib;
321 device_t pci_bus;
322 struct vmbus_pcib_softc *sc;
323
324 uint16_t pci_domain;
325
326 enum hv_pcibus_state state;
327
328 struct resource *cfg_res;
329
330 struct completion query_completion, *query_comp;
331
332 struct mtx config_lock; /* Avoid two threads writing index page */
333 struct mtx device_list_lock; /* Protect lists below */
334 TAILQ_HEAD(, hv_pci_dev) children;
335 TAILQ_HEAD(, hv_dr_state) dr_list;
336
337 volatile int detaching;
338 };
339
340 struct hv_pci_dev {
341 TAILQ_ENTRY(hv_pci_dev) link;
342
343 struct pci_func_desc desc;
344
345 bool reported_missing;
346
347 struct hv_pcibus *hbus;
348 struct task eject_task;
349
350 TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
351
352 /*
353 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
354 * read it back, for each of the BAR offsets within config space.
355 */
356 uint32_t probed_bar[MAX_NUM_BARS];
357 };
358
359 /*
360 * Tracks "Device Relations" messages from the host, which must be both
361 * processed in order.
362 */
363 struct hv_dr_work {
364 struct task task;
365 struct hv_pcibus *bus;
366 };
367
368 struct hv_dr_state {
369 TAILQ_ENTRY(hv_dr_state) link;
370 uint32_t device_count;
371 struct pci_func_desc func[0];
372 };
373
374 struct hv_irq_desc {
375 TAILQ_ENTRY(hv_irq_desc) link;
376 struct tran_int_desc desc;
377 int irq;
378 };
379
380 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
381 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
382 #define PCI_FUNC(devfn) ((devfn) & 0x07)
383
384 static uint32_t
devfn_to_wslot(unsigned int devfn)385 devfn_to_wslot(unsigned int devfn)
386 {
387 union win_slot_encoding wslot;
388
389 wslot.val = 0;
390 wslot.bits.slot = PCI_SLOT(devfn);
391 wslot.bits.func = PCI_FUNC(devfn);
392
393 return (wslot.val);
394 }
395
396 static unsigned int
wslot_to_devfn(uint32_t wslot)397 wslot_to_devfn(uint32_t wslot)
398 {
399 union win_slot_encoding encoding;
400 unsigned int slot;
401 unsigned int func;
402
403 encoding.val = wslot;
404
405 slot = encoding.bits.slot;
406 func = encoding.bits.func;
407
408 return (PCI_DEVFN(slot, func));
409 }
410
411 struct vmbus_pcib_softc {
412 struct vmbus_channel *chan;
413 void *rx_buf;
414
415 struct taskqueue *taskq;
416
417 struct hv_pcibus *hbus;
418 };
419
420 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
421 static const struct hyperv_guid g_pass_through_dev_type = {
422 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
423 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
424 };
425
426 struct hv_pci_compl {
427 struct completion host_event;
428 int32_t completion_status;
429 };
430
431 struct q_res_req_compl {
432 struct completion host_event;
433 struct hv_pci_dev *hpdev;
434 };
435
436 struct compose_comp_ctxt {
437 struct hv_pci_compl comp_pkt;
438 struct tran_int_desc int_desc;
439 };
440
441 static void
hv_pci_generic_compl(void * context,struct pci_response * resp,int resp_packet_size)442 hv_pci_generic_compl(void *context, struct pci_response *resp,
443 int resp_packet_size)
444 {
445 struct hv_pci_compl *comp_pkt = context;
446
447 if (resp_packet_size >= sizeof(struct pci_response))
448 comp_pkt->completion_status = resp->status;
449 else
450 comp_pkt->completion_status = -1;
451
452 complete(&comp_pkt->host_event);
453 }
454
455 static void
q_resource_requirements(void * context,struct pci_response * resp,int resp_packet_size)456 q_resource_requirements(void *context, struct pci_response *resp,
457 int resp_packet_size)
458 {
459 struct q_res_req_compl *completion = context;
460 struct pci_q_res_req_response *q_res_req =
461 (struct pci_q_res_req_response *)resp;
462 int i;
463
464 if (resp->status < 0) {
465 printf("vmbus_pcib: failed to query resource requirements\n");
466 } else {
467 for (i = 0; i < MAX_NUM_BARS; i++)
468 completion->hpdev->probed_bar[i] =
469 q_res_req->probed_bar[i];
470 }
471
472 complete(&completion->host_event);
473 }
474
475 static void
hv_pci_compose_compl(void * context,struct pci_response * resp,int resp_packet_size)476 hv_pci_compose_compl(void *context, struct pci_response *resp,
477 int resp_packet_size)
478 {
479 struct compose_comp_ctxt *comp_pkt = context;
480 struct pci_create_int_response *int_resp =
481 (struct pci_create_int_response *)resp;
482
483 comp_pkt->comp_pkt.completion_status = resp->status;
484 comp_pkt->int_desc = int_resp->int_desc;
485 complete(&comp_pkt->comp_pkt.host_event);
486 }
487
488 static void
hv_int_desc_free(struct hv_pci_dev * hpdev,struct hv_irq_desc * hid)489 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
490 {
491 struct pci_delete_interrupt *int_pkt;
492 struct {
493 struct pci_packet pkt;
494 uint8_t buffer[sizeof(struct pci_delete_interrupt)];
495 } ctxt;
496
497 memset(&ctxt, 0, sizeof(ctxt));
498 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
499 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
500 int_pkt->wslot.val = hpdev->desc.wslot.val;
501 int_pkt->int_desc = hid->desc;
502
503 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
504 int_pkt, sizeof(*int_pkt), 0);
505
506 free(hid, M_DEVBUF);
507 }
508
509 static void
hv_pci_delete_device(struct hv_pci_dev * hpdev)510 hv_pci_delete_device(struct hv_pci_dev *hpdev)
511 {
512 struct hv_pcibus *hbus = hpdev->hbus;
513 struct hv_irq_desc *hid, *tmp_hid;
514 device_t pci_dev;
515 int devfn;
516
517 devfn = wslot_to_devfn(hpdev->desc.wslot.val);
518
519 mtx_lock(&Giant);
520
521 pci_dev = pci_find_dbsf(hbus->pci_domain,
522 0, PCI_SLOT(devfn), PCI_FUNC(devfn));
523 if (pci_dev)
524 device_delete_child(hbus->pci_bus, pci_dev);
525
526 mtx_unlock(&Giant);
527
528 mtx_lock(&hbus->device_list_lock);
529 TAILQ_REMOVE(&hbus->children, hpdev, link);
530 mtx_unlock(&hbus->device_list_lock);
531
532 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
533 hv_int_desc_free(hpdev, hid);
534
535 free(hpdev, M_DEVBUF);
536 }
537
538 static struct hv_pci_dev *
new_pcichild_device(struct hv_pcibus * hbus,struct pci_func_desc * desc)539 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc)
540 {
541 struct hv_pci_dev *hpdev;
542 struct pci_child_message *res_req;
543 struct q_res_req_compl comp_pkt;
544 struct {
545 struct pci_packet pkt;
546 uint8_t buffer[sizeof(struct pci_child_message)];
547 } ctxt;
548 int ret;
549
550 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
551 hpdev->hbus = hbus;
552
553 TAILQ_INIT(&hpdev->irq_desc_list);
554
555 init_completion(&comp_pkt.host_event);
556 comp_pkt.hpdev = hpdev;
557
558 ctxt.pkt.compl_ctxt = &comp_pkt;
559 ctxt.pkt.completion_func = q_resource_requirements;
560
561 res_req = (struct pci_child_message *)&ctxt.pkt.message;
562 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
563 res_req->wslot.val = desc->wslot.val;
564
565 ret = vmbus_chan_send(hbus->sc->chan,
566 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
567 res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
568 if (ret)
569 goto err;
570
571 wait_for_completion(&comp_pkt.host_event);
572 free_completion(&comp_pkt.host_event);
573
574 hpdev->desc = *desc;
575
576 mtx_lock(&hbus->device_list_lock);
577 if (TAILQ_EMPTY(&hbus->children))
578 hbus->pci_domain = desc->ser & 0xFFFF;
579 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
580 mtx_unlock(&hbus->device_list_lock);
581 return (hpdev);
582 err:
583 free_completion(&comp_pkt.host_event);
584 free(hpdev, M_DEVBUF);
585 return (NULL);
586 }
587
588 #if __FreeBSD_version < 1100000
589
590 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */
591
592 static struct pci_devinfo *
pci_identify_function(device_t pcib,device_t dev,int domain,int busno,int slot,int func,size_t dinfo_size)593 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
594 int slot, int func, size_t dinfo_size)
595 {
596 struct pci_devinfo *dinfo;
597
598 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
599 if (dinfo != NULL)
600 pci_add_child(dev, dinfo);
601
602 return (dinfo);
603 }
604
605 static int
pci_rescan(device_t dev)606 pci_rescan(device_t dev)
607 {
608 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
609 device_t pcib = device_get_parent(dev);
610 struct pci_softc *sc;
611 device_t child, *devlist, *unchanged;
612 int devcount, error, i, j, maxslots, oldcount;
613 int busno, domain, s, f, pcifunchigh;
614 uint8_t hdrtype;
615
616 /* No need to check for ARI on a rescan. */
617 error = device_get_children(dev, &devlist, &devcount);
618 if (error)
619 return (error);
620 if (devcount != 0) {
621 unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
622 M_NOWAIT | M_ZERO);
623 if (unchanged == NULL) {
624 free(devlist, M_TEMP);
625 return (ENOMEM);
626 }
627 } else
628 unchanged = NULL;
629
630 sc = device_get_softc(dev);
631 domain = pcib_get_domain(dev);
632 busno = pcib_get_bus(dev);
633 maxslots = PCIB_MAXSLOTS(pcib);
634 for (s = 0; s <= maxslots; s++) {
635 /* If function 0 is not present, skip to the next slot. */
636 f = 0;
637 if (REG(PCIR_VENDOR, 2) == 0xffff)
638 continue;
639 pcifunchigh = 0;
640 hdrtype = REG(PCIR_HDRTYPE, 1);
641 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
642 continue;
643 if (hdrtype & PCIM_MFDEV)
644 pcifunchigh = PCIB_MAXFUNCS(pcib);
645 for (f = 0; f <= pcifunchigh; f++) {
646 if (REG(PCIR_VENDOR, 2) == 0xffff)
647 continue;
648
649 /*
650 * Found a valid function. Check if a
651 * device_t for this device already exists.
652 */
653 for (i = 0; i < devcount; i++) {
654 child = devlist[i];
655 if (child == NULL)
656 continue;
657 if (pci_get_slot(child) == s &&
658 pci_get_function(child) == f) {
659 unchanged[i] = child;
660 goto next_func;
661 }
662 }
663
664 pci_identify_function(pcib, dev, domain, busno, s, f,
665 sizeof(struct pci_devinfo));
666 next_func:;
667 }
668 }
669
670 /* Remove devices that are no longer present. */
671 for (i = 0; i < devcount; i++) {
672 if (unchanged[i] != NULL)
673 continue;
674 device_delete_child(dev, devlist[i]);
675 }
676
677 free(devlist, M_TEMP);
678 oldcount = devcount;
679
680 /* Try to attach the devices just added. */
681 error = device_get_children(dev, &devlist, &devcount);
682 if (error) {
683 free(unchanged, M_TEMP);
684 return (error);
685 }
686
687 for (i = 0; i < devcount; i++) {
688 for (j = 0; j < oldcount; j++) {
689 if (devlist[i] == unchanged[j])
690 goto next_device;
691 }
692
693 device_probe_and_attach(devlist[i]);
694 next_device:;
695 }
696
697 free(unchanged, M_TEMP);
698 free(devlist, M_TEMP);
699 return (0);
700 #undef REG
701 }
702
703 #else
704
705 static int
pci_rescan(device_t dev)706 pci_rescan(device_t dev)
707 {
708 return (BUS_RESCAN(dev));
709 }
710
711 #endif
712
713 static void
pci_devices_present_work(void * arg,int pending __unused)714 pci_devices_present_work(void *arg, int pending __unused)
715 {
716 struct hv_dr_work *dr_wrk = arg;
717 struct hv_dr_state *dr = NULL;
718 struct hv_pcibus *hbus;
719 uint32_t child_no;
720 bool found;
721 struct pci_func_desc *new_desc;
722 struct hv_pci_dev *hpdev, *tmp_hpdev;
723 struct completion *query_comp;
724 bool need_rescan = false;
725
726 hbus = dr_wrk->bus;
727 free(dr_wrk, M_DEVBUF);
728
729 /* Pull this off the queue and process it if it was the last one. */
730 mtx_lock(&hbus->device_list_lock);
731 while (!TAILQ_EMPTY(&hbus->dr_list)) {
732 dr = TAILQ_FIRST(&hbus->dr_list);
733 TAILQ_REMOVE(&hbus->dr_list, dr, link);
734
735 /* Throw this away if the list still has stuff in it. */
736 if (!TAILQ_EMPTY(&hbus->dr_list)) {
737 free(dr, M_DEVBUF);
738 continue;
739 }
740 }
741 mtx_unlock(&hbus->device_list_lock);
742
743 if (!dr)
744 return;
745
746 /* First, mark all existing children as reported missing. */
747 mtx_lock(&hbus->device_list_lock);
748 TAILQ_FOREACH(hpdev, &hbus->children, link)
749 hpdev->reported_missing = true;
750 mtx_unlock(&hbus->device_list_lock);
751
752 /* Next, add back any reported devices. */
753 for (child_no = 0; child_no < dr->device_count; child_no++) {
754 found = false;
755 new_desc = &dr->func[child_no];
756
757 mtx_lock(&hbus->device_list_lock);
758 TAILQ_FOREACH(hpdev, &hbus->children, link) {
759 if ((hpdev->desc.wslot.val ==
760 new_desc->wslot.val) &&
761 (hpdev->desc.v_id == new_desc->v_id) &&
762 (hpdev->desc.d_id == new_desc->d_id) &&
763 (hpdev->desc.ser == new_desc->ser)) {
764 hpdev->reported_missing = false;
765 found = true;
766 break;
767 }
768 }
769 mtx_unlock(&hbus->device_list_lock);
770
771 if (!found) {
772 if (!need_rescan)
773 need_rescan = true;
774
775 hpdev = new_pcichild_device(hbus, new_desc);
776 if (!hpdev)
777 printf("vmbus_pcib: failed to add a child\n");
778 }
779 }
780
781 /* Remove missing device(s), if any */
782 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
783 if (hpdev->reported_missing)
784 hv_pci_delete_device(hpdev);
785 }
786
787 /* Rescan the bus to find any new device, if necessary. */
788 if (hbus->state == hv_pcibus_installed && need_rescan)
789 pci_rescan(hbus->pci_bus);
790
791 /* Wake up hv_pci_query_relations(), if it's waiting. */
792 query_comp = hbus->query_comp;
793 if (query_comp) {
794 hbus->query_comp = NULL;
795 complete(query_comp);
796 }
797
798 free(dr, M_DEVBUF);
799 }
800
801 static struct hv_pci_dev *
get_pcichild_wslot(struct hv_pcibus * hbus,uint32_t wslot)802 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
803 {
804 struct hv_pci_dev *hpdev, *ret = NULL;
805
806 mtx_lock(&hbus->device_list_lock);
807 TAILQ_FOREACH(hpdev, &hbus->children, link) {
808 if (hpdev->desc.wslot.val == wslot) {
809 ret = hpdev;
810 break;
811 }
812 }
813 mtx_unlock(&hbus->device_list_lock);
814
815 return (ret);
816 }
817
818 static void
hv_pci_devices_present(struct hv_pcibus * hbus,struct pci_bus_relations * relations)819 hv_pci_devices_present(struct hv_pcibus *hbus,
820 struct pci_bus_relations *relations)
821 {
822 struct hv_dr_state *dr;
823 struct hv_dr_work *dr_wrk;
824 unsigned long dr_size;
825
826 if (hbus->detaching && relations->device_count > 0)
827 return;
828
829 dr_size = offsetof(struct hv_dr_state, func) +
830 (sizeof(struct pci_func_desc) * relations->device_count);
831 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
832
833 dr->device_count = relations->device_count;
834 if (dr->device_count != 0)
835 memcpy(dr->func, relations->func,
836 sizeof(struct pci_func_desc) * dr->device_count);
837
838 mtx_lock(&hbus->device_list_lock);
839 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
840 mtx_unlock(&hbus->device_list_lock);
841
842 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
843 dr_wrk->bus = hbus;
844 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
845 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
846 }
847
848 static void
hv_eject_device_work(void * arg,int pending __unused)849 hv_eject_device_work(void *arg, int pending __unused)
850 {
851 struct hv_pci_dev *hpdev = arg;
852 union win_slot_encoding wslot = hpdev->desc.wslot;
853 struct hv_pcibus *hbus = hpdev->hbus;
854 struct pci_eject_response *eject_pkt;
855 struct {
856 struct pci_packet pkt;
857 uint8_t buffer[sizeof(struct pci_eject_response)];
858 } ctxt;
859
860 hv_pci_delete_device(hpdev);
861
862 memset(&ctxt, 0, sizeof(ctxt));
863 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
864 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
865 eject_pkt->wslot.val = wslot.val;
866 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
867 eject_pkt, sizeof(*eject_pkt), 0);
868 }
869
870 static void
hv_pci_eject_device(struct hv_pci_dev * hpdev)871 hv_pci_eject_device(struct hv_pci_dev *hpdev)
872 {
873 struct hv_pcibus *hbus = hpdev->hbus;
874 struct taskqueue *taskq;
875
876 if (hbus->detaching)
877 return;
878
879 /*
880 * Push this task into the same taskqueue on which
881 * vmbus_pcib_attach() runs, so we're sure this task can't run
882 * concurrently with vmbus_pcib_attach().
883 */
884 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
885 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
886 taskqueue_enqueue(taskq, &hpdev->eject_task);
887 }
888
889 #define PCIB_PACKET_SIZE 0x100
890
891 static void
vmbus_pcib_on_channel_callback(struct vmbus_channel * chan,void * arg)892 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
893 {
894 struct vmbus_pcib_softc *sc = arg;
895 struct hv_pcibus *hbus = sc->hbus;
896
897 void *buffer;
898 int bufferlen = PCIB_PACKET_SIZE;
899
900 struct pci_packet *comp_packet;
901 struct pci_response *response;
902 struct pci_incoming_message *new_msg;
903 struct pci_bus_relations *bus_rel;
904 struct pci_dev_incoming *dev_msg;
905 struct hv_pci_dev *hpdev;
906
907 buffer = sc->rx_buf;
908 do {
909 struct vmbus_chanpkt_hdr *pkt = buffer;
910 uint32_t bytes_rxed;
911 int ret;
912
913 bytes_rxed = bufferlen;
914 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
915
916 if (ret == ENOBUFS) {
917 /* Handle large packet */
918 if (bufferlen > PCIB_PACKET_SIZE) {
919 free(buffer, M_DEVBUF);
920 buffer = NULL;
921 }
922
923 /* alloc new buffer */
924 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
925 bufferlen = bytes_rxed;
926
927 continue;
928 }
929
930 if (ret != 0) {
931 /* ignore EIO or EAGAIN */
932 break;
933 }
934
935 if (bytes_rxed <= sizeof(struct pci_response))
936 continue;
937
938 switch (pkt->cph_type) {
939 case VMBUS_CHANPKT_TYPE_COMP:
940 comp_packet =
941 (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
942 response = (struct pci_response *)pkt;
943 comp_packet->completion_func(comp_packet->compl_ctxt,
944 response, bytes_rxed);
945 break;
946 case VMBUS_CHANPKT_TYPE_INBAND:
947 new_msg = (struct pci_incoming_message *)buffer;
948
949 switch (new_msg->message_type.type) {
950 case PCI_BUS_RELATIONS:
951 bus_rel = (struct pci_bus_relations *)buffer;
952
953 if (bus_rel->device_count == 0)
954 break;
955
956 if (bytes_rxed <
957 offsetof(struct pci_bus_relations, func) +
958 (sizeof(struct pci_func_desc) *
959 (bus_rel->device_count)))
960 break;
961
962 hv_pci_devices_present(hbus, bus_rel);
963 break;
964
965 case PCI_EJECT:
966 dev_msg = (struct pci_dev_incoming *)buffer;
967 hpdev = get_pcichild_wslot(hbus,
968 dev_msg->wslot.val);
969
970 if (hpdev)
971 hv_pci_eject_device(hpdev);
972
973 break;
974 default:
975 printf("vmbus_pcib: Unknown msg type 0x%x\n",
976 new_msg->message_type.type);
977 break;
978 }
979 break;
980 default:
981 printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
982 pkt->cph_type);
983 break;
984 }
985 } while (1);
986
987 if (bufferlen > PCIB_PACKET_SIZE)
988 free(buffer, M_DEVBUF);
989 }
990
991 static int
hv_pci_protocol_negotiation(struct hv_pcibus * hbus)992 hv_pci_protocol_negotiation(struct hv_pcibus *hbus)
993 {
994 struct pci_version_request *version_req;
995 struct hv_pci_compl comp_pkt;
996 struct {
997 struct pci_packet pkt;
998 uint8_t buffer[sizeof(struct pci_version_request)];
999 } ctxt;
1000 int ret;
1001
1002 init_completion(&comp_pkt.host_event);
1003
1004 ctxt.pkt.completion_func = hv_pci_generic_compl;
1005 ctxt.pkt.compl_ctxt = &comp_pkt;
1006 version_req = (struct pci_version_request *)&ctxt.pkt.message;
1007 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1008 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT;
1009 version_req->is_last_attempt = 1;
1010
1011 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1012 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req),
1013 (uint64_t)(uintptr_t)&ctxt.pkt);
1014 if (ret)
1015 goto out;
1016
1017 wait_for_completion(&comp_pkt.host_event);
1018
1019 if (comp_pkt.completion_status < 0) {
1020 device_printf(hbus->pcib,
1021 "vmbus_pcib version negotiation failed: %x\n",
1022 comp_pkt.completion_status);
1023 ret = EPROTO;
1024 } else {
1025 ret = 0;
1026 }
1027 out:
1028 free_completion(&comp_pkt.host_event);
1029 return (ret);
1030 }
1031
1032 /* Ask the host to send along the list of child devices */
1033 static int
hv_pci_query_relations(struct hv_pcibus * hbus)1034 hv_pci_query_relations(struct hv_pcibus *hbus)
1035 {
1036 struct pci_message message;
1037 int ret;
1038
1039 message.type = PCI_QUERY_BUS_RELATIONS;
1040 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1041 &message, sizeof(message), 0);
1042 return (ret);
1043 }
1044
1045 static int
hv_pci_enter_d0(struct hv_pcibus * hbus)1046 hv_pci_enter_d0(struct hv_pcibus *hbus)
1047 {
1048 struct pci_bus_d0_entry *d0_entry;
1049 struct hv_pci_compl comp_pkt;
1050 struct {
1051 struct pci_packet pkt;
1052 uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1053 } ctxt;
1054 int ret;
1055
1056 /*
1057 * Tell the host that the bus is ready to use, and moved into the
1058 * powered-on state. This includes telling the host which region
1059 * of memory-mapped I/O space has been chosen for configuration space
1060 * access.
1061 */
1062 init_completion(&comp_pkt.host_event);
1063
1064 ctxt.pkt.completion_func = hv_pci_generic_compl;
1065 ctxt.pkt.compl_ctxt = &comp_pkt;
1066
1067 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1068 memset(d0_entry, 0, sizeof(*d0_entry));
1069 d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1070 d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1071
1072 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1073 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1074 (uint64_t)(uintptr_t)&ctxt.pkt);
1075 if (ret)
1076 goto out;
1077
1078 wait_for_completion(&comp_pkt.host_event);
1079
1080 if (comp_pkt.completion_status < 0) {
1081 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1082 ret = EPROTO;
1083 } else {
1084 ret = 0;
1085 }
1086
1087 out:
1088 free_completion(&comp_pkt.host_event);
1089 return (ret);
1090 }
1091
1092 /*
1093 * It looks this is only needed by Windows VM, but let's send the message too
1094 * just to make the host happy.
1095 */
1096 static int
hv_send_resources_allocated(struct hv_pcibus * hbus)1097 hv_send_resources_allocated(struct hv_pcibus *hbus)
1098 {
1099 struct pci_resources_assigned *res_assigned;
1100 struct hv_pci_compl comp_pkt;
1101 struct hv_pci_dev *hpdev;
1102 struct pci_packet *pkt;
1103 uint32_t wslot;
1104 int ret = 0;
1105
1106 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned),
1107 M_DEVBUF, M_WAITOK | M_ZERO);
1108
1109 for (wslot = 0; wslot < 256; wslot++) {
1110 hpdev = get_pcichild_wslot(hbus, wslot);
1111 if (!hpdev)
1112 continue;
1113
1114 init_completion(&comp_pkt.host_event);
1115
1116 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned));
1117 pkt->completion_func = hv_pci_generic_compl;
1118 pkt->compl_ctxt = &comp_pkt;
1119
1120 res_assigned = (struct pci_resources_assigned *)&pkt->message;
1121 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED;
1122 res_assigned->wslot.val = hpdev->desc.wslot.val;
1123
1124 ret = vmbus_chan_send(hbus->sc->chan,
1125 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1126 &pkt->message, sizeof(*res_assigned),
1127 (uint64_t)(uintptr_t)pkt);
1128 if (ret) {
1129 free_completion(&comp_pkt.host_event);
1130 break;
1131 }
1132
1133 wait_for_completion(&comp_pkt.host_event);
1134 free_completion(&comp_pkt.host_event);
1135
1136 if (comp_pkt.completion_status < 0) {
1137 ret = EPROTO;
1138 device_printf(hbus->pcib,
1139 "failed to send PCI_RESOURCES_ASSIGNED\n");
1140 break;
1141 }
1142 }
1143
1144 free(pkt, M_DEVBUF);
1145 return (ret);
1146 }
1147
1148 static int
hv_send_resources_released(struct hv_pcibus * hbus)1149 hv_send_resources_released(struct hv_pcibus *hbus)
1150 {
1151 struct pci_child_message pkt;
1152 struct hv_pci_dev *hpdev;
1153 uint32_t wslot;
1154 int ret;
1155
1156 for (wslot = 0; wslot < 256; wslot++) {
1157 hpdev = get_pcichild_wslot(hbus, wslot);
1158 if (!hpdev)
1159 continue;
1160
1161 pkt.message_type.type = PCI_RESOURCES_RELEASED;
1162 pkt.wslot.val = hpdev->desc.wslot.val;
1163
1164 ret = vmbus_chan_send(hbus->sc->chan,
1165 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1166 if (ret)
1167 return (ret);
1168 }
1169
1170 return (0);
1171 }
1172
1173 #define hv_cfg_read(x, s) \
1174 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \
1175 bus_size_t offset) \
1176 { \
1177 return (bus_read_##s(bus->cfg_res, offset)); \
1178 }
1179
1180 #define hv_cfg_write(x, s) \
1181 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \
1182 bus_size_t offset, uint##x##_t val) \
1183 { \
1184 return (bus_write_##s(bus->cfg_res, offset, val)); \
1185 }
1186
1187 hv_cfg_read(8, 1)
1188 hv_cfg_read(16, 2)
1189 hv_cfg_read(32, 4)
1190
1191 hv_cfg_write(8, 1)
1192 hv_cfg_write(16, 2)
1193 hv_cfg_write(32, 4)
1194
1195 static void
_hv_pcifront_read_config(struct hv_pci_dev * hpdev,int where,int size,uint32_t * val)1196 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1197 uint32_t *val)
1198 {
1199 struct hv_pcibus *hbus = hpdev->hbus;
1200 bus_size_t addr = CFG_PAGE_OFFSET + where;
1201
1202 /*
1203 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1204 */
1205 if (where + size <= PCIR_COMMAND) {
1206 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1207 } else if (where >= PCIR_REVID && where + size <=
1208 PCIR_CACHELNSZ) {
1209 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1210 PCIR_REVID, size);
1211 } else if (where >= PCIR_SUBVEND_0 && where + size <=
1212 PCIR_BIOS) {
1213 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1214 PCIR_SUBVEND_0, size);
1215 } else if (where >= PCIR_BIOS && where + size <=
1216 PCIR_CAP_PTR) {
1217 /* ROM BARs are unimplemented */
1218 *val = 0;
1219 } else if ((where >= PCIR_INTLINE && where + size <=
1220 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1221 /*
1222 * Interrupt Line and Interrupt PIN are hard-wired to zero
1223 * because this front-end only supports message-signaled
1224 * interrupts.
1225 */
1226 *val = 0;
1227 } else if (where + size <= CFG_PAGE_SIZE) {
1228 mtx_lock(&hbus->config_lock);
1229
1230 /* Choose the function to be read. */
1231 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1232
1233 /* Make sure the function was chosen before we start reading.*/
1234 mb();
1235
1236 /* Read from that function's config space. */
1237 switch (size) {
1238 case 1:
1239 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1240 break;
1241 case 2:
1242 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1243 break;
1244 default:
1245 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1246 break;
1247 }
1248 /*
1249 * Make sure the write was done before we release the lock,
1250 * allowing consecutive reads/writes.
1251 */
1252 mb();
1253
1254 mtx_unlock(&hbus->config_lock);
1255 } else {
1256 /* Invalid config read: it's unlikely to reach here. */
1257 memset(val, 0, size);
1258 }
1259 }
1260
1261 static void
_hv_pcifront_write_config(struct hv_pci_dev * hpdev,int where,int size,uint32_t val)1262 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1263 uint32_t val)
1264 {
1265 struct hv_pcibus *hbus = hpdev->hbus;
1266 bus_size_t addr = CFG_PAGE_OFFSET + where;
1267
1268 /* SSIDs and ROM BARs are read-only */
1269 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1270 return;
1271
1272 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1273 mtx_lock(&hbus->config_lock);
1274
1275 /* Choose the function to be written. */
1276 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1277
1278 /* Make sure the function was chosen before we start writing.*/
1279 wmb();
1280
1281 /* Write to that function's config space. */
1282 switch (size) {
1283 case 1:
1284 hv_cfg_write_1(hbus, addr, (uint8_t)val);
1285 break;
1286 case 2:
1287 hv_cfg_write_2(hbus, addr, (uint16_t)val);
1288 break;
1289 default:
1290 hv_cfg_write_4(hbus, addr, (uint32_t)val);
1291 break;
1292 }
1293
1294 /*
1295 * Make sure the write was done before we release the lock,
1296 * allowing consecutive reads/writes.
1297 */
1298 mb();
1299
1300 mtx_unlock(&hbus->config_lock);
1301 } else {
1302 /* Invalid config write: it's unlikely to reach here. */
1303 return;
1304 }
1305 }
1306
1307 static void
vmbus_pcib_set_detaching(void * arg,int pending __unused)1308 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1309 {
1310 struct hv_pcibus *hbus = arg;
1311
1312 atomic_set_int(&hbus->detaching, 1);
1313 }
1314
1315 static void
vmbus_pcib_pre_detach(struct hv_pcibus * hbus)1316 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1317 {
1318 struct task task;
1319
1320 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1321
1322 /*
1323 * Make sure the channel callback won't push any possible new
1324 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1325 */
1326 vmbus_chan_run_task(hbus->sc->chan, &task);
1327
1328 taskqueue_drain_all(hbus->sc->taskq);
1329 }
1330
1331
1332 /*
1333 * Standard probe entry point.
1334 *
1335 */
1336 static int
vmbus_pcib_probe(device_t dev)1337 vmbus_pcib_probe(device_t dev)
1338 {
1339 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1340 &g_pass_through_dev_type) == 0) {
1341 device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1342 return (BUS_PROBE_DEFAULT);
1343 }
1344 return (ENXIO);
1345 }
1346
1347 /*
1348 * Standard attach entry point.
1349 *
1350 */
1351 static int
vmbus_pcib_attach(device_t dev)1352 vmbus_pcib_attach(device_t dev)
1353 {
1354 const int pci_ring_size = (4 * PAGE_SIZE);
1355 const struct hyperv_guid *inst_guid;
1356 struct vmbus_channel *channel;
1357 struct vmbus_pcib_softc *sc;
1358 struct hv_pcibus *hbus;
1359 int rid = 0;
1360 int ret;
1361
1362 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1363 hbus->pcib = dev;
1364
1365 channel = vmbus_get_channel(dev);
1366 inst_guid = vmbus_chan_guid_inst(channel);
1367 hbus->pci_domain = inst_guid->hv_guid[9] |
1368 (inst_guid->hv_guid[8] << 8);
1369
1370 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1371 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1372 TAILQ_INIT(&hbus->children);
1373 TAILQ_INIT(&hbus->dr_list);
1374
1375 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1376 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1377 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1378
1379 if (!hbus->cfg_res) {
1380 device_printf(dev, "failed to get resource for cfg window\n");
1381 ret = ENXIO;
1382 goto free_bus;
1383 }
1384
1385 sc = device_get_softc(dev);
1386 sc->chan = channel;
1387 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1388 sc->hbus = hbus;
1389
1390 /*
1391 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1392 * messages. NB: we can't handle the messages in the channel callback
1393 * directly, because the message handlers need to send new messages
1394 * to the host and waits for the host's completion messages, which
1395 * must also be handled by the channel callback.
1396 */
1397 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1398 taskqueue_thread_enqueue, &sc->taskq);
1399 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1400
1401 hbus->sc = sc;
1402
1403 init_completion(&hbus->query_completion);
1404 hbus->query_comp = &hbus->query_completion;
1405
1406 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1407 NULL, 0, vmbus_pcib_on_channel_callback, sc);
1408 if (ret)
1409 goto free_res;
1410
1411 ret = hv_pci_protocol_negotiation(hbus);
1412 if (ret)
1413 goto vmbus_close;
1414
1415 ret = hv_pci_query_relations(hbus);
1416 if (ret)
1417 goto vmbus_close;
1418 wait_for_completion(hbus->query_comp);
1419
1420 ret = hv_pci_enter_d0(hbus);
1421 if (ret)
1422 goto vmbus_close;
1423
1424 ret = hv_send_resources_allocated(hbus);
1425 if (ret)
1426 goto vmbus_close;
1427
1428 hbus->pci_bus = device_add_child(dev, "pci", -1);
1429 if (!hbus->pci_bus) {
1430 device_printf(dev, "failed to create pci bus\n");
1431 ret = ENXIO;
1432 goto vmbus_close;
1433 }
1434
1435 bus_generic_attach(dev);
1436
1437 hbus->state = hv_pcibus_installed;
1438
1439 return (0);
1440
1441 vmbus_close:
1442 vmbus_pcib_pre_detach(hbus);
1443 vmbus_chan_close(sc->chan);
1444 free_res:
1445 taskqueue_free(sc->taskq);
1446 free_completion(&hbus->query_completion);
1447 free(sc->rx_buf, M_DEVBUF);
1448 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1449 free_bus:
1450 mtx_destroy(&hbus->device_list_lock);
1451 mtx_destroy(&hbus->config_lock);
1452 free(hbus, M_DEVBUF);
1453 return (ret);
1454 }
1455
1456 /*
1457 * Standard detach entry point
1458 */
1459 static int
vmbus_pcib_detach(device_t dev)1460 vmbus_pcib_detach(device_t dev)
1461 {
1462 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1463 struct hv_pcibus *hbus = sc->hbus;
1464 struct pci_message teardown_packet;
1465 struct pci_bus_relations relations;
1466 int ret;
1467
1468 vmbus_pcib_pre_detach(hbus);
1469
1470 if (hbus->state == hv_pcibus_installed)
1471 bus_generic_detach(dev);
1472
1473 /* Delete any children which might still exist. */
1474 memset(&relations, 0, sizeof(relations));
1475 hv_pci_devices_present(hbus, &relations);
1476
1477 ret = hv_send_resources_released(hbus);
1478 if (ret)
1479 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1480
1481 teardown_packet.type = PCI_BUS_D0EXIT;
1482 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1483 &teardown_packet, sizeof(struct pci_message), 0);
1484 if (ret)
1485 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1486
1487 taskqueue_drain_all(hbus->sc->taskq);
1488 vmbus_chan_close(sc->chan);
1489 taskqueue_free(sc->taskq);
1490
1491 free_completion(&hbus->query_completion);
1492 free(sc->rx_buf, M_DEVBUF);
1493 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1494
1495 mtx_destroy(&hbus->device_list_lock);
1496 mtx_destroy(&hbus->config_lock);
1497 free(hbus, M_DEVBUF);
1498
1499 return (0);
1500 }
1501
1502 static int
vmbus_pcib_read_ivar(device_t dev,device_t child,int which,uintptr_t * val)1503 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1504 {
1505 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1506
1507 switch (which) {
1508 case PCIB_IVAR_DOMAIN:
1509 *val = sc->hbus->pci_domain;
1510 return (0);
1511
1512 case PCIB_IVAR_BUS:
1513 /* There is only bus 0. */
1514 *val = 0;
1515 return (0);
1516 }
1517 return (ENOENT);
1518 }
1519
1520 static int
vmbus_pcib_write_ivar(device_t dev,device_t child,int which,uintptr_t val)1521 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1522 {
1523 return (ENOENT);
1524 }
1525
1526 static struct resource *
vmbus_pcib_alloc_resource(device_t dev,device_t child,int type,int * rid,rman_res_t start,rman_res_t end,rman_res_t count,u_int flags)1527 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1528 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1529 {
1530 unsigned int bar_no;
1531 struct hv_pci_dev *hpdev;
1532 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1533 struct resource *res;
1534 unsigned int devfn;
1535
1536 if (type == PCI_RES_BUS)
1537 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1538 start, end, count, flags));
1539
1540 /* Devices with port I/O BAR are not supported. */
1541 if (type == SYS_RES_IOPORT)
1542 return (NULL);
1543
1544 if (type == SYS_RES_MEMORY) {
1545 devfn = PCI_DEVFN(pci_get_slot(child),
1546 pci_get_function(child));
1547 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1548 if (!hpdev)
1549 return (NULL);
1550
1551 bar_no = PCI_RID2BAR(*rid);
1552 if (bar_no >= MAX_NUM_BARS)
1553 return (NULL);
1554
1555 /* Make sure a 32-bit BAR gets a 32-bit address */
1556 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1557 end = ulmin(end, 0xFFFFFFFF);
1558 }
1559
1560 res = bus_generic_alloc_resource(dev, child, type, rid,
1561 start, end, count, flags);
1562 /*
1563 * If this is a request for a specific range, assume it is
1564 * correct and pass it up to the parent.
1565 */
1566 if (res == NULL && start + count - 1 == end)
1567 res = bus_generic_alloc_resource(dev, child, type, rid,
1568 start, end, count, flags);
1569 return (res);
1570 }
1571
1572 static int
vmbus_pcib_release_resource(device_t dev,device_t child,int type,int rid,struct resource * r)1573 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid,
1574 struct resource *r)
1575 {
1576 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1577
1578 if (type == PCI_RES_BUS)
1579 return (pci_domain_release_bus(sc->hbus->pci_domain, child,
1580 rid, r));
1581
1582 if (type == SYS_RES_IOPORT)
1583 return (EINVAL);
1584
1585 return (bus_generic_release_resource(dev, child, type, rid, r));
1586 }
1587
1588 #if __FreeBSD_version >= 1100000
1589 static int
vmbus_pcib_get_cpus(device_t pcib,device_t dev,enum cpu_sets op,size_t setsize,cpuset_t * cpuset)1590 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1591 size_t setsize, cpuset_t *cpuset)
1592 {
1593 return (bus_get_cpus(pcib, op, setsize, cpuset));
1594 }
1595 #endif
1596
1597 static uint32_t
vmbus_pcib_read_config(device_t dev,u_int bus,u_int slot,u_int func,u_int reg,int bytes)1598 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1599 u_int reg, int bytes)
1600 {
1601 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1602 struct hv_pci_dev *hpdev;
1603 unsigned int devfn = PCI_DEVFN(slot, func);
1604 uint32_t data = 0;
1605
1606 KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1607
1608 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1609 if (!hpdev)
1610 return (~0);
1611
1612 _hv_pcifront_read_config(hpdev, reg, bytes, &data);
1613
1614 return (data);
1615 }
1616
1617 static void
vmbus_pcib_write_config(device_t dev,u_int bus,u_int slot,u_int func,u_int reg,uint32_t data,int bytes)1618 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1619 u_int reg, uint32_t data, int bytes)
1620 {
1621 struct vmbus_pcib_softc *sc = device_get_softc(dev);
1622 struct hv_pci_dev *hpdev;
1623 unsigned int devfn = PCI_DEVFN(slot, func);
1624
1625 KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1626
1627 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1628 if (!hpdev)
1629 return;
1630
1631 _hv_pcifront_write_config(hpdev, reg, bytes, data);
1632 }
1633
1634 static int
vmbus_pcib_route_intr(device_t pcib,device_t dev,int pin)1635 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1636 {
1637 /* We only support MSI/MSI-X and don't support INTx interrupt. */
1638 return (PCI_INVALID_IRQ);
1639 }
1640
1641 static int
vmbus_pcib_alloc_msi(device_t pcib,device_t dev,int count,int maxcount,int * irqs)1642 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1643 int maxcount, int *irqs)
1644 {
1645 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1646 irqs));
1647 }
1648
1649 static int
vmbus_pcib_release_msi(device_t pcib,device_t dev,int count,int * irqs)1650 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1651 {
1652 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1653 }
1654
1655 static int
vmbus_pcib_alloc_msix(device_t pcib,device_t dev,int * irq)1656 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1657 {
1658 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1659 }
1660
1661 static int
vmbus_pcib_release_msix(device_t pcib,device_t dev,int irq)1662 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1663 {
1664 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1665 }
1666
1667 #define MSI_INTEL_ADDR_DEST 0x000ff000
1668 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */
1669 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1670
1671 static int
vmbus_pcib_map_msi(device_t pcib,device_t child,int irq,uint64_t * addr,uint32_t * data)1672 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1673 uint64_t *addr, uint32_t *data)
1674 {
1675 unsigned int devfn;
1676 struct hv_pci_dev *hpdev;
1677
1678 uint64_t v_addr;
1679 uint32_t v_data;
1680 struct hv_irq_desc *hid, *tmp_hid;
1681 unsigned int cpu, vcpu_id;
1682 unsigned int vector;
1683
1684 struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1685 struct pci_create_interrupt *int_pkt;
1686 struct compose_comp_ctxt comp;
1687 struct {
1688 struct pci_packet pkt;
1689 uint8_t buffer[sizeof(struct pci_create_interrupt)];
1690 } ctxt;
1691
1692 int ret;
1693
1694 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1695 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1696 if (!hpdev)
1697 return (ENOENT);
1698
1699 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
1700 &v_addr, &v_data);
1701 if (ret)
1702 return (ret);
1703
1704 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
1705 if (hid->irq == irq) {
1706 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
1707 hv_int_desc_free(hpdev, hid);
1708 break;
1709 }
1710 }
1711
1712 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12;
1713 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1714 vector = v_data & MSI_INTEL_DATA_INTVEC;
1715
1716 init_completion(&comp.comp_pkt.host_event);
1717
1718 memset(&ctxt, 0, sizeof(ctxt));
1719 ctxt.pkt.completion_func = hv_pci_compose_compl;
1720 ctxt.pkt.compl_ctxt = ∁
1721
1722 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message;
1723 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
1724 int_pkt->wslot.val = hpdev->desc.wslot.val;
1725 int_pkt->int_desc.vector = vector;
1726 int_pkt->int_desc.vector_count = 1;
1727 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED;
1728 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id;
1729
1730 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1731 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt),
1732 (uint64_t)(uintptr_t)&ctxt.pkt);
1733 if (ret) {
1734 free_completion(&comp.comp_pkt.host_event);
1735 return (ret);
1736 }
1737
1738 wait_for_completion(&comp.comp_pkt.host_event);
1739 free_completion(&comp.comp_pkt.host_event);
1740
1741 if (comp.comp_pkt.completion_status < 0)
1742 return (EPROTO);
1743
1744 *addr = comp.int_desc.address;
1745 *data = comp.int_desc.data;
1746
1747 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
1748 hid->irq = irq;
1749 hid->desc = comp.int_desc;
1750 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
1751
1752 return (0);
1753 }
1754
1755 static device_method_t vmbus_pcib_methods[] = {
1756 /* Device interface */
1757 DEVMETHOD(device_probe, vmbus_pcib_probe),
1758 DEVMETHOD(device_attach, vmbus_pcib_attach),
1759 DEVMETHOD(device_detach, vmbus_pcib_detach),
1760 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1761 DEVMETHOD(device_suspend, bus_generic_suspend),
1762 DEVMETHOD(device_resume, bus_generic_resume),
1763
1764 /* Bus interface */
1765 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar),
1766 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar),
1767 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource),
1768 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource),
1769 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1770 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1771 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
1772 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
1773 #if __FreeBSD_version >= 1100000
1774 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus),
1775 #endif
1776
1777 /* pcib interface */
1778 DEVMETHOD(pcib_maxslots, pcib_maxslots),
1779 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config),
1780 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config),
1781 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr),
1782 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi),
1783 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi),
1784 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix),
1785 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix),
1786 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi),
1787
1788 DEVMETHOD_END
1789 };
1790
1791 static devclass_t pcib_devclass;
1792
1793 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
1794 sizeof(struct vmbus_pcib_softc));
1795 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, pcib_devclass, 0, 0);
1796 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
1797 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
1798
1799 #endif /* NEW_PCIB */
1800