1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/amd64/vmm/io/ppt.c 325900 2017-11-16 18:22:03Z jhb $
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/ppt.c 325900 2017-11-16 18:22:03Z jhb $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/pciio.h>
39 #include <sys/rman.h>
40 #include <sys/smp.h>
41 #include <sys/sysctl.h>
42
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
45
46 #include <machine/resource.h>
47
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50
51 #include "vmm_lapic.h"
52 #include "vmm_ktr.h"
53
54 #include "iommu.h"
55 #include "ppt.h"
56
57 /* XXX locking */
58
59 #define MAX_MSIMSGS 32
60
61 /*
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
66 *
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68 */
69 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
70
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72
73 struct pptintr_arg { /* pptintr(pptintr_arg) */
74 struct pptdev *pptdev;
75 uint64_t addr;
76 uint64_t msg_data;
77 };
78
79 struct pptseg {
80 vm_paddr_t gpa;
81 size_t len;
82 int wired;
83 };
84
85 struct pptdev {
86 device_t dev;
87 struct vm *vm; /* owner of this device */
88 TAILQ_ENTRY(pptdev) next;
89 struct pptseg mmio[MAX_MMIOSEGS];
90 struct {
91 int num_msgs; /* guest state */
92
93 int startrid; /* host state */
94 struct resource *res[MAX_MSIMSGS];
95 void *cookie[MAX_MSIMSGS];
96 struct pptintr_arg arg[MAX_MSIMSGS];
97 } msi;
98
99 struct {
100 int num_msgs;
101 int startrid;
102 int msix_table_rid;
103 struct resource *msix_table_res;
104 struct resource **res;
105 void **cookie;
106 struct pptintr_arg *arg;
107 } msix;
108 };
109
110 SYSCTL_DECL(_hw_vmm);
111 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
112
113 static int num_pptdevs;
114 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
115 "number of pci passthru devices");
116
117 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
118
119 static int
ppt_probe(device_t dev)120 ppt_probe(device_t dev)
121 {
122 int bus, slot, func;
123 struct pci_devinfo *dinfo;
124
125 dinfo = (struct pci_devinfo *)device_get_ivars(dev);
126
127 bus = pci_get_bus(dev);
128 slot = pci_get_slot(dev);
129 func = pci_get_function(dev);
130
131 /*
132 * To qualify as a pci passthrough device a device must:
133 * - be allowed by administrator to be used in this role
134 * - be an endpoint device
135 */
136 if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
137 return (ENXIO);
138 else if (vmm_is_pptdev(bus, slot, func))
139 return (0);
140 else
141 /*
142 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
143 * SR-IOV infrastructure specified as "ppt" passthrough devices.
144 * All normal devices that did not have "ppt" specified as their
145 * driver will not be matched by this.
146 */
147 return (BUS_PROBE_NOWILDCARD);
148 }
149
150 static int
ppt_attach(device_t dev)151 ppt_attach(device_t dev)
152 {
153 struct pptdev *ppt;
154
155 ppt = device_get_softc(dev);
156
157 iommu_remove_device(iommu_host_domain(), pci_get_rid(dev));
158 num_pptdevs++;
159 TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
160 ppt->dev = dev;
161
162 if (bootverbose)
163 device_printf(dev, "attached\n");
164
165 return (0);
166 }
167
168 static int
ppt_detach(device_t dev)169 ppt_detach(device_t dev)
170 {
171 struct pptdev *ppt;
172
173 ppt = device_get_softc(dev);
174
175 if (ppt->vm != NULL)
176 return (EBUSY);
177 num_pptdevs--;
178 TAILQ_REMOVE(&pptdev_list, ppt, next);
179 pci_disable_busmaster(dev);
180 iommu_add_device(iommu_host_domain(), pci_get_rid(dev));
181
182 return (0);
183 }
184
185 static device_method_t ppt_methods[] = {
186 /* Device interface */
187 DEVMETHOD(device_probe, ppt_probe),
188 DEVMETHOD(device_attach, ppt_attach),
189 DEVMETHOD(device_detach, ppt_detach),
190 {0, 0}
191 };
192
193 static devclass_t ppt_devclass;
194 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
195 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
196
197 static struct pptdev *
ppt_find(int bus,int slot,int func)198 ppt_find(int bus, int slot, int func)
199 {
200 device_t dev;
201 struct pptdev *ppt;
202 int b, s, f;
203
204 TAILQ_FOREACH(ppt, &pptdev_list, next) {
205 dev = ppt->dev;
206 b = pci_get_bus(dev);
207 s = pci_get_slot(dev);
208 f = pci_get_function(dev);
209 if (bus == b && slot == s && func == f)
210 return (ppt);
211 }
212 return (NULL);
213 }
214
215 static void
ppt_unmap_mmio(struct vm * vm,struct pptdev * ppt)216 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
217 {
218 int i;
219 struct pptseg *seg;
220
221 for (i = 0; i < MAX_MMIOSEGS; i++) {
222 seg = &ppt->mmio[i];
223 if (seg->len == 0)
224 continue;
225 (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
226 bzero(seg, sizeof(struct pptseg));
227 }
228 }
229
230 static void
ppt_teardown_msi(struct pptdev * ppt)231 ppt_teardown_msi(struct pptdev *ppt)
232 {
233 int i, rid;
234 void *cookie;
235 struct resource *res;
236
237 if (ppt->msi.num_msgs == 0)
238 return;
239
240 for (i = 0; i < ppt->msi.num_msgs; i++) {
241 rid = ppt->msi.startrid + i;
242 res = ppt->msi.res[i];
243 cookie = ppt->msi.cookie[i];
244
245 if (cookie != NULL)
246 bus_teardown_intr(ppt->dev, res, cookie);
247
248 if (res != NULL)
249 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
250
251 ppt->msi.res[i] = NULL;
252 ppt->msi.cookie[i] = NULL;
253 }
254
255 if (ppt->msi.startrid == 1)
256 pci_release_msi(ppt->dev);
257
258 ppt->msi.num_msgs = 0;
259 }
260
261 static void
ppt_teardown_msix_intr(struct pptdev * ppt,int idx)262 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
263 {
264 int rid;
265 struct resource *res;
266 void *cookie;
267
268 rid = ppt->msix.startrid + idx;
269 res = ppt->msix.res[idx];
270 cookie = ppt->msix.cookie[idx];
271
272 if (cookie != NULL)
273 bus_teardown_intr(ppt->dev, res, cookie);
274
275 if (res != NULL)
276 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
277
278 ppt->msix.res[idx] = NULL;
279 ppt->msix.cookie[idx] = NULL;
280 }
281
282 static void
ppt_teardown_msix(struct pptdev * ppt)283 ppt_teardown_msix(struct pptdev *ppt)
284 {
285 int i;
286
287 if (ppt->msix.num_msgs == 0)
288 return;
289
290 for (i = 0; i < ppt->msix.num_msgs; i++)
291 ppt_teardown_msix_intr(ppt, i);
292
293 if (ppt->msix.msix_table_res) {
294 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
295 ppt->msix.msix_table_rid,
296 ppt->msix.msix_table_res);
297 ppt->msix.msix_table_res = NULL;
298 ppt->msix.msix_table_rid = 0;
299 }
300
301 free(ppt->msix.res, M_PPTMSIX);
302 free(ppt->msix.cookie, M_PPTMSIX);
303 free(ppt->msix.arg, M_PPTMSIX);
304
305 pci_release_msi(ppt->dev);
306
307 ppt->msix.num_msgs = 0;
308 }
309
310 int
ppt_avail_devices(void)311 ppt_avail_devices(void)
312 {
313
314 return (num_pptdevs);
315 }
316
317 int
ppt_assigned_devices(struct vm * vm)318 ppt_assigned_devices(struct vm *vm)
319 {
320 struct pptdev *ppt;
321 int num;
322
323 num = 0;
324 TAILQ_FOREACH(ppt, &pptdev_list, next) {
325 if (ppt->vm == vm)
326 num++;
327 }
328 return (num);
329 }
330
331 boolean_t
ppt_is_mmio(struct vm * vm,vm_paddr_t gpa)332 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
333 {
334 int i;
335 struct pptdev *ppt;
336 struct pptseg *seg;
337
338 TAILQ_FOREACH(ppt, &pptdev_list, next) {
339 if (ppt->vm != vm)
340 continue;
341
342 for (i = 0; i < MAX_MMIOSEGS; i++) {
343 seg = &ppt->mmio[i];
344 if (seg->len == 0)
345 continue;
346 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
347 return (TRUE);
348 }
349 }
350
351 return (FALSE);
352 }
353
354 int
ppt_assign_device(struct vm * vm,int bus,int slot,int func)355 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
356 {
357 struct pptdev *ppt;
358
359 ppt = ppt_find(bus, slot, func);
360 if (ppt != NULL) {
361 /*
362 * If this device is owned by a different VM then we
363 * cannot change its owner.
364 */
365 if (ppt->vm != NULL && ppt->vm != vm)
366 return (EBUSY);
367
368 pci_save_state(ppt->dev);
369 pcie_flr(ppt->dev,
370 max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
371 true);
372 pci_restore_state(ppt->dev);
373 ppt->vm = vm;
374 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
375 return (0);
376 }
377 return (ENOENT);
378 }
379
380 int
ppt_unassign_device(struct vm * vm,int bus,int slot,int func)381 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
382 {
383 struct pptdev *ppt;
384
385 ppt = ppt_find(bus, slot, func);
386 if (ppt != NULL) {
387 /*
388 * If this device is not owned by this 'vm' then bail out.
389 */
390 if (ppt->vm != vm)
391 return (EBUSY);
392
393 pci_save_state(ppt->dev);
394 pcie_flr(ppt->dev,
395 max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
396 true);
397 pci_restore_state(ppt->dev);
398 ppt_unmap_mmio(vm, ppt);
399 ppt_teardown_msi(ppt);
400 ppt_teardown_msix(ppt);
401 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
402 ppt->vm = NULL;
403 return (0);
404 }
405 return (ENOENT);
406 }
407
408 int
ppt_unassign_all(struct vm * vm)409 ppt_unassign_all(struct vm *vm)
410 {
411 struct pptdev *ppt;
412 int bus, slot, func;
413 device_t dev;
414
415 TAILQ_FOREACH(ppt, &pptdev_list, next) {
416 if (ppt->vm == vm) {
417 dev = ppt->dev;
418 bus = pci_get_bus(dev);
419 slot = pci_get_slot(dev);
420 func = pci_get_function(dev);
421 vm_unassign_pptdev(vm, bus, slot, func);
422 }
423 }
424
425 return (0);
426 }
427
428 int
ppt_map_mmio(struct vm * vm,int bus,int slot,int func,vm_paddr_t gpa,size_t len,vm_paddr_t hpa)429 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
430 vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
431 {
432 int i, error;
433 struct pptseg *seg;
434 struct pptdev *ppt;
435
436 ppt = ppt_find(bus, slot, func);
437 if (ppt != NULL) {
438 if (ppt->vm != vm)
439 return (EBUSY);
440
441 for (i = 0; i < MAX_MMIOSEGS; i++) {
442 seg = &ppt->mmio[i];
443 if (seg->len == 0) {
444 error = vm_map_mmio(vm, gpa, len, hpa);
445 if (error == 0) {
446 seg->gpa = gpa;
447 seg->len = len;
448 }
449 return (error);
450 }
451 }
452 return (ENOSPC);
453 }
454 return (ENOENT);
455 }
456
457 static int
pptintr(void * arg)458 pptintr(void *arg)
459 {
460 struct pptdev *ppt;
461 struct pptintr_arg *pptarg;
462
463 pptarg = arg;
464 ppt = pptarg->pptdev;
465
466 if (ppt->vm != NULL)
467 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
468 else {
469 /*
470 * XXX
471 * This is not expected to happen - panic?
472 */
473 }
474
475 /*
476 * For legacy interrupts give other filters a chance in case
477 * the interrupt was not generated by the passthrough device.
478 */
479 if (ppt->msi.startrid == 0)
480 return (FILTER_STRAY);
481 else
482 return (FILTER_HANDLED);
483 }
484
485 int
ppt_setup_msi(struct vm * vm,int vcpu,int bus,int slot,int func,uint64_t addr,uint64_t msg,int numvec)486 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
487 uint64_t addr, uint64_t msg, int numvec)
488 {
489 int i, rid, flags;
490 int msi_count, startrid, error, tmp;
491 struct pptdev *ppt;
492
493 if (numvec < 0 || numvec > MAX_MSIMSGS)
494 return (EINVAL);
495
496 ppt = ppt_find(bus, slot, func);
497 if (ppt == NULL)
498 return (ENOENT);
499 if (ppt->vm != vm) /* Make sure we own this device */
500 return (EBUSY);
501
502 /* Free any allocated resources */
503 ppt_teardown_msi(ppt);
504
505 if (numvec == 0) /* nothing more to do */
506 return (0);
507
508 flags = RF_ACTIVE;
509 msi_count = pci_msi_count(ppt->dev);
510 if (msi_count == 0) {
511 startrid = 0; /* legacy interrupt */
512 msi_count = 1;
513 flags |= RF_SHAREABLE;
514 } else
515 startrid = 1; /* MSI */
516
517 /*
518 * The device must be capable of supporting the number of vectors
519 * the guest wants to allocate.
520 */
521 if (numvec > msi_count)
522 return (EINVAL);
523
524 /*
525 * Make sure that we can allocate all the MSI vectors that are needed
526 * by the guest.
527 */
528 if (startrid == 1) {
529 tmp = numvec;
530 error = pci_alloc_msi(ppt->dev, &tmp);
531 if (error)
532 return (error);
533 else if (tmp != numvec) {
534 pci_release_msi(ppt->dev);
535 return (ENOSPC);
536 } else {
537 /* success */
538 }
539 }
540
541 ppt->msi.startrid = startrid;
542
543 /*
544 * Allocate the irq resource and attach it to the interrupt handler.
545 */
546 for (i = 0; i < numvec; i++) {
547 ppt->msi.num_msgs = i + 1;
548 ppt->msi.cookie[i] = NULL;
549
550 rid = startrid + i;
551 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
552 &rid, flags);
553 if (ppt->msi.res[i] == NULL)
554 break;
555
556 ppt->msi.arg[i].pptdev = ppt;
557 ppt->msi.arg[i].addr = addr;
558 ppt->msi.arg[i].msg_data = msg + i;
559
560 error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
561 INTR_TYPE_NET | INTR_MPSAFE,
562 pptintr, NULL, &ppt->msi.arg[i],
563 &ppt->msi.cookie[i]);
564 if (error != 0)
565 break;
566 }
567
568 if (i < numvec) {
569 ppt_teardown_msi(ppt);
570 return (ENXIO);
571 }
572
573 return (0);
574 }
575
576 int
ppt_setup_msix(struct vm * vm,int vcpu,int bus,int slot,int func,int idx,uint64_t addr,uint64_t msg,uint32_t vector_control)577 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
578 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
579 {
580 struct pptdev *ppt;
581 struct pci_devinfo *dinfo;
582 int numvec, alloced, rid, error;
583 size_t res_size, cookie_size, arg_size;
584
585 ppt = ppt_find(bus, slot, func);
586 if (ppt == NULL)
587 return (ENOENT);
588 if (ppt->vm != vm) /* Make sure we own this device */
589 return (EBUSY);
590
591 dinfo = device_get_ivars(ppt->dev);
592 if (!dinfo)
593 return (ENXIO);
594
595 /*
596 * First-time configuration:
597 * Allocate the MSI-X table
598 * Allocate the IRQ resources
599 * Set up some variables in ppt->msix
600 */
601 if (ppt->msix.num_msgs == 0) {
602 numvec = pci_msix_count(ppt->dev);
603 if (numvec <= 0)
604 return (EINVAL);
605
606 ppt->msix.startrid = 1;
607 ppt->msix.num_msgs = numvec;
608
609 res_size = numvec * sizeof(ppt->msix.res[0]);
610 cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
611 arg_size = numvec * sizeof(ppt->msix.arg[0]);
612
613 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
614 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
615 M_WAITOK | M_ZERO);
616 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
617
618 rid = dinfo->cfg.msix.msix_table_bar;
619 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
620 SYS_RES_MEMORY, &rid, RF_ACTIVE);
621
622 if (ppt->msix.msix_table_res == NULL) {
623 ppt_teardown_msix(ppt);
624 return (ENOSPC);
625 }
626 ppt->msix.msix_table_rid = rid;
627
628 alloced = numvec;
629 error = pci_alloc_msix(ppt->dev, &alloced);
630 if (error || alloced != numvec) {
631 ppt_teardown_msix(ppt);
632 return (error == 0 ? ENOSPC: error);
633 }
634 }
635
636 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
637 /* Tear down the IRQ if it's already set up */
638 ppt_teardown_msix_intr(ppt, idx);
639
640 /* Allocate the IRQ resource */
641 ppt->msix.cookie[idx] = NULL;
642 rid = ppt->msix.startrid + idx;
643 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
644 &rid, RF_ACTIVE);
645 if (ppt->msix.res[idx] == NULL)
646 return (ENXIO);
647
648 ppt->msix.arg[idx].pptdev = ppt;
649 ppt->msix.arg[idx].addr = addr;
650 ppt->msix.arg[idx].msg_data = msg;
651
652 /* Setup the MSI-X interrupt */
653 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
654 INTR_TYPE_NET | INTR_MPSAFE,
655 pptintr, NULL, &ppt->msix.arg[idx],
656 &ppt->msix.cookie[idx]);
657
658 if (error != 0) {
659 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
660 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
661 ppt->msix.cookie[idx] = NULL;
662 ppt->msix.res[idx] = NULL;
663 return (ENXIO);
664 }
665 } else {
666 /* Masked, tear it down if it's already been set up */
667 ppt_teardown_msix_intr(ppt, idx);
668 }
669
670 return (0);
671 }
672