1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/pciio.h>
39 #include <sys/rman.h>
40 #include <sys/smp.h>
41 #include <sys/sysctl.h>
42
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
45
46 #include <machine/resource.h>
47
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50
51 #include "vmm_lapic.h"
52 #include "vmm_ktr.h"
53
54 #include "iommu.h"
55 #include "ppt.h"
56
57 /* XXX locking */
58
59 #define MAX_MSIMSGS 32
60
61 /*
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
66 *
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68 */
69 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
70
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72
73 struct pptintr_arg { /* pptintr(pptintr_arg) */
74 struct pptdev *pptdev;
75 uint64_t addr;
76 uint64_t msg_data;
77 };
78
79 struct pptseg {
80 vm_paddr_t gpa;
81 size_t len;
82 int wired;
83 };
84
85 struct pptdev {
86 device_t dev;
87 struct vm *vm; /* owner of this device */
88 TAILQ_ENTRY(pptdev) next;
89 struct pptseg mmio[MAX_MMIOSEGS];
90 struct {
91 int num_msgs; /* guest state */
92
93 int startrid; /* host state */
94 struct resource *res[MAX_MSIMSGS];
95 void *cookie[MAX_MSIMSGS];
96 struct pptintr_arg arg[MAX_MSIMSGS];
97 } msi;
98
99 struct {
100 int num_msgs;
101 int startrid;
102 int msix_table_rid;
103 struct resource *msix_table_res;
104 struct resource **res;
105 void **cookie;
106 struct pptintr_arg *arg;
107 } msix;
108 };
109
110 SYSCTL_DECL(_hw_vmm);
111 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
112
113 static int num_pptdevs;
114 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
115 "number of pci passthru devices");
116
117 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
118
119 static int
ppt_probe(device_t dev)120 ppt_probe(device_t dev)
121 {
122 int bus, slot, func;
123 struct pci_devinfo *dinfo;
124
125 dinfo = (struct pci_devinfo *)device_get_ivars(dev);
126
127 bus = pci_get_bus(dev);
128 slot = pci_get_slot(dev);
129 func = pci_get_function(dev);
130
131 /*
132 * To qualify as a pci passthrough device a device must:
133 * - be allowed by administrator to be used in this role
134 * - be an endpoint device
135 */
136 if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
137 return (ENXIO);
138 else if (vmm_is_pptdev(bus, slot, func))
139 return (0);
140 else
141 /*
142 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
143 * SR-IOV infrastructure specified as "ppt" passthrough devices.
144 * All normal devices that did not have "ppt" specified as their
145 * driver will not be matched by this.
146 */
147 return (BUS_PROBE_NOWILDCARD);
148 }
149
150 static int
ppt_attach(device_t dev)151 ppt_attach(device_t dev)
152 {
153 struct pptdev *ppt;
154
155 ppt = device_get_softc(dev);
156
157 num_pptdevs++;
158 TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
159 ppt->dev = dev;
160
161 if (bootverbose)
162 device_printf(dev, "attached\n");
163
164 return (0);
165 }
166
167 static int
ppt_detach(device_t dev)168 ppt_detach(device_t dev)
169 {
170 struct pptdev *ppt;
171
172 ppt = device_get_softc(dev);
173
174 if (ppt->vm != NULL)
175 return (EBUSY);
176 num_pptdevs--;
177 TAILQ_REMOVE(&pptdev_list, ppt, next);
178
179 return (0);
180 }
181
182 static device_method_t ppt_methods[] = {
183 /* Device interface */
184 DEVMETHOD(device_probe, ppt_probe),
185 DEVMETHOD(device_attach, ppt_attach),
186 DEVMETHOD(device_detach, ppt_detach),
187 {0, 0}
188 };
189
190 static devclass_t ppt_devclass;
191 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
192 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
193
194 static struct pptdev *
ppt_find(int bus,int slot,int func)195 ppt_find(int bus, int slot, int func)
196 {
197 device_t dev;
198 struct pptdev *ppt;
199 int b, s, f;
200
201 TAILQ_FOREACH(ppt, &pptdev_list, next) {
202 dev = ppt->dev;
203 b = pci_get_bus(dev);
204 s = pci_get_slot(dev);
205 f = pci_get_function(dev);
206 if (bus == b && slot == s && func == f)
207 return (ppt);
208 }
209 return (NULL);
210 }
211
212 static void
ppt_unmap_mmio(struct vm * vm,struct pptdev * ppt)213 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
214 {
215 int i;
216 struct pptseg *seg;
217
218 for (i = 0; i < MAX_MMIOSEGS; i++) {
219 seg = &ppt->mmio[i];
220 if (seg->len == 0)
221 continue;
222 (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
223 bzero(seg, sizeof(struct pptseg));
224 }
225 }
226
227 static void
ppt_teardown_msi(struct pptdev * ppt)228 ppt_teardown_msi(struct pptdev *ppt)
229 {
230 int i, rid;
231 void *cookie;
232 struct resource *res;
233
234 if (ppt->msi.num_msgs == 0)
235 return;
236
237 for (i = 0; i < ppt->msi.num_msgs; i++) {
238 rid = ppt->msi.startrid + i;
239 res = ppt->msi.res[i];
240 cookie = ppt->msi.cookie[i];
241
242 if (cookie != NULL)
243 bus_teardown_intr(ppt->dev, res, cookie);
244
245 if (res != NULL)
246 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
247
248 ppt->msi.res[i] = NULL;
249 ppt->msi.cookie[i] = NULL;
250 }
251
252 if (ppt->msi.startrid == 1)
253 pci_release_msi(ppt->dev);
254
255 ppt->msi.num_msgs = 0;
256 }
257
258 static void
ppt_teardown_msix_intr(struct pptdev * ppt,int idx)259 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
260 {
261 int rid;
262 struct resource *res;
263 void *cookie;
264
265 rid = ppt->msix.startrid + idx;
266 res = ppt->msix.res[idx];
267 cookie = ppt->msix.cookie[idx];
268
269 if (cookie != NULL)
270 bus_teardown_intr(ppt->dev, res, cookie);
271
272 if (res != NULL)
273 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
274
275 ppt->msix.res[idx] = NULL;
276 ppt->msix.cookie[idx] = NULL;
277 }
278
279 static void
ppt_teardown_msix(struct pptdev * ppt)280 ppt_teardown_msix(struct pptdev *ppt)
281 {
282 int i;
283
284 if (ppt->msix.num_msgs == 0)
285 return;
286
287 for (i = 0; i < ppt->msix.num_msgs; i++)
288 ppt_teardown_msix_intr(ppt, i);
289
290 if (ppt->msix.msix_table_res) {
291 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
292 ppt->msix.msix_table_rid,
293 ppt->msix.msix_table_res);
294 ppt->msix.msix_table_res = NULL;
295 ppt->msix.msix_table_rid = 0;
296 }
297
298 free(ppt->msix.res, M_PPTMSIX);
299 free(ppt->msix.cookie, M_PPTMSIX);
300 free(ppt->msix.arg, M_PPTMSIX);
301
302 pci_release_msi(ppt->dev);
303
304 ppt->msix.num_msgs = 0;
305 }
306
307 int
ppt_avail_devices(void)308 ppt_avail_devices(void)
309 {
310
311 return (num_pptdevs);
312 }
313
314 int
ppt_assigned_devices(struct vm * vm)315 ppt_assigned_devices(struct vm *vm)
316 {
317 struct pptdev *ppt;
318 int num;
319
320 num = 0;
321 TAILQ_FOREACH(ppt, &pptdev_list, next) {
322 if (ppt->vm == vm)
323 num++;
324 }
325 return (num);
326 }
327
328 boolean_t
ppt_is_mmio(struct vm * vm,vm_paddr_t gpa)329 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
330 {
331 int i;
332 struct pptdev *ppt;
333 struct pptseg *seg;
334
335 TAILQ_FOREACH(ppt, &pptdev_list, next) {
336 if (ppt->vm != vm)
337 continue;
338
339 for (i = 0; i < MAX_MMIOSEGS; i++) {
340 seg = &ppt->mmio[i];
341 if (seg->len == 0)
342 continue;
343 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
344 return (TRUE);
345 }
346 }
347
348 return (FALSE);
349 }
350
351 int
ppt_assign_device(struct vm * vm,int bus,int slot,int func)352 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
353 {
354 struct pptdev *ppt;
355
356 ppt = ppt_find(bus, slot, func);
357 if (ppt != NULL) {
358 /*
359 * If this device is owned by a different VM then we
360 * cannot change its owner.
361 */
362 if (ppt->vm != NULL && ppt->vm != vm)
363 return (EBUSY);
364
365 ppt->vm = vm;
366 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
367 return (0);
368 }
369 return (ENOENT);
370 }
371
372 int
ppt_unassign_device(struct vm * vm,int bus,int slot,int func)373 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
374 {
375 struct pptdev *ppt;
376
377 ppt = ppt_find(bus, slot, func);
378 if (ppt != NULL) {
379 /*
380 * If this device is not owned by this 'vm' then bail out.
381 */
382 if (ppt->vm != vm)
383 return (EBUSY);
384 ppt_unmap_mmio(vm, ppt);
385 ppt_teardown_msi(ppt);
386 ppt_teardown_msix(ppt);
387 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
388 ppt->vm = NULL;
389 return (0);
390 }
391 return (ENOENT);
392 }
393
394 int
ppt_unassign_all(struct vm * vm)395 ppt_unassign_all(struct vm *vm)
396 {
397 struct pptdev *ppt;
398 int bus, slot, func;
399 device_t dev;
400
401 TAILQ_FOREACH(ppt, &pptdev_list, next) {
402 if (ppt->vm == vm) {
403 dev = ppt->dev;
404 bus = pci_get_bus(dev);
405 slot = pci_get_slot(dev);
406 func = pci_get_function(dev);
407 vm_unassign_pptdev(vm, bus, slot, func);
408 }
409 }
410
411 return (0);
412 }
413
414 int
ppt_map_mmio(struct vm * vm,int bus,int slot,int func,vm_paddr_t gpa,size_t len,vm_paddr_t hpa)415 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
416 vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
417 {
418 int i, error;
419 struct pptseg *seg;
420 struct pptdev *ppt;
421
422 ppt = ppt_find(bus, slot, func);
423 if (ppt != NULL) {
424 if (ppt->vm != vm)
425 return (EBUSY);
426
427 for (i = 0; i < MAX_MMIOSEGS; i++) {
428 seg = &ppt->mmio[i];
429 if (seg->len == 0) {
430 error = vm_map_mmio(vm, gpa, len, hpa);
431 if (error == 0) {
432 seg->gpa = gpa;
433 seg->len = len;
434 }
435 return (error);
436 }
437 }
438 return (ENOSPC);
439 }
440 return (ENOENT);
441 }
442
443 static int
pptintr(void * arg)444 pptintr(void *arg)
445 {
446 struct pptdev *ppt;
447 struct pptintr_arg *pptarg;
448
449 pptarg = arg;
450 ppt = pptarg->pptdev;
451
452 if (ppt->vm != NULL)
453 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
454 else {
455 /*
456 * XXX
457 * This is not expected to happen - panic?
458 */
459 }
460
461 /*
462 * For legacy interrupts give other filters a chance in case
463 * the interrupt was not generated by the passthrough device.
464 */
465 if (ppt->msi.startrid == 0)
466 return (FILTER_STRAY);
467 else
468 return (FILTER_HANDLED);
469 }
470
471 int
ppt_setup_msi(struct vm * vm,int vcpu,int bus,int slot,int func,uint64_t addr,uint64_t msg,int numvec)472 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
473 uint64_t addr, uint64_t msg, int numvec)
474 {
475 int i, rid, flags;
476 int msi_count, startrid, error, tmp;
477 struct pptdev *ppt;
478
479 if (numvec < 0 || numvec > MAX_MSIMSGS)
480 return (EINVAL);
481
482 ppt = ppt_find(bus, slot, func);
483 if (ppt == NULL)
484 return (ENOENT);
485 if (ppt->vm != vm) /* Make sure we own this device */
486 return (EBUSY);
487
488 /* Free any allocated resources */
489 ppt_teardown_msi(ppt);
490
491 if (numvec == 0) /* nothing more to do */
492 return (0);
493
494 flags = RF_ACTIVE;
495 msi_count = pci_msi_count(ppt->dev);
496 if (msi_count == 0) {
497 startrid = 0; /* legacy interrupt */
498 msi_count = 1;
499 flags |= RF_SHAREABLE;
500 } else
501 startrid = 1; /* MSI */
502
503 /*
504 * The device must be capable of supporting the number of vectors
505 * the guest wants to allocate.
506 */
507 if (numvec > msi_count)
508 return (EINVAL);
509
510 /*
511 * Make sure that we can allocate all the MSI vectors that are needed
512 * by the guest.
513 */
514 if (startrid == 1) {
515 tmp = numvec;
516 error = pci_alloc_msi(ppt->dev, &tmp);
517 if (error)
518 return (error);
519 else if (tmp != numvec) {
520 pci_release_msi(ppt->dev);
521 return (ENOSPC);
522 } else {
523 /* success */
524 }
525 }
526
527 ppt->msi.startrid = startrid;
528
529 /*
530 * Allocate the irq resource and attach it to the interrupt handler.
531 */
532 for (i = 0; i < numvec; i++) {
533 ppt->msi.num_msgs = i + 1;
534 ppt->msi.cookie[i] = NULL;
535
536 rid = startrid + i;
537 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
538 &rid, flags);
539 if (ppt->msi.res[i] == NULL)
540 break;
541
542 ppt->msi.arg[i].pptdev = ppt;
543 ppt->msi.arg[i].addr = addr;
544 ppt->msi.arg[i].msg_data = msg + i;
545
546 error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
547 INTR_TYPE_NET | INTR_MPSAFE,
548 pptintr, NULL, &ppt->msi.arg[i],
549 &ppt->msi.cookie[i]);
550 if (error != 0)
551 break;
552 }
553
554 if (i < numvec) {
555 ppt_teardown_msi(ppt);
556 return (ENXIO);
557 }
558
559 return (0);
560 }
561
562 int
ppt_setup_msix(struct vm * vm,int vcpu,int bus,int slot,int func,int idx,uint64_t addr,uint64_t msg,uint32_t vector_control)563 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
564 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
565 {
566 struct pptdev *ppt;
567 struct pci_devinfo *dinfo;
568 int numvec, alloced, rid, error;
569 size_t res_size, cookie_size, arg_size;
570
571 ppt = ppt_find(bus, slot, func);
572 if (ppt == NULL)
573 return (ENOENT);
574 if (ppt->vm != vm) /* Make sure we own this device */
575 return (EBUSY);
576
577 dinfo = device_get_ivars(ppt->dev);
578 if (!dinfo)
579 return (ENXIO);
580
581 /*
582 * First-time configuration:
583 * Allocate the MSI-X table
584 * Allocate the IRQ resources
585 * Set up some variables in ppt->msix
586 */
587 if (ppt->msix.num_msgs == 0) {
588 numvec = pci_msix_count(ppt->dev);
589 if (numvec <= 0)
590 return (EINVAL);
591
592 ppt->msix.startrid = 1;
593 ppt->msix.num_msgs = numvec;
594
595 res_size = numvec * sizeof(ppt->msix.res[0]);
596 cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
597 arg_size = numvec * sizeof(ppt->msix.arg[0]);
598
599 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
600 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
601 M_WAITOK | M_ZERO);
602 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
603
604 rid = dinfo->cfg.msix.msix_table_bar;
605 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
606 SYS_RES_MEMORY, &rid, RF_ACTIVE);
607
608 if (ppt->msix.msix_table_res == NULL) {
609 ppt_teardown_msix(ppt);
610 return (ENOSPC);
611 }
612 ppt->msix.msix_table_rid = rid;
613
614 alloced = numvec;
615 error = pci_alloc_msix(ppt->dev, &alloced);
616 if (error || alloced != numvec) {
617 ppt_teardown_msix(ppt);
618 return (error == 0 ? ENOSPC: error);
619 }
620 }
621
622 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
623 /* Tear down the IRQ if it's already set up */
624 ppt_teardown_msix_intr(ppt, idx);
625
626 /* Allocate the IRQ resource */
627 ppt->msix.cookie[idx] = NULL;
628 rid = ppt->msix.startrid + idx;
629 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
630 &rid, RF_ACTIVE);
631 if (ppt->msix.res[idx] == NULL)
632 return (ENXIO);
633
634 ppt->msix.arg[idx].pptdev = ppt;
635 ppt->msix.arg[idx].addr = addr;
636 ppt->msix.arg[idx].msg_data = msg;
637
638 /* Setup the MSI-X interrupt */
639 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
640 INTR_TYPE_NET | INTR_MPSAFE,
641 pptintr, NULL, &ppt->msix.arg[idx],
642 &ppt->msix.cookie[idx]);
643
644 if (error != 0) {
645 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
646 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
647 ppt->msix.cookie[idx] = NULL;
648 ppt->msix.res[idx] = NULL;
649 return (ENXIO);
650 }
651 } else {
652 /* Masked, tear it down if it's already been set up */
653 ppt_teardown_msix_intr(ppt, idx);
654 }
655
656 return (0);
657 }
658