1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/domainset.h>
35 #include <sys/malloc.h>
36 #include <sys/bus.h>
37 #include <sys/conf.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/ktr.h>
41 #include <sys/lock.h>
42 #include <sys/proc.h>
43 #include <sys/memdesc.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/rman.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_map.h>
59 #include <dev/iommu/iommu.h>
60 #include <machine/atomic.h>
61 #include <machine/bus.h>
62 #include <machine/md_var.h>
63 #include <machine/iommu.h>
64 #include <dev/iommu/busdma_iommu.h>
65
66 /*
67 * busdma_iommu.c, the implementation of the busdma(9) interface using
68 * IOMMU units from Intel VT-d.
69 */
70
71 static bool
iommu_bus_dma_is_dev_disabled(int domain,int bus,int slot,int func)72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
73 {
74 char str[128], *env;
75 int default_bounce;
76 bool ret;
77 static const char bounce_str[] = "bounce";
78 static const char iommu_str[] = "iommu";
79 static const char dmar_str[] = "dmar"; /* compatibility */
80
81 default_bounce = 0;
82 env = kern_getenv("hw.busdma.default");
83 if (env != NULL) {
84 if (strcmp(env, bounce_str) == 0)
85 default_bounce = 1;
86 else if (strcmp(env, iommu_str) == 0 ||
87 strcmp(env, dmar_str) == 0)
88 default_bounce = 0;
89 freeenv(env);
90 }
91
92 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
93 domain, bus, slot, func);
94 env = kern_getenv(str);
95 if (env == NULL)
96 return (default_bounce != 0);
97 if (strcmp(env, bounce_str) == 0)
98 ret = true;
99 else if (strcmp(env, iommu_str) == 0 ||
100 strcmp(env, dmar_str) == 0)
101 ret = false;
102 else
103 ret = default_bounce != 0;
104 freeenv(env);
105 return (ret);
106 }
107
108 /*
109 * Given original device, find the requester ID that will be seen by
110 * the IOMMU unit and used for page table lookup. PCI bridges may take
111 * ownership of transactions from downstream devices, so it may not be
112 * the same as the BSF of the target device. In those cases, all
113 * devices downstream of the bridge must share a single mapping
114 * domain, and must collectively be assigned to use either IOMMU or
115 * bounce mapping.
116 */
117 device_t
iommu_get_requester(device_t dev,uint16_t * rid)118 iommu_get_requester(device_t dev, uint16_t *rid)
119 {
120 devclass_t pci_class;
121 device_t l, pci, pcib, pcip, pcibp, requester;
122 int cap_offset;
123 uint16_t pcie_flags;
124 bool bridge_is_pcie;
125
126 pci_class = devclass_find("pci");
127 l = requester = dev;
128
129 *rid = pci_get_rid(dev);
130
131 /*
132 * Walk the bridge hierarchy from the target device to the
133 * host port to find the translating bridge nearest the IOMMU
134 * unit.
135 */
136 for (;;) {
137 pci = device_get_parent(l);
138 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
139 "for %s", device_get_name(dev), device_get_name(l)));
140 KASSERT(device_get_devclass(pci) == pci_class,
141 ("iommu_get_requester(%s): non-pci parent %s for %s",
142 device_get_name(dev), device_get_name(pci),
143 device_get_name(l)));
144
145 pcib = device_get_parent(pci);
146 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
147 "for %s", device_get_name(dev), device_get_name(pci)));
148
149 /*
150 * The parent of our "bridge" isn't another PCI bus,
151 * so pcib isn't a PCI->PCI bridge but rather a host
152 * port, and the requester ID won't be translated
153 * further.
154 */
155 pcip = device_get_parent(pcib);
156 if (device_get_devclass(pcip) != pci_class)
157 break;
158 pcibp = device_get_parent(pcip);
159
160 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
161 /*
162 * Do not stop the loop even if the target
163 * device is PCIe, because it is possible (but
164 * unlikely) to have a PCI->PCIe bridge
165 * somewhere in the hierarchy.
166 */
167 l = pcib;
168 } else {
169 /*
170 * Device is not PCIe, it cannot be seen as a
171 * requester by IOMMU unit. Check whether the
172 * bridge is PCIe.
173 */
174 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
175 &cap_offset) == 0;
176 requester = pcib;
177
178 /*
179 * Check for a buggy PCIe/PCI bridge that
180 * doesn't report the express capability. If
181 * the bridge above it is express but isn't a
182 * PCI bridge, then we know pcib is actually a
183 * PCIe/PCI bridge.
184 */
185 if (!bridge_is_pcie && pci_find_cap(pcibp,
186 PCIY_EXPRESS, &cap_offset) == 0) {
187 pcie_flags = pci_read_config(pcibp,
188 cap_offset + PCIER_FLAGS, 2);
189 if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
190 PCIEM_TYPE_PCI_BRIDGE)
191 bridge_is_pcie = true;
192 }
193
194 if (bridge_is_pcie) {
195 /*
196 * The current device is not PCIe, but
197 * the bridge above it is. This is a
198 * PCIe->PCI bridge. Assume that the
199 * requester ID will be the secondary
200 * bus number with slot and function
201 * set to zero.
202 *
203 * XXX: Doesn't handle the case where
204 * the bridge is PCIe->PCI-X, and the
205 * bridge will only take ownership of
206 * requests in some cases. We should
207 * provide context entries with the
208 * same page tables for taken and
209 * non-taken transactions.
210 */
211 *rid = PCI_RID(pci_get_bus(l), 0, 0);
212 l = pcibp;
213 } else {
214 /*
215 * Neither the device nor the bridge
216 * above it are PCIe. This is a
217 * conventional PCI->PCI bridge, which
218 * will use the bridge's BSF as the
219 * requester ID.
220 */
221 *rid = pci_get_rid(pcib);
222 l = pcib;
223 }
224 }
225 }
226 return (requester);
227 }
228
229 struct iommu_ctx *
iommu_instantiate_ctx(struct iommu_unit * unit,device_t dev,bool rmrr)230 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
231 {
232 device_t requester;
233 struct iommu_ctx *ctx;
234 bool disabled;
235 uint16_t rid;
236
237 requester = iommu_get_requester(dev, &rid);
238
239 /*
240 * If the user requested the IOMMU disabled for the device, we
241 * cannot disable the IOMMU unit, due to possibility of other
242 * devices on the same IOMMU unit still requiring translation.
243 * Instead provide the identity mapping for the device
244 * context.
245 */
246 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
247 pci_get_bus(requester), pci_get_slot(requester),
248 pci_get_function(requester));
249 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
250 if (ctx == NULL)
251 return (NULL);
252 if (disabled) {
253 /*
254 * Keep the first reference on context, release the
255 * later refs.
256 */
257 IOMMU_LOCK(unit);
258 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
259 ctx->flags |= IOMMU_CTX_DISABLED;
260 IOMMU_UNLOCK(unit);
261 } else {
262 iommu_free_ctx_locked(unit, ctx);
263 }
264 ctx = NULL;
265 }
266 return (ctx);
267 }
268
269 struct iommu_ctx *
iommu_get_dev_ctx(device_t dev)270 iommu_get_dev_ctx(device_t dev)
271 {
272 struct iommu_unit *unit;
273
274 unit = iommu_find(dev, bootverbose);
275 /* Not in scope of any IOMMU ? */
276 if (unit == NULL)
277 return (NULL);
278 if (!unit->dma_enabled)
279 return (NULL);
280
281 #if defined(__amd64__) || defined(__i386__)
282 dmar_quirks_pre_use(unit);
283 dmar_instantiate_rmrr_ctxs(unit);
284 #endif
285
286 return (iommu_instantiate_ctx(unit, dev, false));
287 }
288
289 bus_dma_tag_t
iommu_get_dma_tag(device_t dev,device_t child)290 iommu_get_dma_tag(device_t dev, device_t child)
291 {
292 struct iommu_ctx *ctx;
293 bus_dma_tag_t res;
294
295 ctx = iommu_get_dev_ctx(child);
296 if (ctx == NULL)
297 return (NULL);
298
299 res = (bus_dma_tag_t)ctx->tag;
300 return (res);
301 }
302
303 bool
bus_dma_iommu_set_buswide(device_t dev)304 bus_dma_iommu_set_buswide(device_t dev)
305 {
306 struct iommu_unit *unit;
307 device_t parent;
308 u_int busno, slot, func;
309
310 parent = device_get_parent(dev);
311 if (device_get_devclass(parent) != devclass_find("pci"))
312 return (false);
313 unit = iommu_find(dev, bootverbose);
314 if (unit == NULL)
315 return (false);
316 busno = pci_get_bus(dev);
317 slot = pci_get_slot(dev);
318 func = pci_get_function(dev);
319 if (slot != 0 || func != 0) {
320 if (bootverbose) {
321 device_printf(dev,
322 "iommu%d pci%d:%d:%d requested buswide busdma\n",
323 unit->unit, busno, slot, func);
324 }
325 return (false);
326 }
327 iommu_set_buswide_ctx(unit, busno);
328 return (true);
329 }
330
331 void
iommu_set_buswide_ctx(struct iommu_unit * unit,u_int busno)332 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
333 {
334
335 MPASS(busno <= PCI_BUSMAX);
336 IOMMU_LOCK(unit);
337 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
338 1 << (busno % (NBBY * sizeof(uint32_t)));
339 IOMMU_UNLOCK(unit);
340 }
341
342 bool
iommu_is_buswide_ctx(struct iommu_unit * unit,u_int busno)343 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
344 {
345
346 MPASS(busno <= PCI_BUSMAX);
347 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
348 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
349 }
350
351 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
352
353 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
354 struct bus_dmamap_iommu *map);
355
356 static int
iommu_bus_dma_tag_create(bus_dma_tag_t parent,bus_size_t alignment,bus_addr_t boundary,bus_addr_t lowaddr,bus_addr_t highaddr,bus_dma_filter_t * filter,void * filterarg,bus_size_t maxsize,int nsegments,bus_size_t maxsegsz,int flags,bus_dma_lock_t * lockfunc,void * lockfuncarg,bus_dma_tag_t * dmat)357 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
358 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
359 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
360 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
361 void *lockfuncarg, bus_dma_tag_t *dmat)
362 {
363 struct bus_dma_tag_iommu *newtag, *oldtag;
364 int error;
365
366 *dmat = NULL;
367 error = common_bus_dma_tag_create(parent != NULL ?
368 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
369 boundary, lowaddr, highaddr, filter, filterarg, maxsize,
370 nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
371 sizeof(struct bus_dma_tag_iommu), (void **)&newtag);
372 if (error != 0)
373 goto out;
374
375 oldtag = (struct bus_dma_tag_iommu *)parent;
376 newtag->common.impl = &bus_dma_iommu_impl;
377 newtag->ctx = oldtag->ctx;
378 newtag->owner = oldtag->owner;
379
380 *dmat = (bus_dma_tag_t)newtag;
381 out:
382 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
383 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
384 error);
385 return (error);
386 }
387
388 static int
iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)389 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
390 {
391
392 return (0);
393 }
394
395 static int
iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)396 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
397 {
398 struct bus_dma_tag_iommu *dmat, *parent;
399 struct bus_dma_tag_iommu *dmat_copy __unused;
400 int error;
401
402 error = 0;
403 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1;
404
405 if (dmat != NULL) {
406 if (dmat->map_count != 0) {
407 error = EBUSY;
408 goto out;
409 }
410 while (dmat != NULL) {
411 parent = (struct bus_dma_tag_iommu *)dmat->common.parent;
412 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
413 1) {
414 if (dmat == dmat->ctx->tag)
415 iommu_free_ctx(dmat->ctx);
416 free(dmat->segments, M_IOMMU_DMAMAP);
417 free(dmat, M_DEVBUF);
418 dmat = parent;
419 } else
420 dmat = NULL;
421 }
422 }
423 out:
424 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
425 return (error);
426 }
427
428 static bool
iommu_bus_dma_id_mapped(bus_dma_tag_t dmat,vm_paddr_t buf,bus_size_t buflen)429 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
430 {
431
432 return (false);
433 }
434
435 static int
iommu_bus_dmamap_create(bus_dma_tag_t dmat,int flags,bus_dmamap_t * mapp)436 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
437 {
438 struct bus_dma_tag_iommu *tag;
439 struct bus_dmamap_iommu *map;
440
441 tag = (struct bus_dma_tag_iommu *)dmat;
442 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
443 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
444 if (map == NULL) {
445 *mapp = NULL;
446 return (ENOMEM);
447 }
448 if (tag->segments == NULL) {
449 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
450 tag->common.nsegments, M_IOMMU_DMAMAP,
451 DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
452 if (tag->segments == NULL) {
453 free(map, M_IOMMU_DMAMAP);
454 *mapp = NULL;
455 return (ENOMEM);
456 }
457 }
458 IOMMU_DMAMAP_INIT(map);
459 TAILQ_INIT(&map->map_entries);
460 map->tag = tag;
461 map->locked = true;
462 map->cansleep = false;
463 tag->map_count++;
464 *mapp = (bus_dmamap_t)map;
465
466 return (0);
467 }
468
469 static int
iommu_bus_dmamap_destroy(bus_dma_tag_t dmat,bus_dmamap_t map1)470 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
471 {
472 struct bus_dma_tag_iommu *tag;
473 struct bus_dmamap_iommu *map;
474
475 tag = (struct bus_dma_tag_iommu *)dmat;
476 map = (struct bus_dmamap_iommu *)map1;
477 if (map != NULL) {
478 IOMMU_DMAMAP_LOCK(map);
479 if (!TAILQ_EMPTY(&map->map_entries)) {
480 IOMMU_DMAMAP_UNLOCK(map);
481 return (EBUSY);
482 }
483 IOMMU_DMAMAP_DESTROY(map);
484 free(map, M_IOMMU_DMAMAP);
485 }
486 tag->map_count--;
487 return (0);
488 }
489
490
491 static int
iommu_bus_dmamem_alloc(bus_dma_tag_t dmat,void ** vaddr,int flags,bus_dmamap_t * mapp)492 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
493 bus_dmamap_t *mapp)
494 {
495 struct bus_dma_tag_iommu *tag;
496 struct bus_dmamap_iommu *map;
497 int error, mflags;
498 vm_memattr_t attr;
499
500 error = iommu_bus_dmamap_create(dmat, flags, mapp);
501 if (error != 0)
502 return (error);
503
504 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
505 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
506 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
507 VM_MEMATTR_DEFAULT;
508
509 tag = (struct bus_dma_tag_iommu *)dmat;
510 map = (struct bus_dmamap_iommu *)*mapp;
511
512 if (tag->common.maxsize < PAGE_SIZE &&
513 tag->common.alignment <= tag->common.maxsize &&
514 attr == VM_MEMATTR_DEFAULT) {
515 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
516 DOMAINSET_PREF(tag->common.domain), mflags);
517 map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
518 } else {
519 *vaddr = (void *)kmem_alloc_attr_domainset(
520 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
521 mflags, 0ul, BUS_SPACE_MAXADDR, attr);
522 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
523 }
524 if (*vaddr == NULL) {
525 iommu_bus_dmamap_destroy(dmat, *mapp);
526 *mapp = NULL;
527 return (ENOMEM);
528 }
529 return (0);
530 }
531
532 static void
iommu_bus_dmamem_free(bus_dma_tag_t dmat,void * vaddr,bus_dmamap_t map1)533 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
534 {
535 struct bus_dma_tag_iommu *tag;
536 struct bus_dmamap_iommu *map;
537
538 tag = (struct bus_dma_tag_iommu *)dmat;
539 map = (struct bus_dmamap_iommu *)map1;
540
541 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
542 free(vaddr, M_DEVBUF);
543 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
544 } else {
545 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
546 ("iommu_bus_dmamem_free for non alloced map %p", map));
547 kmem_free((vm_offset_t)vaddr, tag->common.maxsize);
548 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
549 }
550
551 iommu_bus_dmamap_destroy(dmat, map1);
552 }
553
554 static int
iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu * tag,struct bus_dmamap_iommu * map,vm_page_t * ma,int offset,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp,struct iommu_map_entries_tailq * entries)555 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
556 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
557 int flags, bus_dma_segment_t *segs, int *segp,
558 struct iommu_map_entries_tailq *entries)
559 {
560 struct iommu_ctx *ctx;
561 struct iommu_domain *domain;
562 struct iommu_map_entry *entry;
563 bus_size_t buflen1;
564 int error, e_flags, idx, gas_flags, seg;
565
566 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
567 if (segs == NULL)
568 segs = tag->segments;
569 ctx = tag->ctx;
570 domain = ctx->domain;
571 e_flags = IOMMU_MAP_ENTRY_READ |
572 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0);
573 seg = *segp;
574 error = 0;
575 idx = 0;
576 while (buflen > 0) {
577 seg++;
578 if (seg >= tag->common.nsegments) {
579 error = EFBIG;
580 break;
581 }
582 buflen1 = buflen > tag->common.maxsegsz ?
583 tag->common.maxsegsz : buflen;
584
585 /*
586 * (Too) optimistically allow split if there are more
587 * then one segments left.
588 */
589 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
590 if (seg + 1 < tag->common.nsegments)
591 gas_flags |= IOMMU_MF_CANSPLIT;
592
593 error = iommu_gas_map(domain, &tag->common, buflen1,
594 offset, e_flags, gas_flags, ma + idx, &entry);
595 if (error != 0)
596 break;
597 /* Update buflen1 in case buffer split. */
598 if (buflen1 > entry->end - entry->start - offset)
599 buflen1 = entry->end - entry->start - offset;
600
601 KASSERT(vm_addr_align_ok(entry->start + offset,
602 tag->common.alignment),
603 ("alignment failed: ctx %p start 0x%jx offset %x "
604 "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
605 (uintmax_t)tag->common.alignment));
606 KASSERT(entry->end <= tag->common.lowaddr ||
607 entry->start >= tag->common.highaddr,
608 ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
609 "lowaddr 0x%jx highaddr 0x%jx", ctx,
610 (uintmax_t)entry->start, (uintmax_t)entry->end,
611 (uintmax_t)tag->common.lowaddr,
612 (uintmax_t)tag->common.highaddr));
613 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1,
614 tag->common.boundary),
615 ("boundary failed: ctx %p start 0x%jx end 0x%jx "
616 "boundary 0x%jx", ctx, (uintmax_t)entry->start,
617 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
618 KASSERT(buflen1 <= tag->common.maxsegsz,
619 ("segment too large: ctx %p start 0x%jx end 0x%jx "
620 "buflen1 0x%jx maxsegsz 0x%jx", ctx,
621 (uintmax_t)entry->start, (uintmax_t)entry->end,
622 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
623
624 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
625 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
626 TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
627
628 segs[seg].ds_addr = entry->start + offset;
629 segs[seg].ds_len = buflen1;
630
631 idx += OFF_TO_IDX(offset + buflen1);
632 offset += buflen1;
633 offset &= IOMMU_PAGE_MASK;
634 buflen -= buflen1;
635 }
636 if (error == 0)
637 *segp = seg;
638 return (error);
639 }
640
641 static int
iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu * tag,struct bus_dmamap_iommu * map,vm_page_t * ma,int offset,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)642 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
643 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
644 int flags, bus_dma_segment_t *segs, int *segp)
645 {
646 struct iommu_ctx *ctx;
647 struct iommu_domain *domain;
648 struct iommu_map_entries_tailq entries;
649 int error;
650
651 ctx = tag->ctx;
652 domain = ctx->domain;
653 atomic_add_long(&ctx->loads, 1);
654
655 TAILQ_INIT(&entries);
656 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
657 buflen, flags, segs, segp, &entries);
658 if (error == 0) {
659 IOMMU_DMAMAP_LOCK(map);
660 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
661 IOMMU_DMAMAP_UNLOCK(map);
662 } else if (!TAILQ_EMPTY(&entries)) {
663 /*
664 * The busdma interface does not allow us to report
665 * partial buffer load, so unfortunately we have to
666 * revert all work done.
667 */
668 IOMMU_DOMAIN_LOCK(domain);
669 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
670 IOMMU_DOMAIN_UNLOCK(domain);
671 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
672 &domain->unload_task);
673 }
674
675 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
676 !map->cansleep)
677 error = EINPROGRESS;
678 if (error == EINPROGRESS)
679 iommu_bus_schedule_dmamap(domain->iommu, map);
680 return (error);
681 }
682
683 static int
iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat,bus_dmamap_t map1,struct vm_page ** ma,bus_size_t tlen,int ma_offs,int flags,bus_dma_segment_t * segs,int * segp)684 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
685 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
686 bus_dma_segment_t *segs, int *segp)
687 {
688 struct bus_dma_tag_iommu *tag;
689 struct bus_dmamap_iommu *map;
690
691 tag = (struct bus_dma_tag_iommu *)dmat;
692 map = (struct bus_dmamap_iommu *)map1;
693 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
694 flags, segs, segp));
695 }
696
697 static int
iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat,bus_dmamap_t map1,vm_paddr_t buf,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)698 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
699 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
700 int *segp)
701 {
702 struct bus_dma_tag_iommu *tag;
703 struct bus_dmamap_iommu *map;
704 vm_page_t *ma, fma;
705 vm_paddr_t pstart, pend, paddr;
706 int error, i, ma_cnt, mflags, offset;
707
708 tag = (struct bus_dma_tag_iommu *)dmat;
709 map = (struct bus_dmamap_iommu *)map1;
710 pstart = trunc_page(buf);
711 pend = round_page(buf + buflen);
712 offset = buf & PAGE_MASK;
713 ma_cnt = OFF_TO_IDX(pend - pstart);
714 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
715 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
716 if (ma == NULL)
717 return (ENOMEM);
718 fma = NULL;
719 for (i = 0; i < ma_cnt; i++) {
720 paddr = pstart + ptoa(i);
721 ma[i] = PHYS_TO_VM_PAGE(paddr);
722 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
723 /*
724 * If PHYS_TO_VM_PAGE() returned NULL or the
725 * vm_page was not initialized we'll use a
726 * fake page.
727 */
728 if (fma == NULL) {
729 fma = malloc(sizeof(struct vm_page) * ma_cnt,
730 M_DEVBUF, M_ZERO | mflags);
731 if (fma == NULL) {
732 free(ma, M_DEVBUF);
733 return (ENOMEM);
734 }
735 }
736 vm_page_initfake(&fma[i], pstart + ptoa(i),
737 VM_MEMATTR_DEFAULT);
738 ma[i] = &fma[i];
739 }
740 }
741 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
742 flags, segs, segp);
743 free(fma, M_DEVBUF);
744 free(ma, M_DEVBUF);
745 return (error);
746 }
747
748 static int
iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat,bus_dmamap_t map1,void * buf,bus_size_t buflen,pmap_t pmap,int flags,bus_dma_segment_t * segs,int * segp)749 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
750 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
751 int *segp)
752 {
753 struct bus_dma_tag_iommu *tag;
754 struct bus_dmamap_iommu *map;
755 vm_page_t *ma, fma;
756 vm_paddr_t pstart, pend, paddr;
757 int error, i, ma_cnt, mflags, offset;
758
759 tag = (struct bus_dma_tag_iommu *)dmat;
760 map = (struct bus_dmamap_iommu *)map1;
761 pstart = trunc_page((vm_offset_t)buf);
762 pend = round_page((vm_offset_t)buf + buflen);
763 offset = (vm_offset_t)buf & PAGE_MASK;
764 ma_cnt = OFF_TO_IDX(pend - pstart);
765 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
766 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
767 if (ma == NULL)
768 return (ENOMEM);
769 fma = NULL;
770 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
771 if (pmap == kernel_pmap)
772 paddr = pmap_kextract(pstart);
773 else
774 paddr = pmap_extract(pmap, pstart);
775 ma[i] = PHYS_TO_VM_PAGE(paddr);
776 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
777 /*
778 * If PHYS_TO_VM_PAGE() returned NULL or the
779 * vm_page was not initialized we'll use a
780 * fake page.
781 */
782 if (fma == NULL) {
783 fma = malloc(sizeof(struct vm_page) * ma_cnt,
784 M_DEVBUF, M_ZERO | mflags);
785 if (fma == NULL) {
786 free(ma, M_DEVBUF);
787 return (ENOMEM);
788 }
789 }
790 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
791 ma[i] = &fma[i];
792 }
793 }
794 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
795 flags, segs, segp);
796 free(ma, M_DEVBUF);
797 free(fma, M_DEVBUF);
798 return (error);
799 }
800
801 static void
iommu_bus_dmamap_waitok(bus_dma_tag_t dmat,bus_dmamap_t map1,struct memdesc * mem,bus_dmamap_callback_t * callback,void * callback_arg)802 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
803 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
804 {
805 struct bus_dmamap_iommu *map;
806
807 if (map1 == NULL)
808 return;
809 map = (struct bus_dmamap_iommu *)map1;
810 map->mem = *mem;
811 map->tag = (struct bus_dma_tag_iommu *)dmat;
812 map->callback = callback;
813 map->callback_arg = callback_arg;
814 }
815
816 static bus_dma_segment_t *
iommu_bus_dmamap_complete(bus_dma_tag_t dmat,bus_dmamap_t map1,bus_dma_segment_t * segs,int nsegs,int error)817 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
818 bus_dma_segment_t *segs, int nsegs, int error)
819 {
820 struct bus_dma_tag_iommu *tag;
821 struct bus_dmamap_iommu *map;
822
823 tag = (struct bus_dma_tag_iommu *)dmat;
824 map = (struct bus_dmamap_iommu *)map1;
825
826 if (!map->locked) {
827 KASSERT(map->cansleep,
828 ("map not locked and not sleepable context %p", map));
829
830 /*
831 * We are called from the delayed context. Relock the
832 * driver.
833 */
834 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
835 map->locked = true;
836 }
837
838 if (segs == NULL)
839 segs = tag->segments;
840 return (segs);
841 }
842
843 /*
844 * The limitations of busdma KPI forces the iommu to perform the actual
845 * unload, consisting of the unmapping of the map entries page tables,
846 * from the delayed context on i386, since page table page mapping
847 * might require a sleep to be successfull. The unfortunate
848 * consequence is that the DMA requests can be served some time after
849 * the bus_dmamap_unload() call returned.
850 *
851 * On amd64, we assume that sf allocation cannot fail.
852 */
853 static void
iommu_bus_dmamap_unload(bus_dma_tag_t dmat,bus_dmamap_t map1)854 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
855 {
856 struct bus_dma_tag_iommu *tag;
857 struct bus_dmamap_iommu *map;
858 struct iommu_ctx *ctx;
859 struct iommu_domain *domain;
860 struct iommu_map_entries_tailq entries;
861
862 tag = (struct bus_dma_tag_iommu *)dmat;
863 map = (struct bus_dmamap_iommu *)map1;
864 ctx = tag->ctx;
865 domain = ctx->domain;
866 atomic_add_long(&ctx->unloads, 1);
867
868 TAILQ_INIT(&entries);
869 IOMMU_DMAMAP_LOCK(map);
870 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
871 IOMMU_DMAMAP_UNLOCK(map);
872 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP)
873 IOMMU_DOMAIN_LOCK(domain);
874 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
875 IOMMU_DOMAIN_UNLOCK(domain);
876 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
877 &domain->unload_task);
878 #else
879 THREAD_NO_SLEEPING();
880 iommu_domain_unload(domain, &entries, false);
881 THREAD_SLEEPING_OK();
882 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
883 #endif
884 }
885
886 static void
iommu_bus_dmamap_sync(bus_dma_tag_t dmat,bus_dmamap_t map,bus_dmasync_op_t op)887 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
888 bus_dmasync_op_t op)
889 {
890 }
891
892 struct bus_dma_impl bus_dma_iommu_impl = {
893 .tag_create = iommu_bus_dma_tag_create,
894 .tag_destroy = iommu_bus_dma_tag_destroy,
895 .tag_set_domain = iommu_bus_dma_tag_set_domain,
896 .id_mapped = iommu_bus_dma_id_mapped,
897 .map_create = iommu_bus_dmamap_create,
898 .map_destroy = iommu_bus_dmamap_destroy,
899 .mem_alloc = iommu_bus_dmamem_alloc,
900 .mem_free = iommu_bus_dmamem_free,
901 .load_phys = iommu_bus_dmamap_load_phys,
902 .load_buffer = iommu_bus_dmamap_load_buffer,
903 .load_ma = iommu_bus_dmamap_load_ma,
904 .map_waitok = iommu_bus_dmamap_waitok,
905 .map_complete = iommu_bus_dmamap_complete,
906 .map_unload = iommu_bus_dmamap_unload,
907 .map_sync = iommu_bus_dmamap_sync,
908 };
909
910 static void
iommu_bus_task_dmamap(void * arg,int pending)911 iommu_bus_task_dmamap(void *arg, int pending)
912 {
913 struct bus_dma_tag_iommu *tag;
914 struct bus_dmamap_iommu *map;
915 struct iommu_unit *unit;
916
917 unit = arg;
918 IOMMU_LOCK(unit);
919 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
920 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
921 IOMMU_UNLOCK(unit);
922 tag = map->tag;
923 map->cansleep = true;
924 map->locked = false;
925 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
926 &map->mem, map->callback, map->callback_arg,
927 BUS_DMA_WAITOK);
928 map->cansleep = false;
929 if (map->locked) {
930 (tag->common.lockfunc)(tag->common.lockfuncarg,
931 BUS_DMA_UNLOCK);
932 } else
933 map->locked = true;
934 map->cansleep = false;
935 IOMMU_LOCK(unit);
936 }
937 IOMMU_UNLOCK(unit);
938 }
939
940 static void
iommu_bus_schedule_dmamap(struct iommu_unit * unit,struct bus_dmamap_iommu * map)941 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
942 {
943
944 map->locked = false;
945 IOMMU_LOCK(unit);
946 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
947 IOMMU_UNLOCK(unit);
948 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
949 }
950
951 int
iommu_init_busdma(struct iommu_unit * unit)952 iommu_init_busdma(struct iommu_unit *unit)
953 {
954 int error;
955
956 unit->dma_enabled = 1;
957 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
958 if (error == 0) /* compatibility */
959 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
960 TAILQ_INIT(&unit->delayed_maps);
961 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
962 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
963 taskqueue_thread_enqueue, &unit->delayed_taskqueue);
964 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
965 "iommu%d busdma taskq", unit->unit);
966 return (0);
967 }
968
969 void
iommu_fini_busdma(struct iommu_unit * unit)970 iommu_fini_busdma(struct iommu_unit *unit)
971 {
972
973 if (unit->delayed_taskqueue == NULL)
974 return;
975
976 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
977 taskqueue_free(unit->delayed_taskqueue);
978 unit->delayed_taskqueue = NULL;
979 }
980
981 int
bus_dma_iommu_load_ident(bus_dma_tag_t dmat,bus_dmamap_t map1,vm_paddr_t start,vm_size_t length,int flags)982 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
983 vm_paddr_t start, vm_size_t length, int flags)
984 {
985 struct bus_dma_tag_common *tc;
986 struct bus_dma_tag_iommu *tag;
987 struct bus_dmamap_iommu *map;
988 struct iommu_ctx *ctx;
989 struct iommu_domain *domain;
990 struct iommu_map_entry *entry;
991 vm_page_t *ma;
992 vm_size_t i;
993 int error;
994 bool waitok;
995
996 MPASS((start & PAGE_MASK) == 0);
997 MPASS((length & PAGE_MASK) == 0);
998 MPASS(length > 0);
999 MPASS(start + length >= start);
1000 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
1001
1002 tc = (struct bus_dma_tag_common *)dmat;
1003 if (tc->impl != &bus_dma_iommu_impl)
1004 return (0);
1005
1006 tag = (struct bus_dma_tag_iommu *)dmat;
1007 ctx = tag->ctx;
1008 domain = ctx->domain;
1009 map = (struct bus_dmamap_iommu *)map1;
1010 waitok = (flags & BUS_DMA_NOWAIT) != 0;
1011
1012 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
1013 if (entry == NULL)
1014 return (ENOMEM);
1015 entry->start = start;
1016 entry->end = start + length;
1017 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
1018 M_WAITOK : M_NOWAIT);
1019 if (ma == NULL) {
1020 iommu_gas_free_entry(entry);
1021 return (ENOMEM);
1022 }
1023 for (i = 0; i < atop(length); i++) {
1024 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
1025 VM_MEMATTR_DEFAULT);
1026 }
1027 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
1028 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) |
1029 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma);
1030 if (error == 0) {
1031 IOMMU_DMAMAP_LOCK(map);
1032 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
1033 IOMMU_DMAMAP_UNLOCK(map);
1034 } else {
1035 iommu_gas_free_entry(entry);
1036 }
1037 for (i = 0; i < atop(length); i++)
1038 vm_page_putfake(ma[i]);
1039 free(ma, M_TEMP);
1040 return (error);
1041 }
1042
1043 static void
iommu_domain_unload_task(void * arg,int pending)1044 iommu_domain_unload_task(void *arg, int pending)
1045 {
1046 struct iommu_domain *domain;
1047 struct iommu_map_entries_tailq entries;
1048
1049 domain = arg;
1050 TAILQ_INIT(&entries);
1051
1052 for (;;) {
1053 IOMMU_DOMAIN_LOCK(domain);
1054 TAILQ_SWAP(&domain->unload_entries, &entries,
1055 iommu_map_entry, dmamap_link);
1056 IOMMU_DOMAIN_UNLOCK(domain);
1057 if (TAILQ_EMPTY(&entries))
1058 break;
1059 iommu_domain_unload(domain, &entries, true);
1060 }
1061 }
1062
1063 void
iommu_domain_init(struct iommu_unit * unit,struct iommu_domain * domain,const struct iommu_domain_map_ops * ops)1064 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
1065 const struct iommu_domain_map_ops *ops)
1066 {
1067
1068 domain->ops = ops;
1069 domain->iommu = unit;
1070
1071 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain);
1072 RB_INIT(&domain->rb_root);
1073 TAILQ_INIT(&domain->unload_entries);
1074 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF);
1075 }
1076
1077 void
iommu_domain_fini(struct iommu_domain * domain)1078 iommu_domain_fini(struct iommu_domain *domain)
1079 {
1080
1081 mtx_destroy(&domain->lock);
1082 }
1083