xref: /freebsd-13-stable/sys/x86/iommu/intel_ctx.c (revision d4a04574875cbbdb8bf2600b4618cd02b781e833)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/malloc.h>
35 #include <sys/bus.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/memdesc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/rwlock.h>
45 #include <sys/rman.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_map.h>
58 #include <contrib/dev/acpica/include/acpi.h>
59 #include <contrib/dev/acpica/include/accommon.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <machine/atomic.h>
63 #include <machine/bus.h>
64 #include <machine/md_var.h>
65 #include <machine/specialreg.h>
66 #include <x86/include/busdma_impl.h>
67 #include <dev/iommu/busdma_iommu.h>
68 #include <x86/iommu/intel_reg.h>
69 #include <x86/iommu/intel_dmar.h>
70 
71 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
72 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
73 
74 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
75     struct dmar_domain *domain);
76 static void dmar_domain_destroy(struct dmar_domain *domain);
77 
78 static void
dmar_ensure_ctx_page(struct dmar_unit * dmar,int bus)79 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
80 {
81 	struct sf_buf *sf;
82 	dmar_root_entry_t *re;
83 	vm_page_t ctxm;
84 
85 	/*
86 	 * Allocated context page must be linked.
87 	 */
88 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
89 	if (ctxm != NULL)
90 		return;
91 
92 	/*
93 	 * Page not present, allocate and link.  Note that other
94 	 * thread might execute this sequence in parallel.  This
95 	 * should be safe, because the context entries written by both
96 	 * threads are equal.
97 	 */
98 	TD_PREP_PINNED_ASSERT;
99 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
100 	    IOMMU_PGF_WAITOK);
101 	re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
102 	re += bus;
103 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
104 	    VM_PAGE_TO_PHYS(ctxm)));
105 	dmar_flush_root_to_ram(dmar, re);
106 	dmar_unmap_pgtbl(sf);
107 	TD_PINNED_ASSERT;
108 }
109 
110 static dmar_ctx_entry_t *
dmar_map_ctx_entry(struct dmar_ctx * ctx,struct sf_buf ** sfp)111 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
112 {
113 	struct dmar_unit *dmar;
114 	dmar_ctx_entry_t *ctxp;
115 
116 	dmar = CTX2DMAR(ctx);
117 
118 	ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
119 	    IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
120 	ctxp += ctx->context.rid & 0xff;
121 	return (ctxp);
122 }
123 
124 static void
device_tag_init(struct dmar_ctx * ctx,device_t dev)125 device_tag_init(struct dmar_ctx *ctx, device_t dev)
126 {
127 	struct dmar_domain *domain;
128 	bus_addr_t maxaddr;
129 
130 	domain = CTX2DOM(ctx);
131 	maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
132 	ctx->context.tag->common.ref_count = 1; /* Prevent free */
133 	ctx->context.tag->common.impl = &bus_dma_iommu_impl;
134 	ctx->context.tag->common.boundary = 0;
135 	ctx->context.tag->common.lowaddr = maxaddr;
136 	ctx->context.tag->common.highaddr = maxaddr;
137 	ctx->context.tag->common.maxsize = maxaddr;
138 	ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
139 	ctx->context.tag->common.maxsegsz = maxaddr;
140 	ctx->context.tag->ctx = CTX2IOCTX(ctx);
141 	ctx->context.tag->owner = dev;
142 }
143 
144 static void
ctx_id_entry_init_one(dmar_ctx_entry_t * ctxp,struct dmar_domain * domain,vm_page_t ctx_root)145 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
146     vm_page_t ctx_root)
147 {
148 	/*
149 	 * For update due to move, the store is not atomic.  It is
150 	 * possible that DMAR read upper doubleword, while low
151 	 * doubleword is not yet updated.  The domain id is stored in
152 	 * the upper doubleword, while the table pointer in the lower.
153 	 *
154 	 * There is no good solution, for the same reason it is wrong
155 	 * to clear P bit in the ctx entry for update.
156 	 */
157 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
158 	    domain->awlvl);
159 	if (ctx_root == NULL) {
160 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
161 	} else {
162 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
163 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
164 		    DMAR_CTX1_P);
165 	}
166 }
167 
168 static void
ctx_id_entry_init(struct dmar_ctx * ctx,dmar_ctx_entry_t * ctxp,bool move,int busno)169 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
170     int busno)
171 {
172 	struct dmar_unit *unit;
173 	struct dmar_domain *domain;
174 	vm_page_t ctx_root;
175 	int i;
176 
177 	domain = CTX2DOM(ctx);
178 	unit = DOM2DMAR(domain);
179 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
180 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
181 	    unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
182 	    pci_get_function(ctx->context.tag->owner),
183 	    ctxp->ctx1, ctxp->ctx2));
184 
185 	if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
186 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
187 		KASSERT(domain->pgtbl_obj == NULL,
188 		    ("ctx %p non-null pgtbl_obj", ctx));
189 		ctx_root = NULL;
190 	} else {
191 		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0,
192 		    IOMMU_PGF_NOALLOC);
193 	}
194 
195 	if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
196 		MPASS(!move);
197 		for (i = 0; i <= PCI_BUSMAX; i++) {
198 			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
199 		}
200 	} else {
201 		ctx_id_entry_init_one(ctxp, domain, ctx_root);
202 	}
203 	dmar_flush_ctx_to_ram(unit, ctxp);
204 }
205 
206 static int
dmar_flush_for_ctx_entry(struct dmar_unit * dmar,bool force)207 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
208 {
209 	int error;
210 
211 	/*
212 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
213 	 * Mode Consideration" and do the (global) invalidation of the
214 	 * negative TLB entries.
215 	 */
216 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
217 		return (0);
218 	if (dmar->qi_enabled) {
219 		dmar_qi_invalidate_ctx_glob_locked(dmar);
220 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
221 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
222 		return (0);
223 	}
224 	error = dmar_inv_ctx_glob(dmar);
225 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
226 		error = dmar_inv_iotlb_glob(dmar);
227 	return (error);
228 }
229 
230 static int
domain_init_rmrr(struct dmar_domain * domain,device_t dev,int bus,int slot,int func,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len)231 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
232     int slot, int func, int dev_domain, int dev_busno,
233     const void *dev_path, int dev_path_len)
234 {
235 	struct iommu_map_entries_tailq rmrr_entries;
236 	struct iommu_map_entry *entry, *entry1;
237 	vm_page_t *ma;
238 	iommu_gaddr_t start, end;
239 	vm_pindex_t size, i;
240 	int error, error1;
241 
242 	if (!dmar_rmrr_enable)
243 		return (0);
244 
245 	error = 0;
246 	TAILQ_INIT(&rmrr_entries);
247 	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
248 	    dev_path_len, &rmrr_entries);
249 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
250 		/*
251 		 * VT-d specification requires that the start of an
252 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
253 		 * anything into the start and end fields.  Truncate
254 		 * and round as neccesary.
255 		 *
256 		 * We also allow the overlapping RMRR entries, see
257 		 * iommu_gas_alloc_region().
258 		 */
259 		start = entry->start;
260 		end = entry->end;
261 		if (bootverbose)
262 			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
263 			    domain->iodom.iommu->unit, bus, slot, func,
264 			    (uintmax_t)start, (uintmax_t)end);
265 		entry->start = trunc_page(start);
266 		entry->end = round_page(end);
267 		if (entry->start == entry->end) {
268 			/* Workaround for some AMI (?) BIOSes */
269 			if (bootverbose) {
270 				if (dev != NULL)
271 					device_printf(dev, "");
272 				printf("pci%d:%d:%d ", bus, slot, func);
273 				printf("BIOS bug: dmar%d RMRR "
274 				    "region (%jx, %jx) corrected\n",
275 				    domain->iodom.iommu->unit, start, end);
276 			}
277 			entry->end += DMAR_PAGE_SIZE * 0x20;
278 		}
279 		size = OFF_TO_IDX(entry->end - entry->start);
280 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
281 		for (i = 0; i < size; i++) {
282 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
283 			    VM_MEMATTR_DEFAULT);
284 		}
285 		error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
286 		    IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
287 		    IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
288 		/*
289 		 * Non-failed RMRR entries are owned by context rb
290 		 * tree.  Get rid of the failed entry, but do not stop
291 		 * the loop.  Rest of the parsed RMRR entries are
292 		 * loaded and removed on the context destruction.
293 		 */
294 		if (error1 == 0 && entry->end != entry->start) {
295 			IOMMU_LOCK(domain->iodom.iommu);
296 			domain->refs++; /* XXXKIB prevent free */
297 			domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
298 			IOMMU_UNLOCK(domain->iodom.iommu);
299 		} else {
300 			if (error1 != 0) {
301 				if (dev != NULL)
302 					device_printf(dev, "");
303 				printf("pci%d:%d:%d ", bus, slot, func);
304 				printf(
305 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
306 				    domain->iodom.iommu->unit, start, end,
307 				    error1);
308 				error = error1;
309 			}
310 			TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
311 			iommu_gas_free_entry(entry);
312 		}
313 		for (i = 0; i < size; i++)
314 			vm_page_putfake(ma[i]);
315 		free(ma, M_TEMP);
316 	}
317 	return (error);
318 }
319 
320 /*
321  * PCI memory address space is shared between memory-mapped devices (MMIO) and
322  * host memory (which may be remapped by an IOMMU).  Device accesses to an
323  * address within a memory aperture in a PCIe root port will be treated as
324  * peer-to-peer and not forwarded to an IOMMU.  To avoid this, reserve the
325  * address space of the root port's memory apertures in the address space used
326  * by the IOMMU for remapping.
327  */
328 static int
dmar_reserve_pci_regions(struct dmar_domain * domain,device_t dev)329 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
330 {
331 	struct iommu_domain *iodom;
332 	device_t root;
333 	uint32_t val;
334 	uint64_t base, limit;
335 	int error;
336 
337 	iodom = DOM2IODOM(domain);
338 
339 	root = pci_find_pcie_root_port(dev);
340 	if (root == NULL)
341 		return (0);
342 
343 	/* Disable downstream memory */
344 	base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
345 	limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
346 	error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
347 	if (bootverbose || error != 0)
348 		device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
349 		    base, limit + 1, error);
350 	if (error != 0)
351 		return (error);
352 
353 	/* Disable downstream prefetchable memory */
354 	val = pci_read_config(root, PCIR_PMBASEL_1, 2);
355 	if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
356 		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
357 			base = PCI_PPBMEMBASE(
358 			    pci_read_config(root, PCIR_PMBASEH_1, 4),
359 			    val);
360 			limit = PCI_PPBMEMLIMIT(
361 			    pci_read_config(root, PCIR_PMLIMITH_1, 4),
362 			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
363 		} else {
364 			base = PCI_PPBMEMBASE(0, val);
365 			limit = PCI_PPBMEMLIMIT(0,
366 			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
367 		}
368 		error = iommu_gas_reserve_region_extend(iodom, base,
369 		    limit + 1);
370 		if (bootverbose || error != 0)
371 			device_printf(dev, "DMAR reserve [%#jx-%#jx] "
372 			    "(error %d)\n", base, limit + 1, error);
373 		if (error != 0)
374 			return (error);
375 	}
376 
377 	return (error);
378 }
379 
380 static struct dmar_domain *
dmar_domain_alloc(struct dmar_unit * dmar,bool id_mapped)381 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
382 {
383 	struct iommu_domain *iodom;
384 	struct iommu_unit *unit;
385 	struct dmar_domain *domain;
386 	int error, id, mgaw;
387 
388 	id = alloc_unr(dmar->domids);
389 	if (id == -1)
390 		return (NULL);
391 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
392 	iodom = DOM2IODOM(domain);
393 	unit = DMAR2IOMMU(dmar);
394 	domain->domain = id;
395 	LIST_INIT(&domain->contexts);
396 	iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
397 
398 	domain->dmar = dmar;
399 
400 	/*
401 	 * For now, use the maximal usable physical address of the
402 	 * installed memory to calculate the mgaw on id_mapped domain.
403 	 * It is useful for the identity mapping, and less so for the
404 	 * virtualized bus address space.
405 	 */
406 	domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
407 	mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
408 	error = domain_set_agaw(domain, mgaw);
409 	if (error != 0)
410 		goto fail;
411 	if (!id_mapped)
412 		/* Use all supported address space for remapping. */
413 		domain->iodom.end = 1ULL << (domain->agaw - 1);
414 
415 	iommu_gas_init_domain(DOM2IODOM(domain));
416 
417 	if (id_mapped) {
418 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
419 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
420 			    domain->iodom.end);
421 		}
422 		domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
423 	} else {
424 		error = domain_alloc_pgtbl(domain);
425 		if (error != 0)
426 			goto fail;
427 		/* Disable local apic region access */
428 		error = iommu_gas_reserve_region(iodom, 0xfee00000,
429 		    0xfeefffff + 1, &iodom->msi_entry);
430 		if (error != 0)
431 			goto fail;
432 	}
433 	return (domain);
434 
435 fail:
436 	dmar_domain_destroy(domain);
437 	return (NULL);
438 }
439 
440 static struct dmar_ctx *
dmar_ctx_alloc(struct dmar_domain * domain,uint16_t rid)441 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
442 {
443 	struct dmar_ctx *ctx;
444 
445 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
446 	ctx->context.domain = DOM2IODOM(domain);
447 	ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
448 	    M_DMAR_CTX, M_WAITOK | M_ZERO);
449 	ctx->context.rid = rid;
450 	ctx->refs = 1;
451 	return (ctx);
452 }
453 
454 static void
dmar_ctx_link(struct dmar_ctx * ctx)455 dmar_ctx_link(struct dmar_ctx *ctx)
456 {
457 	struct dmar_domain *domain;
458 
459 	domain = CTX2DOM(ctx);
460 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
461 	KASSERT(domain->refs >= domain->ctx_cnt,
462 	    ("dom %p ref underflow %d %d", domain, domain->refs,
463 	    domain->ctx_cnt));
464 	domain->refs++;
465 	domain->ctx_cnt++;
466 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
467 }
468 
469 static void
dmar_ctx_unlink(struct dmar_ctx * ctx)470 dmar_ctx_unlink(struct dmar_ctx *ctx)
471 {
472 	struct dmar_domain *domain;
473 
474 	domain = CTX2DOM(ctx);
475 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
476 	KASSERT(domain->refs > 0,
477 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
478 	KASSERT(domain->ctx_cnt >= domain->refs,
479 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
480 	    domain->refs, domain->ctx_cnt));
481 	domain->refs--;
482 	domain->ctx_cnt--;
483 	LIST_REMOVE(ctx, link);
484 }
485 
486 static void
dmar_domain_destroy(struct dmar_domain * domain)487 dmar_domain_destroy(struct dmar_domain *domain)
488 {
489 	struct iommu_domain *iodom;
490 	struct dmar_unit *dmar;
491 
492 	iodom = DOM2IODOM(domain);
493 
494 	KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
495 	    ("unfinished unloads %p", domain));
496 	KASSERT(LIST_EMPTY(&domain->contexts),
497 	    ("destroying dom %p with contexts", domain));
498 	KASSERT(domain->ctx_cnt == 0,
499 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
500 	KASSERT(domain->refs == 0,
501 	    ("destroying dom %p with refs %d", domain, domain->refs));
502 	if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
503 		DMAR_DOMAIN_LOCK(domain);
504 		iommu_gas_fini_domain(iodom);
505 		DMAR_DOMAIN_UNLOCK(domain);
506 	}
507 	if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
508 		if (domain->pgtbl_obj != NULL)
509 			DMAR_DOMAIN_PGLOCK(domain);
510 		domain_free_pgtbl(domain);
511 	}
512 	iommu_domain_fini(iodom);
513 	dmar = DOM2DMAR(domain);
514 	free_unr(dmar->domids, domain->domain);
515 	free(domain, M_DMAR_DOMAIN);
516 }
517 
518 static struct dmar_ctx *
dmar_get_ctx_for_dev1(struct dmar_unit * dmar,device_t dev,uint16_t rid,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len,bool id_mapped,bool rmrr_init)519 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
520     int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
521     bool id_mapped, bool rmrr_init)
522 {
523 	struct dmar_domain *domain, *domain1;
524 	struct dmar_ctx *ctx, *ctx1;
525 	struct iommu_unit *unit __diagused;
526 	dmar_ctx_entry_t *ctxp;
527 	struct sf_buf *sf;
528 	int bus, slot, func, error;
529 	bool enable;
530 
531 	if (dev != NULL) {
532 		bus = pci_get_bus(dev);
533 		slot = pci_get_slot(dev);
534 		func = pci_get_function(dev);
535 	} else {
536 		bus = PCI_RID2BUS(rid);
537 		slot = PCI_RID2SLOT(rid);
538 		func = PCI_RID2FUNC(rid);
539 	}
540 	enable = false;
541 	TD_PREP_PINNED_ASSERT;
542 	unit = DMAR2IOMMU(dmar);
543 	DMAR_LOCK(dmar);
544 	KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
545 	    ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
546 	    slot, func));
547 	ctx = dmar_find_ctx_locked(dmar, rid);
548 	error = 0;
549 	if (ctx == NULL) {
550 		/*
551 		 * Perform the allocations which require sleep or have
552 		 * higher chance to succeed if the sleep is allowed.
553 		 */
554 		DMAR_UNLOCK(dmar);
555 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
556 		domain1 = dmar_domain_alloc(dmar, id_mapped);
557 		if (domain1 == NULL) {
558 			TD_PINNED_ASSERT;
559 			return (NULL);
560 		}
561 		if (!id_mapped) {
562 			error = domain_init_rmrr(domain1, dev, bus,
563 			    slot, func, dev_domain, dev_busno, dev_path,
564 			    dev_path_len);
565 			if (error == 0)
566 				error = dmar_reserve_pci_regions(domain1, dev);
567 			if (error != 0) {
568 				dmar_domain_destroy(domain1);
569 				TD_PINNED_ASSERT;
570 				return (NULL);
571 			}
572 		}
573 		ctx1 = dmar_ctx_alloc(domain1, rid);
574 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
575 		DMAR_LOCK(dmar);
576 
577 		/*
578 		 * Recheck the contexts, other thread might have
579 		 * already allocated needed one.
580 		 */
581 		ctx = dmar_find_ctx_locked(dmar, rid);
582 		if (ctx == NULL) {
583 			domain = domain1;
584 			ctx = ctx1;
585 			dmar_ctx_link(ctx);
586 			ctx->context.tag->owner = dev;
587 			device_tag_init(ctx, dev);
588 
589 			/*
590 			 * This is the first activated context for the
591 			 * DMAR unit.  Enable the translation after
592 			 * everything is set up.
593 			 */
594 			if (LIST_EMPTY(&dmar->domains))
595 				enable = true;
596 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
597 			ctx_id_entry_init(ctx, ctxp, false, bus);
598 			if (dev != NULL) {
599 				device_printf(dev,
600 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
601 				    "agaw %d %s-mapped\n",
602 				    dmar->iommu.unit, dmar->segment, bus, slot,
603 				    func, rid, domain->domain, domain->mgaw,
604 				    domain->agaw, id_mapped ? "id" : "re");
605 			}
606 			dmar_unmap_pgtbl(sf);
607 		} else {
608 			dmar_unmap_pgtbl(sf);
609 			dmar_domain_destroy(domain1);
610 			/* Nothing needs to be done to destroy ctx1. */
611 			free(ctx1, M_DMAR_CTX);
612 			domain = CTX2DOM(ctx);
613 			ctx->refs++; /* tag referenced us */
614 		}
615 	} else {
616 		domain = CTX2DOM(ctx);
617 		if (ctx->context.tag->owner == NULL)
618 			ctx->context.tag->owner = dev;
619 		ctx->refs++; /* tag referenced us */
620 	}
621 
622 	error = dmar_flush_for_ctx_entry(dmar, enable);
623 	if (error != 0) {
624 		dmar_free_ctx_locked(dmar, ctx);
625 		TD_PINNED_ASSERT;
626 		return (NULL);
627 	}
628 
629 	/*
630 	 * The dmar lock was potentially dropped between check for the
631 	 * empty context list and now.  Recheck the state of GCMD_TE
632 	 * to avoid unneeded command.
633 	 */
634 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
635 		error = dmar_enable_translation(dmar);
636 		if (error == 0) {
637 			if (bootverbose) {
638 				printf("dmar%d: enabled translation\n",
639 				    dmar->iommu.unit);
640 			}
641 		} else {
642 			printf("dmar%d: enabling translation failed, "
643 			    "error %d\n", dmar->iommu.unit, error);
644 			dmar_free_ctx_locked(dmar, ctx);
645 			TD_PINNED_ASSERT;
646 			return (NULL);
647 		}
648 	}
649 	DMAR_UNLOCK(dmar);
650 	TD_PINNED_ASSERT;
651 	return (ctx);
652 }
653 
654 struct dmar_ctx *
dmar_get_ctx_for_dev(struct dmar_unit * dmar,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)655 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
656     bool id_mapped, bool rmrr_init)
657 {
658 	int dev_domain, dev_path_len, dev_busno;
659 
660 	dev_domain = pci_get_domain(dev);
661 	dev_path_len = dmar_dev_depth(dev);
662 	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
663 	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
664 	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
665 	    dev_path, dev_path_len, id_mapped, rmrr_init));
666 }
667 
668 struct dmar_ctx *
dmar_get_ctx_for_devpath(struct dmar_unit * dmar,uint16_t rid,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len,bool id_mapped,bool rmrr_init)669 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
670     int dev_domain, int dev_busno,
671     const void *dev_path, int dev_path_len,
672     bool id_mapped, bool rmrr_init)
673 {
674 
675 	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
676 	    dev_path, dev_path_len, id_mapped, rmrr_init));
677 }
678 
679 int
dmar_move_ctx_to_domain(struct dmar_domain * domain,struct dmar_ctx * ctx)680 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
681 {
682 	struct dmar_unit *dmar;
683 	struct dmar_domain *old_domain;
684 	dmar_ctx_entry_t *ctxp;
685 	struct sf_buf *sf;
686 	int error;
687 
688 	dmar = domain->dmar;
689 	old_domain = CTX2DOM(ctx);
690 	if (domain == old_domain)
691 		return (0);
692 	KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
693 	    ("domain %p %u moving between dmars %u %u", domain,
694 	    domain->domain, old_domain->iodom.iommu->unit,
695 	    domain->iodom.iommu->unit));
696 	TD_PREP_PINNED_ASSERT;
697 
698 	ctxp = dmar_map_ctx_entry(ctx, &sf);
699 	DMAR_LOCK(dmar);
700 	dmar_ctx_unlink(ctx);
701 	ctx->context.domain = &domain->iodom;
702 	dmar_ctx_link(ctx);
703 	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
704 	dmar_unmap_pgtbl(sf);
705 	error = dmar_flush_for_ctx_entry(dmar, true);
706 	/* If flush failed, rolling back would not work as well. */
707 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
708 	    dmar->iommu.unit, ctx->context.rid, old_domain->domain,
709 	    domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
710 	    "id" : "re");
711 	dmar_unref_domain_locked(dmar, old_domain);
712 	TD_PINNED_ASSERT;
713 	return (error);
714 }
715 
716 static void
dmar_unref_domain_locked(struct dmar_unit * dmar,struct dmar_domain * domain)717 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
718 {
719 
720 	DMAR_ASSERT_LOCKED(dmar);
721 	KASSERT(domain->refs >= 1,
722 	    ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
723 	    domain->refs));
724 	KASSERT(domain->refs > domain->ctx_cnt,
725 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
726 	    domain->refs, domain->ctx_cnt));
727 
728 	if (domain->refs > 1) {
729 		domain->refs--;
730 		DMAR_UNLOCK(dmar);
731 		return;
732 	}
733 
734 	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
735 	    ("lost ref on RMRR domain %p", domain));
736 
737 	LIST_REMOVE(domain, link);
738 	DMAR_UNLOCK(dmar);
739 
740 	taskqueue_drain(dmar->iommu.delayed_taskqueue,
741 	    &domain->iodom.unload_task);
742 	dmar_domain_destroy(domain);
743 }
744 
745 void
dmar_free_ctx_locked(struct dmar_unit * dmar,struct dmar_ctx * ctx)746 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
747 {
748 	struct sf_buf *sf;
749 	dmar_ctx_entry_t *ctxp;
750 	struct dmar_domain *domain;
751 
752 	DMAR_ASSERT_LOCKED(dmar);
753 	KASSERT(ctx->refs >= 1,
754 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
755 
756 	/*
757 	 * If our reference is not last, only the dereference should
758 	 * be performed.
759 	 */
760 	if (ctx->refs > 1) {
761 		ctx->refs--;
762 		DMAR_UNLOCK(dmar);
763 		return;
764 	}
765 
766 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
767 	    ("lost ref on disabled ctx %p", ctx));
768 
769 	/*
770 	 * Otherwise, the context entry must be cleared before the
771 	 * page table is destroyed.  The mapping of the context
772 	 * entries page could require sleep, unlock the dmar.
773 	 */
774 	DMAR_UNLOCK(dmar);
775 	TD_PREP_PINNED_ASSERT;
776 	ctxp = dmar_map_ctx_entry(ctx, &sf);
777 	DMAR_LOCK(dmar);
778 	KASSERT(ctx->refs >= 1,
779 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
780 
781 	/*
782 	 * Other thread might have referenced the context, in which
783 	 * case again only the dereference should be performed.
784 	 */
785 	if (ctx->refs > 1) {
786 		ctx->refs--;
787 		DMAR_UNLOCK(dmar);
788 		dmar_unmap_pgtbl(sf);
789 		TD_PINNED_ASSERT;
790 		return;
791 	}
792 
793 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
794 	    ("lost ref on disabled ctx %p", ctx));
795 
796 	/*
797 	 * Clear the context pointer and flush the caches.
798 	 * XXXKIB: cannot do this if any RMRR entries are still present.
799 	 */
800 	dmar_pte_clear(&ctxp->ctx1);
801 	ctxp->ctx2 = 0;
802 	dmar_flush_ctx_to_ram(dmar, ctxp);
803 	dmar_inv_ctx_glob(dmar);
804 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
805 		if (dmar->qi_enabled)
806 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
807 		else
808 			dmar_inv_iotlb_glob(dmar);
809 	}
810 	dmar_unmap_pgtbl(sf);
811 	domain = CTX2DOM(ctx);
812 	dmar_ctx_unlink(ctx);
813 	free(ctx->context.tag, M_DMAR_CTX);
814 	free(ctx, M_DMAR_CTX);
815 	dmar_unref_domain_locked(dmar, domain);
816 	TD_PINNED_ASSERT;
817 }
818 
819 void
dmar_free_ctx(struct dmar_ctx * ctx)820 dmar_free_ctx(struct dmar_ctx *ctx)
821 {
822 	struct dmar_unit *dmar;
823 
824 	dmar = CTX2DMAR(ctx);
825 	DMAR_LOCK(dmar);
826 	dmar_free_ctx_locked(dmar, ctx);
827 }
828 
829 /*
830  * Returns with the domain locked.
831  */
832 struct dmar_ctx *
dmar_find_ctx_locked(struct dmar_unit * dmar,uint16_t rid)833 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
834 {
835 	struct dmar_domain *domain;
836 	struct dmar_ctx *ctx;
837 
838 	DMAR_ASSERT_LOCKED(dmar);
839 
840 	LIST_FOREACH(domain, &dmar->domains, link) {
841 		LIST_FOREACH(ctx, &domain->contexts, link) {
842 			if (ctx->context.rid == rid)
843 				return (ctx);
844 		}
845 	}
846 	return (NULL);
847 }
848 
849 void
dmar_domain_free_entry(struct iommu_map_entry * entry,bool free)850 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
851 {
852 	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
853 		iommu_gas_free_region(entry);
854 	else
855 		iommu_gas_free_space(entry);
856 	if (free)
857 		iommu_gas_free_entry(entry);
858 	else
859 		entry->flags = 0;
860 }
861 
862 /*
863  * If the given value for "free" is true, then the caller must not be using
864  * the entry's dmamap_link field.
865  */
866 void
iommu_domain_unload_entry(struct iommu_map_entry * entry,bool free,bool cansleep)867 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
868     bool cansleep)
869 {
870 	struct dmar_domain *domain;
871 	struct dmar_unit *unit;
872 
873 	domain = IODOM2DOM(entry->domain);
874 	unit = DOM2DMAR(domain);
875 
876 	/*
877 	 * If "free" is false, then the IOTLB invalidation must be performed
878 	 * synchronously.  Otherwise, the caller might free the entry before
879 	 * dmar_qi_task() is finished processing it.
880 	 */
881 	if (unit->qi_enabled) {
882 		if (free) {
883 			DMAR_LOCK(unit);
884 			dmar_qi_invalidate_locked(domain, entry, true);
885 			DMAR_UNLOCK(unit);
886 		} else {
887 			dmar_qi_invalidate_sync(domain, entry->start,
888 			    entry->end - entry->start, cansleep);
889 			dmar_domain_free_entry(entry, false);
890 		}
891 	} else {
892 		domain_flush_iotlb_sync(domain, entry->start, entry->end -
893 		    entry->start);
894 		dmar_domain_free_entry(entry, free);
895 	}
896 }
897 
898 static bool
dmar_domain_unload_emit_wait(struct dmar_domain * domain,struct iommu_map_entry * entry)899 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
900     struct iommu_map_entry *entry)
901 {
902 
903 	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
904 		return (true);
905 	return (domain->batch_no++ % dmar_batch_coalesce == 0);
906 }
907 
908 void
iommu_domain_unload(struct iommu_domain * iodom,struct iommu_map_entries_tailq * entries,bool cansleep)909 iommu_domain_unload(struct iommu_domain *iodom,
910     struct iommu_map_entries_tailq *entries, bool cansleep)
911 {
912 	struct dmar_domain *domain;
913 	struct dmar_unit *unit;
914 	struct iommu_map_entry *entry, *entry1;
915 	int error __diagused;
916 
917 	domain = IODOM2DOM(iodom);
918 	unit = DOM2DMAR(domain);
919 
920 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
921 		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
922 		    ("not mapped entry %p %p", domain, entry));
923 		error = iodom->ops->unmap(iodom, entry->start, entry->end -
924 		    entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
925 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
926 		if (!unit->qi_enabled) {
927 			domain_flush_iotlb_sync(domain, entry->start,
928 			    entry->end - entry->start);
929 			TAILQ_REMOVE(entries, entry, dmamap_link);
930 			dmar_domain_free_entry(entry, true);
931 		}
932 	}
933 	if (TAILQ_EMPTY(entries))
934 		return;
935 
936 	KASSERT(unit->qi_enabled, ("loaded entry left"));
937 	DMAR_LOCK(unit);
938 	while ((entry = TAILQ_FIRST(entries)) != NULL) {
939 		TAILQ_REMOVE(entries, entry, dmamap_link);
940 		dmar_qi_invalidate_locked(domain, entry,
941 		    dmar_domain_unload_emit_wait(domain, entry));
942 	}
943 	DMAR_UNLOCK(unit);
944 }
945 
946 struct iommu_ctx *
iommu_get_ctx(struct iommu_unit * iommu,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)947 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
948     bool id_mapped, bool rmrr_init)
949 {
950 	struct dmar_unit *dmar;
951 	struct dmar_ctx *ret;
952 
953 	dmar = IOMMU2DMAR(iommu);
954 
955 	ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
956 
957 	return (CTX2IOCTX(ret));
958 }
959 
960 void
iommu_free_ctx_locked(struct iommu_unit * iommu,struct iommu_ctx * context)961 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
962 {
963 	struct dmar_unit *dmar;
964 	struct dmar_ctx *ctx;
965 
966 	dmar = IOMMU2DMAR(iommu);
967 	ctx = IOCTX2CTX(context);
968 
969 	dmar_free_ctx_locked(dmar, ctx);
970 }
971 
972 void
iommu_free_ctx(struct iommu_ctx * context)973 iommu_free_ctx(struct iommu_ctx *context)
974 {
975 	struct dmar_ctx *ctx;
976 
977 	ctx = IOCTX2CTX(context);
978 
979 	dmar_free_ctx(ctx);
980 }
981