xref: /freebsd-13-stable/sys/amd64/vmm/intel/vtd.c (revision 3bc80996974a61a4223eae4c1ccd47b6ee32a48a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 
35 #include <vm/vm.h>
36 #include <vm/pmap.h>
37 
38 #include <dev/pci/pcireg.h>
39 
40 #include <machine/vmparam.h>
41 #include <contrib/dev/acpica/include/acpi.h>
42 
43 #include "io/iommu.h"
44 
45 /*
46  * Documented in the "Intel Virtualization Technology for Directed I/O",
47  * Architecture Spec, September 2008.
48  */
49 
50 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags)  (((Flags) >> 0) & 0x1)
51 
52 /* Section 10.4 "Register Descriptions" */
53 struct vtdmap {
54 	volatile uint32_t	version;
55 	volatile uint32_t	res0;
56 	volatile uint64_t	cap;
57 	volatile uint64_t	ext_cap;
58 	volatile uint32_t	gcr;
59 	volatile uint32_t	gsr;
60 	volatile uint64_t	rta;
61 	volatile uint64_t	ccr;
62 };
63 
64 #define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
65 #define	VTD_CAP_ND(cap)		((cap) & 0x7)
66 #define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
67 #define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
68 #define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
69 
70 #define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
71 #define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
72 #define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
73 
74 #define	VTD_GCR_WBF		(1 << 27)
75 #define	VTD_GCR_SRTP		(1 << 30)
76 #define	VTD_GCR_TE		(1U << 31)
77 
78 #define	VTD_GSR_WBFS		(1 << 27)
79 #define	VTD_GSR_RTPS		(1 << 30)
80 #define	VTD_GSR_TES		(1U << 31)
81 
82 #define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
83 #define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
84 
85 #define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
86 #define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
87 #define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
88 #define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
89 #define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
90 #define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
91 #define	VTD_IIR_DOMAIN_P	32
92 
93 #define	VTD_ROOT_PRESENT	0x1
94 #define	VTD_CTX_PRESENT		0x1
95 #define	VTD_CTX_TT_ALL		(1UL << 2)
96 
97 #define	VTD_PTE_RD		(1UL << 0)
98 #define	VTD_PTE_WR		(1UL << 1)
99 #define	VTD_PTE_SUPERPAGE	(1UL << 7)
100 #define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
101 
102 #define VTD_RID2IDX(rid)	(((rid) & 0xff) * 2)
103 
104 struct domain {
105 	uint64_t	*ptp;		/* first level page table page */
106 	int		pt_levels;	/* number of page table levels */
107 	int		addrwidth;	/* 'AW' field in context entry */
108 	int		spsmask;	/* supported super page sizes */
109 	u_int		id;		/* domain id */
110 	vm_paddr_t	maxaddr;	/* highest address to be mapped */
111 	SLIST_ENTRY(domain) next;
112 };
113 
114 static SLIST_HEAD(, domain) domhead;
115 
116 #define	DRHD_MAX_UNITS	16
117 static ACPI_DMAR_HARDWARE_UNIT	*drhds[DRHD_MAX_UNITS];
118 static int			drhd_num;
119 static struct vtdmap		*vtdmaps[DRHD_MAX_UNITS];
120 static int			max_domains;
121 typedef int			(*drhd_ident_func_t)(void);
122 
123 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
124 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
125 
126 static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
127 
128 static int
vtd_max_domains(struct vtdmap * vtdmap)129 vtd_max_domains(struct vtdmap *vtdmap)
130 {
131 	int nd;
132 
133 	nd = VTD_CAP_ND(vtdmap->cap);
134 
135 	switch (nd) {
136 	case 0:
137 		return (16);
138 	case 1:
139 		return (64);
140 	case 2:
141 		return (256);
142 	case 3:
143 		return (1024);
144 	case 4:
145 		return (4 * 1024);
146 	case 5:
147 		return (16 * 1024);
148 	case 6:
149 		return (64 * 1024);
150 	default:
151 		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
152 	}
153 }
154 
155 static u_int
domain_id(void)156 domain_id(void)
157 {
158 	u_int id;
159 	struct domain *dom;
160 
161 	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
162 	for (id = 1; id < max_domains; id++) {
163 		SLIST_FOREACH(dom, &domhead, next) {
164 			if (dom->id == id)
165 				break;
166 		}
167 		if (dom == NULL)
168 			break;		/* found it */
169 	}
170 
171 	if (id >= max_domains)
172 		panic("domain ids exhausted");
173 
174 	return (id);
175 }
176 
177 static struct vtdmap *
vtd_device_scope(uint16_t rid)178 vtd_device_scope(uint16_t rid)
179 {
180 	int i, remaining, pathremaining;
181 	char *end, *pathend;
182 	struct vtdmap *vtdmap;
183 	ACPI_DMAR_HARDWARE_UNIT *drhd;
184 	ACPI_DMAR_DEVICE_SCOPE *device_scope;
185 	ACPI_DMAR_PCI_PATH *path;
186 
187 	for (i = 0; i < drhd_num; i++) {
188 		drhd = drhds[i];
189 
190 		if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
191 			/*
192 			 * From Intel VT-d arch spec, version 3.0:
193 			 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
194 			 * for a Segment, it must be enumerated by BIOS after all other
195 			 * DRHD structures for the same Segment.
196 			 */
197 			vtdmap = vtdmaps[i];
198 			return(vtdmap);
199 		}
200 
201 		end = (char *)drhd + drhd->Header.Length;
202 		remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
203 		while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
204 			device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
205 			remaining -= device_scope->Length;
206 
207 			switch (device_scope->EntryType){
208 				/* 0x01 and 0x02 are PCI device entries */
209 				case 0x01:
210 				case 0x02:
211 					break;
212 				default:
213 					continue;
214 			}
215 
216 			if (PCI_RID2BUS(rid) != device_scope->Bus)
217 				continue;
218 
219 			pathend = (char *)device_scope + device_scope->Length;
220 			pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
221 			while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
222 				path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
223 				pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
224 
225 				if (PCI_RID2SLOT(rid) != path->Device)
226 					continue;
227 				if (PCI_RID2FUNC(rid) != path->Function)
228 					continue;
229 
230 				vtdmap = vtdmaps[i];
231 				return (vtdmap);
232 			}
233 		}
234 	}
235 
236 	/* No matching scope */
237 	return (NULL);
238 }
239 
240 static void
vtd_wbflush(struct vtdmap * vtdmap)241 vtd_wbflush(struct vtdmap *vtdmap)
242 {
243 
244 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
245 		pmap_invalidate_cache();
246 
247 	if (VTD_CAP_RWBF(vtdmap->cap)) {
248 		vtdmap->gcr = VTD_GCR_WBF;
249 		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
250 			;
251 	}
252 }
253 
254 static void
vtd_ctx_global_invalidate(struct vtdmap * vtdmap)255 vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
256 {
257 
258 	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
259 	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
260 		;
261 }
262 
263 static void
vtd_iotlb_global_invalidate(struct vtdmap * vtdmap)264 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
265 {
266 	int offset;
267 	volatile uint64_t *iotlb_reg, val;
268 
269 	vtd_wbflush(vtdmap);
270 
271 	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
272 	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
273 
274 	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
275 		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
276 
277 	while (1) {
278 		val = *iotlb_reg;
279 		if ((val & VTD_IIR_IVT) == 0)
280 			break;
281 	}
282 }
283 
284 static void
vtd_translation_enable(struct vtdmap * vtdmap)285 vtd_translation_enable(struct vtdmap *vtdmap)
286 {
287 
288 	vtdmap->gcr = VTD_GCR_TE;
289 	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
290 		;
291 }
292 
293 static void
vtd_translation_disable(struct vtdmap * vtdmap)294 vtd_translation_disable(struct vtdmap *vtdmap)
295 {
296 
297 	vtdmap->gcr = 0;
298 	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
299 		;
300 }
301 
302 static int
vtd_init(void)303 vtd_init(void)
304 {
305 	int i, units, remaining, tmp;
306 	struct vtdmap *vtdmap;
307 	vm_paddr_t ctx_paddr;
308 	char *end, envname[32];
309 	unsigned long mapaddr;
310 	ACPI_STATUS status;
311 	ACPI_TABLE_DMAR *dmar;
312 	ACPI_DMAR_HEADER *hdr;
313 	ACPI_DMAR_HARDWARE_UNIT *drhd;
314 
315 	/*
316 	 * Allow the user to override the ACPI DMAR table by specifying the
317 	 * physical address of each remapping unit.
318 	 *
319 	 * The following example specifies two remapping units at
320 	 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
321 	 * set vtd.regmap.0.addr=0xfed90000
322 	 * set vtd.regmap.1.addr=0xfeda0000
323 	 */
324 	for (units = 0; units < DRHD_MAX_UNITS; units++) {
325 		snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
326 		if (getenv_ulong(envname, &mapaddr) == 0)
327 			break;
328 		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
329 	}
330 
331 	if (units > 0)
332 		goto skip_dmar;
333 
334 	/* Search for DMAR table. */
335 	status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
336 	if (ACPI_FAILURE(status))
337 		return (ENXIO);
338 
339 	end = (char *)dmar + dmar->Header.Length;
340 	remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
341 	while (remaining > sizeof(ACPI_DMAR_HEADER)) {
342 		hdr = (ACPI_DMAR_HEADER *)(end - remaining);
343 		if (hdr->Length > remaining)
344 			break;
345 		/*
346 		 * From Intel VT-d arch spec, version 1.3:
347 		 * BIOS implementations must report mapping structures
348 		 * in numerical order, i.e. All remapping structures of
349 		 * type 0 (DRHD) enumerated before remapping structures of
350 		 * type 1 (RMRR) and so forth.
351 		 */
352 		if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
353 			break;
354 
355 		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
356 		drhds[units] = drhd;
357 		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
358 		if (++units >= DRHD_MAX_UNITS)
359 			break;
360 		remaining -= hdr->Length;
361 	}
362 
363 	if (units <= 0)
364 		return (ENXIO);
365 
366 skip_dmar:
367 	drhd_num = units;
368 
369 	max_domains = 64 * 1024; /* maximum valid value */
370 	for (i = 0; i < drhd_num; i++){
371 		vtdmap = vtdmaps[i];
372 
373 		if (VTD_CAP_CM(vtdmap->cap) != 0)
374 			panic("vtd_init: invalid caching mode");
375 
376 		/* take most compatible (minimum) value */
377 		if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
378 			max_domains = tmp;
379 	}
380 
381 	/*
382 	 * Set up the root-table to point to the context-entry tables
383 	 */
384 	for (i = 0; i < 256; i++) {
385 		ctx_paddr = vtophys(ctx_tables[i]);
386 		if (ctx_paddr & PAGE_MASK)
387 			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
388 
389 		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
390 	}
391 
392 	return (0);
393 }
394 
395 static void
vtd_cleanup(void)396 vtd_cleanup(void)
397 {
398 }
399 
400 static void
vtd_enable(void)401 vtd_enable(void)
402 {
403 	int i;
404 	struct vtdmap *vtdmap;
405 
406 	for (i = 0; i < drhd_num; i++) {
407 		vtdmap = vtdmaps[i];
408 		vtd_wbflush(vtdmap);
409 
410 		/* Update the root table address */
411 		vtdmap->rta = vtophys(root_table);
412 		vtdmap->gcr = VTD_GCR_SRTP;
413 		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
414 			;
415 
416 		vtd_ctx_global_invalidate(vtdmap);
417 		vtd_iotlb_global_invalidate(vtdmap);
418 
419 		vtd_translation_enable(vtdmap);
420 	}
421 }
422 
423 static void
vtd_disable(void)424 vtd_disable(void)
425 {
426 	int i;
427 	struct vtdmap *vtdmap;
428 
429 	for (i = 0; i < drhd_num; i++) {
430 		vtdmap = vtdmaps[i];
431 		vtd_translation_disable(vtdmap);
432 	}
433 }
434 
435 static void
vtd_add_device(void * arg,uint16_t rid)436 vtd_add_device(void *arg, uint16_t rid)
437 {
438 	int idx;
439 	uint64_t *ctxp;
440 	struct domain *dom = arg;
441 	vm_paddr_t pt_paddr;
442 	struct vtdmap *vtdmap;
443 	uint8_t bus;
444 
445 	bus = PCI_RID2BUS(rid);
446 	ctxp = ctx_tables[bus];
447 	pt_paddr = vtophys(dom->ptp);
448 	idx = VTD_RID2IDX(rid);
449 
450 	if (ctxp[idx] & VTD_CTX_PRESENT) {
451 		panic("vtd_add_device: device %x is already owned by "
452 		      "domain %d", rid,
453 		      (uint16_t)(ctxp[idx + 1] >> 8));
454 	}
455 
456 	if ((vtdmap = vtd_device_scope(rid)) == NULL)
457 		panic("vtd_add_device: device %x is not in scope for "
458 		      "any DMA remapping unit", rid);
459 
460 	/*
461 	 * Order is important. The 'present' bit is set only after all fields
462 	 * of the context pointer are initialized.
463 	 */
464 	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
465 
466 	if (VTD_ECAP_DI(vtdmap->ext_cap))
467 		ctxp[idx] = VTD_CTX_TT_ALL;
468 	else
469 		ctxp[idx] = 0;
470 
471 	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
472 
473 	/*
474 	 * 'Not Present' entries are not cached in either the Context Cache
475 	 * or in the IOTLB, so there is no need to invalidate either of them.
476 	 */
477 }
478 
479 static void
vtd_remove_device(void * arg,uint16_t rid)480 vtd_remove_device(void *arg, uint16_t rid)
481 {
482 	int i, idx;
483 	uint64_t *ctxp;
484 	struct vtdmap *vtdmap;
485 	uint8_t bus;
486 
487 	bus = PCI_RID2BUS(rid);
488 	ctxp = ctx_tables[bus];
489 	idx = VTD_RID2IDX(rid);
490 
491 	/*
492 	 * Order is important. The 'present' bit is must be cleared first.
493 	 */
494 	ctxp[idx] = 0;
495 	ctxp[idx + 1] = 0;
496 
497 	/*
498 	 * Invalidate the Context Cache and the IOTLB.
499 	 *
500 	 * XXX use device-selective invalidation for Context Cache
501 	 * XXX use domain-selective invalidation for IOTLB
502 	 */
503 	for (i = 0; i < drhd_num; i++) {
504 		vtdmap = vtdmaps[i];
505 		vtd_ctx_global_invalidate(vtdmap);
506 		vtd_iotlb_global_invalidate(vtdmap);
507 	}
508 }
509 
510 #define	CREATE_MAPPING	0
511 #define	REMOVE_MAPPING	1
512 
513 static uint64_t
vtd_update_mapping(void * arg,vm_paddr_t gpa,vm_paddr_t hpa,uint64_t len,int remove)514 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
515 		   int remove)
516 {
517 	struct domain *dom;
518 	int i, spshift, ptpshift, ptpindex, nlevels;
519 	uint64_t spsize, *ptp;
520 
521 	dom = arg;
522 	ptpindex = 0;
523 	ptpshift = 0;
524 
525 	KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
526 	    gpa, len));
527 	KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
528 	    "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
529 
530 	if (gpa & PAGE_MASK)
531 		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
532 
533 	if (hpa & PAGE_MASK)
534 		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
535 
536 	if (len & PAGE_MASK)
537 		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
538 
539 	/*
540 	 * Compute the size of the mapping that we can accommodate.
541 	 *
542 	 * This is based on three factors:
543 	 * - supported super page size
544 	 * - alignment of the region starting at 'gpa' and 'hpa'
545 	 * - length of the region 'len'
546 	 */
547 	spshift = 48;
548 	for (i = 3; i >= 0; i--) {
549 		spsize = 1UL << spshift;
550 		if ((dom->spsmask & (1 << i)) != 0 &&
551 		    (gpa & (spsize - 1)) == 0 &&
552 		    (hpa & (spsize - 1)) == 0 &&
553 		    (len >= spsize)) {
554 			break;
555 		}
556 		spshift -= 9;
557 	}
558 
559 	ptp = dom->ptp;
560 	nlevels = dom->pt_levels;
561 	while (--nlevels >= 0) {
562 		ptpshift = 12 + nlevels * 9;
563 		ptpindex = (gpa >> ptpshift) & 0x1FF;
564 
565 		/* We have reached the leaf mapping */
566 		if (spshift >= ptpshift) {
567 			break;
568 		}
569 
570 		/*
571 		 * We are working on a non-leaf page table page.
572 		 *
573 		 * Create a downstream page table page if necessary and point
574 		 * to it from the current page table.
575 		 */
576 		if (ptp[ptpindex] == 0) {
577 			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
578 			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
579 		}
580 
581 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
582 	}
583 
584 	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
585 		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
586 
587 	/*
588 	 * Update the 'gpa' -> 'hpa' mapping
589 	 */
590 	if (remove) {
591 		ptp[ptpindex] = 0;
592 	} else {
593 		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
594 
595 		if (nlevels > 0)
596 			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
597 	}
598 
599 	return (1UL << ptpshift);
600 }
601 
602 static uint64_t
vtd_create_mapping(void * arg,vm_paddr_t gpa,vm_paddr_t hpa,uint64_t len)603 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
604 {
605 
606 	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
607 }
608 
609 static uint64_t
vtd_remove_mapping(void * arg,vm_paddr_t gpa,uint64_t len)610 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
611 {
612 
613 	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
614 }
615 
616 static void
vtd_invalidate_tlb(void * dom)617 vtd_invalidate_tlb(void *dom)
618 {
619 	int i;
620 	struct vtdmap *vtdmap;
621 
622 	/*
623 	 * Invalidate the IOTLB.
624 	 * XXX use domain-selective invalidation for IOTLB
625 	 */
626 	for (i = 0; i < drhd_num; i++) {
627 		vtdmap = vtdmaps[i];
628 		vtd_iotlb_global_invalidate(vtdmap);
629 	}
630 }
631 
632 static void *
vtd_create_domain(vm_paddr_t maxaddr)633 vtd_create_domain(vm_paddr_t maxaddr)
634 {
635 	struct domain *dom;
636 	vm_paddr_t addr;
637 	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
638 	struct vtdmap *vtdmap;
639 
640 	if (drhd_num <= 0)
641 		panic("vtd_create_domain: no dma remapping hardware available");
642 
643 	/*
644 	 * Calculate AGAW.
645 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
646 	 */
647 	addr = 0;
648 	for (gaw = 0; addr < maxaddr; gaw++)
649 		addr = 1ULL << gaw;
650 
651 	res = (gaw - 12) % 9;
652 	if (res == 0)
653 		agaw = gaw;
654 	else
655 		agaw = gaw + 9 - res;
656 
657 	if (agaw > 64)
658 		agaw = 64;
659 
660 	/*
661 	 * Select the smallest Supported AGAW and the corresponding number
662 	 * of page table levels.
663 	 */
664 	pt_levels = 2;
665 	sagaw = 30;
666 	addrwidth = 0;
667 
668 	tmp = ~0;
669 	for (i = 0; i < drhd_num; i++) {
670 		vtdmap = vtdmaps[i];
671 		/* take most compatible value */
672 		tmp &= VTD_CAP_SAGAW(vtdmap->cap);
673 	}
674 
675 	for (i = 0; i < 5; i++) {
676 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
677 			break;
678 		pt_levels++;
679 		addrwidth++;
680 		sagaw += 9;
681 		if (sagaw > 64)
682 			sagaw = 64;
683 	}
684 
685 	if (i >= 5) {
686 		panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
687 		      tmp, agaw);
688 	}
689 
690 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
691 	dom->pt_levels = pt_levels;
692 	dom->addrwidth = addrwidth;
693 	dom->id = domain_id();
694 	dom->maxaddr = maxaddr;
695 	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
696 	if ((uintptr_t)dom->ptp & PAGE_MASK)
697 		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
698 
699 #ifdef notyet
700 	/*
701 	 * XXX superpage mappings for the iommu do not work correctly.
702 	 *
703 	 * By default all physical memory is mapped into the host_domain.
704 	 * When a VM is allocated wired memory the pages belonging to it
705 	 * are removed from the host_domain and added to the vm's domain.
706 	 *
707 	 * If the page being removed was mapped using a superpage mapping
708 	 * in the host_domain then we need to demote the mapping before
709 	 * removing the page.
710 	 *
711 	 * There is not any code to deal with the demotion at the moment
712 	 * so we disable superpage mappings altogether.
713 	 */
714 	dom->spsmask = ~0;
715 	for (i = 0; i < drhd_num; i++) {
716 		vtdmap = vtdmaps[i];
717 		/* take most compatible value */
718 		dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
719 	}
720 #endif
721 
722 	SLIST_INSERT_HEAD(&domhead, dom, next);
723 
724 	return (dom);
725 }
726 
727 static void
vtd_free_ptp(uint64_t * ptp,int level)728 vtd_free_ptp(uint64_t *ptp, int level)
729 {
730 	int i;
731 	uint64_t *nlp;
732 
733 	if (level > 1) {
734 		for (i = 0; i < 512; i++) {
735 			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
736 				continue;
737 			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
738 				continue;
739 			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
740 			vtd_free_ptp(nlp, level - 1);
741 		}
742 	}
743 
744 	bzero(ptp, PAGE_SIZE);
745 	free(ptp, M_VTD);
746 }
747 
748 static void
vtd_destroy_domain(void * arg)749 vtd_destroy_domain(void *arg)
750 {
751 	struct domain *dom;
752 
753 	dom = arg;
754 
755 	SLIST_REMOVE(&domhead, dom, domain, next);
756 	vtd_free_ptp(dom->ptp, dom->pt_levels);
757 	free(dom, M_VTD);
758 }
759 
760 const struct iommu_ops iommu_ops_intel = {
761 	.init = vtd_init,
762 	.cleanup = vtd_cleanup,
763 	.enable = vtd_enable,
764 	.disable = vtd_disable,
765 	.create_domain = vtd_create_domain,
766 	.destroy_domain = vtd_destroy_domain,
767 	.create_mapping = vtd_create_mapping,
768 	.remove_mapping = vtd_remove_mapping,
769 	.add_device = vtd_add_device,
770 	.remove_device = vtd_remove_device,
771 	.invalidate_tlb = vtd_invalidate_tlb,
772 };
773