xref: /freebsd-13-stable/sys/kern/subr_intr.c (revision ffe9a1987bb7d57b21bb19e20fd9f66dd98e7a78)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 /*
30  *	New-style Interrupt Framework
31  *
32  *  TODO: - add support for disconnected PICs.
33  *        - to support IPI (PPI) enabling on other CPUs if already started.
34  *        - to complete things for removable PICs.
35  */
36 
37 #include "opt_ddb.h"
38 #include "opt_hwpmc_hooks.h"
39 #include "opt_iommu.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/bitstring.h>
44 #include <sys/bus.h>
45 #include <sys/conf.h>
46 #include <sys/cpuset.h>
47 #include <sys/interrupt.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mutex.h>
52 #include <sys/proc.h>
53 #include <sys/queue.h>
54 #include <sys/rman.h>
55 #include <sys/sched.h>
56 #include <sys/smp.h>
57 #include <sys/sysctl.h>
58 #include <sys/syslog.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <sys/vmmeter.h>
62 #ifdef HWPMC_HOOKS
63 #include <sys/pmckern.h>
64 #endif
65 
66 #include <machine/atomic.h>
67 #include <machine/cpu.h>
68 #include <machine/intr.h>
69 #include <machine/smp.h>
70 #include <machine/stdarg.h>
71 
72 #ifdef DDB
73 #include <ddb/ddb.h>
74 #endif
75 
76 #ifdef IOMMU
77 #include <dev/iommu/iommu_msi.h>
78 #endif
79 
80 #include "pic_if.h"
81 #include "msi_if.h"
82 
83 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
84 
85 #ifdef DEBUG
86 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
87     printf(fmt,##args); } while (0)
88 #else
89 #define debugf(fmt, args...)
90 #endif
91 
92 MALLOC_DECLARE(M_INTRNG);
93 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
94 
95 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
96 void intr_irq_handler(struct trapframe *tf);
97 
98 /* Root interrupt controller stuff. */
99 device_t intr_irq_root_dev;
100 static intr_irq_filter_t *irq_root_filter;
101 static void *irq_root_arg;
102 static u_int irq_root_ipicount;
103 
104 struct intr_pic_child {
105 	SLIST_ENTRY(intr_pic_child)	 pc_next;
106 	struct intr_pic			*pc_pic;
107 	intr_child_irq_filter_t		*pc_filter;
108 	void				*pc_filter_arg;
109 	uintptr_t			 pc_start;
110 	uintptr_t			 pc_length;
111 };
112 
113 /* Interrupt controller definition. */
114 struct intr_pic {
115 	SLIST_ENTRY(intr_pic)	pic_next;
116 	intptr_t		pic_xref;	/* hardware identification */
117 	device_t		pic_dev;
118 /* Only one of FLAG_PIC or FLAG_MSI may be set */
119 #define	FLAG_PIC	(1 << 0)
120 #define	FLAG_MSI	(1 << 1)
121 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
122 	u_int			pic_flags;
123 	struct mtx		pic_child_lock;
124 	SLIST_HEAD(, intr_pic_child) pic_children;
125 };
126 
127 static struct mtx pic_list_lock;
128 static SLIST_HEAD(, intr_pic) pic_list;
129 
130 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
131 
132 /* Interrupt source definition. */
133 static struct mtx isrc_table_lock;
134 static struct intr_irqsrc **irq_sources;
135 u_int irq_next_free;
136 
137 #ifdef SMP
138 #ifdef EARLY_AP_STARTUP
139 static bool irq_assign_cpu = true;
140 #else
141 static bool irq_assign_cpu = false;
142 #endif
143 #endif
144 
145 u_int intr_nirq = NIRQ;
146 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
147     "Number of IRQs");
148 
149 /* Data for MI statistics reporting. */
150 u_long *intrcnt;
151 char *intrnames;
152 size_t sintrcnt;
153 size_t sintrnames;
154 int nintrcnt;
155 static bitstr_t *intrcnt_bitmap;
156 
157 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
158 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
159 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
160 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
161     struct intr_map_data **data);
162 
163 /*
164  *  Interrupt framework initialization routine.
165  */
166 static void
intr_irq_init(void * dummy __unused)167 intr_irq_init(void *dummy __unused)
168 {
169 
170 	SLIST_INIT(&pic_list);
171 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
172 
173 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
174 
175 	/*
176 	 * - 2 counters for each I/O interrupt.
177 	 * - MAXCPU counters for each IPI counters for SMP.
178 	 */
179 	nintrcnt = intr_nirq * 2;
180 #ifdef SMP
181 	nintrcnt += INTR_IPI_COUNT * MAXCPU;
182 #endif
183 
184 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
185 	    M_WAITOK | M_ZERO);
186 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
187 	    M_WAITOK | M_ZERO);
188 	sintrcnt = nintrcnt * sizeof(u_long);
189 	sintrnames = nintrcnt * INTRNAME_LEN;
190 
191 	/* Allocate the bitmap tracking counter allocations. */
192 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
193 
194 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
195 	    M_INTRNG, M_WAITOK | M_ZERO);
196 }
197 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
198 
199 static void
intrcnt_setname(const char * name,int index)200 intrcnt_setname(const char *name, int index)
201 {
202 
203 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
204 	    INTRNAME_LEN - 1, name);
205 }
206 
207 /*
208  *  Update name for interrupt source with interrupt event.
209  */
210 static void
intrcnt_updatename(struct intr_irqsrc * isrc)211 intrcnt_updatename(struct intr_irqsrc *isrc)
212 {
213 
214 	/* QQQ: What about stray counter name? */
215 	mtx_assert(&isrc_table_lock, MA_OWNED);
216 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
217 }
218 
219 /*
220  *  Virtualization for interrupt source interrupt counter increment.
221  */
222 static inline void
isrc_increment_count(struct intr_irqsrc * isrc)223 isrc_increment_count(struct intr_irqsrc *isrc)
224 {
225 
226 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
227 		atomic_add_long(&isrc->isrc_count[0], 1);
228 	else
229 		isrc->isrc_count[0]++;
230 }
231 
232 /*
233  *  Virtualization for interrupt source interrupt stray counter increment.
234  */
235 static inline void
isrc_increment_straycount(struct intr_irqsrc * isrc)236 isrc_increment_straycount(struct intr_irqsrc *isrc)
237 {
238 
239 	isrc->isrc_count[1]++;
240 }
241 
242 /*
243  *  Virtualization for interrupt source interrupt name update.
244  */
245 static void
isrc_update_name(struct intr_irqsrc * isrc,const char * name)246 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
247 {
248 	char str[INTRNAME_LEN];
249 
250 	mtx_assert(&isrc_table_lock, MA_OWNED);
251 
252 	if (name != NULL) {
253 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
254 		intrcnt_setname(str, isrc->isrc_index);
255 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
256 		    name);
257 		intrcnt_setname(str, isrc->isrc_index + 1);
258 	} else {
259 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
260 		intrcnt_setname(str, isrc->isrc_index);
261 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
262 		intrcnt_setname(str, isrc->isrc_index + 1);
263 	}
264 }
265 
266 /*
267  *  Virtualization for interrupt source interrupt counters setup.
268  */
269 static void
isrc_setup_counters(struct intr_irqsrc * isrc)270 isrc_setup_counters(struct intr_irqsrc *isrc)
271 {
272 	int index;
273 
274 	mtx_assert(&isrc_table_lock, MA_OWNED);
275 
276 	/*
277 	 * Allocate two counter values, the second tracking "stray" interrupts.
278 	 */
279 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
280 	if (index == -1)
281 		panic("Failed to allocate 2 counters. Array exhausted?");
282 	bit_nset(intrcnt_bitmap, index, index + 1);
283 	isrc->isrc_index = index;
284 	isrc->isrc_count = &intrcnt[index];
285 	isrc_update_name(isrc, NULL);
286 }
287 
288 /*
289  *  Virtualization for interrupt source interrupt counters release.
290  */
291 static void
isrc_release_counters(struct intr_irqsrc * isrc)292 isrc_release_counters(struct intr_irqsrc *isrc)
293 {
294 	int idx = isrc->isrc_index;
295 
296 	mtx_assert(&isrc_table_lock, MA_OWNED);
297 
298 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
299 }
300 
301 #ifdef SMP
302 /*
303  *  Virtualization for interrupt source IPI counters setup.
304  */
305 u_long *
intr_ipi_setup_counters(const char * name)306 intr_ipi_setup_counters(const char *name)
307 {
308 	u_int index, i;
309 	char str[INTRNAME_LEN];
310 
311 	mtx_lock(&isrc_table_lock);
312 
313 	/*
314 	 * We should never have a problem finding MAXCPU contiguous counters,
315 	 * in practice. Interrupts will be allocated sequentially during boot,
316 	 * so the array should fill from low to high index. Once reserved, the
317 	 * IPI counters will never be released. Similarly, we will not need to
318 	 * allocate more IPIs once the system is running.
319 	 */
320 	bit_ffc_area(intrcnt_bitmap, nintrcnt, MAXCPU, &index);
321 	if (index == -1)
322 		panic("Failed to allocate %d counters. Array exhausted?",
323 		    MAXCPU);
324 	bit_nset(intrcnt_bitmap, index, index + MAXCPU - 1);
325 	for (i = 0; i < MAXCPU; i++) {
326 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
327 		intrcnt_setname(str, index + i);
328 	}
329 	mtx_unlock(&isrc_table_lock);
330 	return (&intrcnt[index]);
331 }
332 #endif
333 
334 /*
335  *  Main interrupt dispatch handler. It's called straight
336  *  from the assembler, where CPU interrupt is served.
337  */
338 void
intr_irq_handler(struct trapframe * tf)339 intr_irq_handler(struct trapframe *tf)
340 {
341 	struct trapframe * oldframe;
342 	struct thread * td;
343 
344 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
345 
346 	VM_CNT_INC(v_intr);
347 	critical_enter();
348 	td = curthread;
349 	oldframe = td->td_intr_frame;
350 	td->td_intr_frame = tf;
351 	irq_root_filter(irq_root_arg);
352 	td->td_intr_frame = oldframe;
353 	critical_exit();
354 #ifdef HWPMC_HOOKS
355 	if (pmc_hook && TRAPF_USERMODE(tf) &&
356 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
357 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
358 #endif
359 }
360 
361 int
intr_child_irq_handler(struct intr_pic * parent,uintptr_t irq)362 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
363 {
364 	struct intr_pic_child *child;
365 	bool found;
366 
367 	found = false;
368 	mtx_lock_spin(&parent->pic_child_lock);
369 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
370 		if (child->pc_start <= irq &&
371 		    irq < (child->pc_start + child->pc_length)) {
372 			found = true;
373 			break;
374 		}
375 	}
376 	mtx_unlock_spin(&parent->pic_child_lock);
377 
378 	if (found)
379 		return (child->pc_filter(child->pc_filter_arg, irq));
380 
381 	return (FILTER_STRAY);
382 }
383 
384 /*
385  *  interrupt controller dispatch function for interrupts. It should
386  *  be called straight from the interrupt controller, when associated interrupt
387  *  source is learned.
388  */
389 int
intr_isrc_dispatch(struct intr_irqsrc * isrc,struct trapframe * tf)390 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
391 {
392 
393 	KASSERT(isrc != NULL, ("%s: no source", __func__));
394 
395 	isrc_increment_count(isrc);
396 
397 #ifdef INTR_SOLO
398 	if (isrc->isrc_filter != NULL) {
399 		int error;
400 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
401 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
402 		if (error == FILTER_HANDLED)
403 			return (0);
404 	} else
405 #endif
406 	if (isrc->isrc_event != NULL) {
407 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
408 			return (0);
409 	}
410 
411 	isrc_increment_straycount(isrc);
412 	return (EINVAL);
413 }
414 
415 /*
416  *  Alloc unique interrupt number (resource handle) for interrupt source.
417  *
418  *  There could be various strategies how to allocate free interrupt number
419  *  (resource handle) for new interrupt source.
420  *
421  *  1. Handles are always allocated forward, so handles are not recycled
422  *     immediately. However, if only one free handle left which is reused
423  *     constantly...
424  */
425 static inline int
isrc_alloc_irq(struct intr_irqsrc * isrc)426 isrc_alloc_irq(struct intr_irqsrc *isrc)
427 {
428 	u_int irq;
429 
430 	mtx_assert(&isrc_table_lock, MA_OWNED);
431 
432 	if (irq_next_free >= intr_nirq)
433 		return (ENOSPC);
434 
435 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
436 		if (irq_sources[irq] == NULL)
437 			goto found;
438 	}
439 	for (irq = 0; irq < irq_next_free; irq++) {
440 		if (irq_sources[irq] == NULL)
441 			goto found;
442 	}
443 
444 	irq_next_free = intr_nirq;
445 	return (ENOSPC);
446 
447 found:
448 	isrc->isrc_irq = irq;
449 	irq_sources[irq] = isrc;
450 
451 	irq_next_free = irq + 1;
452 	if (irq_next_free >= intr_nirq)
453 		irq_next_free = 0;
454 	return (0);
455 }
456 
457 /*
458  *  Free unique interrupt number (resource handle) from interrupt source.
459  */
460 static inline int
isrc_free_irq(struct intr_irqsrc * isrc)461 isrc_free_irq(struct intr_irqsrc *isrc)
462 {
463 
464 	mtx_assert(&isrc_table_lock, MA_OWNED);
465 
466 	if (isrc->isrc_irq >= intr_nirq)
467 		return (EINVAL);
468 	if (irq_sources[isrc->isrc_irq] != isrc)
469 		return (EINVAL);
470 
471 	irq_sources[isrc->isrc_irq] = NULL;
472 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
473 
474 	/*
475 	 * If we are recovering from the state irq_sources table is full,
476 	 * then the following allocation should check the entire table. This
477 	 * will ensure maximum separation of allocation order from release
478 	 * order.
479 	 */
480 	if (irq_next_free >= intr_nirq)
481 		irq_next_free = 0;
482 
483 	return (0);
484 }
485 
486 /*
487  *  Initialize interrupt source and register it into global interrupt table.
488  */
489 int
intr_isrc_register(struct intr_irqsrc * isrc,device_t dev,u_int flags,const char * fmt,...)490 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
491     const char *fmt, ...)
492 {
493 	int error;
494 	va_list ap;
495 
496 	bzero(isrc, sizeof(struct intr_irqsrc));
497 	isrc->isrc_dev = dev;
498 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
499 	isrc->isrc_flags = flags;
500 
501 	va_start(ap, fmt);
502 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
503 	va_end(ap);
504 
505 	mtx_lock(&isrc_table_lock);
506 	error = isrc_alloc_irq(isrc);
507 	if (error != 0) {
508 		mtx_unlock(&isrc_table_lock);
509 		return (error);
510 	}
511 	/*
512 	 * Setup interrupt counters, but not for IPI sources. Those are setup
513 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
514 	 * our counter pool.
515 	 */
516 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
517 		isrc_setup_counters(isrc);
518 	mtx_unlock(&isrc_table_lock);
519 	return (0);
520 }
521 
522 /*
523  *  Deregister interrupt source from global interrupt table.
524  */
525 int
intr_isrc_deregister(struct intr_irqsrc * isrc)526 intr_isrc_deregister(struct intr_irqsrc *isrc)
527 {
528 	int error;
529 
530 	mtx_lock(&isrc_table_lock);
531 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
532 		isrc_release_counters(isrc);
533 	error = isrc_free_irq(isrc);
534 	mtx_unlock(&isrc_table_lock);
535 	return (error);
536 }
537 
538 #ifdef SMP
539 /*
540  *  A support function for a PIC to decide if provided ISRC should be inited
541  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
542  *  struct intr_irqsrc is the following:
543  *
544  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
545  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
546  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
547  */
548 bool
intr_isrc_init_on_cpu(struct intr_irqsrc * isrc,u_int cpu)549 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
550 {
551 
552 	if (isrc->isrc_handlers == 0)
553 		return (false);
554 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
555 		return (false);
556 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
557 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
558 
559 	CPU_SET(cpu, &isrc->isrc_cpu);
560 	return (true);
561 }
562 #endif
563 
564 #ifdef INTR_SOLO
565 /*
566  *  Setup filter into interrupt source.
567  */
568 static int
iscr_setup_filter(struct intr_irqsrc * isrc,const char * name,intr_irq_filter_t * filter,void * arg,void ** cookiep)569 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
570     intr_irq_filter_t *filter, void *arg, void **cookiep)
571 {
572 
573 	if (filter == NULL)
574 		return (EINVAL);
575 
576 	mtx_lock(&isrc_table_lock);
577 	/*
578 	 * Make sure that we do not mix the two ways
579 	 * how we handle interrupt sources.
580 	 */
581 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
582 		mtx_unlock(&isrc_table_lock);
583 		return (EBUSY);
584 	}
585 	isrc->isrc_filter = filter;
586 	isrc->isrc_arg = arg;
587 	isrc_update_name(isrc, name);
588 	mtx_unlock(&isrc_table_lock);
589 
590 	*cookiep = isrc;
591 	return (0);
592 }
593 #endif
594 
595 /*
596  *  Interrupt source pre_ithread method for MI interrupt framework.
597  */
598 static void
intr_isrc_pre_ithread(void * arg)599 intr_isrc_pre_ithread(void *arg)
600 {
601 	struct intr_irqsrc *isrc = arg;
602 
603 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
604 }
605 
606 /*
607  *  Interrupt source post_ithread method for MI interrupt framework.
608  */
609 static void
intr_isrc_post_ithread(void * arg)610 intr_isrc_post_ithread(void *arg)
611 {
612 	struct intr_irqsrc *isrc = arg;
613 
614 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
615 }
616 
617 /*
618  *  Interrupt source post_filter method for MI interrupt framework.
619  */
620 static void
intr_isrc_post_filter(void * arg)621 intr_isrc_post_filter(void *arg)
622 {
623 	struct intr_irqsrc *isrc = arg;
624 
625 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
626 }
627 
628 /*
629  *  Interrupt source assign_cpu method for MI interrupt framework.
630  */
631 static int
intr_isrc_assign_cpu(void * arg,int cpu)632 intr_isrc_assign_cpu(void *arg, int cpu)
633 {
634 #ifdef SMP
635 	struct intr_irqsrc *isrc = arg;
636 	int error;
637 
638 	mtx_lock(&isrc_table_lock);
639 	if (cpu == NOCPU) {
640 		CPU_ZERO(&isrc->isrc_cpu);
641 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
642 	} else {
643 		CPU_SETOF(cpu, &isrc->isrc_cpu);
644 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
645 	}
646 
647 	/*
648 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
649 	 * re-balance it to another CPU or enable it on more CPUs. However,
650 	 * PIC is expected to change isrc_cpu appropriately to keep us well
651 	 * informed if the call is successful.
652 	 */
653 	if (irq_assign_cpu) {
654 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
655 		if (error) {
656 			CPU_ZERO(&isrc->isrc_cpu);
657 			mtx_unlock(&isrc_table_lock);
658 			return (error);
659 		}
660 	}
661 	mtx_unlock(&isrc_table_lock);
662 	return (0);
663 #else
664 	return (EOPNOTSUPP);
665 #endif
666 }
667 
668 /*
669  *  Create interrupt event for interrupt source.
670  */
671 static int
isrc_event_create(struct intr_irqsrc * isrc)672 isrc_event_create(struct intr_irqsrc *isrc)
673 {
674 	struct intr_event *ie;
675 	int error;
676 
677 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
678 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
679 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
680 	if (error)
681 		return (error);
682 
683 	mtx_lock(&isrc_table_lock);
684 	/*
685 	 * Make sure that we do not mix the two ways
686 	 * how we handle interrupt sources. Let contested event wins.
687 	 */
688 #ifdef INTR_SOLO
689 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
690 #else
691 	if (isrc->isrc_event != NULL) {
692 #endif
693 		mtx_unlock(&isrc_table_lock);
694 		intr_event_destroy(ie);
695 		return (isrc->isrc_event != NULL ? EBUSY : 0);
696 	}
697 	isrc->isrc_event = ie;
698 	mtx_unlock(&isrc_table_lock);
699 
700 	return (0);
701 }
702 #ifdef notyet
703 /*
704  *  Destroy interrupt event for interrupt source.
705  */
706 static void
707 isrc_event_destroy(struct intr_irqsrc *isrc)
708 {
709 	struct intr_event *ie;
710 
711 	mtx_lock(&isrc_table_lock);
712 	ie = isrc->isrc_event;
713 	isrc->isrc_event = NULL;
714 	mtx_unlock(&isrc_table_lock);
715 
716 	if (ie != NULL)
717 		intr_event_destroy(ie);
718 }
719 #endif
720 /*
721  *  Add handler to interrupt source.
722  */
723 static int
724 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
725     driver_filter_t filter, driver_intr_t handler, void *arg,
726     enum intr_type flags, void **cookiep)
727 {
728 	int error;
729 
730 	if (isrc->isrc_event == NULL) {
731 		error = isrc_event_create(isrc);
732 		if (error)
733 			return (error);
734 	}
735 
736 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
737 	    arg, intr_priority(flags), flags, cookiep);
738 	if (error == 0) {
739 		mtx_lock(&isrc_table_lock);
740 		intrcnt_updatename(isrc);
741 		mtx_unlock(&isrc_table_lock);
742 	}
743 
744 	return (error);
745 }
746 
747 /*
748  *  Lookup interrupt controller locked.
749  */
750 static inline struct intr_pic *
751 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
752 {
753 	struct intr_pic *pic;
754 
755 	mtx_assert(&pic_list_lock, MA_OWNED);
756 
757 	if (dev == NULL && xref == 0)
758 		return (NULL);
759 
760 	/* Note that pic->pic_dev is never NULL on registered PIC. */
761 	SLIST_FOREACH(pic, &pic_list, pic_next) {
762 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
763 		    (flags & FLAG_TYPE_MASK))
764 			continue;
765 
766 		if (dev == NULL) {
767 			if (xref == pic->pic_xref)
768 				return (pic);
769 		} else if (xref == 0 || pic->pic_xref == 0) {
770 			if (dev == pic->pic_dev)
771 				return (pic);
772 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
773 				return (pic);
774 	}
775 	return (NULL);
776 }
777 
778 /*
779  *  Lookup interrupt controller.
780  */
781 static struct intr_pic *
782 pic_lookup(device_t dev, intptr_t xref, int flags)
783 {
784 	struct intr_pic *pic;
785 
786 	mtx_lock(&pic_list_lock);
787 	pic = pic_lookup_locked(dev, xref, flags);
788 	mtx_unlock(&pic_list_lock);
789 	return (pic);
790 }
791 
792 /*
793  *  Create interrupt controller.
794  */
795 static struct intr_pic *
796 pic_create(device_t dev, intptr_t xref, int flags)
797 {
798 	struct intr_pic *pic;
799 
800 	mtx_lock(&pic_list_lock);
801 	pic = pic_lookup_locked(dev, xref, flags);
802 	if (pic != NULL) {
803 		mtx_unlock(&pic_list_lock);
804 		return (pic);
805 	}
806 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
807 	if (pic == NULL) {
808 		mtx_unlock(&pic_list_lock);
809 		return (NULL);
810 	}
811 	pic->pic_xref = xref;
812 	pic->pic_dev = dev;
813 	pic->pic_flags = flags;
814 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
815 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
816 	mtx_unlock(&pic_list_lock);
817 
818 	return (pic);
819 }
820 #ifdef notyet
821 /*
822  *  Destroy interrupt controller.
823  */
824 static void
825 pic_destroy(device_t dev, intptr_t xref, int flags)
826 {
827 	struct intr_pic *pic;
828 
829 	mtx_lock(&pic_list_lock);
830 	pic = pic_lookup_locked(dev, xref, flags);
831 	if (pic == NULL) {
832 		mtx_unlock(&pic_list_lock);
833 		return;
834 	}
835 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
836 	mtx_unlock(&pic_list_lock);
837 
838 	free(pic, M_INTRNG);
839 }
840 #endif
841 /*
842  *  Register interrupt controller.
843  */
844 struct intr_pic *
845 intr_pic_register(device_t dev, intptr_t xref)
846 {
847 	struct intr_pic *pic;
848 
849 	if (dev == NULL)
850 		return (NULL);
851 	pic = pic_create(dev, xref, FLAG_PIC);
852 	if (pic == NULL)
853 		return (NULL);
854 
855 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
856 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
857 	return (pic);
858 }
859 
860 /*
861  *  Unregister interrupt controller.
862  */
863 int
864 intr_pic_deregister(device_t dev, intptr_t xref)
865 {
866 
867 	panic("%s: not implemented", __func__);
868 }
869 
870 /*
871  *  Mark interrupt controller (itself) as a root one.
872  *
873  *  Note that only an interrupt controller can really know its position
874  *  in interrupt controller's tree. So root PIC must claim itself as a root.
875  *
876  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
877  *  page 30:
878  *    "The root of the interrupt tree is determined when traversal
879  *     of the interrupt tree reaches an interrupt controller node without
880  *     an interrupts property and thus no explicit interrupt parent."
881  */
882 int
883 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
884     void *arg, u_int ipicount)
885 {
886 	struct intr_pic *pic;
887 
888 	pic = pic_lookup(dev, xref, FLAG_PIC);
889 	if (pic == NULL) {
890 		device_printf(dev, "not registered\n");
891 		return (EINVAL);
892 	}
893 
894 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
895 	    ("%s: Found a non-PIC controller: %s", __func__,
896 	     device_get_name(pic->pic_dev)));
897 
898 	if (filter == NULL) {
899 		device_printf(dev, "filter missing\n");
900 		return (EINVAL);
901 	}
902 
903 	/*
904 	 * Only one interrupt controllers could be on the root for now.
905 	 * Note that we further suppose that there is not threaded interrupt
906 	 * routine (handler) on the root. See intr_irq_handler().
907 	 */
908 	if (intr_irq_root_dev != NULL) {
909 		device_printf(dev, "another root already set\n");
910 		return (EBUSY);
911 	}
912 
913 	intr_irq_root_dev = dev;
914 	irq_root_filter = filter;
915 	irq_root_arg = arg;
916 	irq_root_ipicount = ipicount;
917 
918 	debugf("irq root set to %s\n", device_get_nameunit(dev));
919 	return (0);
920 }
921 
922 /*
923  * Add a handler to manage a sub range of a parents interrupts.
924  */
925 int
926 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
927     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
928     uintptr_t length)
929 {
930 	struct intr_pic *parent_pic;
931 	struct intr_pic_child *newchild;
932 #ifdef INVARIANTS
933 	struct intr_pic_child *child;
934 #endif
935 
936 	/* Find the parent PIC */
937 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
938 	if (parent_pic == NULL)
939 		return (ENXIO);
940 
941 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
942 	newchild->pc_pic = pic;
943 	newchild->pc_filter = filter;
944 	newchild->pc_filter_arg = arg;
945 	newchild->pc_start = start;
946 	newchild->pc_length = length;
947 
948 	mtx_lock_spin(&parent_pic->pic_child_lock);
949 #ifdef INVARIANTS
950 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
951 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
952 		    __func__));
953 	}
954 #endif
955 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
956 	mtx_unlock_spin(&parent_pic->pic_child_lock);
957 
958 	return (0);
959 }
960 
961 static int
962 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
963     struct intr_irqsrc **isrc)
964 {
965 	struct intr_pic *pic;
966 	struct intr_map_data_msi *msi;
967 
968 	if (data == NULL)
969 		return (EINVAL);
970 
971 	pic = pic_lookup(dev, xref,
972 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
973 	if (pic == NULL)
974 		return (ESRCH);
975 
976 	switch (data->type) {
977 	case INTR_MAP_DATA_MSI:
978 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
979 		    ("%s: Found a non-MSI controller: %s", __func__,
980 		     device_get_name(pic->pic_dev)));
981 		msi = (struct intr_map_data_msi *)data;
982 		*isrc = msi->isrc;
983 		return (0);
984 
985 	default:
986 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
987 		    ("%s: Found a non-PIC controller: %s", __func__,
988 		     device_get_name(pic->pic_dev)));
989 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
990 	}
991 }
992 
993 bool
994 intr_is_per_cpu(struct resource *res)
995 {
996 	u_int res_id;
997 	struct intr_irqsrc *isrc;
998 
999 	res_id = (u_int)rman_get_start(res);
1000 	isrc = intr_map_get_isrc(res_id);
1001 
1002 	if (isrc == NULL)
1003 		panic("Attempt to get isrc for non-active resource id: %u\n",
1004 		    res_id);
1005 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1006 }
1007 
1008 int
1009 intr_activate_irq(device_t dev, struct resource *res)
1010 {
1011 	device_t map_dev;
1012 	intptr_t map_xref;
1013 	struct intr_map_data *data;
1014 	struct intr_irqsrc *isrc;
1015 	u_int res_id;
1016 	int error;
1017 
1018 	KASSERT(rman_get_start(res) == rman_get_end(res),
1019 	    ("%s: more interrupts in resource", __func__));
1020 
1021 	res_id = (u_int)rman_get_start(res);
1022 	if (intr_map_get_isrc(res_id) != NULL)
1023 		panic("Attempt to double activation of resource id: %u\n",
1024 		    res_id);
1025 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1026 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1027 	if (error != 0) {
1028 		free(data, M_INTRNG);
1029 		/* XXX TODO DISCONECTED PICs */
1030 		/* if (error == EINVAL) return(0); */
1031 		return (error);
1032 	}
1033 	intr_map_set_isrc(res_id, isrc);
1034 	rman_set_virtual(res, data);
1035 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1036 }
1037 
1038 int
1039 intr_deactivate_irq(device_t dev, struct resource *res)
1040 {
1041 	struct intr_map_data *data;
1042 	struct intr_irqsrc *isrc;
1043 	u_int res_id;
1044 	int error;
1045 
1046 	KASSERT(rman_get_start(res) == rman_get_end(res),
1047 	    ("%s: more interrupts in resource", __func__));
1048 
1049 	res_id = (u_int)rman_get_start(res);
1050 	isrc = intr_map_get_isrc(res_id);
1051 	if (isrc == NULL)
1052 		panic("Attempt to deactivate non-active resource id: %u\n",
1053 		    res_id);
1054 
1055 	data = rman_get_virtual(res);
1056 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1057 	intr_map_set_isrc(res_id, NULL);
1058 	rman_set_virtual(res, NULL);
1059 	free(data, M_INTRNG);
1060 	return (error);
1061 }
1062 
1063 int
1064 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1065     driver_intr_t hand, void *arg, int flags, void **cookiep)
1066 {
1067 	int error;
1068 	struct intr_map_data *data;
1069 	struct intr_irqsrc *isrc;
1070 	const char *name;
1071 	u_int res_id;
1072 
1073 	KASSERT(rman_get_start(res) == rman_get_end(res),
1074 	    ("%s: more interrupts in resource", __func__));
1075 
1076 	res_id = (u_int)rman_get_start(res);
1077 	isrc = intr_map_get_isrc(res_id);
1078 	if (isrc == NULL) {
1079 		/* XXX TODO DISCONECTED PICs */
1080 		return (EINVAL);
1081 	}
1082 
1083 	data = rman_get_virtual(res);
1084 	name = device_get_nameunit(dev);
1085 
1086 #ifdef INTR_SOLO
1087 	/*
1088 	 * Standard handling is done through MI interrupt framework. However,
1089 	 * some interrupts could request solely own special handling. This
1090 	 * non standard handling can be used for interrupt controllers without
1091 	 * handler (filter only), so in case that interrupt controllers are
1092 	 * chained, MI interrupt framework is called only in leaf controller.
1093 	 *
1094 	 * Note that root interrupt controller routine is served as well,
1095 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1096 	 */
1097 	if (flags & INTR_SOLO && hand != NULL) {
1098 		debugf("irq %u cannot solo on %s\n", irq, name);
1099 		return (EINVAL);
1100 	}
1101 
1102 	if (flags & INTR_SOLO) {
1103 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1104 		    arg, cookiep);
1105 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1106 		    name);
1107 	} else
1108 #endif
1109 		{
1110 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1111 		    cookiep);
1112 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1113 	}
1114 	if (error != 0)
1115 		return (error);
1116 
1117 	mtx_lock(&isrc_table_lock);
1118 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1119 	if (error == 0) {
1120 		isrc->isrc_handlers++;
1121 		if (isrc->isrc_handlers == 1)
1122 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1123 	}
1124 	mtx_unlock(&isrc_table_lock);
1125 	if (error != 0)
1126 		intr_event_remove_handler(*cookiep);
1127 	return (error);
1128 }
1129 
1130 int
1131 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1132 {
1133 	int error;
1134 	struct intr_map_data *data;
1135 	struct intr_irqsrc *isrc;
1136 	u_int res_id;
1137 
1138 	KASSERT(rman_get_start(res) == rman_get_end(res),
1139 	    ("%s: more interrupts in resource", __func__));
1140 
1141 	res_id = (u_int)rman_get_start(res);
1142 	isrc = intr_map_get_isrc(res_id);
1143 	if (isrc == NULL || isrc->isrc_handlers == 0)
1144 		return (EINVAL);
1145 
1146 	data = rman_get_virtual(res);
1147 
1148 #ifdef INTR_SOLO
1149 	if (isrc->isrc_filter != NULL) {
1150 		if (isrc != cookie)
1151 			return (EINVAL);
1152 
1153 		mtx_lock(&isrc_table_lock);
1154 		isrc->isrc_filter = NULL;
1155 		isrc->isrc_arg = NULL;
1156 		isrc->isrc_handlers = 0;
1157 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1158 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1159 		isrc_update_name(isrc, NULL);
1160 		mtx_unlock(&isrc_table_lock);
1161 		return (0);
1162 	}
1163 #endif
1164 	if (isrc != intr_handler_source(cookie))
1165 		return (EINVAL);
1166 
1167 	error = intr_event_remove_handler(cookie);
1168 	if (error == 0) {
1169 		mtx_lock(&isrc_table_lock);
1170 		isrc->isrc_handlers--;
1171 		if (isrc->isrc_handlers == 0)
1172 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1173 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1174 		intrcnt_updatename(isrc);
1175 		mtx_unlock(&isrc_table_lock);
1176 	}
1177 	return (error);
1178 }
1179 
1180 int
1181 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1182     const char *descr)
1183 {
1184 	int error;
1185 	struct intr_irqsrc *isrc;
1186 	u_int res_id;
1187 
1188 	KASSERT(rman_get_start(res) == rman_get_end(res),
1189 	    ("%s: more interrupts in resource", __func__));
1190 
1191 	res_id = (u_int)rman_get_start(res);
1192 	isrc = intr_map_get_isrc(res_id);
1193 	if (isrc == NULL || isrc->isrc_handlers == 0)
1194 		return (EINVAL);
1195 #ifdef INTR_SOLO
1196 	if (isrc->isrc_filter != NULL) {
1197 		if (isrc != cookie)
1198 			return (EINVAL);
1199 
1200 		mtx_lock(&isrc_table_lock);
1201 		isrc_update_name(isrc, descr);
1202 		mtx_unlock(&isrc_table_lock);
1203 		return (0);
1204 	}
1205 #endif
1206 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1207 	if (error == 0) {
1208 		mtx_lock(&isrc_table_lock);
1209 		intrcnt_updatename(isrc);
1210 		mtx_unlock(&isrc_table_lock);
1211 	}
1212 	return (error);
1213 }
1214 
1215 #ifdef SMP
1216 int
1217 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1218 {
1219 	struct intr_irqsrc *isrc;
1220 	u_int res_id;
1221 
1222 	KASSERT(rman_get_start(res) == rman_get_end(res),
1223 	    ("%s: more interrupts in resource", __func__));
1224 
1225 	res_id = (u_int)rman_get_start(res);
1226 	isrc = intr_map_get_isrc(res_id);
1227 	if (isrc == NULL || isrc->isrc_handlers == 0)
1228 		return (EINVAL);
1229 #ifdef INTR_SOLO
1230 	if (isrc->isrc_filter != NULL)
1231 		return (intr_isrc_assign_cpu(isrc, cpu));
1232 #endif
1233 	return (intr_event_bind(isrc->isrc_event, cpu));
1234 }
1235 
1236 /*
1237  * Return the CPU that the next interrupt source should use.
1238  * For now just returns the next CPU according to round-robin.
1239  */
1240 u_int
1241 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1242 {
1243 	u_int cpu;
1244 
1245 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1246 	if (!irq_assign_cpu || mp_ncpus == 1) {
1247 		cpu = PCPU_GET(cpuid);
1248 
1249 		if (CPU_ISSET(cpu, cpumask))
1250 			return (curcpu);
1251 
1252 		return (CPU_FFS(cpumask) - 1);
1253 	}
1254 
1255 	do {
1256 		last_cpu++;
1257 		if (last_cpu > mp_maxid)
1258 			last_cpu = 0;
1259 	} while (!CPU_ISSET(last_cpu, cpumask));
1260 	return (last_cpu);
1261 }
1262 
1263 #ifndef EARLY_AP_STARTUP
1264 /*
1265  *  Distribute all the interrupt sources among the available
1266  *  CPUs once the AP's have been launched.
1267  */
1268 static void
1269 intr_irq_shuffle(void *arg __unused)
1270 {
1271 	struct intr_irqsrc *isrc;
1272 	u_int i;
1273 
1274 	if (mp_ncpus == 1)
1275 		return;
1276 
1277 	mtx_lock(&isrc_table_lock);
1278 	irq_assign_cpu = true;
1279 	for (i = 0; i < intr_nirq; i++) {
1280 		isrc = irq_sources[i];
1281 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1282 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1283 			continue;
1284 
1285 		if (isrc->isrc_event != NULL &&
1286 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1287 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1288 			panic("%s: CPU inconsistency", __func__);
1289 
1290 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1291 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1292 
1293 		/*
1294 		 * We are in wicked position here if the following call fails
1295 		 * for bound ISRC. The best thing we can do is to clear
1296 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1297 		 */
1298 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1299 			CPU_ZERO(&isrc->isrc_cpu);
1300 	}
1301 	mtx_unlock(&isrc_table_lock);
1302 }
1303 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1304 #endif /* !EARLY_AP_STARTUP */
1305 
1306 #else
1307 u_int
1308 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1309 {
1310 
1311 	return (PCPU_GET(cpuid));
1312 }
1313 #endif /* SMP */
1314 
1315 /*
1316  * Allocate memory for new intr_map_data structure.
1317  * Initialize common fields.
1318  */
1319 struct intr_map_data *
1320 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1321 {
1322 	struct intr_map_data *data;
1323 
1324 	data = malloc(len, M_INTRNG, flags);
1325 	data->type = type;
1326 	data->len = len;
1327 	return (data);
1328 }
1329 
1330 void intr_free_intr_map_data(struct intr_map_data *data)
1331 {
1332 
1333 	free(data, M_INTRNG);
1334 }
1335 
1336 /*
1337  *  Register a MSI/MSI-X interrupt controller
1338  */
1339 int
1340 intr_msi_register(device_t dev, intptr_t xref)
1341 {
1342 	struct intr_pic *pic;
1343 
1344 	if (dev == NULL)
1345 		return (EINVAL);
1346 	pic = pic_create(dev, xref, FLAG_MSI);
1347 	if (pic == NULL)
1348 		return (ENOMEM);
1349 
1350 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1351 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1352 	return (0);
1353 }
1354 
1355 int
1356 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1357     int maxcount, int *irqs)
1358 {
1359 	struct iommu_domain *domain;
1360 	struct intr_irqsrc **isrc;
1361 	struct intr_pic *pic;
1362 	device_t pdev;
1363 	struct intr_map_data_msi *msi;
1364 	int err, i;
1365 
1366 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1367 	if (pic == NULL)
1368 		return (ESRCH);
1369 
1370 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1371 	    ("%s: Found a non-MSI controller: %s", __func__,
1372 	     device_get_name(pic->pic_dev)));
1373 
1374 	/*
1375 	 * If this is the first time we have used this context ask the
1376 	 * interrupt controller to map memory the msi source will need.
1377 	 */
1378 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1379 	if (err != 0)
1380 		return (err);
1381 
1382 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1383 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1384 	if (err != 0) {
1385 		free(isrc, M_INTRNG);
1386 		return (err);
1387 	}
1388 
1389 	for (i = 0; i < count; i++) {
1390 		isrc[i]->isrc_iommu = domain;
1391 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1392 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1393 		msi-> isrc = isrc[i];
1394 
1395 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1396 		    (struct intr_map_data *)msi);
1397 	}
1398 	free(isrc, M_INTRNG);
1399 
1400 	return (err);
1401 }
1402 
1403 int
1404 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1405     int *irqs)
1406 {
1407 	struct intr_irqsrc **isrc;
1408 	struct intr_pic *pic;
1409 	struct intr_map_data_msi *msi;
1410 	int i, err;
1411 
1412 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1413 	if (pic == NULL)
1414 		return (ESRCH);
1415 
1416 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1417 	    ("%s: Found a non-MSI controller: %s", __func__,
1418 	     device_get_name(pic->pic_dev)));
1419 
1420 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1421 
1422 	for (i = 0; i < count; i++) {
1423 		msi = (struct intr_map_data_msi *)
1424 		    intr_map_get_map_data(irqs[i]);
1425 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1426 		    ("%s: irq %d map data is not MSI", __func__,
1427 		    irqs[i]));
1428 		isrc[i] = msi->isrc;
1429 	}
1430 
1431 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1432 
1433 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1434 
1435 	for (i = 0; i < count; i++) {
1436 		if (isrc[i] != NULL)
1437 			intr_unmap_irq(irqs[i]);
1438 	}
1439 
1440 	free(isrc, M_INTRNG);
1441 	return (err);
1442 }
1443 
1444 int
1445 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1446 {
1447 	struct iommu_domain *domain;
1448 	struct intr_irqsrc *isrc;
1449 	struct intr_pic *pic;
1450 	device_t pdev;
1451 	struct intr_map_data_msi *msi;
1452 	int err;
1453 
1454 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1455 	if (pic == NULL)
1456 		return (ESRCH);
1457 
1458 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1459 	    ("%s: Found a non-MSI controller: %s", __func__,
1460 	     device_get_name(pic->pic_dev)));
1461 
1462 	/*
1463 	 * If this is the first time we have used this context ask the
1464 	 * interrupt controller to map memory the msi source will need.
1465 	 */
1466 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1467 	if (err != 0)
1468 		return (err);
1469 
1470 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1471 	if (err != 0)
1472 		return (err);
1473 
1474 	isrc->isrc_iommu = domain;
1475 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1476 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1477 	msi->isrc = isrc;
1478 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1479 	return (0);
1480 }
1481 
1482 int
1483 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1484 {
1485 	struct intr_irqsrc *isrc;
1486 	struct intr_pic *pic;
1487 	struct intr_map_data_msi *msi;
1488 	int err;
1489 
1490 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1491 	if (pic == NULL)
1492 		return (ESRCH);
1493 
1494 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1495 	    ("%s: Found a non-MSI controller: %s", __func__,
1496 	     device_get_name(pic->pic_dev)));
1497 
1498 	msi = (struct intr_map_data_msi *)
1499 	    intr_map_get_map_data(irq);
1500 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1501 	    ("%s: irq %d map data is not MSI", __func__,
1502 	    irq));
1503 	isrc = msi->isrc;
1504 	if (isrc == NULL) {
1505 		intr_unmap_irq(irq);
1506 		return (EINVAL);
1507 	}
1508 
1509 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1510 
1511 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1512 	intr_unmap_irq(irq);
1513 
1514 	return (err);
1515 }
1516 
1517 int
1518 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1519     uint64_t *addr, uint32_t *data)
1520 {
1521 	struct intr_irqsrc *isrc;
1522 	struct intr_pic *pic;
1523 	int err;
1524 
1525 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1526 	if (pic == NULL)
1527 		return (ESRCH);
1528 
1529 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1530 	    ("%s: Found a non-MSI controller: %s", __func__,
1531 	     device_get_name(pic->pic_dev)));
1532 
1533 	isrc = intr_map_get_isrc(irq);
1534 	if (isrc == NULL)
1535 		return (EINVAL);
1536 
1537 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1538 
1539 #ifdef IOMMU
1540 	if (isrc->isrc_iommu != NULL)
1541 		iommu_translate_msi(isrc->isrc_iommu, addr);
1542 #endif
1543 
1544 	return (err);
1545 }
1546 
1547 void dosoftints(void);
1548 void
1549 dosoftints(void)
1550 {
1551 }
1552 
1553 #ifdef SMP
1554 /*
1555  *  Init interrupt controller on another CPU.
1556  */
1557 void
1558 intr_pic_init_secondary(void)
1559 {
1560 
1561 	/*
1562 	 * QQQ: Only root PIC is aware of other CPUs ???
1563 	 */
1564 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1565 
1566 	//mtx_lock(&isrc_table_lock);
1567 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1568 	//mtx_unlock(&isrc_table_lock);
1569 }
1570 #endif
1571 
1572 #ifdef DDB
1573 DB_SHOW_COMMAND(irqs, db_show_irqs)
1574 {
1575 	u_int i, irqsum;
1576 	u_long num;
1577 	struct intr_irqsrc *isrc;
1578 
1579 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1580 		isrc = irq_sources[i];
1581 		if (isrc == NULL)
1582 			continue;
1583 
1584 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1585 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1586 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1587 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1588 		irqsum += num;
1589 	}
1590 	db_printf("irq total %u\n", irqsum);
1591 }
1592 #endif
1593 
1594 /*
1595  * Interrupt mapping table functions.
1596  *
1597  * Please, keep this part separately, it can be transformed to
1598  * extension of standard resources.
1599  */
1600 struct intr_map_entry
1601 {
1602 	device_t 		dev;
1603 	intptr_t 		xref;
1604 	struct intr_map_data 	*map_data;
1605 	struct intr_irqsrc 	*isrc;
1606 	/* XXX TODO DISCONECTED PICs */
1607 	/*int			flags */
1608 };
1609 
1610 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1611 static struct intr_map_entry **irq_map;
1612 static u_int irq_map_count;
1613 static u_int irq_map_first_free_idx;
1614 static struct mtx irq_map_lock;
1615 
1616 static struct intr_irqsrc *
1617 intr_map_get_isrc(u_int res_id)
1618 {
1619 	struct intr_irqsrc *isrc;
1620 
1621 	isrc = NULL;
1622 	mtx_lock(&irq_map_lock);
1623 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1624 		isrc = irq_map[res_id]->isrc;
1625 	mtx_unlock(&irq_map_lock);
1626 
1627 	return (isrc);
1628 }
1629 
1630 static void
1631 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1632 {
1633 
1634 	mtx_lock(&irq_map_lock);
1635 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1636 		irq_map[res_id]->isrc = isrc;
1637 	mtx_unlock(&irq_map_lock);
1638 }
1639 
1640 /*
1641  * Get a copy of intr_map_entry data
1642  */
1643 static struct intr_map_data *
1644 intr_map_get_map_data(u_int res_id)
1645 {
1646 	struct intr_map_data *data;
1647 
1648 	data = NULL;
1649 	mtx_lock(&irq_map_lock);
1650 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1651 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1652 	data = irq_map[res_id]->map_data;
1653 	mtx_unlock(&irq_map_lock);
1654 
1655 	return (data);
1656 }
1657 
1658 /*
1659  * Get a copy of intr_map_entry data
1660  */
1661 static void
1662 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1663     struct intr_map_data **data)
1664 {
1665 	size_t len;
1666 
1667 	len = 0;
1668 	mtx_lock(&irq_map_lock);
1669 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1670 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1671 	if (irq_map[res_id]->map_data != NULL)
1672 		len = irq_map[res_id]->map_data->len;
1673 	mtx_unlock(&irq_map_lock);
1674 
1675 	if (len == 0)
1676 		*data = NULL;
1677 	else
1678 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1679 	mtx_lock(&irq_map_lock);
1680 	if (irq_map[res_id] == NULL)
1681 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1682 	if (len != 0) {
1683 		if (len != irq_map[res_id]->map_data->len)
1684 			panic("Resource id: %u has changed.\n", res_id);
1685 		memcpy(*data, irq_map[res_id]->map_data, len);
1686 	}
1687 	*map_dev = irq_map[res_id]->dev;
1688 	*map_xref = irq_map[res_id]->xref;
1689 	mtx_unlock(&irq_map_lock);
1690 }
1691 
1692 /*
1693  * Allocate and fill new entry in irq_map table.
1694  */
1695 u_int
1696 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1697 {
1698 	u_int i;
1699 	struct intr_map_entry *entry;
1700 
1701 	/* Prepare new entry first. */
1702 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1703 
1704 	entry->dev = dev;
1705 	entry->xref = xref;
1706 	entry->map_data = data;
1707 	entry->isrc = NULL;
1708 
1709 	mtx_lock(&irq_map_lock);
1710 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1711 		if (irq_map[i] == NULL) {
1712 			irq_map[i] = entry;
1713 			irq_map_first_free_idx = i + 1;
1714 			mtx_unlock(&irq_map_lock);
1715 			return (i);
1716 		}
1717 	}
1718 	for (i = 0; i < irq_map_first_free_idx; i++) {
1719 		if (irq_map[i] == NULL) {
1720 			irq_map[i] = entry;
1721 			irq_map_first_free_idx = i + 1;
1722 			mtx_unlock(&irq_map_lock);
1723 			return (i);
1724 		}
1725 	}
1726 	mtx_unlock(&irq_map_lock);
1727 
1728 	/* XXX Expand irq_map table */
1729 	panic("IRQ mapping table is full.");
1730 }
1731 
1732 /*
1733  * Remove and free mapping entry.
1734  */
1735 void
1736 intr_unmap_irq(u_int res_id)
1737 {
1738 	struct intr_map_entry *entry;
1739 
1740 	mtx_lock(&irq_map_lock);
1741 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1742 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1743 	entry = irq_map[res_id];
1744 	irq_map[res_id] = NULL;
1745 	irq_map_first_free_idx = res_id;
1746 	mtx_unlock(&irq_map_lock);
1747 	intr_free_intr_map_data(entry->map_data);
1748 	free(entry, M_INTRNG);
1749 }
1750 
1751 /*
1752  * Clone mapping entry.
1753  */
1754 u_int
1755 intr_map_clone_irq(u_int old_res_id)
1756 {
1757 	device_t map_dev;
1758 	intptr_t map_xref;
1759 	struct intr_map_data *data;
1760 
1761 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1762 	return (intr_map_irq(map_dev, map_xref, data));
1763 }
1764 
1765 static void
1766 intr_map_init(void *dummy __unused)
1767 {
1768 
1769 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1770 
1771 	irq_map_count = 2 * intr_nirq;
1772 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1773 	    M_INTRNG, M_WAITOK | M_ZERO);
1774 }
1775 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1776