xref: /freebsd-13-stable/sys/x86/x86/intr_machdep.c (revision b5f0f20e9bd1999d9c606f2a20a08a54c7f5ccfe)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Machine dependent interrupt code for x86.  For x86, we have to
30  * deal with different PICs.  Thus, we use the passed in vector to lookup
31  * an interrupt source associated with that vector.  The interrupt source
32  * describes which PIC the source belongs to and includes methods to handle
33  * that source.
34  */
35 
36 #include "opt_atpic.h"
37 #include "opt_ddb.h"
38 #include "opt_smp.h"
39 
40 #include <sys/param.h>
41 #include <sys/bus.h>
42 #include <sys/interrupt.h>
43 #include <sys/ktr.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/queue.h>
50 #include <sys/sbuf.h>
51 #include <sys/smp.h>
52 #include <sys/sx.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55 #include <sys/systm.h>
56 #include <sys/taskqueue.h>
57 #include <sys/vmmeter.h>
58 #include <machine/clock.h>
59 #include <machine/intr_machdep.h>
60 #include <machine/smp.h>
61 #ifdef DDB
62 #include <ddb/ddb.h>
63 #endif
64 
65 #ifndef DEV_ATPIC
66 #include <machine/segments.h>
67 #include <machine/frame.h>
68 #include <dev/ic/i8259.h>
69 #include <x86/isa/icu.h>
70 #include <isa/isareg.h>
71 #endif
72 
73 #include <vm/vm.h>
74 
75 #define	MAX_STRAY_LOG	5
76 
77 typedef void (*mask_fn)(void *);
78 
79 static int intrcnt_index;
80 static struct intsrc **interrupt_sources;
81 #ifdef SMP
82 static struct intsrc **interrupt_sorted;
83 static int intrbalance;
84 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RWTUN, &intrbalance, 0,
85     "Interrupt auto-balance interval (seconds).  Zero disables.");
86 static struct timeout_task intrbalance_task;
87 #endif
88 static struct sx intrsrc_lock;
89 static struct mtx intrpic_lock;
90 static struct mtx intrcnt_lock;
91 static TAILQ_HEAD(pics_head, pic) pics;
92 u_int num_io_irqs;
93 
94 #if defined(SMP) && !defined(EARLY_AP_STARTUP)
95 static int assign_cpu;
96 #endif
97 
98 #define	INTRNAME_LEN	(MAXCOMLEN + 1)
99 u_long *intrcnt;
100 char *intrnames;
101 size_t sintrcnt = sizeof(intrcnt);
102 size_t sintrnames = sizeof(intrnames);
103 int nintrcnt;
104 
105 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources");
106 
107 static int	intr_assign_cpu(void *arg, int cpu);
108 static void	intr_disable_src(void *arg);
109 static void	intr_init(void *__dummy);
110 static int	intr_pic_registered(struct pic *pic);
111 static void	intrcnt_setname(const char *name, int index);
112 static void	intrcnt_updatename(struct intsrc *is);
113 static void	intrcnt_register(struct intsrc *is);
114 
115 /*
116  * SYSINIT levels for SI_SUB_INTR:
117  *
118  * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init
119  * SI_ORDER_SECOND: Xen PICs
120  * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges
121  * SI_ORDER_FOURTH: Add 8259A PICs
122  * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources
123  * SI_ORDER_MIDDLE: SMP interrupt counters
124  * SI_ORDER_ANY: Enable interrupts on BSP
125  */
126 
127 static int
intr_pic_registered(struct pic * pic)128 intr_pic_registered(struct pic *pic)
129 {
130 	struct pic *p;
131 
132 	TAILQ_FOREACH(p, &pics, pics) {
133 		if (p == pic)
134 			return (1);
135 	}
136 	return (0);
137 }
138 
139 /*
140  * Register a new interrupt controller (PIC).  This is to support suspend
141  * and resume where we suspend/resume controllers rather than individual
142  * sources.  This also allows controllers with no active sources (such as
143  * 8259As in a system using the APICs) to participate in suspend and resume.
144  */
145 int
intr_register_pic(struct pic * pic)146 intr_register_pic(struct pic *pic)
147 {
148 	int error;
149 
150 	mtx_lock(&intrpic_lock);
151 	if (intr_pic_registered(pic))
152 		error = EBUSY;
153 	else {
154 		TAILQ_INSERT_TAIL(&pics, pic, pics);
155 		error = 0;
156 	}
157 	mtx_unlock(&intrpic_lock);
158 	return (error);
159 }
160 
161 /*
162  * Allocate interrupt source arrays and register interrupt sources
163  * once the number of interrupts is known.
164  */
165 static void
intr_init_sources(void * arg)166 intr_init_sources(void *arg)
167 {
168 	struct pic *pic;
169 
170 	MPASS(num_io_irqs > 0);
171 
172 	interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources),
173 	    M_INTR, M_WAITOK | M_ZERO);
174 #ifdef SMP
175 	interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted),
176 	    M_INTR, M_WAITOK | M_ZERO);
177 #endif
178 
179 	/*
180 	 * - 1 ??? dummy counter.
181 	 * - 2 counters for each I/O interrupt.
182 	 * - 1 counter for each CPU for lapic timer.
183 	 * - 1 counter for each CPU for the Hyper-V vmbus driver.
184 	 * - 8 counters for each CPU for IPI counters for SMP.
185 	 */
186 	nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2;
187 #ifdef COUNT_IPIS
188 	if (mp_ncpus > 1)
189 		nintrcnt += 8 * mp_ncpus;
190 #endif
191 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK |
192 	    M_ZERO);
193 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTR, M_WAITOK |
194 	    M_ZERO);
195 	sintrcnt = nintrcnt * sizeof(u_long);
196 	sintrnames = nintrcnt * INTRNAME_LEN;
197 
198 	intrcnt_setname("???", 0);
199 	intrcnt_index = 1;
200 
201 	/*
202 	 * NB: intrpic_lock is not held here to avoid LORs due to
203 	 * malloc() in intr_register_source().  However, we are still
204 	 * single-threaded at this point in startup so the list of
205 	 * PICs shouldn't change.
206 	 */
207 	TAILQ_FOREACH(pic, &pics, pics) {
208 		if (pic->pic_register_sources != NULL)
209 			pic->pic_register_sources(pic);
210 	}
211 }
212 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources,
213     NULL);
214 
215 /*
216  * Register a new interrupt source with the global interrupt system.
217  * The global interrupts need to be disabled when this function is
218  * called.
219  */
220 int
intr_register_source(struct intsrc * isrc)221 intr_register_source(struct intsrc *isrc)
222 {
223 	int error, vector;
224 
225 	KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
226 	vector = isrc->is_pic->pic_vector(isrc);
227 	KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector,
228 	    num_io_irqs));
229 	if (interrupt_sources[vector] != NULL)
230 		return (EEXIST);
231 	error = intr_event_create(&isrc->is_event, isrc, 0, vector,
232 	    intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
233 	    (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
234 	    vector);
235 	if (error)
236 		return (error);
237 	sx_xlock(&intrsrc_lock);
238 	if (interrupt_sources[vector] != NULL) {
239 		sx_xunlock(&intrsrc_lock);
240 		intr_event_destroy(isrc->is_event);
241 		return (EEXIST);
242 	}
243 	intrcnt_register(isrc);
244 	interrupt_sources[vector] = isrc;
245 	isrc->is_handlers = 0;
246 	sx_xunlock(&intrsrc_lock);
247 	return (0);
248 }
249 
250 struct intsrc *
intr_lookup_source(int vector)251 intr_lookup_source(int vector)
252 {
253 
254 	if (vector < 0 || vector >= num_io_irqs)
255 		return (NULL);
256 	return (interrupt_sources[vector]);
257 }
258 
259 int
intr_add_handler(const char * name,int vector,driver_filter_t filter,driver_intr_t handler,void * arg,enum intr_type flags,void ** cookiep,int domain)260 intr_add_handler(const char *name, int vector, driver_filter_t filter,
261     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
262     int domain)
263 {
264 	struct intsrc *isrc;
265 	int error;
266 
267 	isrc = intr_lookup_source(vector);
268 	if (isrc == NULL)
269 		return (EINVAL);
270 	error = intr_event_add_handler(isrc->is_event, name, filter, handler,
271 	    arg, intr_priority(flags), flags, cookiep);
272 	if (error == 0) {
273 		sx_xlock(&intrsrc_lock);
274 		intrcnt_updatename(isrc);
275 		isrc->is_handlers++;
276 		if (isrc->is_handlers == 1) {
277 			isrc->is_domain = domain;
278 			isrc->is_pic->pic_enable_intr(isrc);
279 			isrc->is_pic->pic_enable_source(isrc);
280 		}
281 		sx_xunlock(&intrsrc_lock);
282 	}
283 	return (error);
284 }
285 
286 int
intr_remove_handler(void * cookie)287 intr_remove_handler(void *cookie)
288 {
289 	struct intsrc *isrc;
290 	int error;
291 
292 	isrc = intr_handler_source(cookie);
293 	error = intr_event_remove_handler(cookie);
294 	if (error == 0) {
295 		sx_xlock(&intrsrc_lock);
296 		isrc->is_handlers--;
297 		if (isrc->is_handlers == 0) {
298 			isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
299 			isrc->is_pic->pic_disable_intr(isrc);
300 		}
301 		intrcnt_updatename(isrc);
302 		sx_xunlock(&intrsrc_lock);
303 	}
304 	return (error);
305 }
306 
307 int
intr_config_intr(int vector,enum intr_trigger trig,enum intr_polarity pol)308 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
309 {
310 	struct intsrc *isrc;
311 
312 	isrc = intr_lookup_source(vector);
313 	if (isrc == NULL)
314 		return (EINVAL);
315 	return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
316 }
317 
318 static void
intr_disable_src(void * arg)319 intr_disable_src(void *arg)
320 {
321 	struct intsrc *isrc;
322 
323 	isrc = arg;
324 	isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
325 }
326 
327 void
intr_execute_handlers(struct intsrc * isrc,struct trapframe * frame)328 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
329 {
330 	struct intr_event *ie;
331 	int vector;
332 
333 	/*
334 	 * We count software interrupts when we process them.  The
335 	 * code here follows previous practice, but there's an
336 	 * argument for counting hardware interrupts when they're
337 	 * processed too.
338 	 */
339 	(*isrc->is_count)++;
340 	VM_CNT_INC(v_intr);
341 
342 	ie = isrc->is_event;
343 
344 	/*
345 	 * XXX: We assume that IRQ 0 is only used for the ISA timer
346 	 * device (clk).
347 	 */
348 	vector = isrc->is_pic->pic_vector(isrc);
349 	if (vector == 0)
350 		clkintr_pending = 1;
351 
352 	/*
353 	 * For stray interrupts, mask and EOI the source, bump the
354 	 * stray count, and log the condition.
355 	 */
356 	if (intr_event_handle(ie, frame) != 0) {
357 		isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
358 		(*isrc->is_straycount)++;
359 		if (*isrc->is_straycount < MAX_STRAY_LOG)
360 			log(LOG_ERR, "stray irq%d\n", vector);
361 		else if (*isrc->is_straycount == MAX_STRAY_LOG)
362 			log(LOG_CRIT,
363 			    "too many stray irq %d's: not logging anymore\n",
364 			    vector);
365 	}
366 }
367 
368 void
intr_resume(bool suspend_cancelled)369 intr_resume(bool suspend_cancelled)
370 {
371 	struct pic *pic;
372 
373 #ifndef DEV_ATPIC
374 	atpic_reset();
375 #endif
376 	mtx_lock(&intrpic_lock);
377 	TAILQ_FOREACH(pic, &pics, pics) {
378 		if (pic->pic_resume != NULL)
379 			pic->pic_resume(pic, suspend_cancelled);
380 	}
381 	mtx_unlock(&intrpic_lock);
382 }
383 
384 void
intr_suspend(void)385 intr_suspend(void)
386 {
387 	struct pic *pic;
388 
389 	mtx_lock(&intrpic_lock);
390 	TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
391 		if (pic->pic_suspend != NULL)
392 			pic->pic_suspend(pic);
393 	}
394 	mtx_unlock(&intrpic_lock);
395 }
396 
397 static int
intr_assign_cpu(void * arg,int cpu)398 intr_assign_cpu(void *arg, int cpu)
399 {
400 #ifdef SMP
401 	struct intsrc *isrc;
402 	int error;
403 
404 #ifdef EARLY_AP_STARTUP
405 	MPASS(mp_ncpus == 1 || smp_started);
406 
407 	/* Nothing to do if there is only a single CPU. */
408 	if (mp_ncpus > 1 && cpu != NOCPU) {
409 #else
410 	/*
411 	 * Don't do anything during early boot.  We will pick up the
412 	 * assignment once the APs are started.
413 	 */
414 	if (assign_cpu && cpu != NOCPU) {
415 #endif
416 		isrc = arg;
417 		sx_xlock(&intrsrc_lock);
418 		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
419 		if (error == 0)
420 			isrc->is_cpu = cpu;
421 		sx_xunlock(&intrsrc_lock);
422 	} else
423 		error = 0;
424 	return (error);
425 #else
426 	return (EOPNOTSUPP);
427 #endif
428 }
429 
430 static void
431 intrcnt_setname(const char *name, int index)
432 {
433 
434 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
435 	    INTRNAME_LEN - 1, name);
436 }
437 
438 static void
439 intrcnt_updatename(struct intsrc *is)
440 {
441 
442 	intrcnt_setname(is->is_event->ie_fullname, is->is_index);
443 }
444 
445 static void
446 intrcnt_register(struct intsrc *is)
447 {
448 	char straystr[INTRNAME_LEN];
449 
450 	KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
451 	mtx_lock_spin(&intrcnt_lock);
452 	MPASS(intrcnt_index + 2 <= nintrcnt);
453 	is->is_index = intrcnt_index;
454 	intrcnt_index += 2;
455 	snprintf(straystr, sizeof(straystr), "stray irq%d",
456 	    is->is_pic->pic_vector(is));
457 	intrcnt_updatename(is);
458 	is->is_count = &intrcnt[is->is_index];
459 	intrcnt_setname(straystr, is->is_index + 1);
460 	is->is_straycount = &intrcnt[is->is_index + 1];
461 	mtx_unlock_spin(&intrcnt_lock);
462 }
463 
464 void
465 intrcnt_add(const char *name, u_long **countp)
466 {
467 
468 	mtx_lock_spin(&intrcnt_lock);
469 	MPASS(intrcnt_index < nintrcnt);
470 	*countp = &intrcnt[intrcnt_index];
471 	intrcnt_setname(name, intrcnt_index);
472 	intrcnt_index++;
473 	mtx_unlock_spin(&intrcnt_lock);
474 }
475 
476 static void
477 intr_init(void *dummy __unused)
478 {
479 
480 	TAILQ_INIT(&pics);
481 	mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF);
482 	sx_init(&intrsrc_lock, "intrsrc");
483 	mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
484 }
485 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
486 
487 static void
488 intr_init_final(void *dummy __unused)
489 {
490 
491 	/*
492 	 * Enable interrupts on the BSP after all of the interrupt
493 	 * controllers are initialized.  Device interrupts are still
494 	 * disabled in the interrupt controllers until interrupt
495 	 * handlers are registered.  Interrupts are enabled on each AP
496 	 * after their first context switch.
497 	 */
498 	enable_intr();
499 }
500 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL);
501 
502 #ifndef DEV_ATPIC
503 /* Initialize the two 8259A's to a known-good shutdown state. */
504 void
505 atpic_reset(void)
506 {
507 
508 	outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
509 	outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
510 	outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
511 	outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
512 	outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
513 	outb(IO_ICU1, OCW3_SEL | OCW3_RR);
514 
515 	outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
516 	outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
517 	outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
518 	outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
519 	outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
520 	outb(IO_ICU2, OCW3_SEL | OCW3_RR);
521 }
522 #endif
523 
524 /* Add a description to an active interrupt handler. */
525 int
526 intr_describe(u_int vector, void *ih, const char *descr)
527 {
528 	struct intsrc *isrc;
529 	int error;
530 
531 	isrc = intr_lookup_source(vector);
532 	if (isrc == NULL)
533 		return (EINVAL);
534 	error = intr_event_describe_handler(isrc->is_event, ih, descr);
535 	if (error)
536 		return (error);
537 	intrcnt_updatename(isrc);
538 	return (0);
539 }
540 
541 void
542 intr_reprogram(void)
543 {
544 	struct intsrc *is;
545 	u_int v;
546 
547 	sx_xlock(&intrsrc_lock);
548 	for (v = 0; v < num_io_irqs; v++) {
549 		is = interrupt_sources[v];
550 		if (is == NULL)
551 			continue;
552 		if (is->is_pic->pic_reprogram_pin != NULL)
553 			is->is_pic->pic_reprogram_pin(is);
554 	}
555 	sx_xunlock(&intrsrc_lock);
556 }
557 
558 #ifdef DDB
559 /*
560  * Dump data about interrupt handlers
561  */
562 DB_SHOW_COMMAND(irqs, db_show_irqs)
563 {
564 	struct intsrc **isrc;
565 	u_int i;
566 	int verbose;
567 
568 	if (strcmp(modif, "v") == 0)
569 		verbose = 1;
570 	else
571 		verbose = 0;
572 	isrc = interrupt_sources;
573 	for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++)
574 		if (*isrc != NULL)
575 			db_dump_intr_event((*isrc)->is_event, verbose);
576 }
577 #endif
578 
579 #ifdef SMP
580 /*
581  * Support for balancing interrupt sources across CPUs.  For now we just
582  * allocate CPUs round-robin.
583  */
584 
585 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
586 static int current_cpu[MAXMEMDOM];
587 
588 static void
589 intr_init_cpus(void)
590 {
591 	int i;
592 
593 	for (i = 0; i < vm_ndomains; i++) {
594 		current_cpu[i] = 0;
595 		if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
596 		    !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
597 			intr_next_cpu(i);
598 	}
599 }
600 
601 /*
602  * Return the CPU that the next interrupt source should use.  For now
603  * this just returns the next local APIC according to round-robin.
604  */
605 u_int
606 intr_next_cpu(int domain)
607 {
608 	u_int apic_id;
609 
610 #ifdef EARLY_AP_STARTUP
611 	MPASS(mp_ncpus == 1 || smp_started);
612 	if (mp_ncpus == 1)
613 		return (PCPU_GET(apic_id));
614 #else
615 	/* Leave all interrupts on the BSP during boot. */
616 	if (!assign_cpu)
617 		return (PCPU_GET(apic_id));
618 #endif
619 
620 	mtx_lock_spin(&icu_lock);
621 	apic_id = cpu_apic_ids[current_cpu[domain]];
622 	do {
623 		current_cpu[domain]++;
624 		if (current_cpu[domain] > mp_maxid)
625 			current_cpu[domain] = 0;
626 	} while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
627 	    !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
628 	mtx_unlock_spin(&icu_lock);
629 	return (apic_id);
630 }
631 
632 /* Attempt to bind the specified IRQ to the specified CPU. */
633 int
634 intr_bind(u_int vector, u_char cpu)
635 {
636 	struct intsrc *isrc;
637 
638 	isrc = intr_lookup_source(vector);
639 	if (isrc == NULL)
640 		return (EINVAL);
641 	return (intr_event_bind(isrc->is_event, cpu));
642 }
643 
644 /*
645  * Add a CPU to our mask of valid CPUs that can be destinations of
646  * interrupts.
647  */
648 void
649 intr_add_cpu(u_int cpu)
650 {
651 
652 	if (cpu >= MAXCPU)
653 		panic("%s: Invalid CPU ID", __func__);
654 	if (bootverbose)
655 		printf("INTR: Adding local APIC %d as a target\n",
656 		    cpu_apic_ids[cpu]);
657 
658 	CPU_SET(cpu, &intr_cpus);
659 }
660 
661 #ifdef EARLY_AP_STARTUP
662 static void
663 intr_smp_startup(void *arg __unused)
664 {
665 
666 	intr_init_cpus();
667 	return;
668 }
669 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
670     NULL);
671 
672 #else
673 /*
674  * Distribute all the interrupt sources among the available CPUs once the
675  * AP's have been launched.
676  */
677 static void
678 intr_shuffle_irqs(void *arg __unused)
679 {
680 	struct intsrc *isrc;
681 	u_int cpu, i;
682 
683 	intr_init_cpus();
684 	/* Don't bother on UP. */
685 	if (mp_ncpus == 1)
686 		return;
687 
688 	/* Round-robin assign a CPU to each enabled source. */
689 	sx_xlock(&intrsrc_lock);
690 	assign_cpu = 1;
691 	for (i = 0; i < num_io_irqs; i++) {
692 		isrc = interrupt_sources[i];
693 		if (isrc != NULL && isrc->is_handlers > 0) {
694 			/*
695 			 * If this event is already bound to a CPU,
696 			 * then assign the source to that CPU instead
697 			 * of picking one via round-robin.  Note that
698 			 * this is careful to only advance the
699 			 * round-robin if the CPU assignment succeeds.
700 			 */
701 			cpu = isrc->is_event->ie_cpu;
702 			if (cpu == NOCPU)
703 				cpu = current_cpu[isrc->is_domain];
704 			if (isrc->is_pic->pic_assign_cpu(isrc,
705 			    cpu_apic_ids[cpu]) == 0) {
706 				isrc->is_cpu = cpu;
707 				if (isrc->is_event->ie_cpu == NOCPU)
708 					intr_next_cpu(isrc->is_domain);
709 			}
710 		}
711 	}
712 	sx_xunlock(&intrsrc_lock);
713 }
714 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
715     NULL);
716 #endif
717 
718 /*
719  * TODO: Export this information in a non-MD fashion, integrate with vmstat -i.
720  */
721 static int
722 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
723 {
724 	struct sbuf sbuf;
725 	struct intsrc *isrc;
726 	u_int i;
727 	int error;
728 
729 	error = sysctl_wire_old_buffer(req, 0);
730 	if (error != 0)
731 		return (error);
732 
733 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
734 	sx_slock(&intrsrc_lock);
735 	for (i = 0; i < num_io_irqs; i++) {
736 		isrc = interrupt_sources[i];
737 		if (isrc == NULL)
738 			continue;
739 		sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
740 		    isrc->is_event->ie_fullname,
741 		    isrc->is_index,
742 		    isrc->is_cpu,
743 		    isrc->is_domain,
744 		    *isrc->is_count);
745 	}
746 
747 	sx_sunlock(&intrsrc_lock);
748 	error = sbuf_finish(&sbuf);
749 	sbuf_delete(&sbuf);
750 	return (error);
751 }
752 SYSCTL_PROC(_hw, OID_AUTO, intrs,
753     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
754     0, 0, sysctl_hw_intrs, "A",
755     "interrupt:number @cpu: count");
756 
757 /*
758  * Compare two, possibly NULL, entries in the interrupt source array
759  * by load.
760  */
761 static int
762 intrcmp(const void *one, const void *two)
763 {
764 	const struct intsrc *i1, *i2;
765 
766 	i1 = *(const struct intsrc * const *)one;
767 	i2 = *(const struct intsrc * const *)two;
768 	if (i1 != NULL && i2 != NULL)
769 		return (*i1->is_count - *i2->is_count);
770 	if (i1 != NULL)
771 		return (1);
772 	if (i2 != NULL)
773 		return (-1);
774 	return (0);
775 }
776 
777 /*
778  * Balance IRQs across available CPUs according to load.
779  */
780 static void
781 intr_balance(void *dummy __unused, int pending __unused)
782 {
783 	struct intsrc *isrc;
784 	int interval;
785 	u_int cpu;
786 	int i;
787 
788 	interval = intrbalance;
789 	if (interval == 0)
790 		goto out;
791 
792 	/*
793 	 * Sort interrupts according to count.
794 	 */
795 	sx_xlock(&intrsrc_lock);
796 	memcpy(interrupt_sorted, interrupt_sources, num_io_irqs *
797 	    sizeof(interrupt_sorted[0]));
798 	qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]),
799 	    intrcmp);
800 
801 	/*
802 	 * Restart the scan from the same location to avoid moving in the
803 	 * common case.
804 	 */
805 	intr_init_cpus();
806 
807 	/*
808 	 * Assign round-robin from most loaded to least.
809 	 */
810 	for (i = num_io_irqs - 1; i >= 0; i--) {
811 		isrc = interrupt_sorted[i];
812 		if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
813 			continue;
814 		cpu = current_cpu[isrc->is_domain];
815 		intr_next_cpu(isrc->is_domain);
816 		if (isrc->is_cpu != cpu &&
817 		    isrc->is_pic->pic_assign_cpu(isrc,
818 		    cpu_apic_ids[cpu]) == 0)
819 			isrc->is_cpu = cpu;
820 	}
821 	sx_xunlock(&intrsrc_lock);
822 out:
823 	taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task,
824 	    interval ? hz * interval : hz * 60);
825 
826 }
827 
828 static void
829 intr_balance_init(void *dummy __unused)
830 {
831 
832 	TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance,
833 	    NULL);
834 	taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz);
835 }
836 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL);
837 
838 #else
839 /*
840  * Always route interrupts to the current processor in the UP case.
841  */
842 u_int
843 intr_next_cpu(int domain)
844 {
845 
846 	return (PCPU_GET(apic_id));
847 }
848 #endif
849