1 /* $NetBSD: machdep.c,v 1.849 2025/05/05 16:57:41 imil Exp $ */
2
3 /*
4 * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal,
11 * by Andrew Doran, and by Maxime Villard.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
37 * All rights reserved.
38 *
39 * This code is derived from software contributed to Berkeley by
40 * William Jolitz.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.849 2025/05/05 16:57:41 imil Exp $");
71
72 #include "opt_beep.h"
73 #include "opt_compat_freebsd.h"
74 #include "opt_compat_netbsd.h"
75 #include "opt_cpureset_delay.h"
76 #include "opt_ddb.h"
77 #include "opt_kgdb.h"
78 #include "opt_mtrr.h"
79 #include "opt_modular.h"
80 #include "opt_multiboot.h"
81 #include "opt_multiprocessor.h"
82 #include "opt_physmem.h"
83 #include "opt_realmem.h"
84 #include "opt_user_ldt.h"
85 #include "opt_xen.h"
86 #include "isa.h"
87 #include "pci.h"
88
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/signal.h>
92 #include <sys/signalvar.h>
93 #include <sys/kernel.h>
94 #include <sys/cpu.h>
95 #include <sys/exec.h>
96 #include <sys/fcntl.h>
97 #include <sys/reboot.h>
98 #include <sys/conf.h>
99 #include <sys/kauth.h>
100 #include <sys/msgbuf.h>
101 #include <sys/mount.h>
102 #include <sys/syscallargs.h>
103 #include <sys/core.h>
104 #include <sys/kcore.h>
105 #include <sys/ucontext.h>
106 #include <sys/ras.h>
107 #include <sys/ksyms.h>
108 #include <sys/device.h>
109 #include <sys/timevar.h>
110
111 #ifdef KGDB
112 #include <sys/kgdb.h>
113 #endif
114
115 #include <dev/cons.h>
116 #include <dev/mm.h>
117
118 #include <uvm/uvm.h>
119 #include <uvm/uvm_page.h>
120
121 #include <sys/sysctl.h>
122
123 #include <x86/efi.h>
124
125 #include <machine/cpu.h>
126 #include <machine/cpu_rng.h>
127 #include <machine/cpufunc.h>
128 #include <machine/cpuvar.h>
129 #include <machine/gdt.h>
130 #include <machine/intr.h>
131 #include <machine/kcore.h>
132 #include <machine/pio.h>
133 #include <machine/psl.h>
134 #include <machine/reg.h>
135 #include <machine/specialreg.h>
136 #include <machine/bootinfo.h>
137 #include <machine/mtrr.h>
138 #include <machine/pmap_private.h>
139 #include <x86/x86/tsc.h>
140
141 #include <x86/bootspace.h>
142 #include <x86/fpu.h>
143 #include <x86/dbregs.h>
144 #include <x86/machdep.h>
145
146 #include <machine/multiboot.h>
147
148 #ifdef XEN
149 #include <xen/evtchn.h>
150 #include <xen/xen.h>
151 #include <xen/hypervisor.h>
152 #endif
153
154 #include <dev/isa/isareg.h>
155 #include <machine/isa_machdep.h>
156 #include <dev/ic/i8042reg.h>
157
158 #include <ddb/db_active.h>
159
160 #ifdef DDB
161 #include <machine/db_machdep.h>
162 #include <ddb/db_extern.h>
163 #endif
164
165 #include "acpica.h"
166 #include "bioscall.h"
167
168 #if NBIOSCALL > 0
169 #include <machine/bioscall.h>
170 #endif
171
172 #if NACPICA > 0
173 #include <dev/acpi/acpivar.h>
174 #define ACPI_MACHDEP_PRIVATE
175 #include <machine/acpi_machdep.h>
176 #else
177 #include <machine/i82489var.h>
178 #endif
179
180 #include "isa.h"
181 #include "isadma.h"
182 #include "ksyms.h"
183
184 #include "cardbus.h"
185 #if NCARDBUS > 0
186 /* For rbus_min_start hint. */
187 #include <sys/bus.h>
188 #include <dev/cardbus/rbus.h>
189 #include <machine/rbus_machdep.h>
190 #endif
191
192 #include "mca.h"
193 #if NMCA > 0
194 #include <machine/mca_machdep.h> /* for mca_busprobe() */
195 #endif
196
197 #ifdef MULTIPROCESSOR /* XXX */
198 #include <machine/mpbiosvar.h> /* XXX */
199 #endif /* XXX */
200
201 /* the following is used externally (sysctl_hw) */
202 char machine[] = "i386"; /* CPU "architecture" */
203 char machine_arch[] = "i386"; /* machine == machine_arch */
204
205 #ifdef CPURESET_DELAY
206 int cpureset_delay = CPURESET_DELAY;
207 #else
208 int cpureset_delay = 2000; /* default to 2s */
209 #endif
210
211 #ifdef MTRR
212 const struct mtrr_funcs *mtrr_funcs;
213 #endif
214
215 int cpu_class;
216 int use_pae;
217 int i386_fpu_fdivbug;
218
219 int i386_use_fxsave;
220 int i386_has_sse;
221 int i386_has_sse2;
222
223 vaddr_t idt_vaddr;
224 paddr_t idt_paddr;
225 vaddr_t gdt_vaddr;
226 paddr_t gdt_paddr;
227 vaddr_t ldt_vaddr;
228 paddr_t ldt_paddr;
229
230 vaddr_t pentium_idt_vaddr;
231
232 struct vm_map *phys_map = NULL;
233
234 extern struct bootspace bootspace;
235
236 extern paddr_t lowmem_rsvd;
237 extern paddr_t avail_start, avail_end;
238 #ifdef XENPV
239 extern paddr_t pmap_pa_start, pmap_pa_end;
240 void hypervisor_callback(void);
241 void failsafe_callback(void);
242 #endif
243
244 /*
245 * Size of memory segments, before any memory is stolen.
246 */
247 phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
248 int mem_cluster_cnt = 0;
249
250 void init_bootspace(void);
251 void init386(paddr_t);
252 void initgdt(union descriptor *);
253
254 static void i386_proc0_pcb_ldt_init(void);
255
256 int *esym;
257 int *eblob;
258 extern int boothowto;
259
260 #ifndef XENPV
261
262 /* Base memory reported by BIOS. */
263 #ifndef REALBASEMEM
264 int biosbasemem = 0;
265 #else
266 int biosbasemem = REALBASEMEM;
267 #endif
268
269 /* Extended memory reported by BIOS. */
270 #ifndef REALEXTMEM
271 int biosextmem = 0;
272 #else
273 int biosextmem = REALEXTMEM;
274 #endif
275
276 /* Set if any boot-loader set biosbasemem/biosextmem. */
277 int biosmem_implicit;
278
279 /*
280 * Representation of the bootinfo structure constructed by a NetBSD native
281 * boot loader. Only be used by native_loader().
282 */
283 struct bootinfo_source {
284 uint32_t bs_naddrs;
285 void *bs_addrs[1]; /* Actually longer. */
286 };
287
288 /* Only called by locore.S; no need to be in a header file. */
289 void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int);
290
291 /*
292 * Called as one of the very first things during system startup (just after
293 * the boot loader gave control to the kernel image), this routine is in
294 * charge of retrieving the parameters passed in by the boot loader and
295 * storing them in the appropriate kernel variables.
296 *
297 * WARNING: Because the kernel has not yet relocated itself to KERNBASE,
298 * special care has to be taken when accessing memory because absolute
299 * addresses (referring to kernel symbols) do not work. So:
300 *
301 * 1) Avoid jumps to absolute addresses (such as gotos and switches).
302 * 2) To access global variables use their physical address, which
303 * can be obtained using the RELOC macro.
304 */
305 void
native_loader(int bl_boothowto,int bl_bootdev,struct bootinfo_source * bl_bootinfo,paddr_t bl_esym,int bl_biosextmem,int bl_biosbasemem)306 native_loader(int bl_boothowto, int bl_bootdev,
307 struct bootinfo_source *bl_bootinfo, paddr_t bl_esym,
308 int bl_biosextmem, int bl_biosbasemem)
309 {
310 #define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE))
311
312 *RELOC(int *, &boothowto) = bl_boothowto;
313
314 /*
315 * The boot loader provides a physical, non-relocated address
316 * for the symbols table's end. We need to convert it to a
317 * virtual address.
318 */
319 if (bl_esym != 0)
320 *RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE);
321 else
322 *RELOC(int **, &esym) = 0;
323
324 /*
325 * Copy bootinfo entries (if any) from the boot loader's
326 * representation to the kernel's bootinfo space.
327 */
328 if (bl_bootinfo != NULL) {
329 size_t i;
330 uint8_t *data;
331 struct bootinfo *bidest;
332 struct btinfo_modulelist *bi;
333
334 bidest = RELOC(struct bootinfo *, &bootinfo);
335
336 data = &bidest->bi_data[0];
337
338 for (i = 0; i < bl_bootinfo->bs_naddrs; i++) {
339 struct btinfo_common *bc;
340
341 bc = bl_bootinfo->bs_addrs[i];
342
343 if ((data + bc->len) >
344 (&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
345 break;
346
347 memcpy(data, bc, bc->len);
348 /*
349 * If any modules were loaded, record where they
350 * end. We'll need to skip over them.
351 */
352 bi = (struct btinfo_modulelist *)data;
353 if (bi->common.type == BTINFO_MODULELIST) {
354 *RELOC(int **, &eblob) =
355 (int *)(bi->endpa + KERNBASE);
356 }
357 data += bc->len;
358 }
359 bidest->bi_nentries = i;
360 }
361
362 /*
363 * Configure biosbasemem and biosextmem only if they were not
364 * explicitly given during the kernel's build.
365 */
366 if (*RELOC(int *, &biosbasemem) == 0) {
367 *RELOC(int *, &biosbasemem) = bl_biosbasemem;
368 *RELOC(int *, &biosmem_implicit) = 1;
369 }
370 if (*RELOC(int *, &biosextmem) == 0) {
371 *RELOC(int *, &biosextmem) = bl_biosextmem;
372 *RELOC(int *, &biosmem_implicit) = 1;
373 }
374 #undef RELOC
375 }
376
377 #endif /* XENPV */
378
379 /*
380 * Machine-dependent startup code
381 */
382 void
cpu_startup(void)383 cpu_startup(void)
384 {
385 int x, y;
386 vaddr_t minaddr, maxaddr;
387 psize_t sz;
388
389 /*
390 * For console drivers that require uvm and pmap to be initialized,
391 * we'll give them one more chance here...
392 */
393 consinit();
394
395 /*
396 * Initialize error message buffer (et end of core).
397 */
398 if (msgbuf_p_cnt == 0)
399 panic("msgbuf paddr map has not been set up");
400 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
401 continue;
402
403 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
404 if (msgbuf_vaddr == 0)
405 panic("failed to valloc msgbuf_vaddr");
406
407 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
408 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
409 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
410 msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
411 VM_PROT_READ|VM_PROT_WRITE, 0);
412 }
413
414 pmap_update(pmap_kernel());
415
416 initmsgbuf((void *)msgbuf_vaddr, sz);
417
418 #ifdef MULTIBOOT
419 multiboot1_print_info();
420 multiboot2_print_info();
421 #endif
422
423 #if NCARDBUS > 0
424 /* Tell RBUS how much RAM we have, so it can use heuristics. */
425 rbus_min_start_hint(ctob((psize_t)physmem));
426 #endif
427
428 minaddr = 0;
429
430 /*
431 * Allocate a submap for physio
432 */
433 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
434 VM_PHYS_SIZE, 0, false, NULL);
435
436 /* Say hello. */
437 banner();
438
439 /* Safe for i/o port / memory space allocation to use malloc now. */
440 #if NISA > 0 || NPCI > 0
441 x86_bus_space_mallocok();
442 #endif
443
444 gdt_init();
445 i386_proc0_pcb_ldt_init();
446
447 cpu_init_tss(&cpu_info_primary);
448 #ifndef XENPV
449 ltr(cpu_info_primary.ci_tss_sel);
450 #endif
451
452 x86_startup();
453 }
454
455 /*
456 * Set up proc0's PCB and LDT.
457 */
458 static void
i386_proc0_pcb_ldt_init(void)459 i386_proc0_pcb_ldt_init(void)
460 {
461 struct lwp *l = &lwp0;
462 struct pcb *pcb = lwp_getpcb(l);
463
464 pcb->pcb_cr0 = rcr0() & ~CR0_TS;
465 pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16;
466 pcb->pcb_iopl = IOPL_KPL;
467 l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1;
468 memcpy(&pcb->pcb_fsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_fsd));
469 memcpy(&pcb->pcb_gsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_gsd));
470 pcb->pcb_dbregs = NULL;
471
472 #ifndef XENPV
473 lldt(GSEL(GLDT_SEL, SEL_KPL));
474 #else
475 HYPERVISOR_fpu_taskswitch(1);
476 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
477 #endif
478 }
479
480 #ifdef XENPV
481 /* used in assembly */
482 void i386_switch_context(lwp_t *);
483 void i386_tls_switch(lwp_t *);
484
485 /*
486 * Switch context:
487 * - switch stack pointer for user->kernel transition
488 */
489 void
i386_switch_context(lwp_t * l)490 i386_switch_context(lwp_t *l)
491 {
492 struct pcb *pcb;
493
494 pcb = lwp_getpcb(l);
495
496 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
497
498 struct physdev_set_iopl set_iopl;
499 set_iopl.iopl = pcb->pcb_iopl;
500 HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
501 }
502
503 void
i386_tls_switch(lwp_t * l)504 i386_tls_switch(lwp_t *l)
505 {
506 struct cpu_info *ci = curcpu();
507 struct pcb *pcb = lwp_getpcb(l);
508
509 /*
510 * Raise the IPL to IPL_HIGH. XXX Still needed?
511 */
512 (void)splhigh();
513
514 /* Update TLS segment pointers */
515 update_descriptor(&ci->ci_gdt[GUFS_SEL],
516 (union descriptor *)&pcb->pcb_fsd);
517 update_descriptor(&ci->ci_gdt[GUGS_SEL],
518 (union descriptor *)&pcb->pcb_gsd);
519 }
520 #endif /* XENPV */
521
522 /* XXX */
523 #define IDTVEC(name) __CONCAT(X, name)
524 typedef void (vector)(void);
525
526 #ifndef XENPV
527 static void tss_init(struct i386tss *, void *, void *);
528
529 static void
tss_init(struct i386tss * tss,void * stack,void * func)530 tss_init(struct i386tss *tss, void *stack, void *func)
531 {
532 KASSERT(curcpu()->ci_pmap == pmap_kernel());
533
534 memset(tss, 0, sizeof *tss);
535 tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16);
536 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
537 tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL);
538 tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
539 tss->tss_gs = tss->__tss_es = tss->__tss_ds =
540 tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL);
541 /* %cr3 contains the value associated to pmap_kernel */
542 tss->tss_cr3 = rcr3();
543 tss->tss_esp = (int)((char *)stack + USPACE - 16);
544 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
545 tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */
546 tss->__tss_eip = (int)func;
547 }
548
549 extern vector IDTVEC(tss_trap08);
550 #if defined(DDB) && defined(MULTIPROCESSOR)
551 extern vector Xintr_ddbipi, Xintr_x2apic_ddbipi;
552 extern int ddb_vec;
553 #endif
554
555 void
cpu_set_tss_gates(struct cpu_info * ci)556 cpu_set_tss_gates(struct cpu_info *ci)
557 {
558 struct segment_descriptor sd;
559 void *doubleflt_stack;
560 idt_descriptor_t *idt;
561
562 doubleflt_stack = (void *)uvm_km_alloc(kernel_map, USPACE, 0,
563 UVM_KMF_WIRED);
564 tss_init(&ci->ci_tss->dblflt_tss, doubleflt_stack, IDTVEC(tss_trap08));
565
566 setsegment(&sd, &ci->ci_tss->dblflt_tss, sizeof(struct i386tss) - 1,
567 SDT_SYS386TSS, SEL_KPL, 0, 0);
568 ci->ci_gdt[GTRAPTSS_SEL].sd = sd;
569
570 idt = cpu_info_primary.ci_idtvec.iv_idt;
571 set_idtgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
572 GSEL(GTRAPTSS_SEL, SEL_KPL));
573
574 #if defined(DDB) && defined(MULTIPROCESSOR)
575 /*
576 * Set up separate handler for the DDB IPI, so that it doesn't
577 * stomp on a possibly corrupted stack.
578 *
579 * XXX overwriting the gate set in db_machine_init.
580 * Should rearrange the code so that it's set only once.
581 */
582 void *ddbipi_stack;
583
584 ddbipi_stack = (void *)uvm_km_alloc(kernel_map, USPACE, 0,
585 UVM_KMF_WIRED);
586 tss_init(&ci->ci_tss->ddbipi_tss, ddbipi_stack,
587 x2apic_mode ? Xintr_x2apic_ddbipi : Xintr_ddbipi);
588
589 setsegment(&sd, &ci->ci_tss->ddbipi_tss, sizeof(struct i386tss) - 1,
590 SDT_SYS386TSS, SEL_KPL, 0, 0);
591 ci->ci_gdt[GIPITSS_SEL].sd = sd;
592
593 set_idtgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
594 GSEL(GIPITSS_SEL, SEL_KPL));
595 #endif
596 }
597 #endif /* XENPV */
598
599 /*
600 * Set up TSS and I/O bitmap.
601 */
602 void
cpu_init_tss(struct cpu_info * ci)603 cpu_init_tss(struct cpu_info *ci)
604 {
605 struct cpu_tss *cputss;
606
607 cputss = (struct cpu_tss *)uvm_km_alloc(kernel_map,
608 sizeof(struct cpu_tss), 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
609
610 cputss->tss.tss_iobase = IOMAP_INVALOFF << 16;
611 #ifndef XENPV
612 cputss->tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
613 cputss->tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
614 cputss->tss.tss_cr3 = rcr3();
615 #endif
616
617 ci->ci_tss = cputss;
618 #ifndef XENPV
619 ci->ci_tss_sel = tss_alloc(&cputss->tss);
620 #endif
621 }
622
623 void *
getframe(struct lwp * l,int sig,int * onstack)624 getframe(struct lwp *l, int sig, int *onstack)
625 {
626 struct proc *p = l->l_proc;
627 struct trapframe *tf = l->l_md.md_regs;
628
629 /* Do we need to jump onto the signal stack? */
630 *onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
631 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
632 if (*onstack)
633 return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size;
634 return (void *)tf->tf_esp;
635 }
636
637 /*
638 * Build context to run handler in. We invoke the handler
639 * directly, only returning via the trampoline. Note the
640 * trampoline version numbers are coordinated with machine-
641 * dependent code in libc.
642 */
643 void
buildcontext(struct lwp * l,int sel,void * catcher,void * fp)644 buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
645 {
646 struct trapframe *tf = l->l_md.md_regs;
647
648 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
649 tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
650 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
651 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
652 tf->tf_eip = (int)catcher;
653 tf->tf_cs = GSEL(sel, SEL_UPL);
654 tf->tf_eflags &= ~PSL_CLEARSIG;
655 tf->tf_esp = (int)fp;
656 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
657
658 /* Ensure FP state is reset. */
659 fpu_sigreset(l);
660 }
661
662 void
sendsig_siginfo(const ksiginfo_t * ksi,const sigset_t * mask)663 sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
664 {
665 struct lwp *l = curlwp;
666 struct proc *p = l->l_proc;
667 struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
668 int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
669 GUCODEBIG_SEL : GUCODE_SEL;
670 struct sigacts *ps = p->p_sigacts;
671 int onstack, error;
672 int sig = ksi->ksi_signo;
673 struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
674 sig_t catcher = SIGACTION(p, sig).sa_handler;
675
676 KASSERT(mutex_owned(p->p_lock));
677
678 fp--;
679 fp = (struct sigframe_siginfo *)((uintptr_t)fp & ~STACK_ALIGNBYTES);
680
681 memset(&frame, 0, sizeof(frame));
682 frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
683 frame.sf_signum = sig;
684 frame.sf_sip = &fp->sf_si;
685 frame.sf_ucp = &fp->sf_uc;
686 frame.sf_si._info = ksi->ksi_info;
687 frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
688 frame.sf_uc.uc_sigmask = *mask;
689 frame.sf_uc.uc_link = l->l_ctxlink;
690 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
691 ? _UC_SETSTACK : _UC_CLRSTACK;
692
693 sendsig_reset(l, sig);
694
695 mutex_exit(p->p_lock);
696 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
697 error = copyout(&frame, fp, sizeof(frame));
698 mutex_enter(p->p_lock);
699
700 if (error != 0) {
701 /*
702 * Process has trashed its stack; give it an illegal
703 * instruction to halt it in its tracks.
704 */
705 sigexit(l, SIGILL);
706 /* NOTREACHED */
707 }
708
709 buildcontext(l, sel, catcher, fp);
710
711 /* Remember that we're now on the signal stack. */
712 if (onstack)
713 l->l_sigstk.ss_flags |= SS_ONSTACK;
714 }
715
716 static void
maybe_dump(int howto)717 maybe_dump(int howto)
718 {
719 int s;
720
721 /* Disable interrupts. */
722 s = splhigh();
723
724 /* Do a dump if requested. */
725 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
726 dumpsys();
727
728 splx(s);
729 }
730
731 void
cpu_reboot(int howto,char * bootstr)732 cpu_reboot(int howto, char *bootstr)
733 {
734 static bool syncdone = false;
735 int s = IPL_NONE;
736
737 if (cold) {
738 howto |= RB_HALT;
739 goto haltsys;
740 }
741
742 boothowto = howto;
743
744 /* XXX used to dump after vfs_shutdown() and before
745 * detaching devices / shutdown hooks / pmf_system_shutdown().
746 */
747 maybe_dump(howto);
748
749 /*
750 * If we've panic'd, don't make the situation potentially
751 * worse by syncing or unmounting the file systems.
752 */
753 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
754 if (!syncdone) {
755 syncdone = true;
756 /* XXX used to force unmount as well, here */
757 vfs_sync_all(curlwp);
758 }
759
760 while (vfs_unmountall1(curlwp, false, false) ||
761 config_detach_all(boothowto) ||
762 vfs_unmount_forceone(curlwp))
763 ; /* do nothing */
764 } else {
765 if (!db_active)
766 suspendsched();
767 }
768
769 pmf_system_shutdown(boothowto);
770
771 s = splhigh();
772
773 /* amd64 maybe_dump() */
774
775 haltsys:
776 doshutdownhooks();
777
778 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
779 #if NACPICA > 0
780 if (s != IPL_NONE)
781 splx(s);
782
783 acpi_enter_sleep_state(ACPI_STATE_S5);
784 #else
785 __USE(s);
786 #endif
787 #ifdef XEN
788 if (vm_guest == VM_GUEST_XENPV ||
789 vm_guest == VM_GUEST_XENPVH ||
790 vm_guest == VM_GUEST_XENPVHVM)
791 HYPERVISOR_shutdown();
792 #endif /* XEN */
793 }
794
795 #ifdef MULTIPROCESSOR
796 cpu_broadcast_halt();
797 #endif /* MULTIPROCESSOR */
798
799 if (howto & RB_HALT) {
800 #if NACPICA > 0
801 acpi_disable();
802 #endif
803
804 printf("\n");
805 printf("The operating system has halted.\n");
806 printf("Please press any key to reboot.\n\n");
807
808 #ifdef BEEP_ONHALT
809 {
810 int c;
811 for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
812 sysbeep(BEEP_ONHALT_PITCH,
813 BEEP_ONHALT_PERIOD * hz / 1000);
814 delay(BEEP_ONHALT_PERIOD * 1000);
815 sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
816 delay(BEEP_ONHALT_PERIOD * 1000);
817 }
818 }
819 #endif
820
821 cnpollc(1); /* for proper keyboard command handling */
822 if (cngetc() == 0) {
823 /* no console attached, so just hlt */
824 printf("No keyboard - cannot reboot after all.\n");
825 for(;;) {
826 x86_hlt();
827 }
828 }
829 cnpollc(0);
830 }
831
832 printf("rebooting...\n");
833 if (cpureset_delay > 0)
834 delay(cpureset_delay * 1000);
835 cpu_reset();
836 for(;;) ;
837 /*NOTREACHED*/
838 }
839
840 /*
841 * Clear registers on exec
842 */
843 void
setregs(struct lwp * l,struct exec_package * pack,vaddr_t stack)844 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
845 {
846 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
847 struct pcb *pcb = lwp_getpcb(l);
848 struct trapframe *tf;
849
850 #ifdef USER_LDT
851 pmap_ldt_cleanup(l);
852 #endif
853
854 fpu_clear(l, pack->ep_osversion >= 699002600
855 ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
856
857 memcpy(&pcb->pcb_fsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_fsd));
858 memcpy(&pcb->pcb_gsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_gsd));
859
860 x86_dbregs_clear(l);
861
862 tf = l->l_md.md_regs;
863 memset(tf, 0, sizeof(*tf));
864
865 tf->tf_trapno = T_ASTFLT;
866 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
867 tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
868 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
869 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
870 tf->tf_edi = 0;
871 tf->tf_esi = 0;
872 tf->tf_ebp = 0;
873 tf->tf_ebx = l->l_proc->p_psstrp;
874 tf->tf_edx = 0;
875 tf->tf_ecx = 0;
876 tf->tf_eax = 0;
877 tf->tf_eip = pack->ep_entry;
878 tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
879 LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
880 tf->tf_eflags = PSL_USERSET;
881 tf->tf_esp = stack;
882 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
883 }
884
885 /*
886 * Initialize segments and descriptor tables
887 */
888
889 union descriptor *gdtstore, *ldtstore;
890 union descriptor *pentium_idt;
891 extern vaddr_t lwp0uarea;
892
893 void
setgate(struct gate_descriptor * gd,void * func,int args,int type,int dpl,int sel)894 setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
895 int sel)
896 {
897
898 gd->gd_looffset = (int)func;
899 gd->gd_selector = sel;
900 gd->gd_stkcpy = args;
901 gd->gd_xx = 0;
902 gd->gd_type = type;
903 gd->gd_dpl = dpl;
904 gd->gd_p = 1;
905 gd->gd_hioffset = (int)func >> 16;
906 }
907
908 void
unsetgate(struct gate_descriptor * gd)909 unsetgate(struct gate_descriptor *gd)
910 {
911
912 gd->gd_p = 0;
913 gd->gd_hioffset = 0;
914 gd->gd_looffset = 0;
915 gd->gd_selector = 0;
916 gd->gd_xx = 0;
917 gd->gd_stkcpy = 0;
918 gd->gd_type = 0;
919 gd->gd_dpl = 0;
920 }
921
922 void
setregion(struct region_descriptor * rd,void * base,size_t limit)923 setregion(struct region_descriptor *rd, void *base, size_t limit)
924 {
925
926 rd->rd_limit = (int)limit;
927 rd->rd_base = (int)base;
928 }
929
930 void
setsegment(struct segment_descriptor * sd,const void * base,size_t limit,int type,int dpl,int def32,int gran)931 setsegment(struct segment_descriptor *sd, const void *base, size_t limit,
932 int type, int dpl, int def32, int gran)
933 {
934
935 sd->sd_lolimit = (int)limit;
936 sd->sd_lobase = (int)base;
937 sd->sd_type = type;
938 sd->sd_dpl = dpl;
939 sd->sd_p = 1;
940 sd->sd_hilimit = (int)limit >> 16;
941 sd->sd_xx = 0;
942 sd->sd_def32 = def32;
943 sd->sd_gran = gran;
944 sd->sd_hibase = (int)base >> 24;
945 }
946
947 /* XXX */
948 extern vector IDTVEC(syscall);
949 extern vector *IDTVEC(exceptions)[];
950 #ifdef XENPV
951 extern union descriptor tmpgdt[];
952 #endif
953
954 void
cpu_init_idt(struct cpu_info * ci)955 cpu_init_idt(struct cpu_info *ci)
956 {
957 struct region_descriptor region;
958 struct idt_vec *iv;
959 idt_descriptor_t *idt;
960
961 iv = &ci->ci_idtvec;
962 idt = iv->iv_idt_pentium;
963 setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1);
964 lidt(®ion);
965 }
966
967 /*
968 * initgdt(tgdt)
969 *
970 * Initialize a temporary Global Descriptor Table (GDT) using
971 * storage space at tgdt.
972 *
973 * 1. Set up segment descriptors for our purposes, including a
974 * CPU-local segment descriptor pointing at &cpu_info_primary.
975 *
976 * 2. Load the address into the Global Descriptor Table Register.
977 *
978 * 3. Set up segment selectors for all the segment registers using
979 * it so that %fs-relative addressing works for the CPU-local
980 * data.
981 *
982 * After this put, CPUVAR(...), curcpu(), and curlwp will work.
983 *
984 * Eventually the kernel will switch to a second temporary GDT
985 * allocated with pmap_bootstrap_valloc in pmap_bootstrap, and
986 * then to permanent GDT allocated with uvm_km(9) in gdt_init.
987 * But the first temporary GDT is needed now to get us going with
988 * early access to curcpu() and curlwp before we enter kernel
989 * main.
990 *
991 * XXX The purpose of each of the segment descriptors should be
992 * written down somewhere in a single place that can be cross-
993 * referenced.
994 *
995 * References:
996 *
997 * - Intel 64 and IA-32 Architectures Software Developer's Manual,
998 * Volume 3: System Programming Guide, Order Number 325384,
999 * April 2022, Sec. 3.5.1 `Segment Descriptor Tables',
1000 * pp. 3-14 through 3-16.
1001 */
1002 void
initgdt(union descriptor * tgdt)1003 initgdt(union descriptor *tgdt)
1004 {
1005 KASSERT(tgdt != NULL);
1006
1007 gdtstore = tgdt;
1008 #ifdef XENPV
1009 u_long frames[16];
1010 #else
1011 struct region_descriptor region;
1012 memset(gdtstore, 0, NGDT * sizeof(*gdtstore));
1013 #endif
1014
1015 /* make gdt gates and memory segments */
1016 setsegment(&gdtstore[GCODE_SEL].sd, 0, 0xfffff,
1017 SDT_MEMERA, SEL_KPL, 1, 1);
1018 setsegment(&gdtstore[GDATA_SEL].sd, 0, 0xfffff,
1019 SDT_MEMRWA, SEL_KPL, 1, 1);
1020 setsegment(&gdtstore[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
1021 SDT_MEMERA, SEL_UPL, 1, 1);
1022 setsegment(&gdtstore[GUCODEBIG_SEL].sd, 0, 0xfffff,
1023 SDT_MEMERA, SEL_UPL, 1, 1);
1024 setsegment(&gdtstore[GUDATA_SEL].sd, 0, 0xfffff,
1025 SDT_MEMRWA, SEL_UPL, 1, 1);
1026 #if NBIOSCALL > 0 && !defined(XENPV)
1027 /* bios trampoline GDT entries */
1028 setsegment(&gdtstore[GBIOSCODE_SEL].sd, 0, 0xfffff,
1029 SDT_MEMERA, SEL_KPL, 0, 0);
1030 setsegment(&gdtstore[GBIOSDATA_SEL].sd, 0, 0xfffff,
1031 SDT_MEMRWA, SEL_KPL, 0, 0);
1032 #endif
1033 setsegment(&gdtstore[GCPU_SEL].sd, &cpu_info_primary,
1034 sizeof(struct cpu_info) - 1, SDT_MEMRWA, SEL_KPL, 1, 0);
1035
1036 #ifndef XENPV
1037 setregion(®ion, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
1038 lgdt(®ion);
1039 #else /* !XENPV */
1040 /*
1041 * We jumpstart the bootstrap process a bit so we can update
1042 * page permissions. This is done redundantly later from
1043 * x86_xpmap.c:xen_locore() - harmless.
1044 */
1045 xpmap_phys_to_machine_mapping =
1046 (unsigned long *)xen_start_info.mfn_list;
1047
1048 frames[0] = xpmap_ptom((uint32_t)gdtstore - KERNBASE) >> PAGE_SHIFT;
1049 { /*
1050 * Enter the gdt page RO into the kernel map. We can't
1051 * use pmap_kenter_pa() here, because %fs is not
1052 * usable until the gdt is loaded, and %fs is used as
1053 * the base pointer for curcpu() and curlwp(), both of
1054 * which are in the callpath of pmap_kenter_pa().
1055 * So we mash up our own - this is MD code anyway.
1056 */
1057 extern pt_entry_t xpmap_pg_nx;
1058 pt_entry_t pte;
1059
1060 pte = pmap_pa2pte((vaddr_t)gdtstore - KERNBASE);
1061 pte |= xpmap_pg_nx | PTE_P;
1062
1063 if (HYPERVISOR_update_va_mapping((vaddr_t)gdtstore, pte,
1064 UVMF_INVLPG) < 0) {
1065 panic("gdt page RO update failed.\n");
1066 }
1067 }
1068
1069 if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */))
1070 panic("HYPERVISOR_set_gdt failed!\n");
1071
1072 lgdt_finish();
1073 #endif /* !XENPV */
1074 }
1075
1076 #if !defined(XENPV) && NBIOSCALL > 0
1077 static void
init386_pte0(void)1078 init386_pte0(void)
1079 {
1080 paddr_t paddr;
1081 vaddr_t vaddr;
1082
1083 paddr = 4 * PAGE_SIZE;
1084 vaddr = (vaddr_t)vtopte(0);
1085 pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0);
1086 pmap_update(pmap_kernel());
1087 /* make sure it is clean before using */
1088 memset((void *)vaddr, 0, PAGE_SIZE);
1089 }
1090 #endif /* !XENPV && NBIOSCALL > 0 */
1091
1092 #ifndef XENPV
1093 static void
init386_ksyms(void)1094 init386_ksyms(void)
1095 {
1096 #if NKSYMS || defined(DDB) || defined(MODULAR)
1097 extern int end;
1098 struct btinfo_symtab *symtab;
1099
1100 #ifdef DDB
1101 db_machine_init();
1102 #endif
1103
1104 #if defined(MULTIBOOT)
1105 if (multiboot1_ksyms_addsyms_elf())
1106 return;
1107
1108 if (multiboot2_ksyms_addsyms_elf())
1109 return;
1110 #endif
1111
1112 #ifdef XEN
1113 if (pvh_boot && vm_guest != VM_GUEST_XENPVH) {
1114 ksyms_addsyms_elf(0, ((int *)&end) + 1, esym);
1115 return;
1116 }
1117 #endif
1118
1119 if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) {
1120 ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym);
1121 return;
1122 }
1123
1124 symtab->ssym += KERNBASE;
1125 symtab->esym += KERNBASE;
1126 ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym);
1127 #endif
1128 }
1129 #endif /* XENPV */
1130
1131 void
init_bootspace(void)1132 init_bootspace(void)
1133 {
1134 extern char __rodata_start;
1135 extern char __data_start;
1136 extern char __kernel_end;
1137 size_t i = 0;
1138
1139 memset(&bootspace, 0, sizeof(bootspace));
1140
1141 bootspace.head.va = KERNTEXTOFF;
1142 bootspace.head.pa = KERNTEXTOFF - KERNBASE;
1143 bootspace.head.sz = 0;
1144
1145 bootspace.segs[i].type = BTSEG_TEXT;
1146 bootspace.segs[i].va = KERNTEXTOFF;
1147 bootspace.segs[i].pa = KERNTEXTOFF - KERNBASE;
1148 bootspace.segs[i].sz = (size_t)&__rodata_start - KERNTEXTOFF;
1149 i++;
1150
1151 bootspace.segs[i].type = BTSEG_RODATA;
1152 bootspace.segs[i].va = (vaddr_t)&__rodata_start;
1153 bootspace.segs[i].pa = (paddr_t)(vaddr_t)&__rodata_start - KERNBASE;
1154 bootspace.segs[i].sz = (size_t)&__data_start - (size_t)&__rodata_start;
1155 i++;
1156
1157 bootspace.segs[i].type = BTSEG_DATA;
1158 bootspace.segs[i].va = (vaddr_t)&__data_start;
1159 bootspace.segs[i].pa = (paddr_t)(vaddr_t)&__data_start - KERNBASE;
1160 bootspace.segs[i].sz = (size_t)&__kernel_end - (size_t)&__data_start;
1161 i++;
1162
1163 bootspace.boot.va = (vaddr_t)&__kernel_end;
1164 bootspace.boot.pa = (paddr_t)(vaddr_t)&__kernel_end - KERNBASE;
1165 bootspace.boot.sz = (size_t)(atdevbase + IOM_SIZE) -
1166 (size_t)&__kernel_end;
1167
1168 /* Virtual address of the top level page */
1169 bootspace.pdir = (vaddr_t)(PDPpaddr + KERNBASE);
1170 }
1171
1172 void
init386(paddr_t first_avail)1173 init386(paddr_t first_avail)
1174 {
1175 extern void consinit(void);
1176 int x;
1177 #ifndef XENPV
1178 extern paddr_t local_apic_pa;
1179 union descriptor *tgdt;
1180 struct region_descriptor region;
1181 #if NBIOSCALL > 0
1182 extern int biostramp_image_size;
1183 extern u_char biostramp_image[];
1184 #endif
1185 #endif /* !XENPV */
1186 struct pcb *pcb;
1187 struct idt_vec *iv;
1188 idt_descriptor_t *idt;
1189
1190 KASSERT(first_avail % PAGE_SIZE == 0);
1191
1192 #ifdef XENPV
1193 KASSERT(HYPERVISOR_shared_info != NULL);
1194 cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
1195 #endif
1196
1197 #ifdef XEN
1198 if (pvh_boot)
1199 xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
1200 #endif
1201
1202 uvm_lwp_setuarea(&lwp0, lwp0uarea);
1203
1204 cpu_probe(&cpu_info_primary);
1205
1206 /*
1207 * Initialize the no-execute bit on cpu0, if supported.
1208 *
1209 * Note: The call to cpu_init_msrs for secondary CPUs happens
1210 * in cpu_hatch.
1211 */
1212 cpu_init_msrs(&cpu_info_primary, true);
1213
1214 #ifndef XENPV
1215 cpu_speculation_init(&cpu_info_primary);
1216 #endif
1217
1218 #ifdef PAE
1219 use_pae = 1;
1220 #else
1221 use_pae = 0;
1222 #endif
1223
1224 pcb = lwp_getpcb(&lwp0);
1225 #ifdef XENPV
1226 pcb->pcb_cr3 = PDPpaddr;
1227 #endif
1228
1229 #if defined(PAE) && !defined(XENPV)
1230 /*
1231 * Save VA and PA of L3 PD of boot processor (for Xen, this is done
1232 * in xen_locore())
1233 */
1234 cpu_info_primary.ci_pae_l3_pdirpa = rcr3();
1235 cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE);
1236 #endif
1237
1238 uvm_md_init();
1239
1240 /*
1241 * Start with 2 color bins -- this is just a guess to get us
1242 * started. We'll recolor when we determine the largest cache
1243 * sizes on the system.
1244 */
1245 uvmexp.ncolors = 2;
1246
1247 avail_start = first_avail;
1248
1249 #ifndef XENPV
1250 /*
1251 * Low memory reservations:
1252 * Page 0: BIOS data
1253 * Page 1: BIOS callback
1254 * Page 2: MP bootstrap code (MP_TRAMPOLINE)
1255 * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR)
1256 * Page 4: Temporary page table for 0MB-4MB
1257 * Page 5: Temporary page directory
1258 */
1259 lowmem_rsvd = 6 * PAGE_SIZE;
1260 #else /* !XENPV */
1261 /* Parse Xen command line (replace bootinfo) */
1262 xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
1263
1264 /* Use the dummy page as a gdt */
1265 extern vaddr_t xen_dummy_page;
1266 gdtstore = (void *)xen_dummy_page;
1267
1268 /* Determine physical address space */
1269 avail_end = ctob((paddr_t)xen_start_info.nr_pages);
1270 pmap_pa_start = (KERNTEXTOFF - KERNBASE);
1271 pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages);
1272 mem_clusters[0].start = avail_start;
1273 mem_clusters[0].size = avail_end - avail_start;
1274 mem_cluster_cnt++;
1275 physmem += xen_start_info.nr_pages;
1276 uvmexp.wired += atop(avail_start);
1277
1278 /*
1279 * initgdt() has to be done before consinit(), so that %fs is properly
1280 * initialised. initgdt() uses pmap_kenter_pa so it can't be called
1281 * before the above variables are set.
1282 */
1283 initgdt(gdtstore);
1284
1285 mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM);
1286 #endif /* XENPV */
1287
1288 #if NISA > 0 || NPCI > 0
1289 x86_bus_space_init();
1290 #endif
1291
1292 consinit(); /* XXX SHOULD NOT BE DONE HERE */
1293
1294 #ifdef DEBUG_MEMLOAD
1295 printf("mem_cluster_count: %d\n", mem_cluster_cnt);
1296 #endif
1297
1298 /*
1299 * Call pmap initialization to make new kernel address space.
1300 * We must do this before loading pages into the VM system.
1301 */
1302 pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
1303
1304 /*
1305 * Initialize RNG to get entropy ASAP either from CPU
1306 * RDRAND/RDSEED or from seed on disk. Constraints:
1307 *
1308 * - Must happen after cpu_init_msrs so that curcpu() and
1309 * curlwp work.
1310 *
1311 * - Must happen after consinit so we have the opportunity to
1312 * print useful feedback.
1313 *
1314 * - On KASLR kernels, must happen after pmap_bootstrap because
1315 * x86_rndseed requires access to the direct map.
1316 */
1317 cpu_rng_init();
1318 x86_rndseed();
1319
1320 #ifndef XENPV
1321 /* Initialize the memory clusters. */
1322 init_x86_clusters();
1323
1324 /* Internalize the physical pages into the VM system. */
1325 init_x86_vm(avail_start);
1326 #else /* !XENPV */
1327 uvm_page_physload(atop(avail_start), atop(avail_end),
1328 atop(avail_start), atop(avail_end),
1329 VM_FREELIST_DEFAULT);
1330
1331 /* Reclaim the boot gdt page - see locore.s */
1332 {
1333 extern pt_entry_t xpmap_pg_nx;
1334 pt_entry_t pte;
1335
1336 pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE);
1337 pte |= PTE_W | xpmap_pg_nx | PTE_P;
1338
1339 if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) {
1340 panic("tmpgdt page relaim RW update failed.\n");
1341 }
1342 }
1343 #endif /* !XENPV */
1344
1345 init_x86_msgbuf();
1346
1347 #if !defined(XENPV) && NBIOSCALL > 0
1348 /*
1349 * XXX Remove this
1350 *
1351 * Setup a temporary Page Table Entry to allow identity mappings of
1352 * the real mode address. This is required by bioscall.
1353 */
1354 init386_pte0();
1355
1356 KASSERT(biostramp_image_size <= PAGE_SIZE);
1357 pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, (paddr_t)BIOSTRAMP_BASE,
1358 VM_PROT_ALL, 0);
1359 pmap_update(pmap_kernel());
1360 memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
1361
1362 /* Needed early, for bioscall() */
1363 cpu_info_primary.ci_pmap = pmap_kernel();
1364 #endif
1365
1366 #ifndef XENPV
1367 pmap_kenter_pa(local_apic_va, local_apic_pa,
1368 VM_PROT_READ|VM_PROT_WRITE, 0);
1369 pmap_update(pmap_kernel());
1370 memset((void *)local_apic_va, 0, PAGE_SIZE);
1371 #endif
1372
1373 pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
1374 pmap_kenter_pa(gdt_vaddr, gdt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
1375 pmap_kenter_pa(ldt_vaddr, ldt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
1376 pmap_update(pmap_kernel());
1377 memset((void *)idt_vaddr, 0, PAGE_SIZE);
1378 memset((void *)gdt_vaddr, 0, PAGE_SIZE);
1379 memset((void *)ldt_vaddr, 0, PAGE_SIZE);
1380
1381 pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0);
1382 pmap_update(pmap_kernel());
1383 iv = &(cpu_info_primary.ci_idtvec);
1384 idt_vec_init_cpu_md(iv, cpu_index(&cpu_info_primary));
1385 idt = (idt_descriptor_t *)iv->iv_idt;
1386
1387 #ifndef XENPV
1388 /*
1389 * Switch from the initial temporary GDT that was allocated on
1390 * the stack by our caller, start. That temporary GDT will be
1391 * popped off the stack when init386 returns before start calls
1392 * main, so we need to use a second temporary GDT allocated in
1393 * pmap_bootstrap with pmap_bootstrap_valloc/palloc to make
1394 * sure at least the CPU-local data area, used by CPUVAR(...),
1395 * curcpu(), and curlwp via %fs-relative addressing, will
1396 * continue to work.
1397 *
1398 * Later, in gdt_init via cpu_startup, we will finally allocate
1399 * a permanent GDT with uvm_km(9).
1400 *
1401 * The content of the second temporary GDT is the same as the
1402 * content of the initial GDT, initialized in initgdt, except
1403 * for the address of the LDT, which is also that we are also
1404 * switching to a new temporary LDT at a new address.
1405 */
1406 tgdt = gdtstore;
1407 gdtstore = (union descriptor *)gdt_vaddr;
1408 ldtstore = (union descriptor *)ldt_vaddr;
1409
1410 memcpy(gdtstore, tgdt, NGDT * sizeof(*gdtstore));
1411
1412 setsegment(&gdtstore[GLDT_SEL].sd, ldtstore,
1413 NLDT * sizeof(ldtstore[0]) - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
1414 #else
1415 HYPERVISOR_set_callbacks(
1416 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback,
1417 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
1418
1419 ldtstore = (union descriptor *)ldt_vaddr;
1420 #endif /* XENPV */
1421
1422 /* make ldt gates and memory segments */
1423 ldtstore[LUCODE_SEL] = gdtstore[GUCODE_SEL];
1424 ldtstore[LUCODEBIG_SEL] = gdtstore[GUCODEBIG_SEL];
1425 ldtstore[LUDATA_SEL] = gdtstore[GUDATA_SEL];
1426
1427 /* exceptions */
1428 for (x = 0; x < 32; x++) {
1429 /* Reset to default. Special cases below */
1430 int sel;
1431 #ifdef XENPV
1432 sel = SEL_XEN;
1433 #else
1434 sel = SEL_KPL;
1435 #endif /* XENPV */
1436
1437 idt_vec_reserve(iv, x);
1438
1439 switch (x) {
1440 #ifdef XENPV
1441 case 2: /* NMI */
1442 case 18: /* MCA */
1443 sel |= 0x4; /* Auto EOI/mask */
1444 break;
1445 #endif /* XENPV */
1446 case 3:
1447 case 4:
1448 sel = SEL_UPL;
1449 break;
1450 default:
1451 break;
1452 }
1453 set_idtgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT,
1454 sel, GSEL(GCODE_SEL, SEL_KPL));
1455 }
1456
1457 /* new-style interrupt gate for syscalls */
1458 idt_vec_reserve(iv, 128);
1459 set_idtgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL,
1460 GSEL(GCODE_SEL, SEL_KPL));
1461
1462 #ifndef XENPV
1463 /*
1464 * Activate the second temporary GDT, allocated in
1465 * pmap_bootstrap with pmap_bootstrap_valloc/palloc, and
1466 * initialized with the content of the initial temporary GDT in
1467 * initgdt, plus an updated LDT.
1468 *
1469 * This ensures the %fs-relative addressing for the CPU-local
1470 * area used by CPUVAR(...), curcpu(), and curlwp will continue
1471 * to work after init386 returns and the initial temporary GDT
1472 * is popped off, before we call main and later create a
1473 * permanent GDT in gdt_init via cpu_startup.
1474 */
1475 setregion(®ion, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
1476 lgdt(®ion);
1477 #endif
1478
1479 lldt(GSEL(GLDT_SEL, SEL_KPL));
1480 cpu_init_idt(&cpu_info_primary);
1481
1482 #ifdef XENPV
1483 xen_init_ksyms();
1484 #else /* XENPV */
1485 #ifdef XEN
1486 if (vm_guest == VM_GUEST_XENPVH)
1487 xen_init_ksyms();
1488 else
1489 #endif /* XEN */
1490 init386_ksyms();
1491 #endif /* XENPV */
1492
1493 #if NMCA > 0
1494 /*
1495 * check for MCA bus, needed to be done before ISA stuff - if
1496 * MCA is detected, ISA needs to use level triggered interrupts
1497 * by default
1498 * And we do not search for MCA using bioscall() on EFI systems
1499 * that lacks it (they lack MCA too, anyway).
1500 */
1501 if (lookup_bootinfo(BTINFO_EFI) == NULL && vm_guest != VM_GUEST_XENPVH)
1502 mca_busprobe();
1503 #endif
1504
1505 #ifdef XENPV
1506 extern int tmpstk;
1507 cpu_info_primary.ci_intrstack = &tmpstk;
1508 events_default_setup();
1509 #else
1510 intr_default_setup();
1511 #endif
1512
1513 splraise(IPL_HIGH);
1514 x86_enable_intr();
1515
1516 #ifdef DDB
1517 if (boothowto & RB_KDB)
1518 Debugger();
1519 #endif
1520 #ifdef KGDB
1521 kgdb_port_init();
1522 if (boothowto & RB_KDB) {
1523 kgdb_debug_init = 1;
1524 kgdb_connect(1);
1525 }
1526 #endif
1527
1528 if (physmem < btoc(2 * 1024 * 1024)) {
1529 printf("warning: too little memory available; "
1530 "have %lu bytes, want %lu bytes\n"
1531 "running in degraded mode\n"
1532 "press a key to confirm\n\n",
1533 (unsigned long)ptoa(physmem), 2*1024*1024UL);
1534 cngetc();
1535 }
1536
1537 pcb->pcb_dbregs = NULL;
1538 x86_dbregs_init();
1539 }
1540
1541 #include <dev/ic/mc146818reg.h> /* for NVRAM POST */
1542 #include <i386/isa/nvram.h> /* for NVRAM POST */
1543
1544 void
cpu_reset(void)1545 cpu_reset(void)
1546 {
1547 #ifdef XENPV
1548 HYPERVISOR_reboot();
1549 for (;;);
1550 #else /* XENPV */
1551 struct region_descriptor region;
1552 idt_descriptor_t *idt;
1553
1554 idt = (idt_descriptor_t *)cpu_info_primary.ci_idtvec.iv_idt;
1555 x86_disable_intr();
1556
1557 /*
1558 * Ensure the NVRAM reset byte contains something vaguely sane.
1559 */
1560
1561 outb(IO_RTC, NVRAM_RESET);
1562 outb(IO_RTC+1, NVRAM_RESET_RST);
1563
1564 /*
1565 * Reset AMD Geode SC1100.
1566 *
1567 * 1) Write PCI Configuration Address Register (0xcf8) to
1568 * select Function 0, Register 0x44: Bridge Configuration,
1569 * GPIO and LPC Configuration Register Space, Reset
1570 * Control Register.
1571 *
1572 * 2) Write 0xf to PCI Configuration Data Register (0xcfc)
1573 * to reset IDE controller, IDE bus, and PCI bus, and
1574 * to trigger a system-wide reset.
1575 *
1576 * See AMD Geode SC1100 Processor Data Book, Revision 2.0,
1577 * sections 6.3.1, 6.3.2, and 6.4.1.
1578 */
1579 if (cpu_info_primary.ci_signature == 0x540) {
1580 outl(0xcf8, 0x80009044);
1581 outl(0xcfc, 0xf);
1582 }
1583
1584 x86_reset();
1585
1586 /*
1587 * Try to cause a triple fault and watchdog reset by making the IDT
1588 * invalid and causing a fault.
1589 */
1590 memset((void *)idt, 0, NIDT * sizeof(idt[0]));
1591 setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1);
1592 lidt(®ion);
1593 breakpoint();
1594
1595 #if 0
1596 /*
1597 * Try to cause a triple fault and watchdog reset by unmapping the
1598 * entire address space and doing a TLB flush.
1599 */
1600 memset((void *)PTD, 0, PAGE_SIZE);
1601 tlbflush();
1602 #endif
1603
1604 for (;;);
1605 #endif /* XENPV */
1606 }
1607
1608 void
cpu_getmcontext(struct lwp * l,mcontext_t * mcp,unsigned int * flags)1609 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
1610 {
1611 const struct trapframe *tf = l->l_md.md_regs;
1612 __greg_t *gr = mcp->__gregs;
1613 __greg_t ras_eip;
1614
1615 /* Save register context. */
1616 gr[_REG_GS] = tf->tf_gs;
1617 gr[_REG_FS] = tf->tf_fs;
1618 gr[_REG_ES] = tf->tf_es;
1619 gr[_REG_DS] = tf->tf_ds;
1620 gr[_REG_EFL] = tf->tf_eflags;
1621
1622 gr[_REG_EDI] = tf->tf_edi;
1623 gr[_REG_ESI] = tf->tf_esi;
1624 gr[_REG_EBP] = tf->tf_ebp;
1625 gr[_REG_EBX] = tf->tf_ebx;
1626 gr[_REG_EDX] = tf->tf_edx;
1627 gr[_REG_ECX] = tf->tf_ecx;
1628 gr[_REG_EAX] = tf->tf_eax;
1629 gr[_REG_EIP] = tf->tf_eip;
1630 gr[_REG_CS] = tf->tf_cs;
1631 gr[_REG_ESP] = tf->tf_esp;
1632 gr[_REG_UESP] = tf->tf_esp;
1633 gr[_REG_SS] = tf->tf_ss;
1634 gr[_REG_TRAPNO] = tf->tf_trapno;
1635 gr[_REG_ERR] = tf->tf_err;
1636
1637 if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
1638 (void *) gr[_REG_EIP])) != -1)
1639 gr[_REG_EIP] = ras_eip;
1640
1641 *flags |= _UC_CPU;
1642
1643 mcp->_mc_tlsbase = (uintptr_t)l->l_private;
1644 *flags |= _UC_TLSBASE;
1645
1646 /*
1647 * Save floating point register context.
1648 *
1649 * If the cpu doesn't support fxsave we must still write to
1650 * the entire 512 byte area - otherwise we leak kernel memory
1651 * contents to userspace.
1652 * It wouldn't matter if we were doing the copyout here.
1653 * So we might as well convert to fxsave format.
1654 */
1655 __CTASSERT(sizeof (struct fxsave) ==
1656 sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1657 process_read_fpregs_xmm(l, (struct fxsave *)
1658 &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1659 memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad);
1660 *flags |= _UC_FXSAVE | _UC_FPU;
1661 }
1662
1663 int
cpu_mcontext_validate(struct lwp * l,const mcontext_t * mcp)1664 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
1665 {
1666 const __greg_t *gr = mcp->__gregs;
1667 struct trapframe *tf = l->l_md.md_regs;
1668
1669 /*
1670 * Check for security violations. If we're returning
1671 * to protected mode, the CPU will validate the segment
1672 * registers automatically and generate a trap on
1673 * violations. We handle the trap, rather than doing
1674 * all of the checking here.
1675 */
1676 if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
1677 !USERMODE(gr[_REG_CS]))
1678 return EINVAL;
1679
1680 return 0;
1681 }
1682
1683 int
cpu_setmcontext(struct lwp * l,const mcontext_t * mcp,unsigned int flags)1684 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
1685 {
1686 struct trapframe *tf = l->l_md.md_regs;
1687 const __greg_t *gr = mcp->__gregs;
1688 struct proc *p = l->l_proc;
1689 int error;
1690
1691 /* Restore register context, if any. */
1692 if ((flags & _UC_CPU) != 0) {
1693 error = cpu_mcontext_validate(l, mcp);
1694 if (error)
1695 return error;
1696
1697 tf->tf_gs = gr[_REG_GS];
1698 tf->tf_fs = gr[_REG_FS];
1699 tf->tf_es = gr[_REG_ES];
1700 tf->tf_ds = gr[_REG_DS];
1701 /* Only change the user-alterable part of eflags */
1702 tf->tf_eflags &= ~PSL_USER;
1703 tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
1704
1705 tf->tf_edi = gr[_REG_EDI];
1706 tf->tf_esi = gr[_REG_ESI];
1707 tf->tf_ebp = gr[_REG_EBP];
1708 tf->tf_ebx = gr[_REG_EBX];
1709 tf->tf_edx = gr[_REG_EDX];
1710 tf->tf_ecx = gr[_REG_ECX];
1711 tf->tf_eax = gr[_REG_EAX];
1712 tf->tf_eip = gr[_REG_EIP];
1713 tf->tf_cs = gr[_REG_CS];
1714 tf->tf_esp = gr[_REG_UESP];
1715 tf->tf_ss = gr[_REG_SS];
1716 }
1717
1718 if ((flags & _UC_TLSBASE) != 0)
1719 lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
1720
1721 /* Restore floating point register context, if given. */
1722 if ((flags & _UC_FPU) != 0) {
1723 __CTASSERT(sizeof (struct fxsave) ==
1724 sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1725 __CTASSERT(sizeof (struct save87) ==
1726 sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state);
1727
1728 if (flags & _UC_FXSAVE) {
1729 process_write_fpregs_xmm(l, (const struct fxsave *)
1730 &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1731 } else {
1732 process_write_fpregs_s87(l, (const struct save87 *)
1733 &mcp->__fpregs.__fp_reg_set.__fpchip_state);
1734 }
1735 }
1736
1737 mutex_enter(p->p_lock);
1738 if (flags & _UC_SETSTACK)
1739 l->l_sigstk.ss_flags |= SS_ONSTACK;
1740 if (flags & _UC_CLRSTACK)
1741 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
1742 mutex_exit(p->p_lock);
1743 return (0);
1744 }
1745
1746 #define DEV_IO 14 /* iopl for compat_10 */
1747
1748 int
mm_md_open(dev_t dev,int flag,int mode,struct lwp * l)1749 mm_md_open(dev_t dev, int flag, int mode, struct lwp *l)
1750 {
1751
1752 switch (minor(dev)) {
1753 case DEV_IO:
1754 /*
1755 * This is done by i386_iopl(3) now.
1756 *
1757 * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD)
1758 */
1759 if (flag & FWRITE) {
1760 struct trapframe *fp;
1761 int error;
1762
1763 error = kauth_authorize_machdep(l->l_cred,
1764 KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL);
1765 if (error)
1766 return (error);
1767 fp = curlwp->l_md.md_regs;
1768 fp->tf_eflags |= PSL_IOPL;
1769 }
1770 break;
1771 default:
1772 break;
1773 }
1774 return 0;
1775 }
1776
1777 #ifdef PAE
1778 void
cpu_alloc_l3_page(struct cpu_info * ci)1779 cpu_alloc_l3_page(struct cpu_info *ci)
1780 {
1781 int ret;
1782 struct pglist pg;
1783 struct vm_page *vmap;
1784
1785 KASSERT(ci != NULL);
1786 /*
1787 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
1788 * resides below the 4GB boundary.
1789 */
1790 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
1791 vmap = TAILQ_FIRST(&pg);
1792
1793 if (ret != 0 || vmap == NULL)
1794 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
1795 __func__, cpu_index(ci), ret);
1796
1797 ci->ci_pae_l3_pdirpa = VM_PAGE_TO_PHYS(vmap);
1798
1799 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
1800 UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
1801 if (ci->ci_pae_l3_pdir == NULL)
1802 panic("%s: failed to allocate L3 PD for CPU %d\n",
1803 __func__, cpu_index(ci));
1804
1805 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
1806 VM_PROT_READ | VM_PROT_WRITE, 0);
1807
1808 pmap_update(pmap_kernel());
1809 }
1810 #endif /* PAE */
1811
1812 static void
idt_vec_copy(struct idt_vec * dst,struct idt_vec * src)1813 idt_vec_copy(struct idt_vec *dst, struct idt_vec *src)
1814 {
1815 idt_descriptor_t *idt_dst;
1816
1817 idt_dst = dst->iv_idt;
1818 memcpy(idt_dst, src->iv_idt, PAGE_SIZE);
1819 memcpy(dst->iv_allocmap, src->iv_allocmap, sizeof(dst->iv_allocmap));
1820 }
1821
1822 void
idt_vec_init_cpu_md(struct idt_vec * iv,cpuid_t cid)1823 idt_vec_init_cpu_md(struct idt_vec *iv, cpuid_t cid)
1824 {
1825 vaddr_t va_idt, va_pentium_idt;
1826 struct vm_page *pg;
1827
1828 if (idt_vec_is_pcpu() &&
1829 cid != cpu_index(&cpu_info_primary)) {
1830 va_idt = uvm_km_alloc(kernel_map, PAGE_SIZE,
1831 0, UVM_KMF_VAONLY);
1832 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
1833 if (pg == NULL) {
1834 panic("failed to allocate pcpu idt PA");
1835 }
1836 pmap_kenter_pa(va_idt, VM_PAGE_TO_PHYS(pg),
1837 VM_PROT_READ|VM_PROT_WRITE, 0);
1838 pmap_update(pmap_kernel());
1839
1840 memset((void *)va_idt, 0, PAGE_SIZE);
1841
1842 /* pentium f00f bug stuff */
1843 va_pentium_idt = uvm_km_alloc(kernel_map, PAGE_SIZE,
1844 0, UVM_KMF_VAONLY);
1845 pmap_kenter_pa(va_pentium_idt, VM_PAGE_TO_PHYS(pg),
1846 VM_PROT_READ, 0);
1847 pmap_update(pmap_kernel());
1848
1849 iv->iv_idt = (void *)va_idt;
1850 iv->iv_idt_pentium = (void *)va_pentium_idt;
1851
1852 idt_vec_copy(iv, &(cpu_info_primary.ci_idtvec));
1853 } else {
1854 iv->iv_idt = (void *)idt_vaddr;
1855 iv->iv_idt_pentium = (void *)pentium_idt_vaddr;
1856 }
1857 }
1858