xref: /dragonfly/sys/platform/pc64/x86_64/vm_machdep.c (revision dba7cc785cea9d1507bd9fed277dd6f702f0382f)
1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * Copyright (c) 2008-2018 The DragonFly Project.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * the Systems Programming Group of the University of Utah Computer
10  * Science Department, and William Jolitz.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *        This product includes software developed by the University of
23  *        California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *        from: @(#)vm_machdep.c        7.3 (Berkeley) 5/13/91
41  *        Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
42  * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
43  */
44 
45 #include "opt_reset.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/malloc.h>
50 #include <sys/proc.h>
51 #include <sys/buf.h>
52 #include <sys/interrupt.h>
53 #include <sys/vnode.h>
54 #include <sys/vmmeter.h>
55 #include <sys/kernel.h>
56 #include <sys/sysctl.h>
57 #include <sys/unistd.h>
58 #include <sys/lwp.h>
59 
60 #include <machine/clock.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <machine/smp.h>
64 #include <machine/pcb.h>
65 #include <machine/pcb_ext.h>
66 #include <machine/segments.h>
67 #include <machine/globaldata.h>         /* npxthread */
68 #include <machine/specialreg.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_param.h>
72 #include <sys/lock.h>
73 #include <vm/vm_kern.h>
74 #include <vm/vm_page.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_extern.h>
77 
78 #include <sys/thread2.h>
79 
80 #include <bus/isa/isa.h>
81 
82 static void         cpu_reset_real (void);
83 
84 static int spectre_mitigation = -1;
85 static int spectre_support = 0;
86 static int spectre_mode = 0;
87 SYSCTL_INT(_machdep, OID_AUTO, spectre_mode, CTLFLAG_RD,
88           &spectre_mode, 0, "current Spectre enablements");
89 
90 static int mds_mitigation = -1;
91 static int mds_support = 0;
92 static int mds_mode = 0;
93 SYSCTL_INT(_machdep, OID_AUTO, mds_mode, CTLFLAG_RD,
94           &mds_mode, 0, "current MDS enablements");
95 
96 /*
97  * Finish a fork operation, with lwp lp2 nearly set up.
98  * Copy and update the pcb, set up the stack so that the child
99  * ready to run and return to user mode.
100  */
101 void
cpu_fork(struct lwp * lp1,struct lwp * lp2,int flags)102 cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
103 {
104           struct pcb *pcb2;
105           struct pmap *pmap2;
106 
107           if ((flags & RFPROC) == 0) {
108                     if ((flags & RFMEM) == 0) {
109                               /*
110                                * Unshare user LDT.  > 1 test is MPSAFE.  While
111                                * it can potentially race a 2->1 transition, the
112                                * worst that happens is that we do an unnecessary
113                                * ldt replacement.
114                                */
115                               struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
116                               struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
117 
118                               if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
119                                         pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
120                                         user_ldt_free(pcb1);
121                                         pcb1->pcb_ldt = pcb_ldt;
122                                         set_user_ldt(pcb1);
123                               }
124                     }
125                     return;
126           }
127 
128           /* Ensure that lp1's pcb is up to date. */
129           if (mdcpu->gd_npxthread == lp1->lwp_thread)
130                     npxsave(lp1->lwp_thread->td_savefpu);
131 
132           /*
133            * Copy lp1's PCB.  This really only applies to the
134            * debug registers and FP state, but its faster to just copy the
135            * whole thing.  Because we only save the PCB at switchout time,
136            * the register state may not be current.
137            */
138           pcb2 = lp2->lwp_thread->td_pcb;
139           *pcb2 = *lp1->lwp_thread->td_pcb;
140 
141           /*
142            * Create a new fresh stack for the new process.
143            * Copy the trap frame for the return to user mode as if from a
144            * syscall.  This copies the user mode register values.
145            *
146            * pcb_rsp must allocate an additional call-return pointer below
147            * the trap frame which will be restored by cpu_heavy_restore from
148            * PCB_RIP, and the thread's td_sp pointer must allocate an
149            * additonal two quadwords below the pcb_rsp call-return pointer to
150            * hold the LWKT restore function pointer and rflags.
151            *
152            * The LWKT restore function pointer must be set to cpu_heavy_restore,
153            * which is our standard heavy-weight process switch-in function.
154            * YYY eventually we should shortcut fork_return and fork_trampoline
155            * to use the LWKT restore function directly so we can get rid of
156            * all the extra crap we are setting up.
157            */
158           lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1;
159           bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
160 
161           /*
162            * Set registers for trampoline to user mode.  Leave space for the
163            * return address on stack.  These are the kernel mode register values.
164            *
165            * Set the new pmap CR3.  If the new process uses isolated VM spaces,
166            * also set the isolated CR3.
167            */
168           pmap2 = vmspace_pmap(lp2->lwp_proc->p_vmspace);
169           pcb2->pcb_cr3 = vtophys(pmap2->pm_pml4);
170           if ((pcb2->pcb_flags & PCB_ISOMMU) && pmap2->pm_pmlpv_iso) {
171                     pcb2->pcb_cr3_iso = vtophys(pmap2->pm_pml4_iso);
172           } else {
173                     pcb2->pcb_flags &= ~PCB_ISOMMU;
174                     pcb2->pcb_cr3_iso = 0;
175           }
176 
177 #if 0
178           /*
179            * Per-process spectre mitigation (future)
180            */
181           pcb2->pcb_flags &= ~(PCB_IBRS1 | PCB_IBRS2);
182           switch (spectre_mitigation) {
183           case 1:
184                     pcb2->pcb_flags |= PCB_IBRS1;
185                     break;
186           case 2:
187                     pcb2->pcb_flags |= PCB_IBRS2;
188                     break;
189           default:
190                     break;
191           }
192 #endif
193 
194           pcb2->pcb_rbx = (unsigned long)fork_return;       /* fork_trampoline argument */
195           pcb2->pcb_rbp = 0;
196           pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
197           pcb2->pcb_r12 = (unsigned long)lp2;               /* fork_trampoline argument */
198           pcb2->pcb_r13 = 0;
199           pcb2->pcb_r14 = 0;
200           pcb2->pcb_r15 = 0;
201           pcb2->pcb_rip = (unsigned long)fork_trampoline;
202           lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *));
203           *(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER;
204           lp2->lwp_thread->td_sp -= sizeof(void *);
205           *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
206 
207           /*
208            * pcb2->pcb_ldt:   duplicated below, if necessary.
209            * pcb2->pcb_savefpu:         cloned above.
210            * pcb2->pcb_flags: cloned above
211            * pcb2->pcb_onfault:         cloned above (always NULL here).
212            * pcb2->pcb_onfault_sp:cloned above (dont care)
213            */
214 
215           /*
216            * XXX don't copy the i/o pages.  this should probably be fixed.
217            */
218           pcb2->pcb_ext = NULL;
219 
220         /* Copy the LDT, if necessary. */
221         if (pcb2->pcb_ldt != NULL) {
222                     if (flags & RFMEM) {
223                               atomic_add_int(&pcb2->pcb_ldt->ldt_refcnt, 1);
224                     } else {
225                               pcb2->pcb_ldt = user_ldt_alloc(pcb2,
226                                                                    pcb2->pcb_ldt->ldt_len);
227                     }
228         }
229           bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
230                 sizeof(lp2->lwp_thread->td_tls));
231           /*
232            * Now, cpu_switch() can schedule the new lwp.
233            * pcb_rsp is loaded pointing to the cpu_switch() stack frame
234            * containing the return address when exiting cpu_switch.
235            * This will normally be to fork_trampoline(), which will have
236            * %rbx loaded with the new lwp's pointer.  fork_trampoline()
237            * will set up a stack to call fork_return(lp, frame); to complete
238            * the return to user-mode.
239            */
240 }
241 
242 /*
243  * Prepare new lwp to return to the address specified in params.
244  */
245 int
cpu_prepare_lwp(struct lwp * lp,struct lwp_params * params)246 cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params)
247 {
248           struct trapframe *regs = lp->lwp_md.md_regs;
249           void *bad_return = NULL;
250           int error;
251 
252           regs->tf_rip = (long)params->lwp_func;
253           regs->tf_rsp = (long)params->lwp_stack;
254           /* Set up argument for function call */
255           regs->tf_rdi = (long)params->lwp_arg;
256 
257           /*
258            * Set up fake return address.  As the lwp function may never return,
259            * we simply copy out a NULL pointer and force the lwp to receive
260            * a SIGSEGV if it returns anyways.
261            */
262           regs->tf_rsp -= sizeof(void *);
263           error = copyout(&bad_return, (void *)regs->tf_rsp, sizeof(bad_return));
264           if (error)
265                     return (error);
266 
267           cpu_set_fork_handler(lp,
268               (void (*)(void *, struct trapframe *))generic_lwp_return, lp);
269           return (0);
270 }
271 
272 /*
273  * Intercept the return address from a freshly forked process that has NOT
274  * been scheduled yet.
275  *
276  * This is needed to make kernel threads stay in kernel mode.
277  */
278 void
cpu_set_fork_handler(struct lwp * lp,void (* func)(void *,struct trapframe *),void * arg)279 cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *),
280                          void *arg)
281 {
282           /*
283            * Note that the trap frame follows the args, so the function
284            * is really called like this:  func(arg, frame);
285            */
286           lp->lwp_thread->td_pcb->pcb_rbx = (long)func;     /* function */
287           lp->lwp_thread->td_pcb->pcb_r12 = (long)arg;      /* first arg */
288 }
289 
290 void
cpu_set_thread_handler(thread_t td,void (* rfunc)(void),void * func,void * arg)291 cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
292 {
293           td->td_pcb->pcb_rbx = (long)func;
294           td->td_pcb->pcb_r12 = (long)arg;
295           td->td_switch = cpu_lwkt_switch;
296           td->td_sp -= sizeof(void *);
297           *(void **)td->td_sp = rfunc;  /* exit function on return */
298           td->td_sp -= sizeof(void *);
299           *(void **)td->td_sp = cpu_kthread_restore;
300 }
301 
302 void
cpu_lwp_exit(void)303 cpu_lwp_exit(void)
304 {
305           struct thread *td = curthread;
306           struct pcb *pcb;
307 
308           pcb = td->td_pcb;
309 
310           /* Some x86 functionality was dropped */
311           KKASSERT(pcb->pcb_ext == NULL);
312 
313           /*
314            * disable all hardware breakpoints
315            */
316         if (pcb->pcb_flags & PCB_DBREGS) {
317                 reset_dbregs();
318                 pcb->pcb_flags &= ~PCB_DBREGS;
319         }
320           td->td_gd->gd_cnt.v_swtch++;
321 
322           crit_enter_quick(td);
323           if (td->td_flags & TDF_TSLEEPQ)
324                     tsleep_remove(td);
325           lwkt_deschedule_self(td);
326           lwkt_remove_tdallq(td);
327           cpu_thread_exit();
328 }
329 
330 /*
331  * Terminate the current thread.  The caller must have already acquired
332  * the thread's rwlock and placed it on a reap list or otherwise notified
333  * a reaper of its existance.  We set a special assembly switch function which
334  * releases td_rwlock after it has cleaned up the MMU state and switched
335  * out the stack.
336  *
337  * Must be caller from a critical section and with the thread descheduled.
338  */
339 void
cpu_thread_exit(void)340 cpu_thread_exit(void)
341 {
342           npxexit();
343           curthread->td_switch = cpu_exit_switch;
344           curthread->td_flags |= TDF_EXITING;
345           lwkt_switch();
346           panic("cpu_thread_exit: lwkt_switch() unexpectedly returned");
347 }
348 
349 void
cpu_reset(void)350 cpu_reset(void)
351 {
352           cpu_reset_real();
353 }
354 
355 static void
cpu_reset_real(void)356 cpu_reset_real(void)
357 {
358           /*
359            * Attempt to do a CPU reset via the keyboard controller,
360            * do not turn off the GateA20, as any machine that fails
361            * to do the reset here would then end up in no man's land.
362            */
363 
364 #if !defined(BROKEN_KEYBOARD_RESET)
365           outb(IO_KBD + 4, 0xFE);
366           DELAY(500000);      /* wait 0.5 sec to see if that did it */
367           kprintf("Keyboard reset did not work, attempting CPU shutdown\n");
368           DELAY(1000000);     /* wait 1 sec for kprintf to complete */
369 #endif
370 #if 0 /* JG */
371           /* force a shutdown by unmapping entire address space ! */
372           bzero((caddr_t) PTD, PAGE_SIZE);
373 #endif
374 
375           /* "good night, sweet prince .... <THUNK!>" */
376           cpu_invltlb();
377           /* NOTREACHED */
378           while(1);
379 }
380 
381 static void
swi_vm(void * arg,void * frame)382 swi_vm(void *arg, void *frame)
383 {
384           if (busdma_swi_pending != 0)
385                     busdma_swi();
386 }
387 
388 static void
swi_vm_setup(void * arg)389 swi_vm_setup(void *arg)
390 {
391           register_swi_mp(SWI_VM, swi_vm, NULL, "swi_vm", NULL, 0);
392 }
393 
394 SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
395 
396 /*
397  * NOTE: This routine is also called after a successful microcode
398  *         reload on cpu 0.
399  */
400 void mitigation_vm_setup(void *arg);
401 
402 /*
403  * Check for IBPB and IBRS support
404  *
405  * This bits also specify desired modes in the spectre_mitigation sysctl.
406  */
407 #define IBRS_SUPPORTED                  0x0001
408 #define STIBP_SUPPORTED                 0x0002
409 #define IBPB_SUPPORTED                  0x0004
410 #define IBRS_AUTO_SUPPORTED   0x0008
411 #define STIBP_AUTO_SUPPORTED  0x0010
412 #define IBRS_PREFERRED_REQUEST          0x0020
413 
414 static
415 int
spectre_check_support(void)416 spectre_check_support(void)
417 {
418           uint32_t p[4];
419           int rv = 0;
420 
421           /*
422            * Spectre mitigation hw bits
423            *
424            * IBRS             Indirect Branch Restricted Speculation   (isolation)
425            * STIBP  Single Thread Indirect Branch Prediction (isolation)
426            * IBPB             Branch Prediction Barrier                (barrier)
427            *
428            * IBRS and STIBP must be toggled (enabled on entry to kernel,
429            * disabled on exit, as well as disabled during any MWAIT/HLT).
430            * When *_AUTO bits are available, IBRS and STIBP may be left
431            * turned on and do not have to be toggled on kernel entry/exit.
432            * Be sure to clear before going idle (else hyperthread performance
433            * will drop).
434            *
435            * All this shit has enormous overhead.  IBPB in particular, and
436            * non-auto modes are disabled by default.
437            */
438           if (cpu_vendor_id == CPU_VENDOR_INTEL) {
439                     p[0] = 0;
440                     p[1] = 0;
441                     p[2] = 0;
442                     p[3] = 0;
443                     cpuid_count(7, 0, p);
444                     if (p[3] & CPUID_STDEXT3_IBPB)
445                               rv |= IBRS_SUPPORTED | IBPB_SUPPORTED;
446                     if (p[3] & CPUID_STDEXT3_STIBP)
447                               rv |= STIBP_SUPPORTED;
448 
449                     /*
450                      * 0x80000008 p[1] bit 12 indicates IBPB support
451                      *
452                      * This bit might be set even though STDEXT3_IBPB is not set.
453                      */
454                     p[0] = 0;
455                     p[1] = 0;
456                     p[2] = 0;
457                     p[3] = 0;
458                     do_cpuid(0x80000008U, p);
459                     if (p[1] & CPUID_CAPEX_IBPB)
460                               rv |= IBPB_SUPPORTED;
461           } else if (cpu_vendor_id == CPU_VENDOR_AMD) {
462                     /*
463                      * 0x80000008
464                      *        p[1] bit 12 indicates IBPB support
465                      *        p[1] bit 14 indicates IBRS support
466                      *        p[1] bit 15 indicates STIBP support
467                      *
468                      *        p[1] bit 16 indicates IBRS auto support
469                      *        p[1] bit 17 indicates STIBP auto support
470                      *        p[1] bit 18 indicates processor prefers using
471                      *                  IBRS instead of retpoline.
472                      */
473                     p[0] = 0;
474                     p[1] = 0;
475                     p[2] = 0;
476                     p[3] = 0;
477                     do_cpuid(0x80000008U, p);
478                     if (p[1] & CPUID_CAPEX_IBPB)
479                               rv |= IBPB_SUPPORTED;
480                     if (p[1] & CPUID_CAPEX_IBRS)
481                               rv |= IBRS_SUPPORTED;
482                     if (p[1] & CPUID_CAPEX_STIBP)
483                               rv |= STIBP_SUPPORTED;
484 
485                     if (p[1] & CPUID_CAPEX_IBRS_ALWAYSON)
486                               rv |= IBRS_AUTO_SUPPORTED;
487                     if (p[1] & CPUID_CAPEX_STIBP_ALWAYSON)
488                               rv |= STIBP_AUTO_SUPPORTED;
489                     if (p[1] & CPUID_CAPEX_PREFER_IBRS)
490                               rv |= IBRS_PREFERRED_REQUEST;
491           }
492 
493           return rv;
494 }
495 
496 /*
497  * Iterate CPUs and adjust MSR for global operations, since
498  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
499  */
500 #define CHECK(flag) (spectre_mitigation & spectre_support & (flag))
501 
502 static
503 void
spectre_sysctl_changed(void)504 spectre_sysctl_changed(void)
505 {
506           globaldata_t save_gd;
507           struct trampframe *tr;
508           int spec_ctrl;
509           int spec_mask;
510           int mode;
511           int n;
512 
513           spec_mask = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
514                         SPEC_CTRL_DUMMY_ENABLE | SPEC_CTRL_DUMMY_IBPB;
515 
516           /*
517            * Fixup state
518            */
519           mode = 0;
520           save_gd = mycpu;
521           for (n = 0; n < ncpus; ++n) {
522                     lwkt_setcpu_self(globaldata_find(n));
523                     cpu_ccfence();
524                     tr = &pscpu->trampoline;
525 
526                     /*
527                      * Make sure we are cleaned out.
528                      *
529                      * XXX cleanup, reusing globals inside the loop (they get
530                      * set to the same thing each loop)
531                      *
532                      * [0] kernel entry (idle exit)
533                      * [1] kernel exit  (idle entry)
534                      */
535                     tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
536                     tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
537 
538                     /*
539                      * Don't try to parse if not available
540                      */
541                     if (spectre_mitigation < 0)
542                               continue;
543 
544                     /*
545                      * IBRS mode.  Auto overrides toggling.
546                      *
547                      * Only set the ENABLE flag if we have to toggle something
548                      * on entry and exit.
549                      */
550                     spec_ctrl = 0;
551                     if (CHECK(IBRS_AUTO_SUPPORTED)) {
552                               spec_ctrl |= SPEC_CTRL_IBRS;
553                               mode |= IBRS_AUTO_SUPPORTED;
554                     } else if (CHECK(IBRS_SUPPORTED)) {
555                               spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_DUMMY_ENABLE;
556                               mode |= IBRS_SUPPORTED;
557                     }
558                     if (CHECK(STIBP_AUTO_SUPPORTED)) {
559                               spec_ctrl |= SPEC_CTRL_STIBP;
560                               mode |= STIBP_AUTO_SUPPORTED;
561                     } else if (CHECK(STIBP_SUPPORTED)) {
562                               spec_ctrl |= SPEC_CTRL_STIBP | SPEC_CTRL_DUMMY_ENABLE;
563                               mode |= STIBP_SUPPORTED;
564                     }
565 
566                     /*
567                      * IBPB requested and supported.
568                      */
569                     if (CHECK(IBPB_SUPPORTED)) {
570                               spec_ctrl |= SPEC_CTRL_DUMMY_IBPB;
571                               mode |= IBPB_SUPPORTED;
572                     }
573 
574                     /*
575                      * Update the MSR if the cpu supports the modes to ensure
576                      * proper disablement if the user disabled the mode.
577                      */
578                     if (spectre_support & (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
579                                             STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED)) {
580                               wrmsr(MSR_SPEC_CTRL,
581                                     spec_ctrl & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
582                     }
583 
584                     /*
585                      * Update spec_ctrl fields in the trampoline.
586                      *
587                      * [0] on-kernel-entry (on-idle-exit)
588                      * [1] on-kernel-exit  (on-idle-entry)
589                      *
590                      * When auto mode is supported we leave the bit set, otherwise
591                      * we clear the bits.
592                      */
593                     tr->tr_pcb_spec_ctrl[0] |= spec_ctrl;
594                     if (CHECK(IBRS_AUTO_SUPPORTED) == 0)
595                               spec_ctrl &= ~SPEC_CTRL_IBRS;
596                     if (CHECK(STIBP_AUTO_SUPPORTED) == 0)
597                               spec_ctrl &= ~SPEC_CTRL_STIBP;
598                     tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
599 
600                     /*
601                      * Make sure we set this on the first loop.  It will be
602                      * the same value on remaining loops.
603                      */
604                     spectre_mode = mode;
605           }
606           lwkt_setcpu_self(save_gd);
607           cpu_ccfence();
608 
609           /*
610            * Console message on mitigation mode change
611            */
612           kprintf("Spectre: support=(");
613           if (spectre_support == 0) {
614                     kprintf(" none");
615           } else {
616                     if (spectre_support & IBRS_SUPPORTED)
617                               kprintf(" IBRS");
618                     if (spectre_support & STIBP_SUPPORTED)
619                               kprintf(" STIBP");
620                     if (spectre_support & IBPB_SUPPORTED)
621                               kprintf(" IBPB");
622                     if (spectre_support & IBRS_AUTO_SUPPORTED)
623                               kprintf(" IBRS_AUTO");
624                     if (spectre_support & STIBP_AUTO_SUPPORTED)
625                               kprintf(" STIBP_AUTO");
626                     if (spectre_support & IBRS_PREFERRED_REQUEST)
627                               kprintf(" IBRS_REQUESTED");
628           }
629           kprintf(" ) req=%04x operating=(", (uint16_t)spectre_mitigation);
630           if (spectre_mode == 0) {
631                     kprintf(" none");
632           } else {
633                     if (spectre_mode & IBRS_SUPPORTED)
634                               kprintf(" IBRS");
635                     if (spectre_mode & STIBP_SUPPORTED)
636                               kprintf(" STIBP");
637                     if (spectre_mode & IBPB_SUPPORTED)
638                               kprintf(" IBPB");
639                     if (spectre_mode & IBRS_AUTO_SUPPORTED)
640                               kprintf(" IBRS_AUTO");
641                     if (spectre_mode & STIBP_AUTO_SUPPORTED)
642                               kprintf(" STIBP_AUTO");
643                     if (spectre_mode & IBRS_PREFERRED_REQUEST)
644                               kprintf(" IBRS_REQUESTED");
645           }
646           kprintf(" )\n");
647 }
648 
649 #undef CHECK
650 
651 /*
652  * User changes sysctl value
653  */
654 static int
sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)655 sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
656 {
657           char buf[128];
658           char *ptr;
659           char *iter;
660           size_t len;
661           int spectre;
662           int error = 0;
663           int loop = 0;
664 
665           /*
666            * Return current operating mode or support.
667            */
668           if (oidp->oid_kind & CTLFLAG_WR)
669                     spectre = spectre_mode;
670           else
671                     spectre = spectre_support;
672 
673           spectre &= (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
674                         STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED |
675                         IBPB_SUPPORTED);
676           while (spectre) {
677                     if (error)
678                               break;
679                     if (loop++) {
680                               error = SYSCTL_OUT(req, " ", 1);
681                               if (error)
682                                         break;
683                     }
684                     if (spectre & IBRS_SUPPORTED) {
685                               spectre &= ~IBRS_SUPPORTED;
686                               error = SYSCTL_OUT(req, "IBRS", 4);
687                     } else
688                     if (spectre & IBRS_AUTO_SUPPORTED) {
689                               spectre &= ~IBRS_AUTO_SUPPORTED;
690                               error = SYSCTL_OUT(req, "IBRS_AUTO", 9);
691                     } else
692                     if (spectre & STIBP_SUPPORTED) {
693                               spectre &= ~STIBP_SUPPORTED;
694                               error = SYSCTL_OUT(req, "STIBP", 5);
695                     } else
696                     if (spectre & STIBP_AUTO_SUPPORTED) {
697                               spectre &= ~STIBP_AUTO_SUPPORTED;
698                               error = SYSCTL_OUT(req, "STIBP_AUTO", 10);
699                     } else
700                     if (spectre & IBPB_SUPPORTED) {
701                               spectre &= ~IBPB_SUPPORTED;
702                               error = SYSCTL_OUT(req, "IBPB", 4);
703                     }
704           }
705           if (loop == 0) {
706                     error = SYSCTL_OUT(req, "NONE", 4);
707           }
708 
709           if (error || req->newptr == NULL)
710                     return error;
711           if ((oidp->oid_kind & CTLFLAG_WR) == 0)
712                     return error;
713 
714           /*
715            * Change current operating mode
716            */
717           len = req->newlen - req->newidx;
718           if (len >= sizeof(buf)) {
719                     error = EINVAL;
720                     len = 0;
721           } else {
722                     error = SYSCTL_IN(req, buf, len);
723           }
724           buf[len] = 0;
725           iter = &buf[0];
726           spectre = 0;
727 
728           while (error == 0 && iter) {
729                     ptr = strsep(&iter, " ,\t\r\n");
730                     if (*ptr == 0)
731                               continue;
732                     if (strcasecmp(ptr, "NONE") == 0)
733                               spectre |= 0;
734                     else if (strcasecmp(ptr, "IBRS") == 0)
735                               spectre |= IBRS_SUPPORTED;
736                     else if (strcasecmp(ptr, "IBRS_AUTO") == 0)
737                               spectre |= IBRS_AUTO_SUPPORTED;
738                     else if (strcasecmp(ptr, "STIBP") == 0)
739                               spectre |= STIBP_SUPPORTED;
740                     else if (strcasecmp(ptr, "STIBP_AUTO") == 0)
741                               spectre |= STIBP_AUTO_SUPPORTED;
742                     else if (strcasecmp(ptr, "IBPB") == 0)
743                               spectre |= IBPB_SUPPORTED;
744                     else
745                               error = ENOENT;
746           }
747           if (error == 0) {
748                     spectre_mitigation = spectre;
749                     spectre_sysctl_changed();
750           }
751           return error;
752 }
753 
754 SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation,
755           CTLTYPE_STRING | CTLFLAG_RW,
756           0, 0, sysctl_spectre_mitigation, "A", "Spectre exploit mitigation");
757 SYSCTL_PROC(_machdep, OID_AUTO, spectre_support,
758           CTLTYPE_STRING | CTLFLAG_RD,
759           0, 0, sysctl_spectre_mitigation, "A", "Spectre supported features");
760 
761 /*
762  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
763  *         updated.  Microcode updates must be applied to all cpus
764  *         for support to be recognized.
765  */
766 static void
spectre_vm_setup(void * arg)767 spectre_vm_setup(void *arg)
768 {
769           int inconsistent = 0;
770           int supmask;
771 
772           /*
773            * Fetch tunable in auto mode
774            */
775           if (spectre_mitigation < 0) {
776                     TUNABLE_INT_FETCH("machdep.spectre_mitigation",
777                                           &spectre_mitigation);
778           }
779 
780           if ((supmask = spectre_check_support()) != 0) {
781                     /*
782                      * Must be supported on all cpus before we
783                      * can enable it.  Returns silently if it
784                      * isn't.
785                      *
786                      * NOTE! arg != NULL indicates we were called
787                      *         from cpuctl after a successful microcode
788                      *         update.
789                      */
790                     if (arg != NULL) {
791                               globaldata_t save_gd;
792                               int n;
793 
794                               save_gd = mycpu;
795                               for (n = 0; n < ncpus; ++n) {
796                                         lwkt_setcpu_self(globaldata_find(n));
797                                         cpu_ccfence();
798                                         if (spectre_check_support() !=
799                                             supmask) {
800                                                   inconsistent = 1;
801                                                   break;
802                                         }
803                               }
804                               lwkt_setcpu_self(save_gd);
805                               cpu_ccfence();
806                     }
807           }
808 
809           /*
810            * Be silent while microcode is being loaded on various CPUs,
811            * until all done.
812            */
813           if (inconsistent) {
814                     spectre_mitigation = -1;
815                     spectre_support = 0;
816                     return;
817           }
818 
819           /*
820            * IBRS support
821            */
822           spectre_support = supmask;
823 
824           /*
825            * Enable spectre_mitigation, set defaults if -1, adjust
826            * tuned value according to support if not.
827            *
828            * NOTE!  We do not enable IBPB for user->kernel transitions
829            *          by default, so this code is commented out for now.
830            */
831           if (spectre_support) {
832                     if (spectre_mitigation < 0) {
833                               spectre_mitigation = 0;
834 
835                               /*
836                                * IBRS toggling not currently recommended as a
837                                * default.
838                                */
839                               if (spectre_support & IBRS_AUTO_SUPPORTED)
840                                         spectre_mitigation |= IBRS_AUTO_SUPPORTED;
841                               else if (spectre_support & IBRS_SUPPORTED)
842                                         spectre_mitigation |= 0;
843 
844                               /*
845                                * STIBP toggling not currently recommended as a
846                                * default.
847                                */
848                               if (spectre_support & STIBP_AUTO_SUPPORTED)
849                                         spectre_mitigation |= STIBP_AUTO_SUPPORTED;
850                               else if (spectre_support & STIBP_SUPPORTED)
851                                         spectre_mitigation |= 0;
852 
853                               /*
854                                * IBPB adds enormous (~2uS) overhead to system
855                                * calls etc, we do not enable it by default.
856                                */
857                               if (spectre_support & IBPB_SUPPORTED)
858                                         spectre_mitigation |= 0;
859                     }
860           } else {
861                     spectre_mitigation = -1;
862           }
863 
864           /*
865            * Disallow sysctl changes when there is no support (otherwise
866            * the wrmsr will cause a protection fault).
867            */
868           if (spectre_mitigation < 0)
869                     sysctl___machdep_spectre_mitigation.oid_kind &= ~CTLFLAG_WR;
870           else
871                     sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
872 
873           spectre_sysctl_changed();
874 }
875 
876 #define MDS_AVX512_4VNNIW_SUPPORTED     0x0001
877 #define MDS_AVX512_4FMAPS_SUPPORTED     0x0002
878 #define MDS_MD_CLEAR_SUPPORTED                    0x0004
879 #define MDS_TSX_FORCE_ABORT_SUPPORTED   0x0008
880 #define MDS_NOT_REQUIRED                0x8000
881 
882 static
883 int
mds_check_support(void)884 mds_check_support(void)
885 {
886           uint64_t msr;
887           uint32_t p[4];
888           int rv = 0;
889 
890           /*
891            * MDS mitigation hw bits
892            *
893            * MD_CLEAR         Use microcode-supported verf insn.  This is the
894            *                  only mode we really support.
895            */
896           if (cpu_vendor_id == CPU_VENDOR_INTEL) {
897                     p[0] = 0;
898                     p[1] = 0;
899                     p[2] = 0;
900                     p[3] = 0;
901                     cpuid_count(7, 0, p);
902 
903                     /*
904                      * Some hypervisors fail to implement
905                      * MSR_IA32_ARCH_CAPABILITIES.
906                      */
907                     if (p[3] & CPUID_STDEXT3_ARCH_CAP) {
908                               msr = 0;
909                               if (rdmsr_safe(MSR_IA32_ARCH_CAPABILITIES, &msr)) {
910                                         kprintf("Warning: MSR_IA32_ARCH_CAPABILITIES "
911                                                   "cannot be accessed\n");
912                               }
913                               if (msr & IA32_ARCH_CAP_MDS_NO)
914                                         rv = MDS_NOT_REQUIRED;
915                     }
916                     if (p[3] & CPUID_STDEXT3_AVX5124VNNIW)
917                               rv |= MDS_AVX512_4VNNIW_SUPPORTED;
918                     if (p[3] & CPUID_STDEXT3_AVX5124FMAPS)
919                               rv |= MDS_AVX512_4FMAPS_SUPPORTED;
920                     if (p[3] & CPUID_STDEXT3_MD_CLEAR)
921                               rv |= MDS_MD_CLEAR_SUPPORTED;
922                     if (p[3] & CPUID_STDEXT3_TSXFA)
923                               rv |= MDS_TSX_FORCE_ABORT_SUPPORTED;
924           } else {
925                     rv = MDS_NOT_REQUIRED;
926           }
927 
928           return rv;
929 }
930 
931 /*
932  * Iterate CPUs and adjust MSR for global operations, since
933  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
934  */
935 #define CHECK(flag) (mds_mitigation & mds_support & (flag))
936 
937 static
938 void
mds_sysctl_changed(void)939 mds_sysctl_changed(void)
940 {
941           globaldata_t save_gd;
942           struct trampframe *tr;
943           int spec_ctrl;
944           int spec_mask;
945           int mode;
946           int n;
947 
948           spec_mask = SPEC_CTRL_MDS_ENABLE;
949 
950           /*
951            * Fixup state
952            */
953           mode = 0;
954           save_gd = mycpu;
955           for (n = 0; n < ncpus; ++n) {
956                     lwkt_setcpu_self(globaldata_find(n));
957                     cpu_ccfence();
958                     tr = &pscpu->trampoline;
959 
960                     /*
961                      * Make sure we are cleaned out.
962                      *
963                      * XXX cleanup, reusing globals inside the loop (they get
964                      * set to the same thing each loop)
965                      *
966                      * [0] kernel entry (idle exit)
967                      * [1] kernel exit  (idle entry)
968                      */
969                     tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
970                     tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
971 
972                     /*
973                      * Don't try to parse if not available
974                      */
975                     if (mds_mitigation < 0)
976                               continue;
977 
978                     spec_ctrl = 0;
979                     if (CHECK(MDS_MD_CLEAR_SUPPORTED)) {
980                               spec_ctrl |= SPEC_CTRL_MDS_ENABLE;
981                               mode |= MDS_MD_CLEAR_SUPPORTED;
982                     }
983 
984                     /*
985                      * Update spec_ctrl fields in the trampoline.
986                      *
987                      * [0] on-kernel-entry (on-idle-exit)
988                      * [1] on-kernel-exit  (on-idle-entry)
989                      *
990                      * The MDS stuff is only needed on kernel-exit or idle-entry
991                      */
992                     /* tr->tr_pcb_spec_ctrl[0] |= spec_ctrl; */
993                     tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
994 
995                     /*
996                      * Make sure we set this on the first loop.  It will be
997                      * the same value on remaining loops.
998                      */
999                     mds_mode = mode;
1000           }
1001           lwkt_setcpu_self(save_gd);
1002           cpu_ccfence();
1003 
1004           /*
1005            * Console message on mitigation mode change
1006            */
1007           kprintf("MDS: support=(");
1008           if (mds_support == 0) {
1009                     kprintf(" none");
1010           } else {
1011                     if (mds_support & MDS_AVX512_4VNNIW_SUPPORTED)
1012                               kprintf(" AVX512_4VNNIW");
1013                     if (mds_support & MDS_AVX512_4FMAPS_SUPPORTED)
1014                               kprintf(" AVX512_4FMAPS");
1015                     if (mds_support & MDS_MD_CLEAR_SUPPORTED)
1016                               kprintf(" MD_CLEAR");
1017                     if (mds_support & MDS_TSX_FORCE_ABORT_SUPPORTED)
1018                               kprintf(" TSX_FORCE_ABORT");
1019                     if (mds_support & MDS_NOT_REQUIRED)
1020                               kprintf(" MDS_NOT_REQUIRED");
1021           }
1022           kprintf(" ) req=%04x operating=(", (uint16_t)mds_mitigation);
1023           if (mds_mode == 0) {
1024                     kprintf(" none");
1025           } else {
1026                     if (mds_mode & MDS_AVX512_4VNNIW_SUPPORTED)
1027                               kprintf(" AVX512_4VNNIW");
1028                     if (mds_mode & MDS_AVX512_4FMAPS_SUPPORTED)
1029                               kprintf(" AVX512_4FMAPS");
1030                     if (mds_mode & MDS_MD_CLEAR_SUPPORTED)
1031                               kprintf(" MD_CLEAR");
1032                     if (mds_mode & MDS_TSX_FORCE_ABORT_SUPPORTED)
1033                               kprintf(" TSX_FORCE_ABORT");
1034                     if (mds_mode & MDS_NOT_REQUIRED)
1035                               kprintf(" MDS_NOT_REQUIRED");
1036           }
1037           kprintf(" )\n");
1038 }
1039 
1040 #undef CHECK
1041 
1042 /*
1043  * User changes sysctl value
1044  */
1045 static int
sysctl_mds_mitigation(SYSCTL_HANDLER_ARGS)1046 sysctl_mds_mitigation(SYSCTL_HANDLER_ARGS)
1047 {
1048           char buf[128];
1049           char *ptr;
1050           char *iter;
1051           size_t len;
1052           int mds;
1053           int error = 0;
1054           int loop = 0;
1055 
1056           /*
1057            * Return current operating mode or support.
1058            */
1059           if (oidp->oid_kind & CTLFLAG_WR)
1060                     mds = mds_mode;
1061           else
1062                     mds = mds_support;
1063 
1064           mds &= MDS_AVX512_4VNNIW_SUPPORTED |
1065                  MDS_AVX512_4FMAPS_SUPPORTED |
1066                  MDS_MD_CLEAR_SUPPORTED |
1067                  MDS_TSX_FORCE_ABORT_SUPPORTED |
1068                  MDS_NOT_REQUIRED;
1069 
1070           while (mds) {
1071                     if (error)
1072                               break;
1073                     if (loop++) {
1074                               error = SYSCTL_OUT(req, " ", 1);
1075                               if (error)
1076                                         break;
1077                     }
1078                     if (mds & MDS_AVX512_4VNNIW_SUPPORTED) {
1079                               mds &= ~MDS_AVX512_4VNNIW_SUPPORTED;
1080                               error = SYSCTL_OUT(req, "AVX512_4VNNIW", 13);
1081                     } else
1082                     if (mds & MDS_AVX512_4FMAPS_SUPPORTED) {
1083                               mds &= ~MDS_AVX512_4FMAPS_SUPPORTED;
1084                               error = SYSCTL_OUT(req, "AVX512_4FMAPS", 13);
1085                     } else
1086                     if (mds & MDS_MD_CLEAR_SUPPORTED) {
1087                               mds &= ~MDS_MD_CLEAR_SUPPORTED;
1088                               error = SYSCTL_OUT(req, "MD_CLEAR", 8);
1089                     } else
1090                     if (mds & MDS_TSX_FORCE_ABORT_SUPPORTED) {
1091                               mds &= ~MDS_TSX_FORCE_ABORT_SUPPORTED;
1092                               error = SYSCTL_OUT(req, "TSX_FORCE_ABORT", 15);
1093                     } else
1094                     if (mds & MDS_NOT_REQUIRED) {
1095                               mds &= ~MDS_NOT_REQUIRED;
1096                               error = SYSCTL_OUT(req, "MDS_NOT_REQUIRED", 16);
1097                     }
1098           }
1099           if (loop == 0) {
1100                     error = SYSCTL_OUT(req, "NONE", 4);
1101           }
1102 
1103           if (error || req->newptr == NULL)
1104                     return error;
1105           if ((oidp->oid_kind & CTLFLAG_WR) == 0)
1106                     return error;
1107 
1108           /*
1109            * Change current operating mode
1110            */
1111           len = req->newlen - req->newidx;
1112           if (len >= sizeof(buf)) {
1113                     error = EINVAL;
1114                     len = 0;
1115           } else {
1116                     error = SYSCTL_IN(req, buf, len);
1117           }
1118           buf[len] = 0;
1119           iter = &buf[0];
1120           mds = 0;
1121 
1122           while (error == 0 && iter) {
1123                     ptr = strsep(&iter, " ,\t\r\n");
1124                     if (*ptr == 0)
1125                               continue;
1126                     if (strcasecmp(ptr, "NONE") == 0)
1127                               mds |= 0;
1128                     else if (strcasecmp(ptr, "AVX512_4VNNIW") == 0)
1129                               mds |= MDS_AVX512_4VNNIW_SUPPORTED;
1130                     else if (strcasecmp(ptr, "AVX512_4FMAPS") == 0)
1131                               mds |= MDS_AVX512_4FMAPS_SUPPORTED;
1132                     else if (strcasecmp(ptr, "MD_CLEAR") == 0)
1133                               mds |= MDS_MD_CLEAR_SUPPORTED;
1134                     else if (strcasecmp(ptr, "TSX_FORCE_ABORT") == 0)
1135                               mds |= MDS_TSX_FORCE_ABORT_SUPPORTED;
1136                     else if (strcasecmp(ptr, "MDS_NOT_REQUIRED") == 0)
1137                               mds |= MDS_NOT_REQUIRED;
1138                     else
1139                               error = ENOENT;
1140           }
1141           if (error == 0) {
1142                     mds_mitigation = mds;
1143                     mds_sysctl_changed();
1144           }
1145           return error;
1146 }
1147 
1148 SYSCTL_PROC(_machdep, OID_AUTO, mds_mitigation,
1149           CTLTYPE_STRING | CTLFLAG_RW,
1150           0, 0, sysctl_mds_mitigation, "A", "MDS exploit mitigation");
1151 SYSCTL_PROC(_machdep, OID_AUTO, mds_support,
1152           CTLTYPE_STRING | CTLFLAG_RD,
1153           0, 0, sysctl_mds_mitigation, "A", "MDS supported features");
1154 
1155 /*
1156  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
1157  *         updated.  Microcode updates must be applied to all cpus
1158  *         for support to be recognized.
1159  */
1160 static void
mds_vm_setup(void * arg)1161 mds_vm_setup(void *arg)
1162 {
1163           int inconsistent = 0;
1164           int supmask;
1165 
1166           /*
1167            * Fetch tunable in auto mode
1168            */
1169           if (mds_mitigation < 0) {
1170                     TUNABLE_INT_FETCH("machdep.mds_mitigation", &mds_mitigation);
1171           }
1172 
1173           if ((supmask = mds_check_support()) != 0) {
1174                     /*
1175                      * Must be supported on all cpus before we
1176                      * can enable it.  Returns silently if it
1177                      * isn't.
1178                      *
1179                      * NOTE! arg != NULL indicates we were called
1180                      *         from cpuctl after a successful microcode
1181                      *         update.
1182                      */
1183                     if (arg != NULL) {
1184                               globaldata_t save_gd;
1185                               int n;
1186 
1187                               save_gd = mycpu;
1188                               for (n = 0; n < ncpus; ++n) {
1189                                         lwkt_setcpu_self(globaldata_find(n));
1190                                         cpu_ccfence();
1191                                         if (mds_check_support() != supmask) {
1192                                                   inconsistent = 1;
1193                                                   break;
1194                                         }
1195                               }
1196                               lwkt_setcpu_self(save_gd);
1197                               cpu_ccfence();
1198                     }
1199           }
1200 
1201           /*
1202            * Be silent while microcode is being loaded on various CPUs,
1203            * until all done.
1204            */
1205           if (inconsistent) {
1206                     mds_mitigation = -1;
1207                     mds_support = 0;
1208                     return;
1209           }
1210 
1211           /*
1212            * IBRS support
1213            */
1214           mds_support = supmask;
1215 
1216           /*
1217            * Enable mds_mitigation, set defaults if -1, adjust
1218            * tuned value according to support if not.
1219            *
1220            * NOTE!  MDS is not enabled by default.
1221            */
1222           if (mds_support) {
1223                     if (mds_mitigation < 0) {
1224                               mds_mitigation = 0;
1225 
1226                               if ((mds_support & MDS_NOT_REQUIRED) == 0 &&
1227                                   (mds_support & MDS_MD_CLEAR_SUPPORTED)) {
1228                                         /* mds_mitigation |= MDS_MD_CLEAR_SUPPORTED; */
1229                               }
1230                     }
1231           } else {
1232                     mds_mitigation = -1;
1233           }
1234 
1235           /*
1236            * Disallow sysctl changes when there is no support (otherwise
1237            * the wrmsr will cause a protection fault).
1238            */
1239           if (mds_mitigation < 0)
1240                     sysctl___machdep_mds_mitigation.oid_kind &= ~CTLFLAG_WR;
1241           else
1242                     sysctl___machdep_mds_mitigation.oid_kind |= CTLFLAG_WR;
1243 
1244           mds_sysctl_changed();
1245 }
1246 
1247 /*
1248  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
1249  *         updated.  Microcode updates must be applied to all cpus
1250  *         for support to be recognized.
1251  */
1252 void
mitigation_vm_setup(void * arg)1253 mitigation_vm_setup(void *arg)
1254 {
1255           spectre_vm_setup(arg);
1256           mds_vm_setup(arg);
1257 }
1258 
1259 SYSINIT(mitigation_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
1260           mitigation_vm_setup, NULL);
1261 
1262 /*
1263  * platform-specific vmspace initialization (nothing for x86_64)
1264  */
1265 void
cpu_vmspace_alloc(struct vmspace * vm __unused)1266 cpu_vmspace_alloc(struct vmspace *vm __unused)
1267 {
1268 }
1269 
1270 void
cpu_vmspace_free(struct vmspace * vm __unused)1271 cpu_vmspace_free(struct vmspace *vm __unused)
1272 {
1273 }
1274 
1275 int
kvm_access_check(vm_offset_t saddr,vm_offset_t eaddr,int prot)1276 kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot)
1277 {
1278           vm_offset_t addr;
1279 
1280           if (saddr < KvaStart)
1281                     return EFAULT;
1282           if (eaddr >= KvaEnd)
1283                     return EFAULT;
1284           for (addr = saddr; addr < eaddr; addr += PAGE_SIZE)  {
1285                     if (pmap_kextract(addr) == 0)
1286                               return EFAULT;
1287           }
1288           if (!kernacc((caddr_t)saddr, eaddr - saddr, prot))
1289                     return EFAULT;
1290           return 0;
1291 }
1292 
1293 #if 0
1294 
1295 void _test_frame_enter(struct trapframe *frame);
1296 void _test_frame_exit(struct trapframe *frame);
1297 
1298 void
1299 _test_frame_enter(struct trapframe *frame)
1300 {
1301           thread_t td = curthread;
1302 
1303           if (ISPL(frame->tf_cs) == SEL_UPL) {
1304                     KKASSERT(td->td_lwp);
1305                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
1306                         ("_test_frame_exit: Frame mismatch %p %p",
1307                               td->td_lwp->lwp_md.md_regs, frame));
1308               td->td_lwp->lwp_saveusp = (void *)frame->tf_rsp;
1309               td->td_lwp->lwp_saveupc = (void *)frame->tf_rip;
1310           }
1311           if ((char *)frame < td->td_kstack ||
1312               (char *)frame > td->td_kstack + td->td_kstack_size) {
1313                     panic("_test_frame_exit: frame not on kstack %p kstack=%p",
1314                               frame, td->td_kstack);
1315           }
1316 }
1317 
1318 void
1319 _test_frame_exit(struct trapframe *frame)
1320 {
1321           thread_t td = curthread;
1322 
1323           if (ISPL(frame->tf_cs) == SEL_UPL) {
1324                     KKASSERT(td->td_lwp);
1325                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
1326                         ("_test_frame_exit: Frame mismatch %p %p",
1327                               td->td_lwp->lwp_md.md_regs, frame));
1328                     if (td->td_lwp->lwp_saveusp != (void *)frame->tf_rsp) {
1329                               kprintf("_test_frame_exit: %s:%d usp mismatch %p/%p\n",
1330                                         td->td_comm, td->td_proc->p_pid,
1331                                         td->td_lwp->lwp_saveusp,
1332                                         (void *)frame->tf_rsp);
1333                     }
1334                     if (td->td_lwp->lwp_saveupc != (void *)frame->tf_rip) {
1335                               kprintf("_test_frame_exit: %s:%d upc mismatch %p/%p\n",
1336                                         td->td_comm, td->td_proc->p_pid,
1337                                         td->td_lwp->lwp_saveupc,
1338                                         (void *)frame->tf_rip);
1339                     }
1340 
1341                     /*
1342                      * adulterate the fields to catch entries that
1343                      * don't run through test_frame_enter
1344                      */
1345                     td->td_lwp->lwp_saveusp =
1346                               (void *)~(intptr_t)td->td_lwp->lwp_saveusp;
1347                     td->td_lwp->lwp_saveupc =
1348                               (void *)~(intptr_t)td->td_lwp->lwp_saveupc;
1349           }
1350           if ((char *)frame < td->td_kstack ||
1351               (char *)frame > td->td_kstack + td->td_kstack_size) {
1352                     panic("_test_frame_exit: frame not on kstack %p kstack=%p",
1353                               frame, td->td_kstack);
1354           }
1355 }
1356 
1357 #endif
1358