1 /* $NetBSD: fpu.c,v 1.91 2025/04/28 13:01:27 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All
5 * rights reserved.
6 *
7 * This code is derived from software developed for The NetBSD Foundation
8 * by Andrew Doran and Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1991 The Regents of the University of California.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)npx.c 7.2 (Berkeley) 5/12/91
61 */
62
63 /*
64 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved.
65 * Copyright (c) 1990 William Jolitz.
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 * 3. All advertising materials mentioning features or use of this software
76 * must display the following acknowledgement:
77 * This product includes software developed by the University of
78 * California, Berkeley and its contributors.
79 * 4. Neither the name of the University nor the names of its contributors
80 * may be used to endorse or promote products derived from this software
81 * without specific prior written permission.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93 * SUCH DAMAGE.
94 *
95 * @(#)npx.c 7.2 (Berkeley) 5/12/91
96 */
97
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.91 2025/04/28 13:01:27 riastradh Exp $");
100
101 #include "opt_ddb.h"
102 #include "opt_multiprocessor.h"
103
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/conf.h>
107 #include <sys/cpu.h>
108 #include <sys/file.h>
109 #include <sys/proc.h>
110 #include <sys/kernel.h>
111 #include <sys/sysctl.h>
112 #include <sys/xcall.h>
113
114 #include <machine/cpu.h>
115 #include <machine/cpuvar.h>
116 #include <machine/cputypes.h>
117 #include <machine/intr.h>
118 #include <machine/cpufunc.h>
119 #include <machine/pcb.h>
120 #include <machine/trap.h>
121 #include <machine/specialreg.h>
122 #include <x86/cpu.h>
123 #include <x86/fpu.h>
124
125 #ifdef DDB
126 #include <ddb/ddb.h>
127 #endif
128
129 #ifdef XENPV
130 #define clts() HYPERVISOR_fpu_taskswitch(0)
131 #define stts() HYPERVISOR_fpu_taskswitch(1)
132 #endif
133
134 void fpu_handle_deferred(void);
135 void fpu_switch(struct lwp *, struct lwp *);
136
137 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
138
139 static const union savefpu safe_fpu_storage __aligned(64) = {
140 .sv_xmm = {
141 .fx_mxcsr = __SAFE_MXCSR__,
142 },
143 };
144 static const union savefpu zero_fpu_storage __aligned(64);
145
146 static const void *safe_fpu __read_mostly = &safe_fpu_storage;
147 static const void *zero_fpu __read_mostly = &zero_fpu_storage;
148
149 /*
150 * x86_fpu_save_separate_p()
151 *
152 * True if we allocate the FPU save space separately, outside the
153 * struct pcb itself, because it doesn't fit in a single page.
154 */
155 bool
x86_fpu_save_separate_p(void)156 x86_fpu_save_separate_p(void)
157 {
158
159 return x86_fpu_save_size >
160 PAGE_SIZE - offsetof(struct pcb, pcb_savefpusmall);
161 }
162
163 static inline union savefpu *
fpu_lwp_area(struct lwp * l)164 fpu_lwp_area(struct lwp *l)
165 {
166 struct pcb *pcb = lwp_getpcb(l);
167 union savefpu *area = pcb->pcb_savefpu;
168
169 KASSERT((l->l_flag & LW_SYSTEM) == 0);
170 if (l == curlwp) {
171 fpu_save();
172 }
173 KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));
174
175 return area;
176 }
177
178 static inline void
fpu_save_lwp(struct lwp * l)179 fpu_save_lwp(struct lwp *l)
180 {
181 struct pcb *pcb = lwp_getpcb(l);
182 union savefpu *area = pcb->pcb_savefpu;
183 int s;
184
185 s = splvm();
186 if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
187 KASSERT((l->l_flag & LW_SYSTEM) == 0);
188 fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
189 l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
190 }
191 splx(s);
192 }
193
194 /*
195 * Bring curlwp's FPU state in memory. It will get installed back in the CPU
196 * when returning to userland.
197 */
198 void
fpu_save(void)199 fpu_save(void)
200 {
201 fpu_save_lwp(curlwp);
202 }
203
204 void
fpuinit(struct cpu_info * ci)205 fpuinit(struct cpu_info *ci)
206 {
207 /*
208 * This might not be strictly necessary since it will be initialized
209 * for each process. However it does no harm.
210 */
211 clts();
212 fninit();
213 stts();
214 }
215
216 /*
217 * fpuinit_mxcsr_mask()
218 *
219 * Called once by cpu_init on the primary CPU. Initializes
220 * x86_fpu_mxcsr_mask based on the initial FPU state, and
221 * initializes save_fpu and zero_fpu if necessary when the
222 * hardware's FPU save size is larger than union savefpu.
223 *
224 * XXX Rename this function!
225 */
226 void
fpuinit_mxcsr_mask(void)227 fpuinit_mxcsr_mask(void)
228 {
229 /*
230 * If the CPU's x86 fpu save size is larger than union savefpu,
231 * we have to allocate larger buffers for the safe and zero FPU
232 * states used here and by fpu_kern_enter/leave.
233 *
234 * Note: This is NOT the same as x86_fpu_save_separate_p(),
235 * which may have a little more space than union savefpu.
236 */
237 const bool allocfpusave = x86_fpu_save_size > sizeof(union savefpu);
238 vaddr_t va;
239
240 #if defined XENPV
241 if (x86_fpu_save_separate_p()) {
242 /*
243 * XXX Temporary workaround for PR kern/59371 until we
244 * work out the implications.
245 */
246 panic("NetBSD/xen does not support fpu save size %u",
247 x86_fpu_save_size);
248 }
249 #elif defined __i386__
250 if (x86_fpu_save_separate_p()) {
251 /*
252 * XXX Need to teach cpu_uarea_alloc/free to allocate a
253 * separate fpu save space, and make pcb_savefpu a
254 * pointer indirection -- currently only done on amd64,
255 * not on i386.
256 *
257 * But the primary motivation on amd64 is the 8192-byte
258 * TILEDATA state for Intel AMX (Advanced Matrix
259 * Extensions), which doesn't work in 32-bit mode
260 * anyway, so on such machines we ought to just disable
261 * it in the first place and keep x86_fpu_save_size
262 * down:
263 *
264 * While Intel AMX instructions can be executed
265 * only in 64-bit mode, instructions of the XSAVE
266 * feature set can operate on TILECFG and TILEDATA
267 * in any mode. It is recommended that only
268 * 64-bit operating systems enable Intel AMX by
269 * setting XCR0[18:17].
270 *
271 * --Intel 64 and IA-32 Architectures Software
272 * Developer's Manual, Volume 1: Basic
273 * Architecture, Order Number: 253665-087US, March
274 * 2025, Sec. 13.3 `Enabling the XSAVE feature set
275 * and XSAVE-enabled features', p. 13-6.
276 * https://cdrdv2.intel.com/v1/dl/getContent/671436
277 * https://web.archive.org/web/20250404141850/https://cdrdv2-public.intel.com/851056/253665-087-sdm-vol-1.pdf
278 * https://web.archive.org/web/20250404141850if_/https://cdrdv2-public.intel.com/851056/253665-087-sdm-vol-1.pdf#page=324
279 */
280 panic("NetBSD/i386 does not support fpu save size %u",
281 x86_fpu_save_size);
282 }
283 #endif
284
285 #ifndef XENPV
286 union savefpu fpusave_stack __aligned(64);
287 union savefpu *fpusave;
288 u_long psl;
289
290 /*
291 * Allocate a temporary save space from the stack if it fits,
292 * or from the heap otherwise, so we can query its mxcsr mask.
293 */
294 if (allocfpusave) {
295 /*
296 * Need 64-byte alignment for XSAVE instructions.
297 * kmem_* doesn't guarantee that and we don't have a
298 * handy posix_memalign in the kernel unless we hack it
299 * ourselves with vmem(9), so just ask for page
300 * alignment with uvm_km(9).
301 */
302 __CTASSERT(PAGE_SIZE >= 64);
303 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE,
304 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA);
305 fpusave = (void *)va;
306 } else {
307 fpusave = &fpusave_stack;
308 memset(fpusave, 0, sizeof(*fpusave));
309 }
310
311 /* Disable interrupts, and enable FPU */
312 psl = x86_read_psl();
313 x86_disable_intr();
314 clts();
315
316 /* Fill in the FPU area */
317 fxsave(fpusave);
318
319 /* Restore previous state */
320 stts();
321 x86_write_psl(psl);
322
323 if (fpusave->sv_xmm.fx_mxcsr_mask == 0) {
324 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
325 } else {
326 x86_fpu_mxcsr_mask = fpusave->sv_xmm.fx_mxcsr_mask;
327 }
328
329 /*
330 * Free the temporary save space.
331 */
332 if (allocfpusave) {
333 uvm_km_free(kernel_map, va, x86_fpu_save_size, UVM_KMF_WIRED);
334 fpusave = NULL;
335 va = 0;
336 }
337 #else
338 /*
339 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because
340 * &fpusave is not 16-byte aligned. Stack alignment problem
341 * somewhere, it seems.
342 */
343 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
344 #endif
345
346 /*
347 * If necessary, allocate FPU save spaces for safe or zero FPU
348 * state, for fpu_kern_enter/leave.
349 */
350 if (allocfpusave) {
351 __CTASSERT(PAGE_SIZE >= 64);
352
353 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE,
354 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA);
355 memcpy((void *)va, &safe_fpu_storage,
356 sizeof(safe_fpu_storage));
357 uvm_km_protect(kernel_map, va, x86_fpu_save_size,
358 VM_PROT_READ);
359 safe_fpu = (void *)va;
360
361 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE,
362 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA);
363 /*
364 * No initialization -- just want zeroes! In fact we
365 * could share this with other all-zero pages.
366 */
367 uvm_km_protect(kernel_map, va, x86_fpu_save_size,
368 VM_PROT_READ);
369 zero_fpu = (void *)va;
370 }
371 }
372
373 static inline void
fpu_errata_amd(void)374 fpu_errata_amd(void)
375 {
376 uint16_t sw;
377
378 /*
379 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
380 * when FSW.ES=0, leaking other threads' execution history.
381 *
382 * Clear them manually by loading a zero (fldummy). We do this
383 * unconditionally, regardless of FSW.ES.
384 *
385 * Before that, clear the ES bit in the x87 status word if it is
386 * currently set, in order to avoid causing a fault in the
387 * upcoming load.
388 *
389 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
390 * which indicates that FIP/FDP/FOP are restored (same behavior
391 * as Intel). We're not using it though.
392 */
393 fnstsw(&sw);
394 if (sw & 0x80)
395 fnclex();
396 fldummy();
397 }
398
399 #ifdef __x86_64__
400 #define XS64(x) (is_64bit ? x##64 : x)
401 #else
402 #define XS64(x) x
403 #endif
404
405 void
fpu_area_save(void * area,uint64_t xsave_features,bool is_64bit)406 fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
407 {
408 switch (x86_fpu_save) {
409 case FPU_SAVE_FSAVE:
410 fnsave(area);
411 break;
412 case FPU_SAVE_FXSAVE:
413 XS64(fxsave)(area);
414 break;
415 case FPU_SAVE_XSAVE:
416 XS64(xsave)(area, xsave_features);
417 break;
418 case FPU_SAVE_XSAVEOPT:
419 XS64(xsaveopt)(area, xsave_features);
420 break;
421 }
422
423 stts();
424 }
425
426 void
fpu_area_restore(const void * area,uint64_t xsave_features,bool is_64bit)427 fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit)
428 {
429 clts();
430
431 switch (x86_fpu_save) {
432 case FPU_SAVE_FSAVE:
433 frstor(area);
434 break;
435 case FPU_SAVE_FXSAVE:
436 if (cpu_vendor == CPUVENDOR_AMD)
437 fpu_errata_amd();
438 XS64(fxrstor)(area);
439 break;
440 case FPU_SAVE_XSAVE:
441 case FPU_SAVE_XSAVEOPT:
442 if (cpu_vendor == CPUVENDOR_AMD)
443 fpu_errata_amd();
444 XS64(xrstor)(area, xsave_features);
445 break;
446 }
447 }
448
449 void
fpu_handle_deferred(void)450 fpu_handle_deferred(void)
451 {
452 struct pcb *pcb = lwp_getpcb(curlwp);
453 fpu_area_restore(pcb->pcb_savefpu, x86_xsave_features,
454 !(curlwp->l_proc->p_flag & PK_32));
455 }
456
457 void
fpu_switch(struct lwp * oldlwp,struct lwp * newlwp)458 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
459 {
460 struct cpu_info *ci __diagused = curcpu();
461 struct pcb *pcb;
462
463 KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d",
464 cpu_index(ci), ci->ci_ilevel);
465
466 if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
467 KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
468 pcb = lwp_getpcb(oldlwp);
469 fpu_area_save(pcb->pcb_savefpu, x86_xsave_features,
470 !(oldlwp->l_proc->p_flag & PK_32));
471 oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
472 }
473 KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
474 }
475
476 void
fpu_lwp_fork(struct lwp * l1,struct lwp * l2)477 fpu_lwp_fork(struct lwp *l1, struct lwp *l2)
478 {
479 struct pcb *pcb2 = lwp_getpcb(l2);
480 union savefpu *fpu_save;
481
482 /* Kernel threads have no FPU. */
483 if (__predict_false(l2->l_flag & LW_SYSTEM)) {
484 return;
485 }
486
487 /* For init(8). */
488 if (__predict_false(l1->l_flag & LW_SYSTEM)) {
489 memset(pcb2->pcb_savefpu, 0, x86_fpu_save_size);
490 return;
491 }
492
493 fpu_save = fpu_lwp_area(l1);
494 memcpy(pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size);
495 l2->l_md.md_flags &= ~MDL_FPU_IN_CPU;
496 }
497
498 void
fpu_lwp_abandon(struct lwp * l)499 fpu_lwp_abandon(struct lwp *l)
500 {
501 int s;
502
503 KASSERT(l == curlwp);
504 s = splvm();
505 l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
506 stts();
507 splx(s);
508 }
509
510 /* -------------------------------------------------------------------------- */
511
512 /*
513 * fpu_kern_enter()
514 *
515 * Begin using the FPU. Raises to splvm, disabling most
516 * interrupts and rendering the thread non-preemptible; caller
517 * should not use this for long periods of time, and must call
518 * fpu_kern_leave() afterward. Non-recursive -- you cannot call
519 * fpu_kern_enter() again without calling fpu_kern_leave() first.
520 *
521 * Must be used only at IPL_VM or below -- never in IPL_SCHED or
522 * IPL_HIGH interrupt handlers.
523 */
524 void
fpu_kern_enter(void)525 fpu_kern_enter(void)
526 {
527 struct lwp *l = curlwp;
528 struct cpu_info *ci;
529 int s;
530
531 s = splvm();
532
533 ci = curcpu();
534 #if 0
535 /*
536 * Can't assert this because if the caller holds a spin lock at
537 * IPL_VM, and previously held and released a spin lock at
538 * higher IPL, the IPL remains raised above IPL_VM.
539 */
540 KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d",
541 ci->ci_ilevel);
542 #endif
543 KASSERT(ci->ci_kfpu_spl == -1);
544 ci->ci_kfpu_spl = s;
545
546 /*
547 * If we are in a softint and have a pinned lwp, the fpu state is that
548 * of the pinned lwp, so save it there.
549 */
550 while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
551 l = l->l_switchto;
552 fpu_save_lwp(l);
553
554 /*
555 * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
556 * otherwise the CPU will trap if we try to use the FPU under
557 * the false impression that there has been a task switch since
558 * the last FPU usage requiring that we save the FPU state.
559 */
560 clts();
561
562 /*
563 * Zero the FPU registers and install safe control words.
564 */
565 fpu_area_restore(safe_fpu, x86_xsave_features, /*is_64bit*/false);
566 }
567
568 /*
569 * fpu_kern_leave()
570 *
571 * End using the FPU after fpu_kern_enter().
572 */
573 void
fpu_kern_leave(void)574 fpu_kern_leave(void)
575 {
576 struct cpu_info *ci = curcpu();
577 int s;
578
579 #if 0
580 /*
581 * Can't assert this because if the caller holds a spin lock at
582 * IPL_VM, and previously held and released a spin lock at
583 * higher IPL, the IPL remains raised above IPL_VM.
584 */
585 KASSERT(ci->ci_ilevel == IPL_VM || cold);
586 #endif
587 KASSERT(ci->ci_kfpu_spl != -1);
588
589 /*
590 * Zero the fpu registers; otherwise we might leak secrets
591 * through Spectre-class attacks to userland, even if there are
592 * no bugs in fpu state management.
593 */
594 fpu_area_restore(zero_fpu, x86_xsave_features, /*is_64bit*/false);
595
596 /*
597 * Set CR0_TS again so that the kernel can't accidentally use
598 * the FPU.
599 */
600 stts();
601
602 s = ci->ci_kfpu_spl;
603 ci->ci_kfpu_spl = -1;
604 splx(s);
605 }
606
607 /* -------------------------------------------------------------------------- */
608
609 /*
610 * The following table is used to ensure that the FPE_... value
611 * that is passed as a trapcode to the signal handler of the user
612 * process does not have more than one bit set.
613 *
614 * Multiple bits may be set if SSE simd instructions generate errors
615 * on more than one value or if the user process modifies the control
616 * word while a status word bit is already set (which this is a sign
617 * of bad coding).
618 * We have no choice than to narrow them down to one bit, since we must
619 * not send a trapcode that is not exactly one of the FPE_ macros.
620 *
621 * The mechanism has a static table with 127 entries. Each combination
622 * of the 7 FPU status word exception bits directly translates to a
623 * position in this table, where a single FPE_... value is stored.
624 * This FPE_... value stored there is considered the "most important"
625 * of the exception bits and will be sent as the signal code. The
626 * precedence of the bits is based upon Intel Document "Numerical
627 * Applications", Chapter "Special Computational Situations".
628 *
629 * The code to choose one of these values does these steps:
630 * 1) Throw away status word bits that cannot be masked.
631 * 2) Throw away the bits currently masked in the control word,
632 * assuming the user isn't interested in them anymore.
633 * 3) Reinsert status word bit 7 (stack fault) if it is set, which
634 * cannot be masked but must be preserved.
635 * 'Stack fault' is a sub-class of 'invalid operation'.
636 * 4) Use the remaining bits to point into the trapcode table.
637 *
638 * The 6 maskable bits in order of their preference, as stated in the
639 * above referenced Intel manual:
640 * 1 Invalid operation (FP_X_INV)
641 * 1a Stack underflow
642 * 1b Stack overflow
643 * 1c Operand of unsupported format
644 * 1d SNaN operand.
645 * 2 QNaN operand (not an exception, irrelevant here)
646 * 3 Any other invalid-operation not mentioned above or zero divide
647 * (FP_X_INV, FP_X_DZ)
648 * 4 Denormal operand (FP_X_DNML)
649 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
650 * 6 Inexact result (FP_X_IMP)
651 *
652 * NB: the above seems to mix up the mxscr error bits and the x87 ones.
653 * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
654 * status.
655 *
656 * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
657 * are swapped).
658 *
659 * This table assumes that any stack fault is cleared - so that an INVOP
660 * fault will only be reported as FLTSUB once.
661 * This might not happen if the mask is being changed.
662 */
663 #define FPE_xxx1(f) (f & EN_SW_INVOP \
664 ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
665 : f & EN_SW_ZERODIV ? FPE_FLTDIV \
666 : f & EN_SW_DENORM ? FPE_FLTUND \
667 : f & EN_SW_OVERFLOW ? FPE_FLTOVF \
668 : f & EN_SW_UNDERFLOW ? FPE_FLTUND \
669 : f & EN_SW_PRECLOSS ? FPE_FLTRES \
670 : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
671 #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1))
672 #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2))
673 #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4))
674 #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8))
675 #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16))
676 static const uint8_t fpetable[128] = {
677 FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
678 };
679 #undef FPE_xxx1
680 #undef FPE_xxx2
681 #undef FPE_xxx4
682 #undef FPE_xxx8
683 #undef FPE_xxx16
684 #undef FPE_xxx32
685
686 /*
687 * This is a synchronous trap on either an x87 instruction (due to an unmasked
688 * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
689 * to an error on the instruction itself.
690 *
691 * If trap actually generates a signal, then the fpu state is saved and then
692 * copied onto the lwp's user-stack, and then recovered from there when the
693 * signal returns.
694 *
695 * All this code needs to do is save the reason for the trap. For x87 traps the
696 * status word bits need clearing to stop the trap re-occurring. For SSE traps
697 * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
698 *
699 * We come here with interrupts disabled.
700 */
701 void
fputrap(struct trapframe * frame)702 fputrap(struct trapframe *frame)
703 {
704 uint32_t statbits;
705 ksiginfo_t ksi;
706
707 if (__predict_false(!USERMODE(frame->tf_cs))) {
708 register_t ip = X86_TF_RIP(frame);
709 char where[128];
710
711 #ifdef DDB
712 db_symstr(where, sizeof(where), (db_expr_t)ip, DB_STGY_PROC);
713 #else
714 snprintf(where, sizeof(where), "%p", (void *)ip);
715 #endif
716 panic("fpu trap from kernel at %s, trapframe %p\n", where,
717 frame);
718 }
719
720 KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);
721
722 if (frame->tf_trapno == T_XMM) {
723 uint32_t mxcsr;
724 x86_stmxcsr(&mxcsr);
725 statbits = mxcsr;
726 /* Clear the sticky status bits */
727 mxcsr &= ~0x3f;
728 x86_ldmxcsr(&mxcsr);
729
730 /* Remove masked interrupts and non-status bits */
731 statbits &= ~(statbits >> 7) & 0x3f;
732 /* Mark this is an XMM status */
733 statbits |= 0x10000;
734 } else {
735 uint16_t cw, sw;
736 /* Get current control and status words */
737 fnstcw(&cw);
738 fnstsw(&sw);
739 /* Clear any pending exceptions from status word */
740 fnclex();
741
742 /* Remove masked interrupts */
743 statbits = sw & ~(cw & 0x3f);
744 }
745
746 /* Doesn't matter now if we get pre-empted */
747 x86_enable_intr();
748
749 KSI_INIT_TRAP(&ksi);
750 ksi.ksi_signo = SIGFPE;
751 ksi.ksi_addr = (void *)X86_TF_RIP(frame);
752 ksi.ksi_code = fpetable[statbits & 0x7f];
753 ksi.ksi_trap = statbits;
754 (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
755 }
756
757 void
fpudna(struct trapframe * frame)758 fpudna(struct trapframe *frame)
759 {
760 #ifdef XENPV
761 /*
762 * Xen produes spurious fpudna traps, just do nothing.
763 */
764 if (USERMODE(frame->tf_cs)) {
765 clts();
766 return;
767 }
768 #endif
769 panic("fpudna from %s, ip %p, trapframe %p",
770 USERMODE(frame->tf_cs) ? "userland" : "kernel",
771 (void *)X86_TF_RIP(frame), frame);
772 }
773
774 /* -------------------------------------------------------------------------- */
775
776 static inline void
fpu_xstate_reload(union savefpu * fpu_save,uint64_t xstate)777 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
778 {
779 /*
780 * Force a reload of the given xstate during the next XRSTOR.
781 */
782 if (x86_fpu_save >= FPU_SAVE_XSAVE) {
783 fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
784 }
785 }
786
787 void
fpu_set_default_cw(struct lwp * l,unsigned int x87_cw)788 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
789 {
790 union savefpu *fpu_save = fpu_lwp_area(l);
791 struct pcb *pcb = lwp_getpcb(l);
792
793 if (i386_use_fxsave) {
794 fpu_save->sv_xmm.fx_cw = x87_cw;
795 if (x87_cw != __INITIAL_NPXCW__) {
796 fpu_xstate_reload(fpu_save, XCR0_X87);
797 }
798 } else {
799 fpu_save->sv_87.s87_cw = x87_cw;
800 }
801 pcb->pcb_fpu_dflt_cw = x87_cw;
802 }
803
804 void
fpu_clear(struct lwp * l,unsigned int x87_cw)805 fpu_clear(struct lwp *l, unsigned int x87_cw)
806 {
807 union savefpu *fpu_save;
808 struct pcb *pcb;
809
810 KASSERT(l == curlwp);
811 fpu_save = fpu_lwp_area(l);
812
813 switch (x86_fpu_save) {
814 case FPU_SAVE_FSAVE:
815 memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
816 fpu_save->sv_87.s87_tw = 0xffff;
817 fpu_save->sv_87.s87_cw = x87_cw;
818 break;
819 case FPU_SAVE_FXSAVE:
820 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
821 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
822 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
823 fpu_save->sv_xmm.fx_cw = x87_cw;
824 break;
825 case FPU_SAVE_XSAVE:
826 case FPU_SAVE_XSAVEOPT:
827 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
828 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
829 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
830 fpu_save->sv_xmm.fx_cw = x87_cw;
831 if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
832 fpu_xstate_reload(fpu_save, XCR0_X87);
833 }
834 break;
835 }
836
837 pcb = lwp_getpcb(l);
838 pcb->pcb_fpu_dflt_cw = x87_cw;
839 }
840
841 void
fpu_sigreset(struct lwp * l)842 fpu_sigreset(struct lwp *l)
843 {
844 union savefpu *fpu_save = fpu_lwp_area(l);
845 struct pcb *pcb = lwp_getpcb(l);
846
847 /*
848 * For signal handlers the register values don't matter. Just reset
849 * a few fields.
850 */
851 if (i386_use_fxsave) {
852 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
853 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
854 fpu_save->sv_xmm.fx_tw = 0;
855 fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
856 } else {
857 fpu_save->sv_87.s87_tw = 0xffff;
858 fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
859 }
860 }
861
862 void
process_write_fpregs_xmm(struct lwp * l,const struct fxsave * fpregs)863 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
864 {
865 union savefpu *fpu_save = fpu_lwp_area(l);
866
867 if (i386_use_fxsave) {
868 memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
869
870 /*
871 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
872 */
873 fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
874 fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
875
876 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
877 } else {
878 process_xmm_to_s87(fpregs, &fpu_save->sv_87);
879 }
880 }
881
882 void
process_write_fpregs_s87(struct lwp * l,const struct save87 * fpregs)883 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
884 {
885 union savefpu *fpu_save = fpu_lwp_area(l);
886
887 if (i386_use_fxsave) {
888 process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
889 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
890 } else {
891 memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
892 }
893 }
894
895 void
process_read_fpregs_xmm(struct lwp * l,struct fxsave * fpregs)896 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
897 {
898 union savefpu *fpu_save = fpu_lwp_area(l);
899
900 if (i386_use_fxsave) {
901 memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
902 } else {
903 memset(fpregs, 0, sizeof(*fpregs));
904 process_s87_to_xmm(&fpu_save->sv_87, fpregs);
905 }
906 }
907
908 void
process_read_fpregs_s87(struct lwp * l,struct save87 * fpregs)909 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
910 {
911 union savefpu *fpu_save = fpu_lwp_area(l);
912
913 if (i386_use_fxsave) {
914 memset(fpregs, 0, sizeof(*fpregs));
915 process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
916 } else {
917 memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
918 }
919 }
920
921 int
process_read_xstate(struct lwp * l,struct xstate * xstate)922 process_read_xstate(struct lwp *l, struct xstate *xstate)
923 {
924 union savefpu *fpu_save = fpu_lwp_area(l);
925
926 if (x86_fpu_save == FPU_SAVE_FSAVE) {
927 /* Convert from legacy FSAVE format. */
928 memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
929 process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
930
931 /* We only got x87 data. */
932 xstate->xs_rfbm = XCR0_X87;
933 xstate->xs_xstate_bv = XCR0_X87;
934 return 0;
935 }
936
937 /* Copy the legacy area. */
938 memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
939 sizeof(xstate->xs_fxsave));
940
941 if (x86_fpu_save == FPU_SAVE_FXSAVE) {
942 /* FXSAVE means we've got x87 + SSE data. */
943 xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
944 xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
945 return 0;
946 }
947
948 /* Copy the bitmap indicating which states are available. */
949 xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
950 xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
951 KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
952
953 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \
954 if (xstate->xs_xstate_bv & xcr0_val) { \
955 KASSERT(x86_xsave_offsets[xsave_val] \
956 >= sizeof(struct xsave_header)); \
957 KASSERT(x86_xsave_sizes[xsave_val] \
958 >= sizeof(xstate->field)); \
959 memcpy(&xstate->field, \
960 (char*)fpu_save + x86_xsave_offsets[xsave_val], \
961 sizeof(xstate->field)); \
962 }
963
964 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
965 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
966 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
967 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
968
969 #undef COPY_COMPONENT
970
971 return 0;
972 }
973
974 int
process_verify_xstate(const struct xstate * xstate)975 process_verify_xstate(const struct xstate *xstate)
976 {
977 /* xstate_bv must be a subset of RFBM */
978 if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
979 return EINVAL;
980
981 switch (x86_fpu_save) {
982 case FPU_SAVE_FSAVE:
983 if ((xstate->xs_rfbm & ~XCR0_X87))
984 return EINVAL;
985 break;
986 case FPU_SAVE_FXSAVE:
987 if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
988 return EINVAL;
989 break;
990 default:
991 /* Verify whether no unsupported features are enabled */
992 if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
993 return EINVAL;
994 }
995
996 return 0;
997 }
998
999 int
process_write_xstate(struct lwp * l,const struct xstate * xstate)1000 process_write_xstate(struct lwp *l, const struct xstate *xstate)
1001 {
1002 union savefpu *fpu_save = fpu_lwp_area(l);
1003
1004 /* Convert data into legacy FSAVE format. */
1005 if (x86_fpu_save == FPU_SAVE_FSAVE) {
1006 if (xstate->xs_xstate_bv & XCR0_X87)
1007 process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
1008 return 0;
1009 }
1010
1011 /* If XSAVE is supported, make sure that xstate_bv is set correctly. */
1012 if (x86_fpu_save >= FPU_SAVE_XSAVE) {
1013 /*
1014 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
1015 * fpu_save->sv_xsave_hdr.xsh_xstate_bv"
1016 */
1017 fpu_save->sv_xsave_hdr.xsh_xstate_bv =
1018 (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
1019 xstate->xs_xstate_bv;
1020 }
1021
1022 if (xstate->xs_xstate_bv & XCR0_X87) {
1023 /*
1024 * X87 state is split into two areas, interspersed with SSE
1025 * data.
1026 */
1027 memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
1028 memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
1029 sizeof(xstate->xs_fxsave.fx_87_ac));
1030 }
1031
1032 /*
1033 * Copy MXCSR if either SSE or AVX state is requested, to match the
1034 * XSAVE behavior for those flags.
1035 */
1036 if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
1037 /*
1038 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
1039 */
1040 fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
1041 & x86_fpu_mxcsr_mask;
1042 fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
1043 fpu_save->sv_xmm.fx_mxcsr_mask;
1044 }
1045
1046 if (xstate->xs_xstate_bv & XCR0_SSE) {
1047 memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
1048 xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
1049 }
1050
1051 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \
1052 if (xstate->xs_xstate_bv & xcr0_val) { \
1053 KASSERT(x86_xsave_offsets[xsave_val] \
1054 >= sizeof(struct xsave_header)); \
1055 KASSERT(x86_xsave_sizes[xsave_val] \
1056 >= sizeof(xstate->field)); \
1057 memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \
1058 &xstate->field, sizeof(xstate->field)); \
1059 }
1060
1061 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
1062 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
1063 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
1064 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
1065
1066 #undef COPY_COMPONENT
1067
1068 return 0;
1069 }
1070