1 /* $NetBSD: fp_complete.c,v 1.33 2025/03/16 22:34:36 thorpej Exp $ */
2 
3 /*-
4  * Copyright (c) 2001 Ross Harvey
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *        This product includes software developed by the NetBSD
18  *        Foundation, Inc. and its contributors.
19  * 4. Neither the name of The NetBSD Foundation nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "opt_ddb.h"
37 
38 #include <sys/cdefs.h>                            /* RCS ID & Copyright macro defns */
39 
40 __KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.33 2025/03/16 22:34:36 thorpej Exp $");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/atomic.h>
46 #include <sys/evcnt.h>
47 
48 #include <machine/cpu.h>
49 #include <machine/fpu.h>
50 #include <machine/reg.h>
51 #include <machine/alpha.h>
52 #include <machine/alpha_instruction.h>
53 
54 #include <lib/libkern/softfloat.h>
55 
56 /*
57  * Validate our assumptions about bit positions.
58  */
59 __CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1));
60 __CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ  << 1));
61 __CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1));
62 __CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1));
63 __CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1));
64 __CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1));
65 
66 __CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1));
67 __CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ  << 1));
68 __CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1));
69 __CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1));
70 __CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1));
71 
72 __CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED);
73 __CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD);
74 __CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD);
75 __CTASSERT((uint64_t)FP_X_DZ  << (49 - 0) == FPCR_DZED);
76 __CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD);
77 
78 __CTASSERT(FP_C_ALLBITS == MDLWP_FP_C);
79 
80 #define   TSWINSIZE 4         /* size of trap shadow window in uint32_t units */
81 
82 /*        Set Name            Opcodes                       AARM C.* Symbols  */
83 
84 #define   CPUREG_CLASS                  (0xfUL << 0x10)               /* INT[ALSM]          */
85 #define   FPUREG_CLASS                  (0xfUL << 0x14)               /* ITFP, FLT[ILV] */
86 #define   CHECKFUNCTIONCODE   (1UL << 0x18)                 /* MISC               */
87 #define   TRAPSHADOWBOUNDARY  (1UL << 0x00 |                /* PAL                */\
88                                          1UL << 0x19 |                /* \PAL\    */\
89                                          1UL << 0x1a |                /* JSR                */\
90                                          1UL << 0x1b |                /* \PAL\    */\
91                                          1UL << 0x1d |                /* \PAL\    */\
92                                          1UL << 0x1e |                /* \PAL\    */\
93                                          1UL << 0x1f |                /* \PAL\    */\
94                                          0xffffUL << 0x30 |           /* branch ops         */\
95                                          CHECKFUNCTIONCODE)
96 
97 #define   MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
98           (u_int ## width ## _t)(sign) << ((width) - 1)                         |\
99           (u_int ## width ## _t)(exp)  << ((width) - 1 - (expwidth))  |\
100           (u_int ## width ## _t)(msb)  << ((width) - 1 - (expwidth) - 1)        |\
101           (u_int ## width ## _t)(rest_of_frac)
102 
103 #define   FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
104 #define   FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
105 
106 #define IS_SUBNORMAL(v)       ((v)->exp == 0 && (v)->frac != 0)
107 
108 #define   PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ        \
109                                              && IS_SUBNORMAL(v))                \
110                                                    (v)->frac = 0; else
111 
112 #define   POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ       \
113                                               && IS_SUBNORMAL(v))               \
114                                                     (v)->frac = 0; else
115 
116           /* Alpha returns 2.0 for true, all zeroes for false. */
117 
118 #define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
119 
120           /* Move bits from sw fp_c to hw fpcr. */
121 
122 #define   CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
123 
124 static struct evcnt fpevent_use;
125 static struct evcnt fpevent_reuse;
126 
127 /*
128  * Temporary trap shadow instrumentation. The [un]resolved counters
129  * could be kept permanently, as they provide information on whether
130  * user code has met AARM trap shadow generation requirements.
131  */
132 
133 static struct evcnt ts_scans;      /* trap shadow scans */
134 static struct evcnt ts_insns;      /* total scanned insns */
135 static struct evcnt ts_insns_max;  /* per-scan high water mark */
136 static struct evcnt ts_resolved;   /* cases trigger pc found */
137 static struct evcnt ts_unresolved; /* cases it wasn't, code problems? */
138 
139 static struct evcnt fp_ill_opc;    /* unexpected op codes */
140 static struct evcnt fp_ill_func;   /* unexpected function codes */
141 static struct evcnt fp_ill_anyop;  /* this "cannot happen" */
142 
143 static struct evcnt fp_vax;        /* traps from VAX FP insns */
144 
145 struct alpha_shadow {
146           uint64_t uop;                 /* bit mask of unexpected opcodes */
147           uint32_t ufunc;               /* bit mask of unexpected functions */
148 } alpha_shadow;
149 
150 static float64 float64_unk(float64, float64);
151 static float64 compare_un(float64, float64);
152 static float64 compare_eq(float64, float64);
153 static float64 compare_lt(float64, float64);
154 static float64 compare_le(float64, float64);
155 static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *);
156 static void cvt_gd(uint32_t, struct lwp *);
157 static void cvt_qt_dg_qg(uint32_t, struct lwp *);
158 static void cvt_tq_gq(uint32_t, struct lwp *);
159 
160 static float32 (*swfp_s[])(float32, float32) = {
161           float32_add, float32_sub, float32_mul, float32_div,
162 };
163 
164 static float64 (*swfp_t[])(float64, float64) = {
165           float64_add, float64_sub, float64_mul, float64_div,
166           compare_un,    compare_eq,    compare_lt,    compare_le,
167           float64_unk, float64_unk, float64_unk, float64_unk
168 };
169 
170 static void (*swfp_cvt[])(uint32_t, struct lwp *) = {
171           cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
172 };
173 
174 static void
this_cannot_happen(int what_cannot_happen,int64_t bits)175 this_cannot_happen(int what_cannot_happen, int64_t bits)
176 {
177           static int total;
178           alpha_instruction inst;
179           static uint64_t reported;
180 
181           inst.bits = bits;
182           atomic_inc_ulong(&fp_ill_func.ev_count);
183           if (bits != -1)
184                     alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
185           if (1UL << what_cannot_happen & reported)
186                     return;
187           reported |= 1UL << what_cannot_happen;
188           if (total >= 1000)
189                     return;   /* right now, this return "cannot happen" */
190           ++total;
191           if (bits)
192                     printf("FP instruction %x\n", (unsigned int)bits);
193           printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported,
194               alpha_shadow.uop);
195           printf("Please report this to port-alpha-maintainer@NetBSD.org\n");
196 }
197 
198 static inline void
sts(unsigned int rn,s_float * v,struct lwp * l)199 sts(unsigned int rn, s_float *v, struct lwp *l)
200 {
201           alpha_sts(rn, v);
202           PREFILTER_SUBNORMAL(l, v);
203 }
204 
205 static inline void
stt(unsigned int rn,t_float * v,struct lwp * l)206 stt(unsigned int rn, t_float *v, struct lwp *l)
207 {
208           alpha_stt(rn, v);
209           PREFILTER_SUBNORMAL(l, v);
210 }
211 
212 static inline void
lds(unsigned int rn,s_float * v,struct lwp * l)213 lds(unsigned int rn, s_float *v, struct lwp *l)
214 {
215           POSTFILTER_SUBNORMAL(l, v);
216           alpha_lds(rn, v);
217 }
218 
219 static inline void
ldt(unsigned int rn,t_float * v,struct lwp * l)220 ldt(unsigned int rn, t_float *v, struct lwp *l)
221 {
222           POSTFILTER_SUBNORMAL(l, v);
223           alpha_ldt(rn, v);
224 }
225 
226 static float64
compare_lt(float64 a,float64 b)227 compare_lt(float64 a, float64 b)
228 {
229           return CMP_RESULT(float64_lt_quiet(a, b));
230 }
231 
232 static float64
compare_le(float64 a,float64 b)233 compare_le(float64 a, float64 b)
234 {
235           return CMP_RESULT(float64_le_quiet(a, b));
236 }
237 
238 static float64
compare_un(float64 a,float64 b)239 compare_un(float64 a, float64 b)
240 {
241           if (float64_is_nan(a) | float64_is_nan(b)) {
242                     if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
243                               float_set_invalid();
244                     return CMP_RESULT(1);
245           }
246           return CMP_RESULT(0);
247 }
248 
249 static float64
compare_eq(float64 a,float64 b)250 compare_eq(float64 a, float64 b)
251 {
252           return CMP_RESULT(float64_eq(a, b));
253 }
254 /*
255  * A note regarding the VAX FP ops.
256  *
257  * The AARM gives us complete leeway to set or not set status flags on VAX
258  * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
259  * flags by IEEE rules.  Many ops are common to d/f/g and s/t source types.
260  * For the purely vax ones, it's hard to imagine ever running them.
261  * (Generated VAX fp ops with completion flags? Hmm.)  We are careful never
262  * to panic, assert, or print unlimited output based on a path through the
263  * decoder, so weird cases don't become security issues.
264  */
265 static void
cvt_qs_ts_st_gf_qf(uint32_t inst_bits,struct lwp * l)266 cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l)
267 {
268           t_float tfb, tfc;
269           s_float sfb, sfc;
270           alpha_instruction inst;
271 
272           inst.bits = inst_bits;
273           /*
274            * cvtst and cvtts have the same opcode, function, and source.  The
275            * distinction for cvtst is hidden in the illegal modifier combinations.
276            * We decode even the non-/s modifier, so that the fix-up-always mode
277            * works on ev6 and later. The rounding bits are unused and fixed for
278            * cvtst, so we check those too.
279            */
280           switch(inst.float_format.function) {
281           case op_cvtst:
282           case op_cvtst_u:
283                     sts(inst.float_detail.fb, &sfb, l);
284                     tfc.i = float32_to_float64(sfb.i);
285                     ldt(inst.float_detail.fc, &tfc, l);
286                     return;
287           }
288           if(inst.float_detail.src == 2) {
289                     stt(inst.float_detail.fb, &tfb, l);
290                     sfc.i = float64_to_float32(tfb.i);
291                     lds(inst.float_detail.fc, &sfc, l);
292                     return;
293           }
294           /* 0: S/F */
295           /* 1:  /D */
296           /* 3: Q/Q */
297           this_cannot_happen(5, inst.generic_format.opcode);
298           tfc.i = FLOAT64QNAN;
299           ldt(inst.float_detail.fc, &tfc, l);
300           return;
301 }
302 
303 static void
cvt_gd(uint32_t inst_bits,struct lwp * l)304 cvt_gd(uint32_t inst_bits, struct lwp *l)
305 {
306           t_float tfb, tfc;
307           alpha_instruction inst;
308 
309           inst.bits = inst_bits;
310           stt(inst.float_detail.fb, &tfb, l);
311           (void) float64_to_float32(tfb.i);
312           l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP);
313           tfc.i = float64_add(tfb.i, (float64)0);
314           ldt(inst.float_detail.fc, &tfc, l);
315 }
316 
317 static void
cvt_qt_dg_qg(uint32_t inst_bits,struct lwp * l)318 cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l)
319 {
320           t_float tfb, tfc;
321           alpha_instruction inst;
322 
323           inst.bits = inst_bits;
324           switch(inst.float_detail.src) {
325           case 0:   /* S/F */
326                     this_cannot_happen(3, inst.bits);
327                     /* fall thru */
328           case 1: /* D */
329                     /* VAX dirty 0's and reserved ops => UNPREDICTABLE */
330                     /* We've done what's important by just not trapping */
331                     tfc.i = 0;
332                     break;
333           case 2: /* T/G */
334                     this_cannot_happen(4, inst.bits);
335                     tfc.i = 0;
336                     break;
337           case 3:   /* Q/Q */
338                     stt(inst.float_detail.fb, &tfb, l);
339                     tfc.i = int64_to_float64(tfb.i);
340                     break;
341           }
342           alpha_ldt(inst.float_detail.fc, &tfc);
343 }
344 /*
345  * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
346  *      unfortunate habit of always returning the nontrapping result.
347  * XXX: there are several apparent AARM/AAH disagreements, as well as
348  *      the issue of trap handler pc and trapping results.
349  */
350 static void
cvt_tq_gq(uint32_t inst_bits,struct lwp * l)351 cvt_tq_gq(uint32_t inst_bits, struct lwp *l)
352 {
353           t_float tfb, tfc;
354           alpha_instruction inst;
355 
356           inst.bits = inst_bits;
357           stt(inst.float_detail.fb, &tfb, l);
358           tfc.i = tfb.sign ? float64_to_int64(tfb.i) : float64_to_uint64(tfb.i);
359           alpha_ldt(inst.float_detail.fc, &tfc);  /* yes, ldt */
360 }
361 
362 static uint64_t
fp_c_to_fpcr_1(uint64_t fpcr,uint64_t fp_c)363 fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c)
364 {
365           uint64_t disables;
366 
367           /*
368            * It's hard to arrange for conforming bit fields, because the FP_C
369            * and the FPCR are both architected, with specified (and relatively
370            * scrambled) bit numbers. Defining an internal unscrambled FP_C
371            * wouldn't help much, because every user exception requires the
372            * architected bit order in the sigcontext.
373            *
374            * Programs that fiddle with the fpcr exception bits (instead of fp_c)
375            * will lose, because those bits can be and usually are subsetted;
376            * the official home is in the fp_c. Furthermore, the kernel puts
377            * phony enables (it lies :-) in the fpcr in order to get control when
378            * it is necessary to initially set a sticky bit.
379            */
380 
381           fpcr &= FPCR_DYN_RM;
382 
383           /*
384            * enable traps = case where flag bit is clear AND program wants a trap
385            *
386            * enables = ~flags & mask
387            * disables = ~(~flags | mask)
388            * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
389            */
390           disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
391 
392           fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
393           fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
394 
395           fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
396           fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
397           if (fp_c & FP_C_MIRRORED)
398                     fpcr |= FPCR_SUM;
399           if (fp_c & IEEE_MAP_UMZ)
400                     fpcr |= FPCR_UNDZ | FPCR_UNFD;
401           fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
402           return fpcr;
403 }
404 
405 static void
fp_c_to_fpcr(struct lwp * l)406 fp_c_to_fpcr(struct lwp *l)
407 {
408           alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags));
409 }
410 
411 void
alpha_write_fp_c(struct lwp * l,uint64_t fp_c)412 alpha_write_fp_c(struct lwp *l, uint64_t fp_c)
413 {
414           uint64_t md_flags;
415 
416           fp_c &= MDLWP_FP_C;
417           md_flags = l->l_md.md_flags;
418           if ((md_flags & MDLWP_FP_C) == fp_c)
419                     return;
420           l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
421           kpreempt_disable();
422           if (md_flags & MDLWP_FPACTIVE) {
423                     alpha_pal_wrfen(1);
424                     fp_c_to_fpcr(l);
425                     alpha_pal_wrfen(0);
426           } else {
427                     struct pcb *pcb = l->l_addr;
428 
429                     pcb->pcb_fp.fpr_cr =
430                         fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags);
431           }
432           kpreempt_enable();
433 }
434 
435 uint64_t
alpha_read_fp_c(struct lwp * l)436 alpha_read_fp_c(struct lwp *l)
437 {
438           /*
439            * A possibly-desirable EV6-specific optimization would deviate from
440            * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
441            * but in a transparent way. Some of the code for that would need to
442            * go right here.
443            */
444           return l->l_md.md_flags & MDLWP_FP_C;
445 }
446 
447 static float64
float64_unk(float64 a,float64 b)448 float64_unk(float64 a, float64 b)
449 {
450           return 0;
451 }
452 
453 /*
454  * The real function field encodings for IEEE and VAX FP instructions.
455  *
456  * Since there is only one operand type field, the cvtXX instructions
457  * require a variety of special cases, and these have to be analyzed as
458  * they don't always fit into the field descriptions in AARM section I.
459  *
460  * Lots of staring at bits in the appendix shows what's really going on.
461  *
462  *           |             |
463  * 15 14 13|12 11 10 09|08 07 06 05
464  * --------======------============
465  *  TRAP   : RND : SRC : FUNCTION  :
466  *  0  0  0:. . .:. . . . . . . . . . . . Imprecise
467  *  0  0  1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
468  *           |                                     /V overfloat enable (if int output)
469  *  0  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
470  *  0  1  1|. . .:. . . . . . . . . . . . Unsupported
471  *  1  0  0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
472  *  1  0  1|. . .:. . . . . . . . . . . ./SU
473  *           |                                     /SV
474  *  1  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
475  *  1  1  1|. . .:. . . . . . . . . . . ./SUI (if FP output)          (IEEE only)
476  *           |                                     /SVI (if int output)   (IEEE only)
477  *  S  I  UV: In other words: bits 15:13 are S:I:UV, except that _usually_
478  *           |  not all combinations are valid.
479  *           |             |
480  * 15 14 13|12 11 10 09|08 07 06 05
481  * --------======------============
482  *  TRAP   : RND : SRC : FUNCTION  :
483  *           | 0    0 . . . . . . . . . . . ./C Chopped
484  *           : 0    1 . . . . . . . . . . . ./M Minus Infinity
485  *           | 1    0 . . . . . . . . . . . .   Normal
486  *           : 1    1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
487  *           |             |
488  * 15 14 13|12 11 10 09|08 07 06 05
489  * --------======------============
490  *  TRAP   : RND : SRC : FUNCTION  :
491  *                     0 0. . . . . . . . . . S/F
492  *                     0 1. . . . . . . . . . -/D
493  *                     1 0. . . . . . . . . . T/G
494  *                     1 1. . . . . . . . . . Q/Q
495  *           |             |
496  * 15 14 13|12 11 10 09|08 07 06 05
497  * --------======------============
498  *  TRAP   : RND : SRC : FUNCTION  :
499  *                             0  0  0  0 . . . addX
500  *                             0  0  0  1 . . . subX
501  *                             0  0  1  0 . . . mulX
502  *                             0  0  1  1 . . . divX
503  *                             0  1  0  0 . . . cmpXun
504  *                             0  1  0  1 . . . cmpXeq
505  *                             0  1  1  0 . . . cmpXlt
506  *                             0  1  1  1 . . . cmpXle
507  *                             1  0  0  0 . . . reserved
508  *                             1  0  0  1 . . . reserved
509  *                             1  0  1  0 . . . sqrt[fg] (op_fix, not exactly "vax")
510  *                             1  0  1  1 . . . sqrt[st] (op_fix, not exactly "ieee")
511  *                             1  1  0  0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
512  *                             1  1  0  1 . . . cvtXd   (vax only)
513  *                             1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
514  *                             1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
515  *           |             |
516  * 15 14 13|12 11 10 09|08 07 06 05       the twilight zone
517  * --------======------============
518  *  TRAP   : RND : SRC : FUNCTION  :
519  * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
520  *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
521  *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
522  *  x  0  x  x  x  x  0        1  1  1  1 . . . cvttq/_ (src == T)
523  */
524 
525 static void
print_fp_instruction(unsigned long pc,struct lwp * l,uint32_t bits)526 print_fp_instruction(unsigned long pc, struct lwp *l, uint32_t bits)
527 {
528 #if defined(DDB)
529           char buf[32];
530           struct alpha_print_instruction_context ctx = {
531                     .insn.bits = bits,
532                     .pc = pc,
533                     .buf = buf,
534                     .bufsize = sizeof(buf),
535           };
536 
537           (void) alpha_print_instruction(&ctx);
538 
539           printf("INSN [%s:%d] @0x%lx -> %s\n",
540               l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf);
541 #else
542           alpha_instruction insn = {
543                     .bits = bits,
544           };
545           printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n",
546               l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
547               insn.float_format.opcode, insn.float_format.function,
548               insn.float_format.fa, insn.float_format.fb, insn.float_format.fc);
549           printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n",
550               l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
551               insn.float_detail.trp, insn.float_detail.rnd,
552               insn.float_detail.src, insn.float_detail.opclass);
553 #endif /* DDB */
554 }
555 
556 static void
alpha_fp_interpret(unsigned long pc,struct lwp * l,uint32_t bits)557 alpha_fp_interpret(unsigned long pc, struct lwp *l, uint32_t bits)
558 {
559           s_float sfa, sfb, sfc;
560           t_float tfa, tfb, tfc;
561           alpha_instruction inst;
562 
563           if (alpha_fp_complete_debug) {
564                     print_fp_instruction(pc, l, bits);
565           }
566 
567           inst.bits = bits;
568           switch(inst.generic_format.opcode) {
569           default:
570                     /* this "cannot happen" */
571                     atomic_inc_ulong(&fp_ill_opc.ev_count);
572                     this_cannot_happen(2, inst.bits);
573                     return;
574           case op_any_float:
575                     if (inst.float_format.function == op_cvtql_sv ||
576                         inst.float_format.function == op_cvtql_v) {
577                               alpha_stt(inst.float_detail.fb, &tfb);
578                               sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
579                               alpha_lds(inst.float_detail.fc, &sfc);
580                               float_raise(FP_X_INV);
581                     } else {
582                               atomic_inc_ulong(&fp_ill_anyop.ev_count);
583                               this_cannot_happen(3, inst.bits);
584                     }
585                     break;
586           case op_vax_float:
587                     atomic_inc_ulong(&fp_vax.ev_count);
588                     /* FALLTHROUGH */             /* XXX */
589           case op_ieee_float:
590           case op_fix_float:
591                     switch(inst.float_detail.src) {
592                     case op_src_sf:
593                               sts(inst.float_detail.fb, &sfb, l);
594                               if (inst.float_detail.opclass == 11)
595                                         sfc.i = float32_sqrt(sfb.i);
596                               else if (inst.float_detail.opclass & ~3) {
597                                         this_cannot_happen(1, inst.bits);
598                                         sfc.i = FLOAT32QNAN;
599                               } else {
600                                         sts(inst.float_detail.fa, &sfa, l);
601                                         sfc.i = (*swfp_s[inst.float_detail.opclass])(
602                                             sfa.i, sfb.i);
603                               }
604                               lds(inst.float_detail.fc, &sfc, l);
605                               break;
606                     case op_src_xd:
607                     case op_src_tg:
608                               if (inst.float_detail.opclass >= 12)
609                                         (*swfp_cvt[inst.float_detail.opclass - 12])(
610                                             inst.bits, l);
611                               else {
612                                         stt(inst.float_detail.fb, &tfb, l);
613                                         if (inst.float_detail.opclass == 11)
614                                                   tfc.i = float64_sqrt(tfb.i);
615                                         else {
616                                                   stt(inst.float_detail.fa, &tfa, l);
617                                                   tfc.i = (*swfp_t[inst.float_detail
618                                                       .opclass])(tfa.i, tfb.i);
619                                         }
620                                         ldt(inst.float_detail.fc, &tfc, l);
621                               }
622                               break;
623                     case op_src_qq:
624                               float_raise(FP_X_IMP);
625                               break;
626                     }
627           }
628 }
629 
630 int
alpha_fp_complete_at(unsigned long trigger_pc,struct lwp * l,uint64_t * ucode)631 alpha_fp_complete_at(unsigned long trigger_pc, struct lwp *l, uint64_t *ucode)
632 {
633           int needsig;
634           alpha_instruction inst;
635           uint64_t rm, fpcr, orig_fpcr;
636           uint64_t orig_flags, new_flags, changed_flags, md_flags;
637 
638           if (__predict_false(ufetch_32((void *)trigger_pc, &inst.bits))) {
639                     this_cannot_happen(6, -1);
640                     return SIGSEGV;
641           }
642           kpreempt_disable();
643           if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
644                     fpu_load();
645           }
646           alpha_pal_wrfen(1);
647           /*
648            * Alpha FLOAT instructions can override the rounding mode on a
649            * per-instruction basis.  If necessary, lie about the dynamic
650            * rounding mode so emulation software need go to only one place
651            * for it, and so we don't have to lock any memory locations or
652            * pass a third parameter to every SoftFloat entry point.
653            *
654            * N.B. the rounding mode field of the FLOAT format instructions
655            * matches that of the FPCR *except* for the value 3, which means
656            * "dynamic" rounding mode (i.e. what is programmed into the FPCR).
657            */
658           orig_fpcr = fpcr = alpha_read_fpcr();
659           rm = inst.float_detail.rnd;
660           if (__predict_false(rm != 3 /* dynamic */ &&
661                                   rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) {
662                     fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM);
663                     alpha_write_fpcr(fpcr);
664           }
665           orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
666 
667           alpha_fp_interpret(trigger_pc, l, inst.bits);
668 
669           md_flags = l->l_md.md_flags;
670 
671           new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
672           changed_flags = orig_flags ^ new_flags;
673           KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
674           alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
675           needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
676           alpha_pal_wrfen(0);
677           kpreempt_enable();
678           if (__predict_false(needsig)) {
679                     *ucode = needsig;
680                     return SIGFPE;
681           }
682           return 0;
683 }
684 
685 int
alpha_fp_complete(u_long a0,u_long a1,struct lwp * l,uint64_t * ucode)686 alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
687 {
688           uint64_t op_class;
689           alpha_instruction inst;
690           /* "trigger_pc" is Compaq's term for the earliest faulting op */
691           alpha_instruction *trigger_pc, *usertrap_pc;
692           alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
693           long insn_count = 0;
694           int sig;
695 
696           if (alpha_fp_complete_debug) {
697                     printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx "
698                            "FPCR=0x%lx FP_C=0x%lx\n",
699                         __func__, l->l_proc->p_comm, l->l_proc->p_pid,
700                         a0, a1, alpha_read_fpcr(),
701                         l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE));
702           }
703 
704           pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
705           trigger_pc = pc - 1;          /* for ALPHA_AMASK_PAT case */
706 
707           /*
708            * Start out with the code mirroring the exception flags
709            * (FP_X_*).  Shift right 1 bit to discard SWC to achieve
710            * this.
711            */
712           *ucode = a0 >> 1;
713 
714           if (cpu_amask & ALPHA_AMASK_PAT) {
715                     if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 ||
716                         alpha_fp_sync_complete) {
717                               sig = alpha_fp_complete_at((u_long)trigger_pc, l,
718                                   ucode);
719                               goto resolved;
720                     }
721           }
722           if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0)
723                     goto unresolved;
724 /*
725  * At this point we are somewhere in the trap shadow of one or more instruc-
726  * tions that have trapped with software completion specified.  We have a mask
727  * of the registers written by trapping instructions.
728  *
729  * Now step backwards through the trap shadow, clearing bits in the
730  * destination write mask until the trigger instruction is found, and
731  * interpret this one instruction in SW. If a SIGFPE is not required, back up
732  * the PC until just after this instruction and restart. This will execute all
733  * trap shadow instructions between the trigger pc and the trap pc twice.
734  */
735           trigger_pc = 0;
736           win_begin = pc;
737           atomic_inc_ulong(&ts_scans.ev_count);
738           for (--pc; a1; --pc) {
739                     insn_count++;
740                     if (pc < win_begin) {
741                               win_begin = pc - TSWINSIZE + 1;
742                               if (copyin(win_begin, tsw, sizeof tsw)) {
743                                         /* sigh, try to get just one */
744                                         win_begin = pc;
745                                         if (copyin(win_begin, tsw, 4)) {
746                                                   /*
747                                                    * We're off the rails here; don't
748                                                    * bother updating the FP_C.
749                                                    */
750                                                   return SIGSEGV;
751                                         }
752                               }
753                     }
754                     assert(win_begin <= pc && !((long)pc  & 3));
755                     inst = tsw[pc - win_begin];
756                     op_class = 1UL << inst.generic_format.opcode;
757                     if (op_class & FPUREG_CLASS) {
758                               a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
759                               trigger_pc = pc;
760                     } else if (op_class & CPUREG_CLASS) {
761                               a1 &= ~(1UL << inst.operate_generic_format.rc);
762                               trigger_pc = pc;
763                     } else if (op_class & TRAPSHADOWBOUNDARY) {
764                               if (op_class & CHECKFUNCTIONCODE) {
765                                         if (inst.mem_format.displacement == op_trapb ||
766                                             inst.mem_format.displacement == op_excb)
767                                                   break;    /* code breaks AARM rules */
768                               } else
769                                         break; /* code breaks AARM rules */
770                     }
771                     /* Some shadow-safe op, probably load, store, or FPTI class */
772           }
773           if (insn_count > atomic_load_relaxed(&ts_insns_max.ev_count)) {
774                     atomic_store_relaxed(&ts_insns_max.ev_count, insn_count);
775           }
776           atomic_add_long(&ts_insns.ev_count, insn_count);
777           if (__predict_true(trigger_pc != 0 && a1 == 0)) {
778                     atomic_inc_ulong(&ts_resolved.ev_count);
779                     sig = alpha_fp_complete_at((u_long)trigger_pc, l, ucode);
780                     goto resolved;
781           } else {
782                     atomic_inc_ulong(&ts_unresolved.ev_count);
783           }
784 
785  unresolved: /* obligatory statement */;
786           /*
787            * *ucode contains the exception bits (FP_X_*).  We need to
788            * update the FP_C and FPCR, and send a signal for any new
789            * trap that is enabled.
790            */
791           uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
792           uint64_t new_flags = orig_flags | *ucode;
793           uint64_t changed_flags = orig_flags ^ new_flags;
794           KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
795 
796           l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags);
797 
798           kpreempt_disable();
799           if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
800                     fpu_load();
801           }
802           alpha_pal_wrfen(1);
803           uint64_t orig_fpcr = alpha_read_fpcr();
804           alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags));
805           uint64_t needsig =
806               changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags);
807           alpha_pal_wrfen(0);
808           kpreempt_enable();
809 
810           if (__predict_false(needsig)) {
811                     *ucode = needsig;
812                     return SIGFPE;
813           }
814           return 0;
815 
816  resolved:
817           if (sig) {
818                     usertrap_pc = trigger_pc + 1;
819                     l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
820           }
821           return sig;
822 }
823 
824 /*
825  * Initialize FP handling.
826  */
827 void
alpha_fp_init(void)828 alpha_fp_init(void)
829 {
830           evcnt_attach_dynamic_nozero(&fpevent_use, EVCNT_TYPE_MISC, NULL,
831               "FP", "proc use");
832           evcnt_attach_dynamic_nozero(&fpevent_reuse, EVCNT_TYPE_MISC, NULL,
833               "FP", "proc re-use");
834 
835           evcnt_attach_dynamic_nozero(&ts_scans, EVCNT_TYPE_MISC, NULL,
836               "FP", "TS scans");
837           evcnt_attach_dynamic_nozero(&ts_insns, EVCNT_TYPE_MISC, NULL,
838               "FP", "TS total insns");
839           evcnt_attach_dynamic_nozero(&ts_insns_max, EVCNT_TYPE_MISC, NULL,
840               "FP", "TS max single-scan insns");
841           evcnt_attach_dynamic_nozero(&ts_resolved, EVCNT_TYPE_MISC, NULL,
842               "FP", "TS resolved");
843           evcnt_attach_dynamic_nozero(&ts_unresolved, EVCNT_TYPE_MISC, NULL,
844               "FP", "TS unresolved");
845 
846           evcnt_attach_dynamic_nozero(&fp_ill_opc, EVCNT_TYPE_MISC, NULL,
847               "FP", "illegal op code");
848           evcnt_attach_dynamic_nozero(&fp_ill_func, EVCNT_TYPE_MISC, NULL,
849               "FP", "illegal function code");
850           evcnt_attach_dynamic_nozero(&fp_ill_anyop, EVCNT_TYPE_MISC, NULL,
851               "FP", "illegal any_float function code");
852 }
853 
854 /*
855  * Load the float-point context for the current lwp.
856  */
857 void
fpu_state_load(struct lwp * l,u_int flags)858 fpu_state_load(struct lwp *l, u_int flags)
859 {
860           struct pcb * const pcb = lwp_getpcb(l);
861           KASSERT(l == curlwp);
862 
863 #ifdef MULTIPROCESSOR
864           /*
865            * If the LWP got switched to another CPU, pcu_switchpoint would have
866            * called state_release to clear MDLWP_FPACTIVE.  Now that we are back
867            * on the CPU that has our FP context, set MDLWP_FPACTIVE again.
868            */
869           if (flags & PCU_REENABLE) {
870                     KASSERT(flags & PCU_VALID);
871                     l->l_md.md_flags |= MDLWP_FPACTIVE;
872                     return;
873           }
874 #else
875           KASSERT((flags & PCU_REENABLE) == 0);
876 #endif
877 
878           /*
879            * Instrument FP usage -- if a process had not previously
880            * used FP, mark it as having used FP for the first time,
881            * and count this event.
882            *
883            * If a process has used FP, count a "used FP, and took
884            * a trap to use it again" event.
885            */
886           if ((flags & PCU_VALID) == 0) {
887                     atomic_inc_ulong(&fpevent_use.ev_count);
888           } else {
889                     atomic_inc_ulong(&fpevent_reuse.ev_count);
890           }
891 
892           if (alpha_fp_complete_debug) {
893                     printf("%s: [%s:%d] loading FPCR=0x%lx\n",
894                         __func__, l->l_proc->p_comm, l->l_proc->p_pid,
895                         pcb->pcb_fp.fpr_cr);
896           }
897           alpha_pal_wrfen(1);
898           restorefpstate(&pcb->pcb_fp);
899           alpha_pal_wrfen(0);
900 
901           l->l_md.md_flags |= MDLWP_FPACTIVE;
902 }
903 
904 /*
905  * Save the FPU state.
906  */
907 
908 void
fpu_state_save(struct lwp * l)909 fpu_state_save(struct lwp *l)
910 {
911           struct pcb * const pcb = lwp_getpcb(l);
912 
913           alpha_pal_wrfen(1);
914           savefpstate(&pcb->pcb_fp);
915           alpha_pal_wrfen(0);
916           if (alpha_fp_complete_debug) {
917                     printf("%s: [%s:%d] saved FPCR=0x%lx\n",
918                         __func__, l->l_proc->p_comm, l->l_proc->p_pid,
919                         pcb->pcb_fp.fpr_cr);
920           }
921 }
922 
923 /*
924  * Release the FPU.
925  */
926 void
fpu_state_release(struct lwp * l)927 fpu_state_release(struct lwp *l)
928 {
929           l->l_md.md_flags &= ~MDLWP_FPACTIVE;
930 }
931