xref: /NextBSD/lib/libc/sparc64/fpu/fpu.c (revision 4557fabb34e865d7f40be64b39c9e34fa41dbb60)
1 /*
2  * Copyright (c) 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This software was developed by the Computer Systems Engineering group
6  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7  * contributed to Berkeley.
8  *
9  * All advertising materials mentioning features or use of this software
10  * must display the following acknowledgement:
11  *	This product includes software developed by the University of
12  *	California, Lawrence Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 /*-
39  * Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>.  All rights reserved.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
51  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
52  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
53  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
54  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
55  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
56  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
58  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
59  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60  *
61  *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
62  *	$NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp $
63  */
64 
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67 
68 #include <sys/param.h>
69 
70 #include "namespace.h"
71 #include <errno.h>
72 #include <signal.h>
73 #ifdef FPU_DEBUG
74 #include <stdio.h>
75 #endif
76 #include <stdlib.h>
77 #include <unistd.h>
78 #include "un-namespace.h"
79 #include "libc_private.h"
80 
81 #include <machine/fp.h>
82 #include <machine/frame.h>
83 #include <machine/fsr.h>
84 #include <machine/instr.h>
85 #include <machine/pcb.h>
86 #include <machine/tstate.h>
87 
88 #include "__sparc_utrap_private.h"
89 #include "fpu_emu.h"
90 #include "fpu_extern.h"
91 
92 /*
93  * Translate current exceptions into `first' exception.  The
94  * bits go the wrong way for ffs() (0x10 is most important, etc).
95  * There are only 5, so do it the obvious way.
96  */
97 #define	X1(x) x
98 #define	X2(x) x,x
99 #define	X4(x) x,x,x,x
100 #define	X8(x) X4(x),X4(x)
101 #define	X16(x) X8(x),X8(x)
102 
103 static const char cx_to_trapx[] = {
104 	X1(FSR_NX),
105 	X2(FSR_DZ),
106 	X4(FSR_UF),
107 	X8(FSR_OF),
108 	X16(FSR_NV)
109 };
110 
111 #ifdef FPU_DEBUG
112 #ifdef FPU_DEBUG_MASK
113 int __fpe_debug = FPU_DEBUG_MASK;
114 #else
115 int __fpe_debug = 0;
116 #endif
117 #endif	/* FPU_DEBUG */
118 
119 static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t,
120     u_long);
121 
122 /*
123  * Need to use an fpstate on the stack; we could switch, so we cannot safely
124  * modify the pcb one, it might get overwritten.
125  */
126 int
__fpu_exception(struct utrapframe * uf)127 __fpu_exception(struct utrapframe *uf)
128 {
129 	struct fpemu fe;
130 	u_long fsr, tstate;
131 	u_int insn;
132 	int sig;
133 
134 	fsr = uf->uf_fsr;
135 
136 	switch (FSR_GET_FTT(fsr)) {
137 	case FSR_FTT_NONE:
138 		__utrap_write("lost FPU trap type\n");
139 		return (0);
140 	case FSR_FTT_IEEE:
141 		return (SIGFPE);
142 	case FSR_FTT_SEQERR:
143 		__utrap_write("FPU sequence error\n");
144 		return (SIGFPE);
145 	case FSR_FTT_HWERR:
146 		__utrap_write("FPU hardware error\n");
147 		return (SIGFPE);
148 	case FSR_FTT_UNFIN:
149 	case FSR_FTT_UNIMP:
150 		break;
151 	default:
152 		__utrap_write("unknown FPU error\n");
153 		return (SIGFPE);
154 	}
155 
156 	fe.fe_fsr = fsr & ~FSR_FTT_MASK;
157 	insn = *(u_int32_t *)uf->uf_pc;
158 	if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 &&
159 	    IF_F3_OP3(insn) != INS2_FPop2))
160 		__utrap_panic("bogus FP fault");
161 	tstate = uf->uf_state;
162 	sig = __fpu_execute(uf, &fe, insn, tstate);
163 	if (sig != 0)
164 		return (sig);
165 	__asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr));
166 	return (0);
167 }
168 
169 #ifdef FPU_DEBUG
170 /*
171  * Dump a `fpn' structure.
172  */
173 void
__fpu_dumpfpn(struct fpn * fp)174 __fpu_dumpfpn(struct fpn *fp)
175 {
176 	static const char *const class[] = {
177 		"SNAN", "QNAN", "ZERO", "NUM", "INF"
178 	};
179 
180 	printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
181 		fp->fp_sign ? '-' : ' ',
182 		fp->fp_mant[0],	fp->fp_mant[1],
183 		fp->fp_mant[2], fp->fp_mant[3],
184 		fp->fp_exp);
185 }
186 #endif
187 
188 static const int opmask[] = {0, 0, 1, 3, 1};
189 
190 /* Decode 5 bit register field depending on the type. */
191 #define	RN_DECODE(tp, rn) \
192 	((tp) >= FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn))
193 
194 /*
195  * Helper for forming the below case statements. Build only the op3 and opf
196  * field of the instruction, these are the only ones that need to match.
197  */
198 #define	FOP(op3, opf) \
199 	((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
200 
201 /*
202  * Implement a move operation for all supported operand types. The additional
203  * nand and xor parameters will be applied to the upper 32 bit word of the
204  * source operand. This allows to implement fabs and fneg (for fp operands
205  * only!) using this functions, too, by passing (1U << 31) for one of the
206  * parameters, and 0 for the other.
207  */
208 static void
__fpu_mov(struct fpemu * fe,int type,int rd,int rs2,u_int32_t nand,u_int32_t xor)209 __fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand,
210     u_int32_t xor)
211 {
212 
213 	if (type == FTYPE_INT || type == FTYPE_SNG)
214 		__fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor);
215 	else {
216 		/*
217 		 * Need to use the double versions to be able to access
218 		 * the upper 32 fp registers.
219 		 */
220 		__fpu_setreg64(rd, (__fpu_getreg64(rs2) &
221 		    ~((u_int64_t)nand << 32)) ^ ((u_int64_t)xor << 32));
222 		if (type == FTYPE_EXT)
223 			__fpu_setreg64(rd + 2, __fpu_getreg64(rs2 + 2));
224 	}
225 }
226 
227 static __inline void
__fpu_ccmov(struct fpemu * fe,int type,int rd,int rs2,u_int32_t insn,int fcc)228 __fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2,
229     u_int32_t insn, int fcc)
230 {
231 
232 	if (IF_F4_COND(insn) == fcc)
233 		__fpu_mov(fe, type, rd, rs2, 0, 0);
234 }
235 
236 static int
__fpu_cmpck(struct fpemu * fe)237 __fpu_cmpck(struct fpemu *fe)
238 {
239 	u_long fsr;
240 	int cx;
241 
242 	/*
243 	 * The only possible exception here is NV; catch it
244 	 * early and get out, as there is no result register.
245 	 */
246 	cx = fe->fe_cx;
247 	fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT);
248 	if (cx != 0) {
249 		if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
250 			fe->fe_fsr = (fsr & ~FSR_FTT_MASK) |
251 			    FSR_FTT(FSR_FTT_IEEE);
252 			return (SIGFPE);
253 		}
254 		fsr |= FSR_NV << FSR_AEXC_SHIFT;
255 	}
256 	fe->fe_fsr = fsr;
257 	return (0);
258 }
259 
260 /*
261  * Execute an FPU instruction (one that runs entirely in the FPU; not
262  * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
263  * modified to reflect the setting the hardware would have left.
264  *
265  * Note that we do not catch all illegal opcodes, so you can, for instance,
266  * multiply two integers this way.
267  */
268 static int
__fpu_execute(struct utrapframe * uf,struct fpemu * fe,u_int32_t insn,u_long tstate)269 __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn,
270     u_long tstate)
271 {
272 	struct fpn *fp;
273 	int opf, rs1, rs2, rd, type, mask, cx, cond __unused;
274 	u_long reg, fsr;
275 	u_int space[4];
276 
277 	/*
278 	 * `Decode' and execute instruction.  Start with no exceptions.
279 	 * The type of almost any OPF opcode is in the bottom two bits, so we
280 	 * squish them out here.
281 	 */
282 	opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
283 	    IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
284 	type = IF_F3_OPF(insn) & 3;
285 	rs1 = RN_DECODE(type, IF_F3_RS1(insn));
286 	rs2 = RN_DECODE(type, IF_F3_RS2(insn));
287 	rd = RN_DECODE(type, IF_F3_RD(insn));
288 	cond = 0;
289 #ifdef notdef
290 	if ((rs1 | rs2 | rd) & opmask[type])
291 		return (SIGILL);
292 #endif
293 	fsr = fe->fe_fsr;
294 	fe->fe_fsr &= ~FSR_CEXC_MASK;
295 	fe->fe_cx = 0;
296 	switch (opf) {
297 	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
298 		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr));
299 		return (0);
300 	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
301 		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr));
302 		return (0);
303 	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
304 		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr));
305 		return (0);
306 	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
307 		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr));
308 		return (0);
309 	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
310 		__fpu_ccmov(fe, type, rd, rs2, insn,
311 		    (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
312 		return (0);
313 	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
314 		__fpu_ccmov(fe, type, rd, rs2, insn,
315 		    (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
316 		return (0);
317 	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
318 		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
319 		if (reg == 0)
320 			__fpu_mov(fe, type, rd, rs2, 0, 0);
321 		return (0);
322 	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
323 		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
324 		if (reg <= 0)
325 			__fpu_mov(fe, type, rd, rs2, 0, 0);
326 		return (0);
327 	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
328 		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
329 		if (reg < 0)
330 			__fpu_mov(fe, type, rd, rs2, 0, 0);
331 		return (0);
332 	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
333 		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
334 		if (reg != 0)
335 			__fpu_mov(fe, type, rd, rs2, 0, 0);
336 		return (0);
337 	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
338 		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
339 		if (reg > 0)
340 			__fpu_mov(fe, type, rd, rs2, 0, 0);
341 		return (0);
342 	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
343 		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
344 		if (reg >= 0)
345 			__fpu_mov(fe, type, rd, rs2, 0, 0);
346 		return (0);
347 	case FOP(INS2_FPop2, INSFP2_FCMP):
348 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
349 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
350 		__fpu_compare(fe, 0, IF_F3_CC(insn));
351 		return (__fpu_cmpck(fe));
352 	case FOP(INS2_FPop2, INSFP2_FCMPE):
353 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
354 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
355 		__fpu_compare(fe, 1, IF_F3_CC(insn));
356 		return (__fpu_cmpck(fe));
357 	case FOP(INS2_FPop1, INSFP1_FMOV):
358 		__fpu_mov(fe, type, rd, rs2, 0, 0);
359 		return (0);
360 	case FOP(INS2_FPop1, INSFP1_FNEG):
361 		__fpu_mov(fe, type, rd, rs2, 0, (1U << 31));
362 		return (0);
363 	case FOP(INS2_FPop1, INSFP1_FABS):
364 		__fpu_mov(fe, type, rd, rs2, (1U << 31), 0);
365 		return (0);
366 	case FOP(INS2_FPop1, INSFP1_FSQRT):
367 		__fpu_explode(fe, &fe->fe_f1, type, rs2);
368 		fp = __fpu_sqrt(fe);
369 		break;
370 	case FOP(INS2_FPop1, INSFP1_FADD):
371 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
372 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
373 		fp = __fpu_add(fe);
374 		break;
375 	case FOP(INS2_FPop1, INSFP1_FSUB):
376 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
377 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
378 		fp = __fpu_sub(fe);
379 		break;
380 	case FOP(INS2_FPop1, INSFP1_FMUL):
381 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
382 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
383 		fp = __fpu_mul(fe);
384 		break;
385 	case FOP(INS2_FPop1, INSFP1_FDIV):
386 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
387 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
388 		fp = __fpu_div(fe);
389 		break;
390 	case FOP(INS2_FPop1, INSFP1_FsMULd):
391 	case FOP(INS2_FPop1, INSFP1_FdMULq):
392 		if (type == FTYPE_EXT)
393 			return (SIGILL);
394 		__fpu_explode(fe, &fe->fe_f1, type, rs1);
395 		__fpu_explode(fe, &fe->fe_f2, type, rs2);
396 		type++;	/* single to double, or double to quad */
397 		/*
398 		 * Recalculate rd (the old type applied for the source regs
399 		 * only, the target one has a different size).
400 		 */
401 		rd = RN_DECODE(type, IF_F3_RD(insn));
402 		fp = __fpu_mul(fe);
403 		break;
404 	case FOP(INS2_FPop1, INSFP1_FxTOs):
405 	case FOP(INS2_FPop1, INSFP1_FxTOd):
406 	case FOP(INS2_FPop1, INSFP1_FxTOq):
407 		type = FTYPE_LNG;
408 		rs2 = RN_DECODE(type, IF_F3_RS2(insn));
409 		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
410 		/* sneaky; depends on instruction encoding */
411 		type = (IF_F3_OPF(insn) >> 2) & 3;
412 		rd = RN_DECODE(type, IF_F3_RD(insn));
413 		break;
414 	case FOP(INS2_FPop1, INSFP1_FTOx):
415 		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
416 		type = FTYPE_LNG;
417 		rd = RN_DECODE(type, IF_F3_RD(insn));
418 		break;
419 	case FOP(INS2_FPop1, INSFP1_FTOs):
420 	case FOP(INS2_FPop1, INSFP1_FTOd):
421 	case FOP(INS2_FPop1, INSFP1_FTOq):
422 	case FOP(INS2_FPop1, INSFP1_FTOi):
423 		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
424 		/* sneaky; depends on instruction encoding */
425 		type = (IF_F3_OPF(insn) >> 2) & 3;
426 		rd = RN_DECODE(type, IF_F3_RD(insn));
427 		break;
428 	default:
429 		return (SIGILL);
430 	}
431 
432 	/*
433 	 * ALU operation is complete.  Collapse the result and then check
434 	 * for exceptions.  If we got any, and they are enabled, do not
435 	 * alter the destination register, just stop with an exception.
436 	 * Otherwise set new current exceptions and accrue.
437 	 */
438 	__fpu_implode(fe, fp, type, space);
439 	cx = fe->fe_cx;
440 	if (cx != 0) {
441 		mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK;
442 		if (cx & mask) {
443 			/* not accrued??? */
444 			fsr = (fsr & ~FSR_FTT_MASK) |
445 			    FSR_FTT(FSR_FTT_IEEE) |
446 			    FSR_CEXC(cx_to_trapx[(cx & mask) - 1]);
447 			return (SIGFPE);
448 		}
449 		fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
450 	}
451 	fe->fe_fsr = fsr;
452 	if (type == FTYPE_INT || type == FTYPE_SNG)
453 		__fpu_setreg(rd, space[0]);
454 	else {
455 		__fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]);
456 		if (type == FTYPE_EXT)
457 			__fpu_setreg64(rd + 2,
458 			    ((u_int64_t)space[2] << 32) | space[3]);
459 	}
460 	return (0);	/* success */
461 }
462