1 /*        $NetBSD: fpu.c,v 1.27 2020/04/16 05:44:43 skrll Exp $       */
2 
3 /*
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matthew Fredette.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * FPU handling for NetBSD/hppa.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.27 2020/04/16 05:44:43 skrll Exp $");
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/signalvar.h>
43 
44 #include <uvm/uvm_extern.h>
45 
46 #include <machine/cpufunc.h>
47 #include <machine/frame.h>
48 #include <machine/reg.h>
49 #include <machine/pcb.h>
50 #include <machine/pmap.h>
51 
52 #include <hppa/hppa/machdep.h>
53 
54 #include "../spmath/float.h"
55 #include "../spmath/fpudispatch.h"
56 
57 /* Some macros representing opcodes. */
58 #define OPCODE_NOP  0x08000240
59 #define OPCODE_COPR_0_0       0x30000000
60 
61 /* Some macros representing fields in load/store opcodes. */
62 #define   OPCODE_CMPLT_S      0x00002000
63 #define   OPCODE_CMPLT_M      0x00000020
64 #define   OPCODE_CMPLT_SM     (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
65 #define   OPCODE_CMPLT_MB     OPCODE_CMPLT_M
66 #define   OPCODE_CMPLT_MA     (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
67 #define   OPCODE_CMPLT        (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
68 #define   OPCODE_DOUBLE       0x08000000
69 #define   OPCODE_STORE        0x00000200
70 #define OPCODE_INDEXED        0x00001000
71 
72 /* This is nonzero iff we're using a hardware FPU. */
73 int fpu_present;
74 
75 /* If we have any FPU, this is its version. */
76 u_int fpu_version;
77 
78 /* The number of times we have had to switch the FPU context. */
79 u_int fpu_csw;
80 
81 /* In locore.S, this swaps states in and out of the FPU. */
82 void hppa_fpu_swapout(struct pcb *);
83 void hppa_fpu_swap(struct fpreg *, struct fpreg *);
84 
85 static int hppa_fpu_ls(struct trapframe *, struct lwp *);
86 
87 /*
88  * Given a trapframe and a general register number, the
89  * FRAME_REG macro returns a pointer to that general
90  * register.  The _frame_reg_positions array is a lookup
91  * table, since the general registers aren't in order
92  * in a trapframe.
93  *
94  * NB: this more or less assumes that all members of
95  * struct trapframe are u_ints.
96  */
97 #define FRAME_REG(f, reg, r0) \
98           ((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
99 #define _FRAME_POSITION(f)    \
100           ((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
101 const int _frame_reg_positions[32] = {
102           -1,                                     /* r0 */
103           _FRAME_POSITION(tf_r1),
104           _FRAME_POSITION(tf_rp),                 /* r2 */
105           _FRAME_POSITION(tf_r3),
106           _FRAME_POSITION(tf_r4),
107           _FRAME_POSITION(tf_r5),
108           _FRAME_POSITION(tf_r6),
109           _FRAME_POSITION(tf_r7),
110           _FRAME_POSITION(tf_r8),
111           _FRAME_POSITION(tf_r9),
112           _FRAME_POSITION(tf_r10),
113           _FRAME_POSITION(tf_r11),
114           _FRAME_POSITION(tf_r12),
115           _FRAME_POSITION(tf_r13),
116           _FRAME_POSITION(tf_r14),
117           _FRAME_POSITION(tf_r15),
118           _FRAME_POSITION(tf_r16),
119           _FRAME_POSITION(tf_r17),
120           _FRAME_POSITION(tf_r18),
121           _FRAME_POSITION(tf_t4),                 /* r19 */
122           _FRAME_POSITION(tf_t3),                 /* r20 */
123           _FRAME_POSITION(tf_t2),                 /* r21 */
124           _FRAME_POSITION(tf_t1),                 /* r22 */
125           _FRAME_POSITION(tf_arg3),     /* r23 */
126           _FRAME_POSITION(tf_arg2),     /* r24 */
127           _FRAME_POSITION(tf_arg1),     /* r25 */
128           _FRAME_POSITION(tf_arg0),     /* r26 */
129           _FRAME_POSITION(tf_dp),                 /* r27 */
130           _FRAME_POSITION(tf_ret0),     /* r28 */
131           _FRAME_POSITION(tf_ret1),     /* r29 */
132           _FRAME_POSITION(tf_sp),                 /* r30 */
133           _FRAME_POSITION(tf_r31),
134 };
135 
136 /*
137  * Bootstraps the FPU.
138  */
139 void
hppa_fpu_bootstrap(u_int ccr_enable)140 hppa_fpu_bootstrap(u_int ccr_enable)
141 {
142           uint32_t junk[2];
143           uint32_t vers[2];
144 
145           /* See if we have a present and functioning hardware FPU. */
146           fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
147           if (!fpu_present) {
148                     fpu_csw = 0;
149                     curcpu()->ci_fpu_state = 0;
150 
151                     return;
152           }
153 
154           KASSERT(fpu_present);
155           /* Initialize the FPU and get its version. */
156 
157           /*
158            * We track what process has the FPU,
159            * and how many times we have to swap
160            * in and out.
161            */
162 
163           /*
164            * The PA-RISC 1.1 Architecture manual is
165            * pretty clear that the copr,0,0 must be
166            * wrapped in double word stores of fr0,
167            * otherwise its operation is undefined.
168            */
169           __asm volatile(
170                     "         ldo       %0, %%r22 \n"
171                     "         fstds     %%fr0, 0(%%r22)     \n"
172                     "         ldo       %1, %%r22 \n"
173                     "         copr,0,0            \n"
174                     "         fstds     %%fr0, 0(%%r22)     \n"
175                     : "=m" (junk), "=m" (vers) : : "r22");
176 
177           /*
178            * Now mark that no process has the FPU,
179            * and disable it, so the first time it
180            * gets used the process' state gets
181            * swapped in.
182            */
183           fpu_csw = 0;
184           curcpu()->ci_fpu_state = 0;
185           mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
186 
187           fpu_version = vers[0];
188 }
189 
190 /*
191  * If the given LWP has its state in the FPU,
192  * flush that state out into the LWP's PCB.
193  */
194 void
hppa_fpu_flush(struct lwp * l)195 hppa_fpu_flush(struct lwp *l)
196 {
197           struct trapframe *tf = l->l_md.md_regs;
198           struct pcb *pcb = lwp_getpcb(l);
199           struct cpu_info *ci = curcpu();
200 
201           if (!fpu_present)
202                     return;
203 
204           /*
205            * If this process' state is currently in hardware, swap it out.
206            */
207 
208           if (ci->ci_fpu_state == 0 ||
209               ci->ci_fpu_state != tf->tf_cr30) {
210                     return;
211           }
212 
213           hppa_fpu_swapout(pcb);
214           ci->ci_fpu_state = 0;
215 }
216 
217 /*
218  * This emulates a coprocessor load/store instruction.
219  */
220 static int
hppa_fpu_ls(struct trapframe * frame,struct lwp * l)221 hppa_fpu_ls(struct trapframe *frame, struct lwp *l)
222 {
223           struct pcb *pcb = lwp_getpcb(l);
224           u_int inst, inst_b, inst_x, inst_s, inst_t;
225           int log2size;
226           u_int *base;
227           u_int offset, index, im5;
228           void *fpreg;
229           u_int r0 = 0;
230           int error;
231 
232           /*
233            * Get the instruction that we're emulating,
234            * and break it down.  Using HP bit notation,
235            * b is a five-bit field starting at bit 10,
236            * x is a five-bit field starting at bit 15,
237            * s is a two-bit field starting at bit 17,
238            * and t is a five-bit field starting at bit 31.
239            */
240           inst = frame->tf_iir;
241           __asm volatile(
242                     "         extru %4, 10, 5, %1 \n"
243                     "         extru %4, 15, 5, %2 \n"
244                     "         extru %4, 17, 2, %3 \n"
245                     "         extru %4, 31, 5, %4 \n"
246                     : "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
247                     : "r" (inst));
248 
249           /*
250            * The space must be the user's space, else we
251            * segfault.
252            */
253           if (inst_s != pcb->pcb_space)
254                     return EFAULT;
255 
256           /* See whether or not this is a doubleword load/store. */
257           log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
258 
259           /* Get the floating point register. */
260           fpreg = ((char *)pcb->pcb_fpregs) + (inst_t << log2size);
261 
262           /* Get the base register. */
263           base = FRAME_REG(frame, inst_b, r0);
264 
265           /* Dispatch on whether or not this is an indexed load/store. */
266           if (inst & OPCODE_INDEXED) {
267 
268                     /* Get the index register value. */
269                     index = *FRAME_REG(frame, inst_x, r0);
270 
271                     /* Dispatch on the completer. */
272                     switch (inst & OPCODE_CMPLT) {
273                     case OPCODE_CMPLT_S:
274                               offset = *base + (index << log2size);
275                               break;
276                     case OPCODE_CMPLT_M:
277                               offset = *base;
278                               *base = *base + index;
279                               break;
280                     case OPCODE_CMPLT_SM:
281                               offset = *base;
282                               *base = *base + (index << log2size);
283                               break;
284                     default:
285                               offset = *base + index;
286                               break;
287                     }
288           } else {
289 
290                     /* Do a low_sign_ext(x, 5). */
291                     im5 = inst_x >> 1;
292                     if (inst_x & 1)
293                               im5 |= 0xfffffff0;
294 
295                     /* Dispatch on the completer. */
296                     switch (inst & OPCODE_CMPLT) {
297                     case OPCODE_CMPLT_MB:
298                               offset = *base + im5;
299                               *base = *base + im5;
300                               break;
301                     case OPCODE_CMPLT_MA:
302                               offset = *base;
303                               *base = *base + im5;
304                               break;
305                     default:
306                               offset = *base + im5;
307                               break;
308                     }
309           }
310 
311           /*
312            * The offset we calculated must be the same as the
313            * offset in the IOR.
314            */
315           KASSERT(offset == frame->tf_ior);
316 
317           /* Perform the load or store. */
318           error = (inst & OPCODE_STORE) ?
319                     copyout(fpreg, (void *) offset, 1 << log2size) :
320                     copyin((const void *) offset, fpreg, 1 << log2size);
321           return error;
322 }
323 
324 /*
325  * This is called to emulate an instruction.
326  */
327 void
hppa_fpu_emulate(struct trapframe * frame,struct lwp * l,u_int inst)328 hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst)
329 {
330           struct pcb *pcb = lwp_getpcb(l);
331           u_int opcode, class, sub;
332           u_int *fpregs;
333           int exception;
334           ksiginfo_t ksi;
335 
336           /*
337            * If the process' state is in any hardware FPU,
338            * flush it out - we need to operate on it.
339            */
340           hppa_fpu_flush(l);
341 
342           /*
343            * Get the instruction that we're emulating,
344            * and break it down.  Using HP bit notation,
345            * the class is a two-bit field starting at
346            * bit 22, the opcode is a 6-bit field starting
347            * at bit 5, and sub for a class 1 instruction
348            * is a two bit field starting at bit 16, else
349            * it is a three bit field starting at bit 18.
350            */
351 #if 0
352           __asm volatile(
353                     "         extru %3, 22, 2, %1 \n"
354                     "         extru %3, 5, 6, %0  \n"
355                     "         extru %3, 18, 3, %2 \n"
356                     "         comib,<> 1, %1, 0   \n"
357                     "         extru %3, 16, 2, %2 \n"
358                     : "=r" (opcode), "=r" (class), "=r" (sub)
359                     : "r" (inst));
360 #else
361           opcode = (inst >> (31 - 5)) & 0x3f;
362           class = (inst >> (31 - 22)) & 0x3;
363           if (class == 1) {
364                     sub = (inst >> (31 - 16)) & 3;
365           } else {
366                     sub = (inst >> (31 - 18)) & 7;
367           }
368 #endif
369 
370           /* Get this LWP's FPU registers. */
371           fpregs = (u_int *)pcb->pcb_fpregs;
372 
373           /* Dispatch on the opcode. */
374           switch (opcode) {
375           case 0x09:
376           case 0x0b:
377                     if (hppa_fpu_ls(frame, l) != 0) {
378                               KSI_INIT_TRAP(&ksi);
379                               ksi.ksi_signo = SIGSEGV;
380                               ksi.ksi_code = SEGV_MAPERR;
381                               ksi.ksi_trap = T_DTLBMISS;
382                               ksi.ksi_addr = (void *)frame->tf_iioq_head;
383                               trapsignal(l, &ksi);
384                     }
385                     return;
386           case 0x0c:
387                     exception = decode_0c(inst, class, sub, fpregs);
388                     break;
389           case 0x0e:
390                     exception = decode_0e(inst, class, sub, fpregs);
391                     break;
392           case 0x06:
393                     exception = decode_06(inst, fpregs);
394                     break;
395           case 0x26:
396                     exception = decode_26(inst, fpregs);
397                     break;
398           default:
399                     exception = UNIMPLEMENTEDEXCEPTION;
400                     break;
401         }
402 
403           if (exception) {
404                     KSI_INIT_TRAP(&ksi);
405                     if (exception & UNIMPLEMENTEDEXCEPTION) {
406                               ksi.ksi_signo = SIGILL;
407                               ksi.ksi_code = ILL_COPROC;
408                     } else {
409                               ksi.ksi_signo = SIGFPE;
410                               if (exception & INVALIDEXCEPTION) {
411                                         ksi.ksi_code = FPE_FLTINV;
412                               } else if (exception & DIVISIONBYZEROEXCEPTION) {
413                                         ksi.ksi_code = FPE_FLTDIV;
414                               } else if (exception & OVERFLOWEXCEPTION) {
415                                         ksi.ksi_code = FPE_FLTOVF;
416                               } else if (exception & UNDERFLOWEXCEPTION) {
417                                         ksi.ksi_code = FPE_FLTUND;
418                               } else if (exception & INEXACTEXCEPTION) {
419                                         ksi.ksi_code = FPE_FLTRES;
420                               }
421                     }
422                     ksi.ksi_trap = T_EMULATION;
423                     ksi.ksi_addr = (void *)frame->tf_iioq_head;
424                     trapsignal(l, &ksi);
425           }
426 }
427