xref: /dragonfly/contrib/gcc-8.0/libgcc/soft-fp/op-2.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1 /* Software floating-point emulation.
2    Basic two-word fraction declaration and manipulation.
3    Copyright (C) 1997-2016 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6                       Jakub Jelinek (jj@ultra.linux.cz),
7                       David S. Miller (davem@redhat.com) and
8                       Peter Maydell (pmaydell@chiark.greenend.org.uk).
9 
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14 
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23 
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28 
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32 
33 #ifndef SOFT_FP_OP_2_H
34 #define SOFT_FP_OP_2_H        1
35 
36 #define _FP_FRAC_DECL_2(X)                                  \
37   _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
38 #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
39 #define _FP_FRAC_SET_2(X, I)  __FP_FRAC_SET_2 (X, I)
40 #define _FP_FRAC_HIGH_2(X)    (X##_f1)
41 #define _FP_FRAC_LOW_2(X)     (X##_f0)
42 #define _FP_FRAC_WORD_2(X, w) (X##_f##w)
43 
44 #define _FP_FRAC_SLL_2(X, N)                                                    \
45   (void) (((N) < _FP_W_TYPE_SIZE)                                               \
46             ? ({                                                                          \
47                 if (__builtin_constant_p (N) && (N) == 1)                       \
48                     {                                                                     \
49                       X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
50                       X##_f0 += X##_f0;                                         \
51                     }                                                                     \
52                 else                                                                      \
53                     {                                                                     \
54                       X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
55                       X##_f0 <<= (N);                                           \
56                     }                                                                     \
57                 0;                                                              \
58               })                                                                          \
59             : ({                                                                          \
60                 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);           \
61                 X##_f0 = 0;                                                     \
62               }))
63 
64 
65 #define _FP_FRAC_SRL_2(X, N)                                                    \
66   (void) (((N) < _FP_W_TYPE_SIZE)                                               \
67             ? ({                                                                          \
68                 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
69                 X##_f1 >>= (N);                                                           \
70               })                                                                          \
71             : ({                                                                          \
72                 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);           \
73                 X##_f1 = 0;                                                     \
74               }))
75 
76 /* Right shift with sticky-lsb.  */
77 #define _FP_FRAC_SRST_2(X, S, N, sz)                                            \
78   (void) (((N) < _FP_W_TYPE_SIZE)                                               \
79             ? ({                                                                          \
80                 S = (__builtin_constant_p (N) && (N) == 1                       \
81                        ? X##_f0 & 1                                                       \
82                        : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);             \
83                 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
84                 X##_f1 >>= (N);                                                           \
85               })                                                                          \
86             : ({                                                                          \
87                 S = ((((N) == _FP_W_TYPE_SIZE                                   \
88                          ? 0                                                    \
89                          : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))               \
90                         | X##_f0) != 0);                                                  \
91                 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));                   \
92                 X##_f1 = 0;                                                     \
93               }))
94 
95 #define _FP_FRAC_SRS_2(X, N, sz)                                                \
96   (void) (((N) < _FP_W_TYPE_SIZE)                                               \
97             ? ({                                                                          \
98                 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
99                               | (__builtin_constant_p (N) && (N) == 1           \
100                                  ? X##_f0 & 1                                             \
101                                  : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
102                 X##_f1 >>= (N);                                                           \
103               })                                                                          \
104             : ({                                                                          \
105                 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)           \
106                               | ((((N) == _FP_W_TYPE_SIZE                       \
107                                    ? 0                                          \
108                                    : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))     \
109                                   | X##_f0) != 0));                                       \
110                 X##_f1 = 0;                                                     \
111               }))
112 
113 #define _FP_FRAC_ADDI_2(X, I) \
114   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
115 
116 #define _FP_FRAC_ADD_2(R, X, Y)         \
117   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118 
119 #define _FP_FRAC_SUB_2(R, X, Y)         \
120   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
121 
122 #define _FP_FRAC_DEC_2(X, Y)  \
123   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
124 
125 #define _FP_FRAC_CLZ_2(R, X)                      \
126   do                                                        \
127     {                                                       \
128       if (X##_f1)                                 \
129           __FP_CLZ ((R), X##_f1);                           \
130       else                                                  \
131           {                                                 \
132             __FP_CLZ ((R), X##_f0);               \
133             (R) += _FP_W_TYPE_SIZE;               \
134           }                                                 \
135     }                                                       \
136   while (0)
137 
138 /* Predicates.  */
139 #define _FP_FRAC_NEGP_2(X)    ((_FP_WS_TYPE) X##_f1 < 0)
140 #define _FP_FRAC_ZEROP_2(X)   ((X##_f1 | X##_f0) == 0)
141 #define _FP_FRAC_OVERP_2(fs, X)         (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
142 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
143 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
144   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
145 #define _FP_FRAC_EQ_2(X, Y)   (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
146 #define _FP_FRAC_GT_2(X, Y)   \
147   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
148 #define _FP_FRAC_GE_2(X, Y)   \
149   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
150 
151 #define _FP_ZEROFRAC_2                  0, 0
152 #define _FP_MINFRAC_2                   0, 1
153 #define _FP_MAXFRAC_2                   (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
154 
155 /* Internals.  */
156 
157 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
158 
159 #define __FP_CLZ_2(R, xh, xl)                     \
160   do                                                        \
161     {                                                       \
162       if (xh)                                               \
163           __FP_CLZ ((R), xh);                     \
164       else                                                  \
165           {                                                 \
166             __FP_CLZ ((R), xl);                             \
167             (R) += _FP_W_TYPE_SIZE;               \
168           }                                                 \
169     }                                                       \
170   while (0)
171 
172 #if 0
173 
174 # ifndef __FP_FRAC_ADDI_2
175 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
176   (xh += ((xl += i) < i))
177 # endif
178 # ifndef __FP_FRAC_ADD_2
179 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
180   (rh = xh + yh + ((rl = xl + yl) < xl))
181 # endif
182 # ifndef __FP_FRAC_SUB_2
183 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
184   (rh = xh - yh - ((rl = xl - yl) > xl))
185 # endif
186 # ifndef __FP_FRAC_DEC_2
187 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)                   \
188   do                                                                  \
189     {                                                                 \
190       UWtype __FP_FRAC_DEC_2_t = xl;                        \
191       xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);          \
192     }                                                                 \
193   while (0)
194 # endif
195 
196 #else
197 
198 # undef __FP_FRAC_ADDI_2
199 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
200 # undef __FP_FRAC_ADD_2
201 # define __FP_FRAC_ADD_2                add_ssaaaa
202 # undef __FP_FRAC_SUB_2
203 # define __FP_FRAC_SUB_2                sub_ddmmss
204 # undef __FP_FRAC_DEC_2
205 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)          \
206   sub_ddmmss (xh, xl, xh, xl, yh, yl)
207 
208 #endif
209 
210 /* Unpack the raw bits of a native fp value.  Do not classify or
211    normalize the data.  */
212 
213 #define _FP_UNPACK_RAW_2(fs, X, val)                        \
214   do                                                                  \
215     {                                                                 \
216       union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;  \
217       _FP_UNPACK_RAW_2_flo.flt = (val);                     \
218                                                                       \
219       X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;             \
220       X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;             \
221       X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;               \
222       X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;              \
223     }                                                                 \
224   while (0)
225 
226 #define _FP_UNPACK_RAW_2_P(fs, X, val)                      \
227   do                                                                  \
228     {                                                                 \
229       union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo          \
230           = (union _FP_UNION_##fs *) (val);                 \
231                                                                       \
232       X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;          \
233       X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;          \
234       X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;  \
235       X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
236     }                                                                 \
237   while (0)
238 
239 
240 /* Repack the raw bits of a native fp value.  */
241 
242 #define _FP_PACK_RAW_2(fs, val, X)                \
243   do                                                        \
244     {                                                       \
245       union _FP_UNION_##fs _FP_PACK_RAW_2_flo;    \
246                                                             \
247       _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;     \
248       _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;     \
249       _FP_PACK_RAW_2_flo.bits.exp   = X##_e;      \
250       _FP_PACK_RAW_2_flo.bits.sign  = X##_s;      \
251                                                             \
252       (val) = _FP_PACK_RAW_2_flo.flt;             \
253     }                                                       \
254   while (0)
255 
256 #define _FP_PACK_RAW_2_P(fs, val, X)                        \
257   do                                                                  \
258     {                                                                 \
259       union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo  \
260           = (union _FP_UNION_##fs *) (val);                 \
261                                                                       \
262       _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;  \
263       _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;  \
264       _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;             \
265       _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;             \
266     }                                                                 \
267   while (0)
268 
269 
270 /* Multiplication algorithms: */
271 
272 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
273 
274 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)              \
275   do                                                                                      \
276     {                                                                                     \
277       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);                     \
278       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);                     \
279                                                                                           \
280       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),           \
281               X##_f0, Y##_f0);                                                            \
282       doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
283               X##_f0, Y##_f1);                                                            \
284       doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
285               X##_f1, Y##_f0);                                                            \
286       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),           \
287               X##_f1, Y##_f1);                                                            \
288                                                                                           \
289       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),          \
290                            _FP_FRAC_WORD_4 (R, 1), 0,                           \
291                            _FP_MUL_MEAT_DW_2_wide_b_f1,                         \
292                            _FP_MUL_MEAT_DW_2_wide_b_f0,                         \
293                            _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),      \
294                            _FP_FRAC_WORD_4 (R, 1));                                       \
295       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),          \
296                            _FP_FRAC_WORD_4 (R, 1), 0,                           \
297                            _FP_MUL_MEAT_DW_2_wide_c_f1,                         \
298                            _FP_MUL_MEAT_DW_2_wide_c_f0,                         \
299                            _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),      \
300                            _FP_FRAC_WORD_4 (R, 1));                                       \
301     }                                                                                     \
302   while (0)
303 
304 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                           \
305   do                                                                                      \
306     {                                                                                     \
307       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);                                  \
308                                                                                           \
309       _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z,     \
310                                     X, Y, doit);                                \
311                                                                                           \
312       /* Normalize since we know where the msb of the multiplicands   \
313            were (bit B), we know that the msb of the of the product is          \
314            at either 2B or 2B-1.  */                                            \
315       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1,           \
316                           2*(wfracbits));                                                 \
317       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);            \
318       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);            \
319     }                                                                                     \
320   while (0)
321 
322 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
323    Do only 3 multiplications instead of four. This one is for machines
324    where multiplication is much more expensive than subtraction.  */
325 
326 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)                   \
327   do                                                                                      \
328     {                                                                                     \
329       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);                          \
330       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);                          \
331       _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;                                 \
332       int _FP_MUL_MEAT_DW_2_wide_3mul_c1;                                       \
333       int _FP_MUL_MEAT_DW_2_wide_3mul_c2;                                       \
334                                                                                           \
335       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;             \
336       _FP_MUL_MEAT_DW_2_wide_3mul_c1                                            \
337           = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;                          \
338       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;             \
339       _FP_MUL_MEAT_DW_2_wide_3mul_c2                                            \
340           = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;                          \
341       doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),    \
342               X##_f0, Y##_f0);                                                            \
343       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),           \
344               _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,                                 \
345               _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);                                \
346       doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                                   \
347               _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);                \
348                                                                                           \
349       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0                                          \
350           &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;                                   \
351       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1                                          \
352           &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;                                   \
353       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),          \
354                            _FP_FRAC_WORD_4 (R, 1),                                        \
355                            (_FP_MUL_MEAT_DW_2_wide_3mul_c1                      \
356                               & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,             \
357                            _FP_MUL_MEAT_DW_2_wide_3mul_d,                       \
358                            0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
359       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),         \
360                               _FP_MUL_MEAT_DW_2_wide_3mul_b_f0);                \
361       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),         \
362                               _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);                \
363       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),          \
364                            _FP_FRAC_WORD_4 (R, 1),                                        \
365                            0, _FP_MUL_MEAT_DW_2_wide_3mul_d,                    \
366                            _FP_FRAC_WORD_4 (R, 0));                                       \
367       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),          \
368                            _FP_FRAC_WORD_4 (R, 1), 0,                           \
369                            _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                    \
370                            _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);                   \
371       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),          \
372                            _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                    \
373                            _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,                    \
374                            _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2));     \
375     }                                                                                     \
376   while (0)
377 
378 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)            \
379   do                                                                                      \
380     {                                                                                     \
381       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);                             \
382                                                                                           \
383       _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits),                                 \
384                                            _FP_MUL_MEAT_2_wide_3mul_z,                    \
385                                            X, Y, doit);                                   \
386                                                                                           \
387       /* Normalize since we know where the msb of the multiplicands   \
388            were (bit B), we know that the msb of the of the product is          \
389            at either 2B or 2B-1.  */                                            \
390       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,                     \
391                           (wfracbits)-1, 2*(wfracbits));                        \
392       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);                 \
393       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);                 \
394     }                                                                                     \
395   while (0)
396 
397 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
398   do                                                                  \
399     {                                                                 \
400       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];                \
401       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];                \
402       _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;                  \
403       _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;                  \
404       _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;                  \
405       _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;                  \
406                                                                       \
407       mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,  \
408                      _FP_MUL_MEAT_DW_2_gmp_y, 2);           \
409     }                                                                 \
410   while (0)
411 
412 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                                  \
413   do                                                                                      \
414     {                                                                                     \
415       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);                                   \
416                                                                                           \
417       _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y);          \
418                                                                                           \
419       /* Normalize since we know where the msb of the multiplicands   \
420            were (bit B), we know that the msb of the of the product is          \
421            at either 2B or 2B-1.  */                                            \
422       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1,            \
423                           2*(wfracbits));                                                 \
424       R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];                                       \
425       R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];                                       \
426     }                                                                                     \
427   while (0)
428 
429 /* Do at most 120x120=240 bits multiplication using double floating
430    point multiplication.  This is useful if floating point
431    multiplication has much bigger throughput than integer multiply.
432    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
433    between 106 and 120 only.
434    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
435    SETFETZ is a macro which will disable all FPU exceptions and set rounding
436    towards zero,  RESETFE should optionally reset it back.  */
437 
438 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
439   do                                                                                      \
440     {                                                                                     \
441       static const double _const[] =                                            \
442           {                                                                               \
443             /* 2^-24 */ 5.9604644775390625e-08,                                 \
444             /* 2^-48 */ 3.5527136788005009e-15,                                 \
445             /* 2^-72 */ 2.1175823681357508e-22,                                 \
446             /* 2^-96 */ 1.2621774483536189e-29,                                 \
447             /* 2^28 */ 2.68435456e+08,                                          \
448             /* 2^4 */ 1.600000e+01,                                             \
449             /* 2^-20 */ 9.5367431640625e-07,                                    \
450             /* 2^-44 */ 5.6843418860808015e-14,                                 \
451             /* 2^-68 */ 3.3881317890172014e-21,                                 \
452             /* 2^-92 */ 2.0194839173657902e-28,                                 \
453             /* 2^-116 */ 1.2037062152420224e-35                                 \
454           };                                                                              \
455       double _a240, _b240, _c240, _d240, _e240, _f240,                          \
456           _g240, _h240, _i240, _j240, _k240;                                    \
457       union { double d; UDItype i; } _l240, _m240, _n240, _o240,      \
458                                                _p240, _q240, _r240, _s240;      \
459       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;           \
460                                                                                           \
461       _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120,    \
462                                "wfracbits out of range");                       \
463                                                                                           \
464       setfetz;                                                                            \
465                                                                                           \
466       _e240 = (double) (long) (X##_f0 & 0xffffff);                              \
467       _j240 = (double) (long) (Y##_f0 & 0xffffff);                              \
468       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);            \
469       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);            \
470       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
471       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
472       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);             \
473       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);             \
474       _a240 = (double) (long) (X##_f1 >> 32);                                   \
475       _f240 = (double) (long) (Y##_f1 >> 32);                                   \
476       _e240 *= _const[3];                                                       \
477       _j240 *= _const[3];                                                       \
478       _d240 *= _const[2];                                                       \
479       _i240 *= _const[2];                                                       \
480       _c240 *= _const[1];                                                       \
481       _h240 *= _const[1];                                                       \
482       _b240 *= _const[0];                                                       \
483       _g240 *= _const[0];                                                       \
484       _s240.d =                                                                       _e240*_j240; \
485       _r240.d =                                                       _d240*_j240 + _e240*_i240; \
486       _q240.d =                                     _c240*_j240 + _d240*_i240 + _e240*_h240; \
487       _p240.d =                   _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
488       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
489       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;          \
490       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;              \
491       _l240.d = _a240*_g240 + _b240*_f240;                                      \
492       _k240 =   _a240*_f240;                                                    \
493       _r240.d += _s240.d;                                                       \
494       _q240.d += _r240.d;                                                       \
495       _p240.d += _q240.d;                                                       \
496       _o240.d += _p240.d;                                                       \
497       _n240.d += _o240.d;                                                       \
498       _m240.d += _n240.d;                                                       \
499       _l240.d += _m240.d;                                                       \
500       _k240 += _l240.d;                                                                   \
501       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                             \
502       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                     \
503       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                     \
504       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                     \
505       _o240.d += _const[7];                                                     \
506       _n240.d += _const[6];                                                     \
507       _m240.d += _const[5];                                                     \
508       _l240.d += _const[4];                                                     \
509       if (_s240.d != 0.0)                                                       \
510           _y240 = 1;                                                                      \
511       if (_r240.d != 0.0)                                                       \
512           _y240 = 1;                                                                      \
513       if (_q240.d != 0.0)                                                       \
514           _y240 = 1;                                                                      \
515       if (_p240.d != 0.0)                                                       \
516           _y240 = 1;                                                                      \
517       _t240 = (DItype) _k240;                                                   \
518       _u240 = _l240.i;                                                                    \
519       _v240 = _m240.i;                                                                    \
520       _w240 = _n240.i;                                                                    \
521       _x240 = _o240.i;                                                                    \
522       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                              \
523                     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));         \
524       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                 \
525                     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
526                     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
527                     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))  \
528                     | _y240);                                                   \
529       resetfe;                                                                            \
530     }                                                                                     \
531   while (0)
532 
533 /* Division algorithms: */
534 
535 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                        \
536   do                                                                                      \
537     {                                                                                     \
538       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;                                      \
539       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;                                      \
540       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;                                      \
541       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;                                      \
542       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;                                      \
543       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;                                      \
544       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;                                      \
545       if (_FP_FRAC_GE_2 (X, Y))                                                           \
546           {                                                                               \
547             _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;                             \
548             _FP_DIV_MEAT_2_udiv_n_f1                                            \
549               = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;                  \
550             _FP_DIV_MEAT_2_udiv_n_f0                                            \
551               = X##_f0 << (_FP_W_TYPE_SIZE - 1);                                \
552           }                                                                               \
553       else                                                                                \
554           {                                                                               \
555             R##_e--;                                                                      \
556             _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;                                  \
557             _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;                                  \
558             _FP_DIV_MEAT_2_udiv_n_f0 = 0;                                                 \
559           }                                                                               \
560                                                                                           \
561       /* Normalize, i.e. make the most significant bit of the                   \
562            denominator set.  */                                                           \
563       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                                  \
564                                                                                           \
565       udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,                             \
566                       _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,       \
567                       Y##_f1);                                                            \
568       umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,  \
569                      R##_f1, Y##_f0);                                           \
570       _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;            \
571       if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r))         \
572           {                                                                               \
573             R##_f1--;                                                                     \
574             _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                           \
575                                 _FP_DIV_MEAT_2_udiv_r);                         \
576             if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)                        \
577                 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                        \
578                                         _FP_DIV_MEAT_2_udiv_r))                           \
579               {                                                                           \
580                 R##_f1--;                                                                 \
581                 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                       \
582                                     _FP_DIV_MEAT_2_udiv_r);                     \
583               }                                                                           \
584           }                                                                               \
585       _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);  \
586                                                                                           \
587       if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)                                   \
588           {                                                                               \
589             /* This is a special case, not an optimization            \
590                (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).        \
591                As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,                \
592                R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we        \
593                know what kind of bits it is (sticky, guard, round),   \
594                we don't care.  We also don't care what the reminder is,         \
595                because the guard bit will be set anyway.  -jj */                \
596             R##_f0 = -1;                                                                  \
597           }                                                                               \
598       else                                                                                \
599           {                                                                               \
600             udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,                       \
601                           _FP_DIV_MEAT_2_udiv_r_f1,                                       \
602                           _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);                    \
603             umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,                                \
604                          _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);             \
605             _FP_DIV_MEAT_2_udiv_r_f0 = 0;                                                 \
606             if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                           \
607                                    _FP_DIV_MEAT_2_udiv_r))                      \
608               {                                                                           \
609                 R##_f0--;                                                                 \
610                 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                       \
611                                     _FP_DIV_MEAT_2_udiv_r);                     \
612                 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)                    \
613                       && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                  \
614                                             _FP_DIV_MEAT_2_udiv_r))             \
615                     {                                                                     \
616                       R##_f0--;                                                           \
617                       _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
618                                           _FP_DIV_MEAT_2_udiv_r);               \
619                     }                                                                     \
620               }                                                                           \
621             if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,                          \
622                                     _FP_DIV_MEAT_2_udiv_m))                     \
623               R##_f0 |= _FP_WORK_STICKY;                                                  \
624           }                                                                               \
625     }                                                                                     \
626   while (0)
627 
628 
629 /* Square root algorithms:
630    We have just one right now, maybe Newton approximation
631    should be added for those machines where division is fast.  */
632 
633 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                                \
634   do                                                                            \
635     {                                                                           \
636       while (q)                                                                 \
637           {                                                                     \
638             T##_f1 = S##_f1 + (q);                                    \
639             if (T##_f1 <= X##_f1)                                               \
640               {                                                                 \
641                 S##_f1 = T##_f1 + (q);                                \
642                 X##_f1 -= T##_f1;                                               \
643                 R##_f1 += (q);                                                  \
644               }                                                                 \
645             _FP_FRAC_SLL_2 (X, 1);                                    \
646             (q) >>= 1;                                                          \
647           }                                                                     \
648       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);                  \
649       while ((q) != _FP_WORK_ROUND)                                   \
650           {                                                                     \
651             T##_f0 = S##_f0 + (q);                                    \
652             T##_f1 = S##_f1;                                          \
653             if (T##_f1 < X##_f1                                                 \
654                 || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))  \
655               {                                                                 \
656                 S##_f0 = T##_f0 + (q);                                \
657                 S##_f1 += (T##_f0 > S##_f0);                          \
658                 _FP_FRAC_DEC_2 (X, T);                                \
659                 R##_f0 += (q);                                                  \
660               }                                                                 \
661             _FP_FRAC_SLL_2 (X, 1);                                    \
662             (q) >>= 1;                                                          \
663           }                                                                     \
664       if (X##_f0 | X##_f1)                                            \
665           {                                                                     \
666             if (S##_f1 < X##_f1                                                 \
667                 || (S##_f1 == X##_f1 && S##_f0 < X##_f0))             \
668               R##_f0 |= _FP_WORK_ROUND;                               \
669             R##_f0 |= _FP_WORK_STICKY;                                \
670           }                                                                     \
671     }                                                                           \
672   while (0)
673 
674 
675 /* Assembly/disassembly for converting to/from integral types.
676    No shifting or overflow handled here.  */
677 
678 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)          \
679   (void) (((rsize) <= _FP_W_TYPE_SIZE)            \
680             ? ({ (r) = X##_f0; })                           \
681             : ({                                            \
682                 (r) = X##_f1;                     \
683                 (r) <<= _FP_W_TYPE_SIZE;                    \
684                 (r) += X##_f0;                              \
685               }))
686 
687 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)       \
688   do                                                        \
689     {                                                       \
690       X##_f0 = (r);                               \
691       X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE        \
692                     ? 0                                     \
693                     : (r) >> _FP_W_TYPE_SIZE);    \
694     }                                                       \
695   while (0)
696 
697 /* Convert FP values between word sizes.  */
698 
699 #define _FP_FRAC_COPY_1_2(D, S)                   (D##_f = S##_f0)
700 
701 #define _FP_FRAC_COPY_2_1(D, S)                   ((D##_f0 = S##_f), (D##_f1 = 0))
702 
703 #define _FP_FRAC_COPY_2_2(D, S)                   _FP_FRAC_COPY_2 (D, S)
704 
705 #endif /* !SOFT_FP_OP_2_H */
706