1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
2 
3 This file is part of GCC.
4 
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9 
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 for more details.
14 
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18 
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22 <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef __BIDECIMAL_H
25 #define __BIDECIMAL_H
26 
27 #include "bid_conf.h"
28 #include "bid_functions.h"
29 
30 #define __BID_INLINE__ static __inline
31 
32 /*********************************************************************
33  *
34  *      Logical Shift Macros
35  *
36  *********************************************************************/
37 
38 #define __shr_128(Q, A, k)                        \
39 {                                                 \
40      (Q).w[0] = (A).w[0] >> k;                      \
41            (Q).w[0] |= (A).w[1] << (64-k);               \
42            (Q).w[1] = (A).w[1] >> k;                    \
43 }
44 
45 #define __shr_128_long(Q, A, k)                   \
46 {                                                 \
47           if((k)<64) {                                  \
48      (Q).w[0] = (A).w[0] >> k;                    \
49            (Q).w[0] |= (A).w[1] << (64-k);              \
50            (Q).w[1] = (A).w[1] >> k;                    \
51           }                                             \
52           else {                                        \
53            (Q).w[0] = (A).w[1]>>((k)-64);               \
54            (Q).w[1] = 0;                                \
55           }                                             \
56 }
57 
58 #define __shl_128_long(Q, A, k)                   \
59 {                                                 \
60           if((k)<64) {                                  \
61      (Q).w[1] = (A).w[1] << k;                    \
62            (Q).w[1] |= (A).w[0] >> (64-k);              \
63            (Q).w[0] = (A).w[0] << k;                    \
64           }                                             \
65           else {                                        \
66            (Q).w[1] = (A).w[0]<<((k)-64);               \
67            (Q).w[0] = 0;                                \
68           }                                             \
69 }
70 
71 #define __low_64(Q)  (Q).w[0]
72 /*********************************************************************
73  *
74  *      String Macros
75  *
76  *********************************************************************/
77 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78 /*********************************************************************
79  *
80  *      Compare Macros
81  *
82  *********************************************************************/
83 // greater than
84 //  return 0 if A<=B
85 //  non-zero if A>B
86 #define __unsigned_compare_gt_128(A, B)  \
87     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88 // greater-or-equal
89 #define __unsigned_compare_ge_128(A, B)  \
90     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91 #define __test_equal_128(A, B)  (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92 // tighten exponent range
93 #define __tight_bin_range_128(bp, P, bin_expon)  \
94 {                                                \
95 UINT64 M;                                        \
96           M = 1;                                       \
97           (bp) = (bin_expon);                          \
98           if((bp)<63) {                                \
99             M <<= ((bp)+1);                            \
100             if((P).w[0] >= M) (bp)++; }                 \
101           else if((bp)>64) {                           \
102             M <<= ((bp)+1-64);                         \
103             if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104                 (bp)++; }                              \
105           else if((P).w[1]) (bp)++;                    \
106 }
107 /*********************************************************************
108  *
109  *      Add/Subtract Macros
110  *
111  *********************************************************************/
112 // add 64-bit value to 128-bit
113 #define __add_128_64(R128, A128, B64)    \
114 {                                        \
115 UINT64 R64H;                             \
116           R64H = (A128).w[1];                 \
117           (R128).w[0] = (B64) + (A128).w[0];     \
118           if((R128).w[0] < (B64))               \
119             R64H ++;                           \
120     (R128).w[1] = R64H;                  \
121 }
122 // subtract 64-bit value from 128-bit
123 #define __sub_128_64(R128, A128, B64)    \
124 {                                        \
125 UINT64 R64H;                             \
126           R64H = (A128).w[1];                  \
127           if((A128).w[0] < (B64))               \
128             R64H --;                           \
129     (R128).w[1] = R64H;                  \
130           (R128).w[0] = (A128).w[0] - (B64);     \
131 }
132 // add 128-bit value to 128-bit
133 // assume no carry-out
134 #define __add_128_128(R128, A128, B128)  \
135 {                                        \
136 UINT128 Q128;                            \
137           Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138           Q128.w[0] = (B128).w[0] + (A128).w[0];  \
139           if(Q128.w[0] < (B128).w[0])            \
140             Q128.w[1] ++;                      \
141     (R128).w[1] = Q128.w[1];             \
142     (R128).w[0] = Q128.w[0];               \
143 }
144 #define __sub_128_128(R128, A128, B128)  \
145 {                                        \
146 UINT128 Q128;                            \
147           Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148           Q128.w[0] = (A128).w[0] - (B128).w[0];  \
149           if((A128).w[0] < (B128).w[0])          \
150             Q128.w[1] --;                      \
151     (R128).w[1] = Q128.w[1];             \
152     (R128).w[0] = Q128.w[0];               \
153 }
154 #define __add_carry_out(S, CY, X, Y)    \
155 {                                      \
156 UINT64 X1=X;                           \
157           S = X + Y;                         \
158           CY = (S<X1) ? 1 : 0;                \
159 }
160 #define __add_carry_in_out(S, CY, X, Y, CI)    \
161 {                                             \
162 UINT64 X1;                                    \
163           X1 = X + CI;                              \
164           S = X1 + Y;                               \
165           CY = ((S<X1) || (X1<CI)) ? 1 : 0;          \
166 }
167 #define __sub_borrow_out(S, CY, X, Y)    \
168 {                                      \
169 UINT64 X1=X;                           \
170           S = X - Y;                         \
171           CY = (S>X1) ? 1 : 0;                \
172 }
173 #define __sub_borrow_in_out(S, CY, X, Y, CI)    \
174 {                                             \
175 UINT64 X1, X0=X;                              \
176           X1 = X - CI;                              \
177           S = X1 - Y;                               \
178           CY = ((S>X1) || (X1>X0)) ? 1 : 0;          \
179 }
180 // increment C128 and check for rounding overflow:
181 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182 #define INCREMENT(C_128, exp)                                           \
183 {                                                                       \
184   C_128.w[0]++;                                                         \
185   if (C_128.w[0] == 0) C_128.w[1]++;                                    \
186   if (C_128.w[1] == 0x0001ed09bead87c0ull &&                            \
187       C_128.w[0] == 0x378d8e6400000000ull) {                            \
188     exp++;                                                              \
189     C_128.w[1] = 0x0000314dc6448d93ull;                                 \
190     C_128.w[0] = 0x38c15b0a00000000ull;                                 \
191   }                                                                     \
192 }
193 // decrement C128 and check for rounding underflow, but only at the
194 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195 // and decrement the exponent
196 #define DECREMENT(C_128, exp)                                           \
197 {                                                                       \
198   C_128.w[0]--;                                                         \
199   if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--;                \
200   if (C_128.w[1] == 0x0000314dc6448d93ull &&                            \
201       C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) {                 \
202     exp--;                                                              \
203     C_128.w[1] = 0x0001ed09bead87c0ull;                                 \
204     C_128.w[0] = 0x378d8e63ffffffffull;                                 \
205   }                                                                     \
206 }
207 
208  /*********************************************************************
209  *
210  *      Multiply Macros
211  *
212  *********************************************************************/
213 #define __mul_64x64_to_64(P64, CX, CY)  (P64) = (CX) * (CY)
214 /***************************************
215  *  Signed, Full 64x64-bit Multiply
216  ***************************************/
217 #define __imul_64x64_to_128(P, CX, CY)  \
218 {                                       \
219 UINT64 SX, SY;                          \
220    __mul_64x64_to_128(P, CX, CY);       \
221                                         \
222    SX = ((SINT64)(CX))>>63;             \
223    SY = ((SINT64)(CY))>>63;             \
224    SX &= CY;   SY &= CX;                \
225                                         \
226    (P).w[1] = (P).w[1] - SX - SY;       \
227 }
228 /***************************************
229  *  Signed, Full 64x128-bit Multiply
230  ***************************************/
231 #define __imul_64x128_full(Ph, Ql, A, B)          \
232 {                                                 \
233 UINT128 ALBL, ALBH, QM2, QM;                      \
234                                                   \
235           __imul_64x64_to_128(ALBH, (A), (B).w[1]);     \
236           __imul_64x64_to_128(ALBL, (A), (B).w[0]);      \
237                                                   \
238           (Ql).w[0] = ALBL.w[0];                          \
239           QM.w[0] = ALBL.w[1];                           \
240           QM.w[1] = ((SINT64)ALBL.w[1])>>63;            \
241     __add_128_128(QM2, ALBH, QM);                 \
242           (Ql).w[1] = QM2.w[0];                          \
243     Ph = QM2.w[1];                                \
244 }
245 /*****************************************************
246  *      Unsigned Multiply Macros
247  *****************************************************/
248 // get full 64x64bit product
249 //
250 #define __mul_64x64_to_128(P, CX, CY)   \
251 {                                       \
252 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253           CXH = (CX) >> 32;                     \
254           CXL = (UINT32)(CX);                   \
255           CYH = (CY) >> 32;                     \
256           CYL = (UINT32)(CY);                   \
257                                                 \
258     PM = CXH*CYL;                         \
259           PH = CXH*CYH;                         \
260           PL = CXL*CYL;                         \
261           PM2 = CXL*CYH;                        \
262           PH += (PM>>32);                       \
263           PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264                                           \
265           (P).w[1] = PH + (PM>>32);             \
266           (P).w[0] = (PM<<32)+(UINT32)PL;       \
267 }
268 // get full 64x64bit product
269 // Note:
270 // This macro is used for CX < 2^61, CY < 2^61
271 //
272 #define __mul_64x64_to_128_fast(P, CX, CY)   \
273 {                                       \
274 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
275           CXH = (CX) >> 32;                   \
276           CXL = (UINT32)(CX);                 \
277           CYH = (CY) >> 32;                   \
278           CYL = (UINT32)(CY);                 \
279                                               \
280     PM = CXH*CYL;                       \
281           PL = CXL*CYL;                       \
282           PH = CXH*CYH;                       \
283           PM += CXL*CYH;                      \
284           PM += (PL>>32);                     \
285                                         \
286           (P).w[1] = PH + (PM>>32);           \
287           (P).w[0] = (PM<<32)+(UINT32)PL;      \
288 }
289 // used for CX< 2^60
290 #define __sqr64_fast(P, CX)   \
291 {                                       \
292 UINT64 CXH, CXL, PL, PH, PM;            \
293           CXH = (CX) >> 32;                   \
294           CXL = (UINT32)(CX);                 \
295                                               \
296     PM = CXH*CXL;                       \
297           PL = CXL*CXL;                       \
298           PH = CXH*CXH;                       \
299           PM += PM;                           \
300           PM += (PL>>32);                     \
301                                         \
302           (P).w[1] = PH + (PM>>32);           \
303           (P).w[0] = (PM<<32)+(UINT32)PL;     \
304 }
305 // get full 64x64bit product
306 // Note:
307 // This implementation is used for CX < 2^61, CY < 2^61
308 //
309 #define __mul_64x64_to_64_high_fast(P, CX, CY)   \
310 {                                       \
311 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
312           CXH = (CX) >> 32;                   \
313           CXL = (UINT32)(CX);                 \
314           CYH = (CY) >> 32;                   \
315           CYL = (UINT32)(CY);                 \
316                                               \
317     PM = CXH*CYL;                       \
318           PL = CXL*CYL;                       \
319           PH = CXH*CYH;                       \
320           PM += CXL*CYH;                      \
321           PM += (PL>>32);                     \
322                                         \
323           (P) = PH + (PM>>32);                \
324 }
325 // get full 64x64bit product
326 //
327 #define __mul_64x64_to_128_full(P, CX, CY)     \
328 {                                         \
329 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330           CXH = (CX) >> 32;                     \
331           CXL = (UINT32)(CX);                   \
332           CYH = (CY) >> 32;                     \
333           CYL = (UINT32)(CY);                   \
334                                                 \
335     PM = CXH*CYL;                         \
336           PH = CXH*CYH;                         \
337           PL = CXL*CYL;                         \
338           PM2 = CXL*CYH;                        \
339           PH += (PM>>32);                       \
340           PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341                                           \
342           (P).w[1] = PH + (PM>>32);             \
343           (P).w[0] = (PM<<32)+(UINT32)PL;        \
344 }
345 #define __mul_128x128_high(Q, A, B)               \
346 {                                                 \
347 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
348                                                   \
349           __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
350           __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
351           __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
352           __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
353                                                   \
354     __add_128_128(QM, ALBH, AHBL);                \
355     __add_128_64(QM2, QM, ALBL.w[1]);             \
356     __add_128_64((Q), AHBH, QM2.w[1]);            \
357 }
358 #define __mul_128x128_full(Qh, Ql, A, B)          \
359 {                                                 \
360 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
361                                                   \
362           __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
363           __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
364           __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
365           __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
366                                                   \
367     __add_128_128(QM, ALBH, AHBL);                \
368           (Ql).w[0] = ALBL.w[0];                          \
369     __add_128_64(QM2, QM, ALBL.w[1]);             \
370     __add_128_64((Qh), AHBH, QM2.w[1]);           \
371           (Ql).w[1] = QM2.w[0];                          \
372 }
373 #define __mul_128x128_low(Ql, A, B)               \
374 {                                                 \
375 UINT128 ALBL;                                     \
376 UINT64 QM64;                                      \
377                                                   \
378           __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
379           QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1];   \
380                                                   \
381           (Ql).w[0] = ALBL.w[0];                          \
382           (Ql).w[1] = QM64 + ALBL.w[1];                 \
383 }
384 #define __mul_64x128_low(Ql, A, B)                \
385 {                                                 \
386   UINT128 ALBL, ALBH, QM2;                        \
387   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
388   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
389   (Ql).w[0] = ALBL.w[0];                          \
390   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
391   (Ql).w[1] = QM2.w[0];                           \
392 }
393 #define __mul_64x128_full(Ph, Ql, A, B)           \
394 {                                                 \
395 UINT128 ALBL, ALBH, QM2;                          \
396                                                   \
397           __mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
398           __mul_64x64_to_128(ALBL, (A), (B).w[0]);       \
399                                                   \
400           (Ql).w[0] = ALBL.w[0];                          \
401     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
402           (Ql).w[1] = QM2.w[0];                          \
403     Ph = QM2.w[1];                                \
404 }
405 #define __mul_64x128_to_192(Q, A, B)              \
406 {                                                 \
407 UINT128 ALBL, ALBH, QM2;                          \
408                                                   \
409           __mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
410           __mul_64x64_to_128(ALBL, (A), (B).w[0]);      \
411                                                   \
412           (Q).w[0] = ALBL.w[0];                         \
413     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
414           (Q).w[1] = QM2.w[0];                          \
415     (Q).w[2] = QM2.w[1];                          \
416 }
417 #define __mul_64x128_to192(Q, A, B)          \
418 {                                             \
419 UINT128 ALBL, ALBH, QM2;                      \
420                                               \
421     __mul_64x64_to_128(ALBH, (A), (B).w[1]);  \
422     __mul_64x64_to_128(ALBL, (A), (B).w[0]);  \
423                                               \
424     (Q).w[0] = ALBL.w[0];                    \
425     __add_128_64(QM2, ALBH, ALBL.w[1]);       \
426     (Q).w[1] = QM2.w[0];                     \
427     (Q).w[2] = QM2.w[1];                     \
428 }
429 #define __mul_128x128_to_256(P256, A, B)                         \
430 {                                                                \
431 UINT128 Qll, Qlh;                                                \
432 UINT64 Phl, Phh, CY1, CY2;                                         \
433                                                                  \
434    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
435    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
436   (P256).w[0] = Qll.w[0];                                        \
437              __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
438              __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);    \
439              (P256).w[3] = Phh + CY2;                                   \
440 }
441 //
442 // For better performance, will check A.w[1] against 0,
443 //                         but not B.w[1]
444 // Use this macro accordingly
445 #define __mul_128x128_to_256_check_A(P256, A, B)                   \
446 {                                                                  \
447 UINT128 Qll, Qlh;                                                  \
448 UINT64 Phl, Phh, CY1, CY2;                                           \
449                                                                    \
450    __mul_64x128_full(Phl, Qll, A.w[0], B);                          \
451   (P256).w[0] = Qll.w[0];                                        \
452    if(A.w[1])  {                                                   \
453              __mul_64x128_full(Phh, Qlh, A.w[1], B);                     \
454              __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
455              __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);   \
456              (P256).w[3] = Phh + CY2;   }                              \
457    else  {                                                         \
458              (P256).w[1] = Qll.w[1];                                  \
459              (P256).w[2] = Phl;                                       \
460              (P256).w[3] = 0;  }                                      \
461 }
462 #define __mul_64x192_to_256(lP, lA, lB)                      \
463 {                                                         \
464 UINT128 lP0,lP1,lP2;                                      \
465 UINT64 lC;                                                 \
466           __mul_64x64_to_128(lP0, lA, (lB).w[0]);              \
467           __mul_64x64_to_128(lP1, lA, (lB).w[1]);              \
468           __mul_64x64_to_128(lP2, lA, (lB).w[2]);              \
469           (lP).w[0] = lP0.w[0];                                \
470           __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]);      \
471           __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472           (lP).w[3] = lP2.w[1] + lC;                           \
473 }
474 #define __mul_64x256_to_320(P, A, B)                    \
475 {                                                       \
476 UINT128 lP0,lP1,lP2,lP3;                                \
477 UINT64 lC;                                               \
478           __mul_64x64_to_128(lP0, A, (B).w[0]);             \
479           __mul_64x64_to_128(lP1, A, (B).w[1]);             \
480           __mul_64x64_to_128(lP2, A, (B).w[2]);             \
481           __mul_64x64_to_128(lP3, A, (B).w[3]);             \
482           (P).w[0] = lP0.w[0];                               \
483           __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
484           __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485           __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486           (P).w[4] = lP3.w[1] + lC;                          \
487 }
488 #define __mul_192x192_to_384(P, A, B)                          \
489 {                                                              \
490 UINT256 P0,P1,P2;                                              \
491 UINT64 CY;                                                      \
492           __mul_64x192_to_256(P0, (A).w[0], B);                   \
493           __mul_64x192_to_256(P1, (A).w[1], B);                   \
494           __mul_64x192_to_256(P2, (A).w[2], B);                   \
495           (P).w[0] = P0.w[0];                                  \
496           __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
497           __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498           __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499           (P).w[4] = P1.w[3] + CY;                              \
500           __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
501           __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
502           __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
503           (P).w[5] = P2.w[3] + CY;                              \
504 }
505 #define __mul_64x320_to_384(P, A, B)                    \
506 {                                                       \
507 UINT128 lP0,lP1,lP2,lP3,lP4;                            \
508 UINT64 lC;                                               \
509           __mul_64x64_to_128(lP0, A, (B).w[0]);             \
510           __mul_64x64_to_128(lP1, A, (B).w[1]);             \
511           __mul_64x64_to_128(lP2, A, (B).w[2]);             \
512           __mul_64x64_to_128(lP3, A, (B).w[3]);             \
513           __mul_64x64_to_128(lP4, A, (B).w[4]);             \
514           (P).w[0] = lP0.w[0];                               \
515           __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
516           __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517           __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518           __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519           (P).w[5] = lP4.w[1] + lC;                          \
520 }
521 // A*A
522 // Full 128x128-bit product
523 #define __sqr128_to_256(P256, A)                                 \
524 {                                                                \
525 UINT128 Qll, Qlh, Qhh;                                           \
526 UINT64 TMP_C1, TMP_C2;                                 \
527                                                                  \
528    __mul_64x64_to_128(Qhh, A.w[1], A.w[1]);                      \
529    __mul_64x64_to_128(Qlh, A.w[0], A.w[1]);                      \
530    Qhh.w[1] += (Qlh.w[1]>>63);                                   \
531    Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63);                \
532    Qlh.w[0] += Qlh.w[0];                                         \
533    __mul_64x64_to_128(Qll, A.w[0], A.w[0]);                      \
534                                                                  \
535    __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]);      \
536    (P256).w[0] = Qll.w[0];                                       \
537    __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1);    \
538    (P256).w[3] = Qhh.w[1]+TMP_C2;                                         \
539 }
540 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B)            \
541 {                                                                \
542 UINT128 Qll, Qlh;                                                \
543 UINT64 Phl, Phh, C1, C2;                                         \
544                                                                  \
545    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
546    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
547   (PQl).w[0] = Qll.w[0];                                        \
548              __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]);      \
549              __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1);    \
550              (PQh).w[1] = Phh + C2;                                   \
551 }
552 #define __mul_256x256_to_512(P, A, B)                          \
553 {                                                              \
554 UINT512 P0,P1,P2,P3;                                           \
555 UINT64 CY;                                                      \
556           __mul_64x256_to_320(P0, (A).w[0], B);                   \
557           __mul_64x256_to_320(P1, (A).w[1], B);                   \
558           __mul_64x256_to_320(P2, (A).w[2], B);                   \
559           __mul_64x256_to_320(P3, (A).w[3], B);                   \
560           (P).w[0] = P0.w[0];                                  \
561           __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
562           __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563           __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564           __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565           (P).w[5] = P1.w[4] + CY;                              \
566           __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
567           __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
568           __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
569           __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
570           (P).w[6] = P2.w[4] + CY;                              \
571           __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]);     \
572           __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY);   \
573           __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY);   \
574           __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY);   \
575           (P).w[7] = P3.w[4] + CY;                              \
576 }
577 #define __mul_192x256_to_448(P, A, B)                          \
578 {                                                              \
579 UINT512 P0,P1,P2;                                           \
580 UINT64 CY;                                                      \
581           __mul_64x256_to_320(P0, (A).w[0], B);                   \
582           __mul_64x256_to_320(P1, (A).w[1], B);                   \
583           __mul_64x256_to_320(P2, (A).w[2], B);                   \
584           (P).w[0] = P0.w[0];                                  \
585           __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
586           __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587           __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588           __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589           (P).w[5] = P1.w[4] + CY;                              \
590           __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
591           __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
592           __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
593           __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
594           (P).w[6] = P2.w[4] + CY;                              \
595 }
596 #define __mul_320x320_to_640(P, A, B)                          \
597 {                                                              \
598 UINT512 P0,P1,P2,P3;                                           \
599 UINT64 CY;                                                     \
600           __mul_256x256_to_512((P), (A), B);                   \
601           __mul_64x256_to_320(P1, (A).w[4], B);                   \
602           __mul_64x256_to_320(P2, (B).w[4], A);                   \
603           __mul_64x64_to_128(P3, (A).w[4], (B).w[4]);               \
604           __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
605           __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606           __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607           __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608           __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609           P3.w[1] += CY;                                       \
610           __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]);      \
611           __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612           __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613           __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614           __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615           (P).w[9] = P3.w[1] + CY;                             \
616 }
617 #define __mul_384x384_to_768(P, A, B)                          \
618 {                                                              \
619 UINT512 P0,P1,P2,P3;                                           \
620 UINT64 CY;                                                     \
621           __mul_320x320_to_640((P), (A), B);                         \
622           __mul_64x320_to_384(P1, (A).w[5], B);                   \
623           __mul_64x320_to_384(P2, (B).w[5], A);                   \
624           __mul_64x64_to_128(P3, (A).w[5], (B).w[5]);               \
625           __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
626           __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627           __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628           __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629           __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630           __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631           P3.w[1] += CY;                                       \
632           __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]);      \
633           __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634           __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635           __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636           __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637           __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638           (P).w[11] = P3.w[1] + CY;                             \
639 }
640 #define __mul_64x128_short(Ql, A, B)              \
641 {                                                 \
642 UINT64 ALBH_L;                                    \
643                                                   \
644           __mul_64x64_to_64(ALBH_L, (A),(B).w[1]);      \
645           __mul_64x64_to_128((Ql), (A), (B).w[0]);       \
646                                                   \
647           (Ql).w[1] += ALBH_L;                          \
648 }
649 #define __scale128_10(D,_TMP)                            \
650 {                                                        \
651 UINT128 _TMP2,_TMP8;                                     \
652             _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63);       \
653             _TMP2.w[0] = _TMP.w[0]<<1;                         \
654             _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61);       \
655             _TMP8.w[0] = _TMP.w[0]<<3;                         \
656             __add_128_128(D, _TMP2, _TMP8);                    \
657 }
658 // 64x64-bit product
659 #define __mul_64x64_to_128MACH(P128, CX64, CY64)  \
660 {                                                  \
661   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
662   CXH = (CX64) >> 32;                              \
663   CXL = (UINT32)(CX64);                            \
664   CYH = (CY64) >> 32;                              \
665   CYL = (UINT32)(CY64);                            \
666   PM = CXH*CYL;                                    \
667   PH = CXH*CYH;                                    \
668   PL = CXL*CYL;                                    \
669   PM2 = CXL*CYH;                                   \
670   PH += (PM>>32);                                  \
671   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
672   (P128).w[1] = PH + (PM>>32);                     \
673   (P128).w[0] = (PM<<32)+(UINT32)PL;                \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128HIGH(P64, CX64, CY64)  \
677 {                                                  \
678   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
679   CXH = (CX64) >> 32;                              \
680   CXL = (UINT32)(CX64);                            \
681   CYH = (CY64) >> 32;                              \
682   CYL = (UINT32)(CY64);                            \
683   PM = CXH*CYL;                                    \
684   PH = CXH*CYH;                                    \
685   PL = CXL*CYL;                                    \
686   PM2 = CXL*CYH;                                   \
687   PH += (PM>>32);                                  \
688   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
689   P64 = PH + (PM>>32);                     \
690 }
691 #define __mul_128x64_to_128(Q128, A64, B128)        \
692 {                                                  \
693   UINT64 ALBH_L;                                   \
694   ALBH_L = (A64) * (B128).w[1];                    \
695   __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]);   \
696   (Q128).w[1] += ALBH_L;                           \
697 }
698 // might simplify by calculating just QM2.w[0]
699 #define __mul_64x128_to_128(Ql, A, B)           \
700 {                                                 \
701   UINT128 ALBL, ALBH, QM2;                        \
702   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
703   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
704   (Ql).w[0] = ALBL.w[0];                          \
705   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
706   (Ql).w[1] = QM2.w[0];                           \
707 }
708 /*********************************************************************
709  *
710  *      BID Pack/Unpack Macros
711  *
712  *********************************************************************/
713 /////////////////////////////////////////
714 // BID64 definitions
715 ////////////////////////////////////////
716 #define DECIMAL_MAX_EXPON_64  767
717 #define DECIMAL_EXPONENT_BIAS 398
718 #define MAX_FORMAT_DIGITS     16
719 /////////////////////////////////////////
720 // BID128 definitions
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_128  12287
723 #define DECIMAL_EXPONENT_BIAS_128  6176
724 #define MAX_FORMAT_DIGITS_128      34
725 /////////////////////////////////////////
726 // BID32 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_32  191
729 #define DECIMAL_EXPONENT_BIAS_32  101
730 #define MAX_FORMAT_DIGITS_32      7
731 ////////////////////////////////////////
732 // Constant Definitions
733 ///////////////////////////////////////
734 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735 #define INFINITY_MASK64         0x7800000000000000ull
736 #define SINFINITY_MASK64        0xf800000000000000ull
737 #define SSNAN_MASK64            0xfc00000000000000ull
738 #define NAN_MASK64              0x7c00000000000000ull
739 #define SNAN_MASK64             0x7e00000000000000ull
740 #define QUIET_MASK64            0xfdffffffffffffffull
741 #define LARGE_COEFF_MASK64      0x0007ffffffffffffull
742 #define LARGE_COEFF_HIGH_BIT64  0x0020000000000000ull
743 #define SMALL_COEFF_MASK64      0x001fffffffffffffull
744 #define EXPONENT_MASK64         0x3ff
745 #define EXPONENT_SHIFT_LARGE64  51
746 #define EXPONENT_SHIFT_SMALL64  53
747 #define LARGEST_BID64           0x77fb86f26fc0ffffull
748 #define SMALLEST_BID64          0xf7fb86f26fc0ffffull
749 #define SMALL_COEFF_MASK128     0x0001ffffffffffffull
750 #define LARGE_COEFF_MASK128     0x00007fffffffffffull
751 #define EXPONENT_MASK128        0x3fff
752 #define LARGEST_BID128_HIGH     0x5fffed09bead87c0ull
753 #define LARGEST_BID128_LOW      0x378d8e63ffffffffull
754 #define SPECIAL_ENCODING_MASK32 0x60000000ul
755 #define INFINITY_MASK32         0x78000000ul
756 #define LARGE_COEFF_MASK32      0x007ffffful
757 #define LARGE_COEFF_HIGH_BIT32  0x00800000ul
758 #define SMALL_COEFF_MASK32      0x001ffffful
759 #define EXPONENT_MASK32         0xff
760 #define LARGEST_BID32           0x77f8967f
761 #define NAN_MASK32              0x7c000000
762 #define SNAN_MASK32             0x7e000000
763 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
764 #define BINARY_EXPONENT_BIAS  0x3ff
765 #define UPPER_EXPON_LIMIT     51
766 // data needed for BID pack/unpack macros
767 extern UINT64 round_const_table[][19];
768 extern UINT128 reciprocals10_128[];
769 extern int recip_scale[];
770 extern UINT128 power10_table_128[];
771 extern int estimate_decimal_digits[];
772 extern int estimate_bin_expon[];
773 extern UINT64 power10_index_binexp[];
774 extern int short_recip_scale[];
775 extern UINT64 reciprocals10_64[];
776 extern UINT128 power10_index_binexp_128[];
777 extern UINT128 round_const_table_128[][36];
778 
779 
780 //////////////////////////////////////////////
781 //  Status Flag Handling
782 /////////////////////////////////////////////
783 #define __set_status_flags(fpsc, status)  *(fpsc) |= status
784 #define is_inexact(fpsc)  ((*(fpsc))&INEXACT_EXCEPTION)
785 
786 __BID_INLINE__ UINT64
unpack_BID64(UINT64 * psign_x,int * pexponent_x,UINT64 * pcoefficient_x,UINT64 x)787 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788                 UINT64 * pcoefficient_x, UINT64 x) {
789   UINT64 tmp, coeff;
790 
791   *psign_x = x & 0x8000000000000000ull;
792 
793   if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794     // special encodings
795     // coefficient
796     coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797 
798     if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
799       *pexponent_x = 0;
800       *pcoefficient_x = x & 0xfe03ffffffffffffull;
801       if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802           *pcoefficient_x = x & 0xfe00000000000000ull;
803       if ((x & NAN_MASK64) == INFINITY_MASK64)
804           *pcoefficient_x = x & SINFINITY_MASK64;
805       return 0;     // NaN or Infinity
806     }
807     // check for non-canonical values
808     if (coeff >= 10000000000000000ull)
809       coeff = 0;
810     *pcoefficient_x = coeff;
811     // get exponent
812     tmp = x >> EXPONENT_SHIFT_LARGE64;
813     *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814     return coeff;
815   }
816   // exponent
817   tmp = x >> EXPONENT_SHIFT_SMALL64;
818   *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819   // coefficient
820   *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821 
822   return *pcoefficient_x;
823 }
824 
825 //
826 //   BID64 pack macro (general form)
827 //
828 __BID_INLINE__ UINT64
get_BID64(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830              unsigned *fpsc) {
831   UINT128 Stemp, Q_low;
832   UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833   int extra_digits, amount, amount2;
834   unsigned status;
835 
836   if (coeff > 9999999999999999ull) {
837     expon++;
838     coeff = 1000000000000000ull;
839   }
840   // check for possible underflow/overflow
841   if (((unsigned) expon) >= 3 * 256) {
842     if (expon < 0) {
843       // underflow
844       if (expon + MAX_FORMAT_DIGITS < 0) {
845 #ifdef SET_STATUS_FLAGS
846           __set_status_flags (fpsc,
847                                   UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848 #endif
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851           if (rmode == ROUNDING_DOWN && sgn)
852             return 0x8000000000000001ull;
853           if (rmode == ROUNDING_UP && !sgn)
854             return 1ull;
855 #endif
856 #endif
857           // result is 0
858           return sgn;
859       }
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862       if (sgn && (unsigned) (rmode - 1) < 2)
863           rmode = 3 - rmode;
864 #endif
865 #endif
866       // get digits to be shifted out
867       extra_digits = -expon;
868       coeff += round_const_table[rmode][extra_digits];
869 
870       // get coeff*(2^M[extra_digits])/10^extra_digits
871       __mul_64x128_full (QH, Q_low, coeff,
872                                reciprocals10_128[extra_digits]);
873 
874       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875       amount = recip_scale[extra_digits];
876 
877       C64 = QH >> amount;
878 
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881       if (rmode == 0)         //ROUNDING_TO_NEAREST
882 #endif
883           if (C64 & 1) {
884             // check whether fractional part of initial_P/10^extra_digits is exactly .5
885 
886             // get remainder
887             amount2 = 64 - amount;
888             remainder_h = 0;
889             remainder_h--;
890             remainder_h >>= amount2;
891             remainder_h = remainder_h & QH;
892 
893             if (!remainder_h
894                 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895                       || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
896                           && Q_low.w[0] <
897                           reciprocals10_128[extra_digits].w[0]))) {
898               C64--;
899             }
900           }
901 #endif
902 
903 #ifdef SET_STATUS_FLAGS
904 
905       if (is_inexact (fpsc))
906           __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907       else {
908           status = INEXACT_EXCEPTION;
909           // get remainder
910           remainder_h = QH << (64 - amount);
911 
912           switch (rmode) {
913           case ROUNDING_TO_NEAREST:
914           case ROUNDING_TIES_AWAY:
915             // test whether fractional part is 0
916             if (remainder_h == 0x8000000000000000ull
917                 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918                       || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
919                           && Q_low.w[0] <
920                           reciprocals10_128[extra_digits].w[0])))
921               status = EXACT_STATUS;
922             break;
923           case ROUNDING_DOWN:
924           case ROUNDING_TO_ZERO:
925             if (!remainder_h
926                 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927                       || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
928                           && Q_low.w[0] <
929                           reciprocals10_128[extra_digits].w[0])))
930               status = EXACT_STATUS;
931             break;
932           default:
933             // round up
934             __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935                                  reciprocals10_128[extra_digits].w[0]);
936             __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937                                     reciprocals10_128[extra_digits].w[1], CY);
938             if ((remainder_h >> (64 - amount)) + carry >=
939                 (((UINT64) 1) << amount))
940               status = EXACT_STATUS;
941           }
942 
943           if (status != EXACT_STATUS)
944             __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945       }
946 
947 #endif
948 
949       return sgn | C64;
950     }
951     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952       expon--;
953       coeff = (coeff << 3) + (coeff << 1);
954     }
955     if (expon > DECIMAL_MAX_EXPON_64) {
956 #ifdef SET_STATUS_FLAGS
957       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958 #endif
959       // overflow
960       r = sgn | INFINITY_MASK64;
961       switch (rmode) {
962       case ROUNDING_DOWN:
963           if (!sgn)
964             r = LARGEST_BID64;
965           break;
966       case ROUNDING_TO_ZERO:
967           r = sgn | LARGEST_BID64;
968           break;
969       case ROUNDING_UP:
970           // round up
971           if (sgn)
972             r = SMALLEST_BID64;
973       }
974       return r;
975     }
976   }
977 
978   mask = 1;
979   mask <<= EXPONENT_SHIFT_SMALL64;
980 
981   // check whether coefficient fits in 10*5+3 bits
982   if (coeff < mask) {
983     r = expon;
984     r <<= EXPONENT_SHIFT_SMALL64;
985     r |= (coeff | sgn);
986     return r;
987   }
988   // special format
989 
990   // eliminate the case coeff==10^16 after rounding
991   if (coeff == 10000000000000000ull) {
992     r = expon + 1;
993     r <<= EXPONENT_SHIFT_SMALL64;
994     r |= (1000000000000000ull | sgn);
995     return r;
996   }
997 
998   r = expon;
999   r <<= EXPONENT_SHIFT_LARGE64;
1000   r |= (sgn | SPECIAL_ENCODING_MASK64);
1001   // add coeff, without leading bits
1002   mask = (mask >> 2) - 1;
1003   coeff &= mask;
1004   r |= coeff;
1005 
1006   return r;
1007 }
1008 
1009 
1010 
1011 
1012 //
1013 //   No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017   UINT64 r, mask;
1018 
1019   mask = 1;
1020   mask <<= EXPONENT_SHIFT_SMALL64;
1021 
1022   // check whether coefficient fits in 10*5+3 bits
1023   if (coeff < mask) {
1024     r = expon;
1025     r <<= EXPONENT_SHIFT_SMALL64;
1026     r |= (coeff | sgn);
1027     return r;
1028   }
1029   // special format
1030 
1031   // eliminate the case coeff==10^16 after rounding
1032   if (coeff == 10000000000000000ull) {
1033     r = expon + 1;
1034     r <<= EXPONENT_SHIFT_SMALL64;
1035     r |= (1000000000000000ull | sgn);
1036     return r;
1037   }
1038 
1039   r = expon;
1040   r <<= EXPONENT_SHIFT_LARGE64;
1041   r |= (sgn | SPECIAL_ENCODING_MASK64);
1042   // add coeff, without leading bits
1043   mask = (mask >> 2) - 1;
1044   coeff &= mask;
1045   r |= coeff;
1046 
1047   return r;
1048 }
1049 
1050 
1051 //
1052 //   no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
fast_get_BID64_check_OF(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056                                unsigned *fpsc) {
1057   UINT64 r, mask;
1058 
1059   if (((unsigned) expon) >= 3 * 256 - 1) {
1060     if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061       expon = 3 * 256;
1062       coeff = 1000000000000000ull;
1063     }
1064 
1065     if (((unsigned) expon) >= 3 * 256) {
1066       while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067           expon--;
1068           coeff = (coeff << 3) + (coeff << 1);
1069       }
1070       if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072           __set_status_flags (fpsc,
1073                                   OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075           // overflow
1076           r = sgn | INFINITY_MASK64;
1077           switch (rmode) {
1078           case ROUNDING_DOWN:
1079             if (!sgn)
1080               r = LARGEST_BID64;
1081             break;
1082           case ROUNDING_TO_ZERO:
1083             r = sgn | LARGEST_BID64;
1084             break;
1085           case ROUNDING_UP:
1086             // round up
1087             if (sgn)
1088               r = SMALLEST_BID64;
1089           }
1090           return r;
1091       }
1092     }
1093   }
1094 
1095   mask = 1;
1096   mask <<= EXPONENT_SHIFT_SMALL64;
1097 
1098   // check whether coefficient fits in 10*5+3 bits
1099   if (coeff < mask) {
1100     r = expon;
1101     r <<= EXPONENT_SHIFT_SMALL64;
1102     r |= (coeff | sgn);
1103     return r;
1104   }
1105   // special format
1106 
1107   // eliminate the case coeff==10^16 after rounding
1108   if (coeff == 10000000000000000ull) {
1109     r = expon + 1;
1110     r <<= EXPONENT_SHIFT_SMALL64;
1111     r |= (1000000000000000ull | sgn);
1112     return r;
1113   }
1114 
1115   r = expon;
1116   r <<= EXPONENT_SHIFT_LARGE64;
1117   r |= (sgn | SPECIAL_ENCODING_MASK64);
1118   // add coeff, without leading bits
1119   mask = (mask >> 2) - 1;
1120   coeff &= mask;
1121   r |= coeff;
1122 
1123   return r;
1124 }
1125 
1126 
1127 //
1128 //   No overflow/underflow checking
1129 //   or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
very_fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133   UINT64 r, mask;
1134 
1135   mask = 1;
1136   mask <<= EXPONENT_SHIFT_SMALL64;
1137 
1138   // check whether coefficient fits in 10*5+3 bits
1139   if (coeff < mask) {
1140     r = expon;
1141     r <<= EXPONENT_SHIFT_SMALL64;
1142     r |= (coeff | sgn);
1143     return r;
1144   }
1145   // special format
1146   r = expon;
1147   r <<= EXPONENT_SHIFT_LARGE64;
1148   r |= (sgn | SPECIAL_ENCODING_MASK64);
1149   // add coeff, without leading bits
1150   mask = (mask >> 2) - 1;
1151   coeff &= mask;
1152   r |= coeff;
1153 
1154   return r;
1155 }
1156 
1157 //
1158 //   No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
very_fast_get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff)1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162   // no UF/OF
1163   UINT64 r;
1164 
1165   r = expon;
1166   r <<= EXPONENT_SHIFT_SMALL64;
1167   r |= (coeff | sgn);
1168   return r;
1169 }
1170 
1171 
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
get_BID64_UF(UINT64 sgn,int expon,UINT64 coeff,UINT64 R,int rmode,unsigned * fpsc)1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177                 unsigned *fpsc) {
1178   UINT128 C128, Q_low, Stemp;
1179   UINT64 C64, remainder_h, QH, carry, CY;
1180   int extra_digits, amount, amount2;
1181   unsigned status;
1182 
1183   // underflow
1184   if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190     if (rmode == ROUNDING_DOWN && sgn)
1191       return 0x8000000000000001ull;
1192     if (rmode == ROUNDING_UP && !sgn)
1193       return 1ull;
1194 #endif
1195 #endif
1196     // result is 0
1197     return sgn;
1198   }
1199   // 10*coeff
1200   coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203   if (sgn && (unsigned) (rmode - 1) < 2)
1204     rmode = 3 - rmode;
1205 #endif
1206 #endif
1207   if (R)
1208     coeff |= 1;
1209   // get digits to be shifted out
1210   extra_digits = 1 - expon;
1211   C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1212 
1213   // get coeff*(2^M[extra_digits])/10^extra_digits
1214   __mul_64x128_full (QH, Q_low, C128.w[0],
1215                          reciprocals10_128[extra_digits]);
1216 
1217   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218   amount = recip_scale[extra_digits];
1219 
1220   C64 = QH >> amount;
1221   //__shr_128(C128, Q_high, amount);
1222 
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225   if (rmode == 0)   //ROUNDING_TO_NEAREST
1226 #endif
1227     if (C64 & 1) {
1228       // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229 
1230       // get remainder
1231       amount2 = 64 - amount;
1232       remainder_h = 0;
1233       remainder_h--;
1234       remainder_h >>= amount2;
1235       remainder_h = remainder_h & QH;
1236 
1237       if (!remainder_h
1238             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1240                       && Q_low.w[0] <
1241                       reciprocals10_128[extra_digits].w[0]))) {
1242           C64--;
1243       }
1244     }
1245 #endif
1246 
1247 #ifdef SET_STATUS_FLAGS
1248 
1249   if (is_inexact (fpsc))
1250     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251   else {
1252     status = INEXACT_EXCEPTION;
1253     // get remainder
1254     remainder_h = QH << (64 - amount);
1255 
1256     switch (rmode) {
1257     case ROUNDING_TO_NEAREST:
1258     case ROUNDING_TIES_AWAY:
1259       // test whether fractional part is 0
1260       if (remainder_h == 0x8000000000000000ull
1261             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1263                       && Q_low.w[0] <
1264                       reciprocals10_128[extra_digits].w[0])))
1265           status = EXACT_STATUS;
1266       break;
1267     case ROUNDING_DOWN:
1268     case ROUNDING_TO_ZERO:
1269       if (!remainder_h
1270             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1272                       && Q_low.w[0] <
1273                       reciprocals10_128[extra_digits].w[0])))
1274           status = EXACT_STATUS;
1275       break;
1276     default:
1277       // round up
1278       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279                            reciprocals10_128[extra_digits].w[0]);
1280       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281                                 reciprocals10_128[extra_digits].w[1], CY);
1282       if ((remainder_h >> (64 - amount)) + carry >=
1283             (((UINT64) 1) << amount))
1284           status = EXACT_STATUS;
1285     }
1286 
1287     if (status != EXACT_STATUS)
1288       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289   }
1290 
1291 #endif
1292 
1293   return sgn | C64;
1294 
1295 }
1296 
1297 
1298 
1299 //
1300 //   This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304                                 int rmode, unsigned *fpsc) {
1305   UINT128 C128, Q_low, Stemp;
1306   UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307   int extra_digits, amount, amount2;
1308   unsigned status;
1309 
1310   // check for possible underflow/overflow
1311   if (((unsigned) expon) >= 3 * 256) {
1312     if (expon < 0) {
1313       // underflow
1314       if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316           __set_status_flags (fpsc,
1317                                   UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321           if (rmode == ROUNDING_DOWN && sgn)
1322             return 0x8000000000000001ull;
1323           if (rmode == ROUNDING_UP && !sgn)
1324             return 1ull;
1325 #endif
1326 #endif
1327           // result is 0
1328           return sgn;
1329       }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332       if (sgn && (unsigned) (rmode - 1) < 2)
1333           rmode = 3 - rmode;
1334 #endif
1335 #endif
1336       // get digits to be shifted out
1337       extra_digits = -expon;
1338       C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1339 
1340       // get coeff*(2^M[extra_digits])/10^extra_digits
1341       __mul_64x128_full (QH, Q_low, C128.w[0],
1342                                reciprocals10_128[extra_digits]);
1343 
1344       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345       amount = recip_scale[extra_digits];
1346 
1347       C64 = QH >> amount;
1348 
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351       if (rmode == 0)         //ROUNDING_TO_NEAREST
1352 #endif
1353           if (C64 & 1) {
1354             // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355 
1356             // get remainder
1357             amount2 = 64 - amount;
1358             remainder_h = 0;
1359             remainder_h--;
1360             remainder_h >>= amount2;
1361             remainder_h = remainder_h & QH;
1362 
1363             if (!remainder_h
1364                 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365                       || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1366                           && Q_low.w[0] <
1367                           reciprocals10_128[extra_digits].w[0]))) {
1368               C64--;
1369             }
1370           }
1371 #endif
1372 
1373 #ifdef SET_STATUS_FLAGS
1374 
1375       if (is_inexact (fpsc))
1376           __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377       else {
1378           status = INEXACT_EXCEPTION;
1379           // get remainder
1380           remainder_h = QH << (64 - amount);
1381 
1382           switch (rmode) {
1383           case ROUNDING_TO_NEAREST:
1384           case ROUNDING_TIES_AWAY:
1385             // test whether fractional part is 0
1386             if (remainder_h == 0x8000000000000000ull
1387                 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388                       || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1389                           && Q_low.w[0] <
1390                           reciprocals10_128[extra_digits].w[0])))
1391               status = EXACT_STATUS;
1392             break;
1393           case ROUNDING_DOWN:
1394           case ROUNDING_TO_ZERO:
1395             if (!remainder_h
1396                 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397                       || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1398                           && Q_low.w[0] <
1399                           reciprocals10_128[extra_digits].w[0])))
1400               status = EXACT_STATUS;
1401             break;
1402           default:
1403             // round up
1404             __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405                                  reciprocals10_128[extra_digits].w[0]);
1406             __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407                                     reciprocals10_128[extra_digits].w[1], CY);
1408             if ((remainder_h >> (64 - amount)) + carry >=
1409                 (((UINT64) 1) << amount))
1410               status = EXACT_STATUS;
1411           }
1412 
1413           if (status != EXACT_STATUS)
1414             __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415       }
1416 
1417 #endif
1418 
1419       return sgn | C64;
1420     }
1421 
1422     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423       expon--;
1424       coeff = (coeff << 3) + (coeff << 1);
1425     }
1426     if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430       // overflow
1431       r = sgn | INFINITY_MASK64;
1432       switch (rmode) {
1433       case ROUNDING_DOWN:
1434           if (!sgn)
1435             r = LARGEST_BID64;
1436           break;
1437       case ROUNDING_TO_ZERO:
1438           r = sgn | LARGEST_BID64;
1439           break;
1440       case ROUNDING_UP:
1441           // round up
1442           if (sgn)
1443             r = SMALLEST_BID64;
1444       }
1445       return r;
1446     } else {
1447       mask = 1;
1448       mask <<= EXPONENT_SHIFT_SMALL64;
1449       if (coeff >= mask) {
1450           r = expon;
1451           r <<= EXPONENT_SHIFT_LARGE64;
1452           r |= (sgn | SPECIAL_ENCODING_MASK64);
1453           // add coeff, without leading bits
1454           mask = (mask >> 2) - 1;
1455           coeff &= mask;
1456           r |= coeff;
1457           return r;
1458       }
1459     }
1460   }
1461 
1462   r = expon;
1463   r <<= EXPONENT_SHIFT_SMALL64;
1464   r |= (coeff | sgn);
1465 
1466   return r;
1467 }
1468 
1469 
1470 /*****************************************************************************
1471 *
1472 *    BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475 
1476 //
1477 //   Macro for handling BID128 underflow
1478 //         sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
handle_UF_128_rem(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482                        UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484   UINT64 carry, CY;
1485   int ed2, amount;
1486   unsigned rmode, status;
1487 
1488   // UF occurs
1489   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493     pres->w[1] = sgn;
1494     pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498           || (!sgn && *prounding_mode == ROUNDING_UP))
1499       pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502     return pres;
1503   }
1504   // CQ *= 10
1505   CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506   CQ2.w[0] = CQ.w[0] << 1;
1507   CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508   CQ8.w[0] = CQ.w[0] << 3;
1509   __add_128_128 (CQ, CQ2, CQ8);
1510 
1511   // add remainder
1512   if (R)
1513     CQ.w[0] |= 1;
1514 
1515   ed2 = 1 - expon;
1516   // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519   rmode = *prounding_mode;
1520   if (sgn && (unsigned) (rmode - 1) < 2)
1521     rmode = 3 - rmode;
1522 #else
1523   rmode = 0;
1524 #endif
1525 #else
1526   rmode = 0;
1527 #endif
1528   T128 = round_const_table_128[rmode][ed2];
1529   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531 
1532   TP128 = reciprocals10_128[ed2];
1533   __mul_128x128_full (Qh, Ql, CQ, TP128);
1534   amount = recip_scale[ed2];
1535 
1536   if (amount >= 64) {
1537     CQ.w[0] = Qh.w[1] >> (amount - 64);
1538     CQ.w[1] = 0;
1539   } else {
1540     __shr_128 (CQ, Qh, amount);
1541   }
1542 
1543 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1544 #ifndef IEEE_ROUND_NEAREST
1545   if (!(*prounding_mode))
1546 #endif
1547     if (CQ.w[0] & 1) {
1548       // check whether fractional part of initial_P/10^ed1 is exactly .5
1549 
1550       // get remainder
1551       __shl_128_long (Qh1, Qh, (128 - amount));
1552 
1553       if (!Qh1.w[1] && !Qh1.w[0]
1554             && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1555                 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1556                       && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1557           CQ.w[0]--;
1558       }
1559     }
1560 #endif
1561 
1562 #ifdef SET_STATUS_FLAGS
1563 
1564   if (is_inexact (fpsc))
1565     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1566   else {
1567     status = INEXACT_EXCEPTION;
1568     // get remainder
1569     __shl_128_long (Qh1, Qh, (128 - amount));
1570 
1571     switch (rmode) {
1572     case ROUNDING_TO_NEAREST:
1573     case ROUNDING_TIES_AWAY:
1574       // test whether fractional part is 0
1575       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1576             && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1577                 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1578                       && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1579           status = EXACT_STATUS;
1580       break;
1581     case ROUNDING_DOWN:
1582     case ROUNDING_TO_ZERO:
1583       if ((!Qh1.w[1]) && (!Qh1.w[0])
1584             && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1585                 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1586                       && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1587           status = EXACT_STATUS;
1588       break;
1589     default:
1590       // round up
1591       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1592                            reciprocals10_128[ed2].w[0]);
1593       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1594                                 reciprocals10_128[ed2].w[1], CY);
1595       __shr_128_long (Qh, Qh1, (128 - amount));
1596       Tmp.w[0] = 1;
1597       Tmp.w[1] = 0;
1598       __shl_128_long (Tmp1, Tmp, amount);
1599       Qh.w[0] += carry;
1600       if (Qh.w[0] < carry)
1601           Qh.w[1]++;
1602       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1603           status = EXACT_STATUS;
1604     }
1605 
1606     if (status != EXACT_STATUS)
1607       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1608   }
1609 
1610 #endif
1611 
1612   pres->w[1] = sgn | CQ.w[1];
1613   pres->w[0] = CQ.w[0];
1614 
1615   return pres;
1616 
1617 }
1618 
1619 
1620 //
1621 //   Macro for handling BID128 underflow
1622 //
1623 __BID_INLINE__ UINT128 *
handle_UF_128(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1624 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1625                  unsigned *prounding_mode, unsigned *fpsc) {
1626   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1627   UINT64 carry, CY;
1628   int ed2, amount;
1629   unsigned rmode, status;
1630 
1631   // UF occurs
1632   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1633 #ifdef SET_STATUS_FLAGS
1634     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1635 #endif
1636     pres->w[1] = sgn;
1637     pres->w[0] = 0;
1638 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1639 #ifndef IEEE_ROUND_NEAREST
1640     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1641           || (!sgn && *prounding_mode == ROUNDING_UP))
1642       pres->w[0] = 1ull;
1643 #endif
1644 #endif
1645     return pres;
1646   }
1647 
1648   ed2 = 0 - expon;
1649   // add rounding constant to CQ
1650 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1651 #ifndef IEEE_ROUND_NEAREST
1652   rmode = *prounding_mode;
1653   if (sgn && (unsigned) (rmode - 1) < 2)
1654     rmode = 3 - rmode;
1655 #else
1656   rmode = 0;
1657 #endif
1658 #else
1659   rmode = 0;
1660 #endif
1661 
1662   T128 = round_const_table_128[rmode][ed2];
1663   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1664   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1665 
1666   TP128 = reciprocals10_128[ed2];
1667   __mul_128x128_full (Qh, Ql, CQ, TP128);
1668   amount = recip_scale[ed2];
1669 
1670   if (amount >= 64) {
1671     CQ.w[0] = Qh.w[1] >> (amount - 64);
1672     CQ.w[1] = 0;
1673   } else {
1674     __shr_128 (CQ, Qh, amount);
1675   }
1676 
1677 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1678 #ifndef IEEE_ROUND_NEAREST
1679   if (!(*prounding_mode))
1680 #endif
1681     if (CQ.w[0] & 1) {
1682       // check whether fractional part of initial_P/10^ed1 is exactly .5
1683 
1684       // get remainder
1685       __shl_128_long (Qh1, Qh, (128 - amount));
1686 
1687       if (!Qh1.w[1] && !Qh1.w[0]
1688             && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1689                 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1690                       && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1691           CQ.w[0]--;
1692       }
1693     }
1694 #endif
1695 
1696 #ifdef SET_STATUS_FLAGS
1697 
1698   if (is_inexact (fpsc))
1699     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1700   else {
1701     status = INEXACT_EXCEPTION;
1702     // get remainder
1703     __shl_128_long (Qh1, Qh, (128 - amount));
1704 
1705     switch (rmode) {
1706     case ROUNDING_TO_NEAREST:
1707     case ROUNDING_TIES_AWAY:
1708       // test whether fractional part is 0
1709       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1710             && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1711                 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1712                       && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1713           status = EXACT_STATUS;
1714       break;
1715     case ROUNDING_DOWN:
1716     case ROUNDING_TO_ZERO:
1717       if ((!Qh1.w[1]) && (!Qh1.w[0])
1718             && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1719                 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1720                       && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1721           status = EXACT_STATUS;
1722       break;
1723     default:
1724       // round up
1725       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1726                            reciprocals10_128[ed2].w[0]);
1727       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1728                                 reciprocals10_128[ed2].w[1], CY);
1729       __shr_128_long (Qh, Qh1, (128 - amount));
1730       Tmp.w[0] = 1;
1731       Tmp.w[1] = 0;
1732       __shl_128_long (Tmp1, Tmp, amount);
1733       Qh.w[0] += carry;
1734       if (Qh.w[0] < carry)
1735           Qh.w[1]++;
1736       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1737           status = EXACT_STATUS;
1738     }
1739 
1740     if (status != EXACT_STATUS)
1741       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1742   }
1743 
1744 #endif
1745 
1746   pres->w[1] = sgn | CQ.w[1];
1747   pres->w[0] = CQ.w[0];
1748 
1749   return pres;
1750 
1751 }
1752 
1753 
1754 
1755 //
1756 //  BID128 unpack, input passed by value
1757 //
1758 __BID_INLINE__ UINT64
unpack_BID128_value(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 x)1759 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1760                          UINT128 * pcoefficient_x, UINT128 x) {
1761   UINT128 coeff, T33, T34;
1762   UINT64 ex;
1763 
1764   *psign_x = (x.w[1]) & 0x8000000000000000ull;
1765 
1766   // special encodings
1767   if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1768     if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1769       // non-canonical input
1770       pcoefficient_x->w[0] = 0;
1771       pcoefficient_x->w[1] = 0;
1772       ex = (x.w[1]) >> 47;
1773       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1774       return 0;
1775     }
1776     // 10^33
1777     T33 = power10_table_128[33];
1778     /*coeff.w[0] = x.w[0];
1779        coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1780        pcoefficient_x->w[0] = x.w[0];
1781        pcoefficient_x->w[1] = x.w[1];
1782        if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1783        pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1784 
1785     pcoefficient_x->w[0] = x.w[0];
1786     pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1787     if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1788     {
1789       pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1790       pcoefficient_x->w[0] = 0;
1791     } else
1792       pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1793     if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1794       pcoefficient_x->w[0] = 0;
1795       pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1796     }
1797     *pexponent_x = 0;
1798     return 0;       // NaN or Infinity
1799   }
1800 
1801   coeff.w[0] = x.w[0];
1802   coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1803 
1804   // 10^34
1805   T34 = power10_table_128[34];
1806   // check for non-canonical values
1807   if (__unsigned_compare_ge_128 (coeff, T34))
1808     coeff.w[0] = coeff.w[1] = 0;
1809 
1810   pcoefficient_x->w[0] = coeff.w[0];
1811   pcoefficient_x->w[1] = coeff.w[1];
1812 
1813   ex = (x.w[1]) >> 49;
1814   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1815 
1816   return coeff.w[0] | coeff.w[1];
1817 }
1818 
1819 
1820 //
1821 //  BID128 unpack, input pased by reference
1822 //
1823 __BID_INLINE__ UINT64
unpack_BID128(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 * px)1824 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1825                  UINT128 * pcoefficient_x, UINT128 * px) {
1826   UINT128 coeff, T33, T34;
1827   UINT64 ex;
1828 
1829   *psign_x = (px->w[1]) & 0x8000000000000000ull;
1830 
1831   // special encodings
1832   if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1833     if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1834       // non-canonical input
1835       pcoefficient_x->w[0] = 0;
1836       pcoefficient_x->w[1] = 0;
1837       ex = (px->w[1]) >> 47;
1838       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1839       return 0;
1840     }
1841     // 10^33
1842     T33 = power10_table_128[33];
1843     coeff.w[0] = px->w[0];
1844     coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1845     pcoefficient_x->w[0] = px->w[0];
1846     pcoefficient_x->w[1] = px->w[1];
1847     if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
1848       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1849       pcoefficient_x->w[0] = 0;
1850     }
1851     *pexponent_x = 0;
1852     return 0;       // NaN or Infinity
1853   }
1854 
1855   coeff.w[0] = px->w[0];
1856   coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1857 
1858   // 10^34
1859   T34 = power10_table_128[34];
1860   // check for non-canonical values
1861   if (__unsigned_compare_ge_128 (coeff, T34))
1862     coeff.w[0] = coeff.w[1] = 0;
1863 
1864   pcoefficient_x->w[0] = coeff.w[0];
1865   pcoefficient_x->w[1] = coeff.w[1];
1866 
1867   ex = (px->w[1]) >> 49;
1868   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1869 
1870   return coeff.w[0] | coeff.w[1];
1871 }
1872 
1873 //
1874 //   Pack macro checks for overflow, but not underflow
1875 //
1876 __BID_INLINE__ UINT128 *
get_BID128_very_fast_OF(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1877 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1878                                UINT128 coeff, unsigned *prounding_mode,
1879                                unsigned *fpsc) {
1880   UINT128 T;
1881   UINT64 tmp, tmp2;
1882 
1883   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1884 
1885     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1886       T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1887       while (__unsigned_compare_gt_128 (T, coeff)
1888                && expon > DECIMAL_MAX_EXPON_128) {
1889           coeff.w[1] =
1890             (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1891             (coeff.w[0] >> 63);
1892           tmp2 = coeff.w[0] << 3;
1893           coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1894           if (coeff.w[0] < tmp2)
1895             coeff.w[1]++;
1896 
1897           expon--;
1898       }
1899     }
1900     if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1901       // OF
1902 #ifdef SET_STATUS_FLAGS
1903       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1904 #endif
1905 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1906 #ifndef IEEE_ROUND_NEAREST
1907       if (*prounding_mode == ROUNDING_TO_ZERO
1908             || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1909                                                                        &&
1910                                                                        *prounding_mode
1911                                                                        ==
1912                                                                        ROUNDING_DOWN))
1913       {
1914           pres->w[1] = sgn | LARGEST_BID128_HIGH;
1915           pres->w[0] = LARGEST_BID128_LOW;
1916       } else
1917 #endif
1918 #endif
1919       {
1920           pres->w[1] = sgn | INFINITY_MASK64;
1921           pres->w[0] = 0;
1922       }
1923       return pres;
1924     }
1925   }
1926 
1927   pres->w[0] = coeff.w[0];
1928   tmp = expon;
1929   tmp <<= 49;
1930   pres->w[1] = sgn | tmp | coeff.w[1];
1931 
1932   return pres;
1933 }
1934 
1935 
1936 //
1937 //   No overflow/underflow checks
1938 //   No checking for coefficient == 10^34 (rounding artifact)
1939 //
1940 __BID_INLINE__ UINT128 *
get_BID128_very_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1941 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1942                           UINT128 coeff) {
1943   UINT64 tmp;
1944 
1945   pres->w[0] = coeff.w[0];
1946   tmp = expon;
1947   tmp <<= 49;
1948   pres->w[1] = sgn | tmp | coeff.w[1];
1949 
1950   return pres;
1951 }
1952 
1953 //
1954 //   No overflow/underflow checks
1955 //
1956 __BID_INLINE__ UINT128 *
get_BID128_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1957 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1958   UINT64 tmp;
1959 
1960   // coeff==10^34?
1961   if (coeff.w[1] == 0x0001ed09bead87c0ull
1962       && coeff.w[0] == 0x378d8e6400000000ull) {
1963     expon++;
1964     // set coefficient to 10^33
1965     coeff.w[1] = 0x0000314dc6448d93ull;
1966     coeff.w[0] = 0x38c15b0a00000000ull;
1967   }
1968 
1969   pres->w[0] = coeff.w[0];
1970   tmp = expon;
1971   tmp <<= 49;
1972   pres->w[1] = sgn | tmp | coeff.w[1];
1973 
1974   return pres;
1975 }
1976 
1977 //
1978 //   General BID128 pack macro
1979 //
1980 __BID_INLINE__ UINT128 *
get_BID128(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1981 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1982               unsigned *prounding_mode, unsigned *fpsc) {
1983   UINT128 T;
1984   UINT64 tmp, tmp2;
1985 
1986   // coeff==10^34?
1987   if (coeff.w[1] == 0x0001ed09bead87c0ull
1988       && coeff.w[0] == 0x378d8e6400000000ull) {
1989     expon++;
1990     // set coefficient to 10^33
1991     coeff.w[1] = 0x0000314dc6448d93ull;
1992     coeff.w[0] = 0x38c15b0a00000000ull;
1993   }
1994   // check OF, UF
1995   if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
1996     // check UF
1997     if (expon < 0) {
1998       return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
1999                                   fpsc);
2000     }
2001 
2002     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2003       T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2004       while (__unsigned_compare_gt_128 (T, coeff)
2005                && expon > DECIMAL_MAX_EXPON_128) {
2006           coeff.w[1] =
2007             (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2008             (coeff.w[0] >> 63);
2009           tmp2 = coeff.w[0] << 3;
2010           coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2011           if (coeff.w[0] < tmp2)
2012             coeff.w[1]++;
2013 
2014           expon--;
2015       }
2016     }
2017     if (expon > DECIMAL_MAX_EXPON_128) {
2018       if (!(coeff.w[1] | coeff.w[0])) {
2019           pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2020           pres->w[0] = 0;
2021           return pres;
2022       }
2023       // OF
2024 #ifdef SET_STATUS_FLAGS
2025       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2026 #endif
2027 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2028 #ifndef IEEE_ROUND_NEAREST
2029       if (*prounding_mode == ROUNDING_TO_ZERO
2030             || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2031                                                                        &&
2032                                                                        *prounding_mode
2033                                                                        ==
2034                                                                        ROUNDING_DOWN))
2035       {
2036           pres->w[1] = sgn | LARGEST_BID128_HIGH;
2037           pres->w[0] = LARGEST_BID128_LOW;
2038       } else
2039 #endif
2040 #endif
2041       {
2042           pres->w[1] = sgn | INFINITY_MASK64;
2043           pres->w[0] = 0;
2044       }
2045       return pres;
2046     }
2047   }
2048 
2049   pres->w[0] = coeff.w[0];
2050   tmp = expon;
2051   tmp <<= 49;
2052   pres->w[1] = sgn | tmp | coeff.w[1];
2053 
2054   return pres;
2055 }
2056 
2057 
2058 //
2059 //  Macro used for conversions from string
2060 //        (no additional arguments given for rounding mode, status flags)
2061 //
2062 __BID_INLINE__ UINT128 *
get_BID128_string(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)2063 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2064   UINT128 D2, D8;
2065   UINT64 tmp;
2066   unsigned rmode = 0, status;
2067 
2068   // coeff==10^34?
2069   if (coeff.w[1] == 0x0001ed09bead87c0ull
2070       && coeff.w[0] == 0x378d8e6400000000ull) {
2071     expon++;
2072     // set coefficient to 10^33
2073     coeff.w[1] = 0x0000314dc6448d93ull;
2074     coeff.w[0] = 0x38c15b0a00000000ull;
2075   }
2076   // check OF, UF
2077   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2078     // check UF
2079     if (expon < 0)
2080       return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2081 
2082     // OF
2083 
2084     if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2085       while (expon > DECIMAL_MAX_EXPON_128 &&
2086                (coeff.w[1] < power10_table_128[33].w[1] ||
2087                 (coeff.w[1] == power10_table_128[33].w[1]
2088                  && coeff.w[0] < power10_table_128[33].w[0]))) {
2089           D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2090           D2.w[0] = coeff.w[0] << 1;
2091           D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2092           D8.w[0] = coeff.w[0] << 3;
2093 
2094           __add_128_128 (coeff, D2, D8);
2095           expon--;
2096       }
2097     } else if (!(coeff.w[0] | coeff.w[1]))
2098       expon = DECIMAL_MAX_EXPON_128;
2099 
2100     if (expon > DECIMAL_MAX_EXPON_128) {
2101       pres->w[1] = sgn | INFINITY_MASK64;
2102       pres->w[0] = 0;
2103       switch (rmode) {
2104       case ROUNDING_DOWN:
2105           if (!sgn) {
2106             pres->w[1] = LARGEST_BID128_HIGH;
2107             pres->w[0] = LARGEST_BID128_LOW;
2108           }
2109           break;
2110       case ROUNDING_TO_ZERO:
2111           pres->w[1] = sgn | LARGEST_BID128_HIGH;
2112           pres->w[0] = LARGEST_BID128_LOW;
2113           break;
2114       case ROUNDING_UP:
2115           // round up
2116           if (sgn) {
2117             pres->w[1] = sgn | LARGEST_BID128_HIGH;
2118             pres->w[0] = LARGEST_BID128_LOW;
2119           }
2120           break;
2121       }
2122 
2123       return pres;
2124     }
2125   }
2126 
2127   pres->w[0] = coeff.w[0];
2128   tmp = expon;
2129   tmp <<= 49;
2130   pres->w[1] = sgn | tmp | coeff.w[1];
2131 
2132   return pres;
2133 }
2134 
2135 
2136 
2137 /*****************************************************************************
2138 *
2139 *    BID32 pack/unpack macros
2140 *
2141 *****************************************************************************/
2142 
2143 
2144 __BID_INLINE__ UINT32
unpack_BID32(UINT32 * psign_x,int * pexponent_x,UINT32 * pcoefficient_x,UINT32 x)2145 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2146                 UINT32 * pcoefficient_x, UINT32 x) {
2147   UINT32 tmp;
2148 
2149   *psign_x = x & 0x80000000;
2150 
2151   if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2152     // special encodings
2153     if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2154       *pcoefficient_x = x & 0xfe0fffff;
2155       if ((x & 0x000fffff) >= 1000000)
2156           *pcoefficient_x = x & 0xfe000000;
2157       if ((x & NAN_MASK32) == INFINITY_MASK32)
2158           *pcoefficient_x = x & 0xf8000000;
2159       *pexponent_x = 0;
2160       return 0;     // NaN or Infinity
2161     }
2162     // coefficient
2163     *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2164     // check for non-canonical value
2165     if (*pcoefficient_x >= 10000000)
2166       *pcoefficient_x = 0;
2167     // get exponent
2168     tmp = x >> 21;
2169     *pexponent_x = tmp & EXPONENT_MASK32;
2170     return 1;
2171   }
2172   // exponent
2173   tmp = x >> 23;
2174   *pexponent_x = tmp & EXPONENT_MASK32;
2175   // coefficient
2176   *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2177 
2178   return *pcoefficient_x;
2179 }
2180 
2181 //
2182 //   General pack macro for BID32
2183 //
2184 __BID_INLINE__ UINT32
get_BID32(UINT32 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)2185 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2186              unsigned *fpsc) {
2187   UINT128 Q;
2188   UINT64 C64, remainder_h, carry, Stemp;
2189   UINT32 r, mask;
2190   int extra_digits, amount, amount2;
2191   unsigned status;
2192 
2193   if (coeff > 9999999ull) {
2194     expon++;
2195     coeff = 1000000ull;
2196   }
2197   // check for possible underflow/overflow
2198   if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2199     if (expon < 0) {
2200       // underflow
2201       if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2202 #ifdef SET_STATUS_FLAGS
2203           __set_status_flags (fpsc,
2204                                   UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2205 #endif
2206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2207 #ifndef IEEE_ROUND_NEAREST
2208           if (rmode == ROUNDING_DOWN && sgn)
2209             return 0x80000001;
2210           if (rmode == ROUNDING_UP && !sgn)
2211             return 1;
2212 #endif
2213 #endif
2214           // result is 0
2215           return sgn;
2216       }
2217       // get digits to be shifted out
2218 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2219       rmode = 0;
2220 #endif
2221 #ifdef IEEE_ROUND_NEAREST
2222       rmode = 0;
2223 #endif
2224 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2225 #ifndef IEEE_ROUND_NEAREST
2226       if (sgn && (unsigned) (rmode - 1) < 2)
2227           rmode = 3 - rmode;
2228 #endif
2229 #endif
2230 
2231       extra_digits = -expon;
2232       coeff += round_const_table[rmode][extra_digits];
2233 
2234       // get coeff*(2^M[extra_digits])/10^extra_digits
2235       __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2236 
2237       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2238       amount = short_recip_scale[extra_digits];
2239 
2240       C64 = Q.w[1] >> amount;
2241 
2242 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2243 #ifndef IEEE_ROUND_NEAREST
2244       if (rmode == 0)         //ROUNDING_TO_NEAREST
2245 #endif
2246           if (C64 & 1) {
2247             // check whether fractional part of initial_P/10^extra_digits is exactly .5
2248 
2249             // get remainder
2250             amount2 = 64 - amount;
2251             remainder_h = 0;
2252             remainder_h--;
2253             remainder_h >>= amount2;
2254             remainder_h = remainder_h & Q.w[1];
2255 
2256             if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2257               C64--;
2258             }
2259           }
2260 #endif
2261 
2262 #ifdef SET_STATUS_FLAGS
2263 
2264       if (is_inexact (fpsc))
2265           __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2266       else {
2267           status = INEXACT_EXCEPTION;
2268           // get remainder
2269           remainder_h = Q.w[1] << (64 - amount);
2270 
2271           switch (rmode) {
2272           case ROUNDING_TO_NEAREST:
2273           case ROUNDING_TIES_AWAY:
2274             // test whether fractional part is 0
2275             if (remainder_h == 0x8000000000000000ull
2276                 && (Q.w[0] < reciprocals10_64[extra_digits]))
2277               status = EXACT_STATUS;
2278             break;
2279           case ROUNDING_DOWN:
2280           case ROUNDING_TO_ZERO:
2281             if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2282               status = EXACT_STATUS;
2283             break;
2284           default:
2285             // round up
2286             __add_carry_out (Stemp, carry, Q.w[0],
2287                                  reciprocals10_64[extra_digits]);
2288             if ((remainder_h >> (64 - amount)) + carry >=
2289                 (((UINT64) 1) << amount))
2290               status = EXACT_STATUS;
2291           }
2292 
2293           if (status != EXACT_STATUS)
2294             __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2295       }
2296 
2297 #endif
2298 
2299       return sgn | (UINT32) C64;
2300     }
2301 
2302     while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2303       coeff = (coeff << 3) + (coeff << 1);
2304       expon--;
2305     }
2306     if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2307       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2308       // overflow
2309       r = sgn | INFINITY_MASK32;
2310       switch (rmode) {
2311       case ROUNDING_DOWN:
2312           if (!sgn)
2313             r = LARGEST_BID32;
2314           break;
2315       case ROUNDING_TO_ZERO:
2316           r = sgn | LARGEST_BID32;
2317           break;
2318       case ROUNDING_UP:
2319           // round up
2320           if (sgn)
2321             r = sgn | LARGEST_BID32;
2322       }
2323       return r;
2324     }
2325   }
2326 
2327   mask = 1 << 23;
2328 
2329   // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2330   if (coeff < mask) {
2331     r = expon;
2332     r <<= 23;
2333     r |= ((UINT32) coeff | sgn);
2334     return r;
2335   }
2336   // special format
2337 
2338   r = expon;
2339   r <<= 21;
2340   r |= (sgn | SPECIAL_ENCODING_MASK32);
2341   // add coeff, without leading bits
2342   mask = (1 << 21) - 1;
2343   r |= (((UINT32) coeff) & mask);
2344 
2345   return r;
2346 }
2347 
2348 
2349 
2350 //
2351 //   no overflow/underflow checks
2352 //
2353 __BID_INLINE__ UINT32
very_fast_get_BID32(UINT32 sgn,int expon,UINT32 coeff)2354 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2355   UINT32 r, mask;
2356 
2357   mask = 1 << 23;
2358 
2359   // check whether coefficient fits in 10*2+3 bits
2360   if (coeff < mask) {
2361     r = expon;
2362     r <<= 23;
2363     r |= (coeff | sgn);
2364     return r;
2365   }
2366   // special format
2367   r = expon;
2368   r <<= 21;
2369   r |= (sgn | SPECIAL_ENCODING_MASK32);
2370   // add coeff, without leading bits
2371   mask = (1 << 21) - 1;
2372   coeff &= mask;
2373   r |= coeff;
2374 
2375   return r;
2376 }
2377 
2378 
2379 
2380 /*************************************************************
2381  *
2382  *************************************************************/
2383 typedef
2384 ALIGN (16)
2385      struct {
2386        UINT64 w[6];
2387      } UINT384;
2388      typedef ALIGN (16)
2389      struct {
2390        UINT64 w[8];
2391      } UINT512;
2392 
2393 // #define P                               34
2394 #define MASK_STEERING_BITS              0x6000000000000000ull
2395 #define MASK_BINARY_EXPONENT1           0x7fe0000000000000ull
2396 #define MASK_BINARY_SIG1                0x001fffffffffffffull
2397 #define MASK_BINARY_EXPONENT2           0x1ff8000000000000ull
2398     //used to take G[2:w+3] (sec 3.3)
2399 #define MASK_BINARY_SIG2                0x0007ffffffffffffull
2400     //used to mask out G4:T0 (sec 3.3)
2401 #define MASK_BINARY_OR2                 0x0020000000000000ull
2402     //used to prefix 8+G4 to T (sec 3.3)
2403 #define UPPER_EXPON_LIMIT               51
2404 #define MASK_EXP                        0x7ffe000000000000ull
2405 #define MASK_SPECIAL                    0x7800000000000000ull
2406 #define MASK_NAN                        0x7c00000000000000ull
2407 #define MASK_SNAN                       0x7e00000000000000ull
2408 #define MASK_ANY_INF                    0x7c00000000000000ull
2409 #define MASK_INF                        0x7800000000000000ull
2410 #define MASK_SIGN                       0x8000000000000000ull
2411 #define MASK_COEFF                      0x0001ffffffffffffull
2412 #define BIN_EXP_BIAS                    (0x1820ull << 49)
2413 
2414 #define EXP_MIN                         0x0000000000000000ull
2415    // EXP_MIN = (-6176 + 6176) << 49
2416 #define EXP_MAX                         0x5ffe000000000000ull
2417   // EXP_MAX = (6111 + 6176) << 49
2418 #define EXP_MAX_P1                      0x6000000000000000ull
2419   // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2420 #define EXP_P1                          0x0002000000000000ull
2421   // EXP_ P1= 1 << 49
2422 #define expmin                            -6176
2423   // min unbiased exponent
2424 #define expmax                            6111
2425   // max unbiased exponent
2426 #define expmin16                          -398
2427   // min unbiased exponent
2428 #define expmax16                          369
2429   // max unbiased exponent
2430 
2431 #define SIGNMASK32 0x80000000
2432 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2433 #define SIGNMASK64    0x8000000000000000ull
2434 
2435 // typedef unsigned int FPSC; // floating-point status and control
2436           // bit31:
2437           // bit30:
2438           // bit29:
2439           // bit28:
2440           // bit27:
2441           // bit26:
2442           // bit25:
2443           // bit24:
2444           // bit23:
2445           // bit22:
2446           // bit21:
2447           // bit20:
2448           // bit19:
2449           // bit18:
2450           // bit17:
2451           // bit16:
2452           // bit15:
2453           // bit14: RC:2
2454           // bit13: RC:1
2455           // bit12: RC:0
2456           // bit11: PM
2457           // bit10: UM
2458           // bit9:  OM
2459           // bit8:  ZM
2460           // bit7:  DM
2461           // bit6:  IM
2462           // bit5:  PE
2463           // bit4:  UE
2464           // bit3:  OE
2465           // bit2:  ZE
2466           // bit1:  DE
2467           // bit0:  IE
2468 
2469 #define ROUNDING_MODE_MASK    0x00007000
2470 
2471      typedef struct _DEC_DIGITS {
2472        unsigned int digits;
2473        UINT64 threshold_hi;
2474        UINT64 threshold_lo;
2475        unsigned int digits1;
2476      } DEC_DIGITS;
2477 
2478      extern DEC_DIGITS nr_digits[];
2479      extern UINT64 midpoint64[];
2480      extern UINT128 midpoint128[];
2481      extern UINT192 midpoint192[];
2482      extern UINT256 midpoint256[];
2483      extern UINT64 ten2k64[];
2484      extern UINT128 ten2k128[];
2485      extern UINT256 ten2k256[];
2486      extern UINT128 ten2mk128[];
2487      extern UINT64 ten2mk64[];
2488      extern UINT128 ten2mk128trunc[];
2489      extern int shiftright128[];
2490      extern UINT64 maskhigh128[];
2491      extern UINT64 maskhigh128M[];
2492      extern UINT64 maskhigh192M[];
2493      extern UINT64 maskhigh256M[];
2494      extern UINT64 onehalf128[];
2495      extern UINT64 onehalf128M[];
2496      extern UINT64 onehalf192M[];
2497      extern UINT64 onehalf256M[];
2498      extern UINT128 ten2mk128M[];
2499      extern UINT128 ten2mk128truncM[];
2500      extern UINT192 ten2mk192truncM[];
2501      extern UINT256 ten2mk256truncM[];
2502      extern int shiftright128M[];
2503      extern int shiftright192M[];
2504      extern int shiftright256M[];
2505      extern UINT192 ten2mk192M[];
2506      extern UINT256 ten2mk256M[];
2507      extern unsigned char char_table2[];
2508      extern unsigned char char_table3[];
2509 
2510      extern UINT64 ten2m3k64[];
2511      extern unsigned int shift_ten2m3k64[];
2512      extern UINT128 ten2m3k128[];
2513      extern unsigned int shift_ten2m3k128[];
2514 
2515 
2516 
2517 /***************************************************************************
2518  *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2519  ***************************************************************************/
2520 
2521      extern UINT64 Kx64[];
2522      extern unsigned int Ex64m64[];
2523      extern UINT64 half64[];
2524      extern UINT64 mask64[];
2525      extern UINT64 ten2mxtrunc64[];
2526 
2527      extern UINT128 Kx128[];
2528      extern unsigned int Ex128m128[];
2529      extern UINT64 half128[];
2530      extern UINT64 mask128[];
2531      extern UINT128 ten2mxtrunc128[];
2532 
2533      extern UINT192 Kx192[];
2534      extern unsigned int Ex192m192[];
2535      extern UINT64 half192[];
2536      extern UINT64 mask192[];
2537      extern UINT192 ten2mxtrunc192[];
2538 
2539      extern UINT256 Kx256[];
2540      extern unsigned int Ex256m256[];
2541      extern UINT64 half256[];
2542      extern UINT64 mask256[];
2543      extern UINT256 ten2mxtrunc256[];
2544 
2545      typedef union __bid64_128 {
2546        UINT64 b64;
2547        UINT128 b128;
2548      } BID64_128;
2549 
2550      BID64_128 bid_fma (unsigned int P0,
2551                               BID64_128 x1, unsigned int P1,
2552                               BID64_128 y1, unsigned int P2,
2553                               BID64_128 z1, unsigned int P3,
2554                               unsigned int rnd_mode, FPSC * fpsc);
2555 
2556 #define         P16     16
2557 #define         P34     34
2558 
2559      union __int_double {
2560        UINT64 i;
2561        double d;
2562      };
2563      typedef union __int_double int_double;
2564 
2565 
2566      union __int_float {
2567        UINT32 i;
2568        float d;
2569      };
2570      typedef union __int_float int_float;
2571 
2572 #define SWAP(A,B,T) {\
2573         T = A; \
2574         A = B; \
2575         B = T; \
2576 }
2577 
2578 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2579 // ie it knows that it is A bits long
2580 #define NUMBITS(A, coefficient_x, tempx){\
2581       temp_x.d=(float)coefficient_x;\
2582       A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2583 }
2584 
2585      enum class_types {
2586        signalingNaN,
2587        quietNaN,
2588        negativeInfinity,
2589        negativeNormal,
2590        negativeSubnormal,
2591        negativeZero,
2592        positiveZero,
2593        positiveSubnormal,
2594        positiveNormal,
2595        positiveInfinity
2596      };
2597 
2598      typedef union {
2599        UINT64 ui64;
2600        double d;
2601      } BID_UI64DOUBLE;
2602 
2603 #endif
2604