1*         $NetBSD: bindec.sa,v 1.5 2001/12/09 01:43:13 briggs Exp $
2
3*         MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*         M68000 Hi-Performance Microprocessor Division
5*         M68040 Software Package
6*
7*         M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*         All rights reserved.
9*
10*         THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*         To the maximum extent permitted by applicable law,
12*         MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*         INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*         PARTICULAR PURPOSE and any warranty against infringement with
15*         regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*         and any accompanying written materials.
17*
18*         To the maximum extent permitted by applicable law,
19*         IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*         (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*         PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*         OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*         SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*         and support of the SOFTWARE.
25*
26*         You are hereby granted a copyright license to use, modify, and
27*         distribute the SOFTWARE so long as this entire notice is retained
28*         without alteration in any modified and/or redistributed versions,
29*         and that such modified versions are clearly identified as such.
30*         No licenses are granted by implication, estoppel or otherwise
31*         under any patents or trademarks of Motorola, Inc.
32
33*
34*         bindec.sa 3.4 1/3/91
35*
36*         bindec
37*
38*         Description:
39*                   Converts an input in extended precision format
40*                   to bcd format.
41*
42*         Input:
43*                   a0 points to the input extended precision value
44*                   value in memory; d0 contains the k-factor sign-extended
45*                   to 32-bits.  The input may be either normalized,
46*                   unnormalized, or denormalized.
47*
48*         Output:   result in the FP_SCR1 space on the stack.
49*
50*         Saves and Modifies: D2-D7,A2,FP2
51*
52*         Algorithm:
53*
54*         A1.       Set RM and size ext;  Set SIGMA = sign of input.
55*                   The k-factor is saved for use in d7. Clear the
56*                   BINDEC_FLG for separating normalized/denormalized
57*                   input.  If input is unnormalized or denormalized,
58*                   normalize it.
59*
60*         A2.       Set X = abs(input).
61*
62*         A3.       Compute ILOG.
63*                   ILOG is the log base 10 of the input value.  It is
64*                   approximated by adding e + 0.f when the original
65*                   value is viewed as 2^^e * 1.f in extended precision.
66*                   This value is stored in d6.
67*
68*         A4.       Clr INEX bit.
69*                   The operation in A3 above may have set INEX2.
70*
71*         A5.       Set ICTR = 0;
72*                   ICTR is a flag used in A13.  It must be set before the
73*                   loop entry A6.
74*
75*         A6.       Calculate LEN.
76*                   LEN is the number of digits to be displayed.  The
77*                   k-factor can dictate either the total number of digits,
78*                   if it is a positive number, or the number of digits
79*                   after the decimal point which are to be included as
80*                   significant.  See the 68882 manual for examples.
81*                   If LEN is computed to be greater than 17, set OPERR in
82*                   USER_FPSR.  LEN is stored in d4.
83*
84*         A7.       Calculate SCALE.
85*                   SCALE is equal to 10^ISCALE, where ISCALE is the number
86*                   of decimal places needed to insure LEN integer digits
87*                   in the output before conversion to bcd. LAMBDA is the
88*                   sign of ISCALE, used in A9. Fp1 contains
89*                   10^^(abs(ISCALE)) using a rounding mode which is a
90*                   function of the original rounding mode and the signs
91*                   of ISCALE and X.  A table is given in the code.
92*
93*         A8.       Clr INEX; Force RZ.
94*                   The operation in A3 above may have set INEX2.
95*                   RZ mode is forced for the scaling operation to insure
96*                   only one rounding error.  The grs bits are collected in
97*                   the INEX flag for use in A10.
98*
99*         A9.       Scale X -> Y.
100*                   The mantissa is scaled to the desired number of
101*                   significant digits.  The excess digits are collected
102*                   in INEX2.
103*
104*         A10.      Or in INEX.
105*                   If INEX is set, round error occurred.  This is
106*                   compensated for by 'or-ing' in the INEX2 flag to
107*                   the lsb of Y.
108*
109*         A11.      Restore original FPCR; set size ext.
110*                   Perform FINT operation in the user's rounding mode.
111*                   Keep the size to extended.
112*
113*         A12.      Calculate YINT = FINT(Y) according to user's rounding
114*                   mode.  The FPSP routine sintd0 is used.  The output
115*                   is in fp0.
116*
117*         A13.      Check for LEN digits.
118*                   If the int operation results in more than LEN digits,
119*                   or less than LEN -1 digits, adjust ILOG and repeat from
120*                   A6.  This test occurs only on the first pass.  If the
121*                   result is exactly 10^LEN, decrement ILOG and divide
122*                   the mantissa by 10.
123*
124*         A14.      Convert the mantissa to bcd.
125*                   The binstr routine is used to convert the LEN digit
126*                   mantissa to bcd in memory.  The input to binstr is
127*                   to be a fraction; i.e. (mantissa)/10^LEN and adjusted
128*                   such that the decimal point is to the left of bit 63.
129*                   The bcd digits are stored in the correct position in
130*                   the final string area in memory.
131*
132*         A15.      Convert the exponent to bcd.
133*                   As in A14 above, the exp is converted to bcd and the
134*                   digits are stored in the final string.
135*                   Test the length of the final exponent string.  If the
136*                   length is 4, set operr.
137*
138*         A16.      Write sign bits to final string.
139*
140*         Implementation Notes:
141*
142*         The registers are used as follows:
143*
144*                   d0: scratch; LEN input to binstr
145*                   d1: scratch
146*                   d2: upper 32-bits of mantissa for binstr
147*                   d3: scratch;lower 32-bits of mantissa for binstr
148*                   d4: LEN
149*                   d5: LAMBDA/ICTR
150*                   d6: ILOG
151*                   d7: k-factor
152*                   a0: ptr for original operand/final result
153*                   a1: scratch pointer
154*                   a2: pointer to FP_X; abs(original value) in ext
155*                   fp0: scratch
156*                   fp1: scratch
157*                   fp2: scratch
158*                   F_SCR1:
159*                   F_SCR2:
160*                   L_SCR1:
161*                   L_SCR2:
162*
163
164BINDEC    IDNT    2,1 Motorola 040 Floating Point Software Package
165
166          include   fpsp.h
167
168          section   8
169
170* Constants in extended precision
171LOG2      dc.l      $3FFD0000,$9A209A84,$FBCFF798,$00000000
172LOG2UP1   dc.l      $3FFD0000,$9A209A84,$FBCFF799,$00000000
173
174* Constants in single precision
175FONE      dc.l      $3F800000,$00000000,$00000000,$00000000
176FTWO      dc.l      $40000000,$00000000,$00000000,$00000000
177FTEN      dc.l      $41200000,$00000000,$00000000,$00000000
178F4933     dc.l      $459A2800,$00000000,$00000000,$00000000
179
180RBDTBL    dc.b      0,0,0,0
181          dc.b      3,3,2,2
182          dc.b      3,2,2,3
183          dc.b      2,3,3,2
184
185          xref      binstr
186          xref      sintdo
187          xref      ptenrn,ptenrm,ptenrp
188
189          xdef      bindec
190          xdef      sc_mul
191bindec:
192          movem.l   d2-d7/a2,-(a7)
193          fmovem.x fp0-fp2,-(a7)
194
195* A1. Set RM and size ext. Set SIGMA = sign input;
196*     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
197*     separating  normalized/denormalized input.  If the input
198*     is a denormalized number, set the BINDEC_FLG memory word
199*     to signal denorm.  If the input is unnormalized, normalize
200*     the input and test for denormalized result.
201*
202          fmove.l   #rm_mode,FPCR       ;set RM and ext
203          move.l    (a0),L_SCR2(a6)     ;save exponent for sign check
204          move.l    d0,d7               ;move k-factor to d7
205          clr.b     BINDEC_FLG(a6)      ;clr norm/denorm flag
206          move.w    STAG(a6),d0         ;get stag
207          andi.w    #$e000,d0 ;isolate stag bits
208          beq       A2_str              ;if zero, input is norm
209*
210* Normalize the denorm
211*
212un_de_norm:
213          move.w    (a0),d0
214          andi.w    #$7fff,d0 ;strip sign of normalized exp
215          move.l    4(a0),d1
216          move.l    8(a0),d2
217norm_loop:
218          sub.w     #1,d0
219          add.l     d2,d2
220          addx.l    d1,d1
221          tst.l     d1
222          bge.b     norm_loop
223*
224* Test if the normalized input is denormalized
225*
226          tst.w     d0
227          bgt.b     pos_exp             ;if greater than zero, it is a norm
228          st        BINDEC_FLG(a6)      ;set flag for denorm
229pos_exp:
230          andi.w    #$7fff,d0 ;strip sign of normalized exp
231          move.w    d0,(a0)
232          move.l    d1,4(a0)
233          move.l    d2,8(a0)
234
235* A2. Set X = abs(input).
236*
237A2_str:
238          move.l    (a0),FP_SCR2(a6) ; move input to work space
239          move.l    4(a0),FP_SCR2+4(a6) ; move input to work space
240          move.l    8(a0),FP_SCR2+8(a6) ; move input to work space
241          andi.l    #$7fffffff,FP_SCR2(a6) ;create abs(X)
242
243* A3. Compute ILOG.
244*     ILOG is the log base 10 of the input value.  It is approx-
245*     imated by adding e + 0.f when the original value is viewed
246*     as 2^^e * 1.f in extended precision.  This value is stored
247*     in d6.
248*
249* Register usage:
250*         Input/Output
251*         d0: k-factor/exponent
252*         d2: x/x
253*         d3: x/x
254*         d4: x/x
255*         d5: x/x
256*         d6: x/ILOG
257*         d7: k-factor/Unchanged
258*         a0: ptr for original operand/final result
259*         a1: x/x
260*         a2: x/x
261*         fp0: x/float(ILOG)
262*         fp1: x/x
263*         fp2: x/x
264*         F_SCR1:x/x
265*         F_SCR2:Abs(X)/Abs(X) with $3fff exponent
266*         L_SCR1:x/x
267*         L_SCR2:first word of X packed/Unchanged
268
269          tst.b     BINDEC_FLG(a6)      ;check for denorm
270          beq.b     A3_cont             ;if clr, continue with norm
271          move.l    #-4933,d6 ;force ILOG = -4933
272          bra.b     A4_str
273A3_cont:
274          move.w    FP_SCR2(a6),d0      ;move exp to d0
275          move.w    #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
276          fmove.x   FP_SCR2(a6),fp0     ;now fp0 has 1.f
277          sub.w     #$3fff,d0 ;strip off bias
278          fadd.w    d0,fp0              ;add in exp
279          fsub.s    FONE,fp0  ;subtract off 1.0
280          fbge.w    pos_res             ;if pos, branch
281          fmul.x    LOG2UP1,fp0         ;if neg, mul by LOG2UP1
282          fmove.l   fp0,d6              ;put ILOG in d6 as a lword
283          bra.b     A4_str              ;go move out ILOG
284pos_res:
285          fmul.x    LOG2,fp0  ;if pos, mul by LOG2
286          fmove.l   fp0,d6              ;put ILOG in d6 as a lword
287
288
289* A4. Clr INEX bit.
290*     The operation in A3 above may have set INEX2.
291
292A4_str:
293          fmove.l   #0,FPSR             ;zero all of fpsr - nothing needed
294
295
296* A5. Set ICTR = 0;
297*     ICTR is a flag used in A13.  It must be set before the
298*     loop entry A6. The lower word of d5 is used for ICTR.
299
300          clr.w     d5                  ;clear ICTR
301
302
303* A6. Calculate LEN.
304*     LEN is the number of digits to be displayed.  The k-factor
305*     can dictate either the total number of digits, if it is
306*     a positive number, or the number of digits after the
307*     original decimal point which are to be included as
308*     significant.  See the 68882 manual for examples.
309*     If LEN is computed to be greater than 17, set OPERR in
310*     USER_FPSR.  LEN is stored in d4.
311*
312* Register usage:
313*         Input/Output
314*         d0: exponent/Unchanged
315*         d2: x/x/scratch
316*         d3: x/x
317*         d4: exc picture/LEN
318*         d5: ICTR/Unchanged
319*         d6: ILOG/Unchanged
320*         d7: k-factor/Unchanged
321*         a0: ptr for original operand/final result
322*         a1: x/x
323*         a2: x/x
324*         fp0: float(ILOG)/Unchanged
325*         fp1: x/x
326*         fp2: x/x
327*         F_SCR1:x/x
328*         F_SCR2:Abs(X) with $3fff exponent/Unchanged
329*         L_SCR1:x/x
330*         L_SCR2:first word of X packed/Unchanged
331
332A6_str:
333          tst.l     d7                  ;branch on sign of k
334          ble.b     k_neg               ;if k <= 0, LEN = ILOG + 1 - k
335          move.l    d7,d4               ;if k > 0, LEN = k
336          bra.b     len_ck              ;skip to LEN check
337k_neg:
338          move.l    d6,d4               ;first load ILOG to d4
339          sub.l     d7,d4               ;subtract off k
340          addq.l    #1,d4               ;add in the 1
341len_ck:
342          tst.l     d4                  ;LEN check: branch on sign of LEN
343          ble.b     LEN_ng              ;if neg, set LEN = 1
344          cmp.l     #17,d4              ;test if LEN > 17
345          ble.b     A7_str              ;if not, forget it
346          move.l    #17,d4              ;set max LEN = 17
347          tst.l     d7                  ;if negative, never set OPERR
348          ble.b     A7_str              ;if positive, continue
349          or.l      #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
350          bra.b     A7_str              ;finished here
351LEN_ng:
352          moveq.l   #1,d4               ;min LEN is 1
353
354
355* A7. Calculate SCALE.
356*     SCALE is equal to 10^ISCALE, where ISCALE is the number
357*     of decimal places needed to insure LEN integer digits
358*     in the output before conversion to bcd. LAMBDA is the sign
359*     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
360*     the rounding mode as given in the following table (see
361*     Coonen, p. 7.23 as ref.; however, the SCALE variable is
362*     of opposite sign in bindec.sa from Coonen).
363*
364*         Initial                                           USE
365*         FPCR[6:5] LAMBDA    SIGN(X)             FPCR[6:5]
366*         ----------------------------------------------
367*          RN       00           0         0                00/0      RN
368*          RN       00           0         1                00/0      RN
369*          RN       00           1         0                00/0      RN
370*          RN       00           1         1                00/0      RN
371*          RZ       01           0         0                11/3      RP
372*          RZ       01           0         1                11/3      RP
373*          RZ       01           1         0                10/2      RM
374*          RZ       01           1         1                10/2      RM
375*          RM       10           0         0                11/3      RP
376*          RM       10           0         1                10/2      RM
377*          RM       10           1         0                10/2      RM
378*          RM       10           1         1                11/3      RP
379*          RP       11           0         0                10/2      RM
380*          RP       11           0         1                11/3      RP
381*          RP       11           1         0                11/3      RP
382*          RP       11           1         1                10/2      RM
383*
384* Register usage:
385*         Input/Output
386*         d0: exponent/scratch - final is 0
387*         d2: x/0 or 24 for A9
388*         d3: x/scratch - offset ptr into PTENRM array
389*         d4: LEN/Unchanged
390*         d5: 0/ICTR:LAMBDA
391*         d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
392*         d7: k-factor/Unchanged
393*         a0: ptr for original operand/final result
394*         a1: x/ptr to PTENRM array
395*         a2: x/x
396*         fp0: float(ILOG)/Unchanged
397*         fp1: x/10^ISCALE
398*         fp2: x/x
399*         F_SCR1:x/x
400*         F_SCR2:Abs(X) with $3fff exponent/Unchanged
401*         L_SCR1:x/x
402*         L_SCR2:first word of X packed/Unchanged
403
404A7_str:
405          tst.l     d7                  ;test sign of k
406          bgt.b     k_pos               ;if pos and > 0, skip this
407          cmp.l     d6,d7               ;test k - ILOG
408          blt.b     k_pos               ;if ILOG >= k, skip this
409          move.l    d7,d6               ;if ((k<0) & (ILOG < k)) ILOG = k
410k_pos:
411          move.l    d6,d0               ;calc ILOG + 1 - LEN in d0
412          addq.l    #1,d0               ;add the 1
413          sub.l     d4,d0               ;sub off LEN
414          swap      d5                  ;use upper word of d5 for LAMBDA
415          clr.w     d5                  ;set it zero initially
416          clr.w     d2                  ;set up d2 for very small case
417          tst.l     d0                  ;test sign of ISCALE
418          bge.b     iscale              ;if pos, skip next inst
419          addq.w    #1,d5               ;if neg, set LAMBDA true
420          cmp.l     #$ffffecd4,d0       ;test iscale <= -4908
421          bgt.b     no_inf              ;if false, skip rest
422          addi.l    #24,d0              ;add in 24 to iscale
423          move.l    #24,d2              ;put 24 in d2 for A9
424no_inf:
425          neg.l     d0                  ;and take abs of ISCALE
426iscale:
427          fmove.s   FONE,fp1  ;init fp1 to 1
428          bfextu    USER_FPCR(a6){26:2},d1 ;get initial rmode bits
429          add.w     d1,d1               ;put them in bits 2:1
430          add.w     d5,d1               ;add in LAMBDA
431          add.w     d1,d1               ;put them in bits 3:1
432          tst.l     L_SCR2(a6)          ;test sign of original x
433          bge.b     x_pos               ;if pos, don't set bit 0
434          addq.l    #1,d1               ;if neg, set bit 0
435x_pos:
436          lea.l     RBDTBL,a2 ;load rbdtbl base
437          move.b    (a2,d1),d3          ;load d3 with new rmode
438          lsl.l     #4,d3               ;put bits in proper position
439          fmove.l   d3,fpcr             ;load bits into fpu
440          lsr.l     #4,d3               ;put bits in proper position
441          tst.b     d3                  ;decode new rmode for pten table
442          bne.b     not_rn              ;if zero, it is RN
443          lea.l     PTENRN,a1 ;load a1 with RN table base
444          bra.b     rmode               ;exit decode
445not_rn:
446          lsr.b     #1,d3               ;get lsb in carry
447          bcc.b     not_rp              ;if carry clear, it is RM
448          lea.l     PTENRP,a1 ;load a1 with RP table base
449          bra.b     rmode               ;exit decode
450not_rp:
451          lea.l     PTENRM,a1 ;load a1 with RM table base
452rmode:
453          clr.l     d3                  ;clr table index
454e_loop:
455          lsr.l     #1,d0               ;shift next bit into carry
456          bcc.b     e_next              ;if zero, skip the mul
457          fmul.x    (a1,d3),fp1         ;mul by 10**(d3_bit_no)
458e_next:
459          add.l     #12,d3              ;inc d3 to next pwrten table entry
460          tst.l     d0                  ;test if ISCALE is zero
461          bne.b     e_loop              ;if not, loop
462
463
464* A8. Clr INEX; Force RZ.
465*     The operation in A3 above may have set INEX2.
466*     RZ mode is forced for the scaling operation to insure
467*     only one rounding error.  The grs bits are collected in
468*     the INEX flag for use in A10.
469*
470* Register usage:
471*         Input/Output
472
473          fmove.l   #0,FPSR             ;clr INEX
474          fmove.l   #rz_mode,FPCR       ;set RZ rounding mode
475
476
477* A9. Scale X -> Y.
478*     The mantissa is scaled to the desired number of significant
479*     digits.  The excess digits are collected in INEX2. If mul,
480*     Check d2 for excess 10 exponential value.  If not zero,
481*     the iscale value would have caused the pwrten calculation
482*     to overflow.  Only a negative iscale can cause this, so
483*     multiply by 10^(d2), which is now only allowed to be 24,
484*     with a multiply by 10^8 and 10^16, which is exact since
485*     10^24 is exact.  If the input was denormalized, we must
486*     create a busy stack frame with the mul command and the
487*     two operands, and allow the fpu to complete the multiply.
488*
489* Register usage:
490*         Input/Output
491*         d0: FPCR with RZ mode/Unchanged
492*         d2: 0 or 24/unchanged
493*         d3: x/x
494*         d4: LEN/Unchanged
495*         d5: ICTR:LAMBDA
496*         d6: ILOG/Unchanged
497*         d7: k-factor/Unchanged
498*         a0: ptr for original operand/final result
499*         a1: ptr to PTENRM array/Unchanged
500*         a2: x/x
501*         fp0: float(ILOG)/X adjusted for SCALE (Y)
502*         fp1: 10^ISCALE/Unchanged
503*         fp2: x/x
504*         F_SCR1:x/x
505*         F_SCR2:Abs(X) with $3fff exponent/Unchanged
506*         L_SCR1:x/x
507*         L_SCR2:first word of X packed/Unchanged
508
509A9_str:
510          fmove.x   (a0),fp0  ;load X from memory
511          fabs.x    fp0                 ;use abs(X)
512          tst.w     d5                  ;LAMBDA is in lower word of d5
513          bne.b     short_sc_mul        ;if neg (LAMBDA = 1), scale by mul
514          fdiv.x    fp1,fp0             ;calculate X / SCALE -> Y to fp0
515          bra.b     A10_st              ;branch to A10
516
517sc_mul:
518short_sc_mul:
519          tst.b     BINDEC_FLG(a6)      ;check for denorm
520          beq.b     A9_norm             ;if norm, continue with mul
521          fmovem.x fp1,-(a7)  ;load ETEMP with 10^ISCALE
522          move.l    8(a0),-(a7)         ;load FPTEMP with input arg
523          move.l    4(a0),-(a7)
524          move.l    (a0),-(a7)
525          move.l    #18,d3              ;load count for busy stack
526A9_loop:
527          clr.l     -(a7)               ;clear lword on stack
528          dbf.w     d3,A9_loop
529          move.b    VER_TMP(a6),(a7) ;write current version number
530          move.b    #BUSY_SIZE-4,1(a7) ;write current busy size
531          move.b    #$10,$44(a7)        ;set fcefpte[15] bit
532          move.w    #$0023,$40(a7)      ;load cmdreg1b with mul command
533          move.b    #$fe,$8(a7)         ;load all 1s to cu savepc
534          frestore (a7)+                ;restore frame to fpu for completion
535          fmul.x    36(a1),fp0          ;multiply fp0 by 10^8
536          fmul.x    48(a1),fp0          ;multiply fp0 by 10^16
537          bra.b     A10_st
538A9_norm:
539          tst.w     d2                  ;test for small exp case
540          beq.b     A9_con              ;if zero, continue as normal
541          fmul.x    36(a1),fp0          ;multiply fp0 by 10^8
542          fmul.x    48(a1),fp0          ;multiply fp0 by 10^16
543A9_con:
544          fmul.x    fp1,fp0             ;calculate X * SCALE -> Y to fp0
545
546
547* A10. Or in INEX.
548*      If INEX is set, round error occurred.  This is compensated
549*      for by 'or-ing' in the INEX2 flag to the lsb of Y.
550*
551* Register usage:
552*         Input/Output
553*         d0: FPCR with RZ mode/FPSR with INEX2 isolated
554*         d2: x/x
555*         d3: x/x
556*         d4: LEN/Unchanged
557*         d5: ICTR:LAMBDA
558*         d6: ILOG/Unchanged
559*         d7: k-factor/Unchanged
560*         a0: ptr for original operand/final result
561*         a1: ptr to PTENxx array/Unchanged
562*         a2: x/ptr to FP_SCR2(a6)
563*         fp0: Y/Y with lsb adjusted
564*         fp1: 10^ISCALE/Unchanged
565*         fp2: x/x
566
567A10_st:
568          fmove.l   FPSR,d0             ;get FPSR
569          fmove.x   fp0,FP_SCR2(a6)     ;move Y to memory
570          lea.l     FP_SCR2(a6),a2      ;load a2 with ptr to FP_SCR2
571          btst.l    #9,d0               ;check if INEX2 set
572          beq.b     A11_st              ;if clear, skip rest
573          ori.l     #1,8(a2)  ;or in 1 to lsb of mantissa
574          fmove.x   FP_SCR2(a6),fp0     ;write adjusted Y back to fpu
575
576
577* A11. Restore original FPCR; set size ext.
578*      Perform FINT operation in the user's rounding mode.  Keep
579*      the size to extended.  The sintdo entry point in the sint
580*      routine expects the FPCR value to be in USER_FPCR for
581*      mode and precision.  The original FPCR is saved in L_SCR1.
582
583A11_st:
584          move.l    USER_FPCR(a6),L_SCR1(a6) ;save it for later
585          andi.l    #$00000030,USER_FPCR(a6) ;set size to ext,
586*                                                 ;block exceptions
587
588
589* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
590*      The FPSP routine sintd0 is used.  The output is in fp0.
591*
592* Register usage:
593*         Input/Output
594*         d0: FPSR with AINEX cleared/FPCR with size set to ext
595*         d2: x/x/scratch
596*         d3: x/x
597*         d4: LEN/Unchanged
598*         d5: ICTR:LAMBDA/Unchanged
599*         d6: ILOG/Unchanged
600*         d7: k-factor/Unchanged
601*         a0: ptr for original operand/src ptr for sintdo
602*         a1: ptr to PTENxx array/Unchanged
603*         a2: ptr to FP_SCR2(a6)/Unchanged
604*         a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
605*         fp0: Y/YINT
606*         fp1: 10^ISCALE/Unchanged
607*         fp2: x/x
608*         F_SCR1:x/x
609*         F_SCR2:Y adjusted for inex/Y with original exponent
610*         L_SCR1:x/original USER_FPCR
611*         L_SCR2:first word of X packed/Unchanged
612
613A12_st:
614          movem.l   d0-d1/a0-a1,-(a7)   ;save regs used by sintd0
615          move.l    L_SCR1(a6),-(a7)
616          move.l    L_SCR2(a6),-(a7)
617          lea.l     FP_SCR2(a6),a0                ;a0 is ptr to F_SCR2(a6)
618          fmove.x   fp0,(a0)            ;move Y to memory at FP_SCR2(a6)
619          tst.l     L_SCR2(a6)                    ;test sign of original operand
620          bge.b     do_fint                       ;if pos, use Y
621          or.l      #$80000000,(a0)               ;if neg, use -Y
622do_fint:
623          move.l    USER_FPSR(a6),-(a7)
624          bsr       sintdo                        ;sint routine returns int in fp0
625          move.b    (a7),USER_FPSR(a6)
626          add.l     #4,a7
627          move.l    (a7)+,L_SCR2(a6)
628          move.l    (a7)+,L_SCR1(a6)
629          movem.l   (a7)+,d0-d1/a0-a1   ;restore regs used by sint
630          move.l    L_SCR2(a6),FP_SCR2(a6)        ;restore original exponent
631          move.l    L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
632
633
634* A13. Check for LEN digits.
635*      If the int operation results in more than LEN digits,
636*      or less than LEN -1 digits, adjust ILOG and repeat from
637*      A6.  This test occurs only on the first pass.  If the
638*      result is exactly 10^LEN, decrement ILOG and divide
639*      the mantissa by 10.  The calculation of 10^LEN cannot
640*      be inexact, since all powers of ten upto 10^27 are exact
641*      in extended precision, so the use of a previous power-of-ten
642*      table will introduce no error.
643*
644*
645* Register usage:
646*         Input/Output
647*         d0: FPCR with size set to ext/scratch final = 0
648*         d2: x/x
649*         d3: x/scratch final = x
650*         d4: LEN/LEN adjusted
651*         d5: ICTR:LAMBDA/LAMBDA:ICTR
652*         d6: ILOG/ILOG adjusted
653*         d7: k-factor/Unchanged
654*         a0: pointer into memory for packed bcd string formation
655*         a1: ptr to PTENxx array/Unchanged
656*         a2: ptr to FP_SCR2(a6)/Unchanged
657*         fp0: int portion of Y/abs(YINT) adjusted
658*         fp1: 10^ISCALE/Unchanged
659*         fp2: x/10^LEN
660*         F_SCR1:x/x
661*         F_SCR2:Y with original exponent/Unchanged
662*         L_SCR1:original USER_FPCR/Unchanged
663*         L_SCR2:first word of X packed/Unchanged
664
665A13_st:
666          swap      d5                  ;put ICTR in lower word of d5
667          tst.w     d5                  ;check if ICTR = 0
668          bne       not_zr              ;if non-zero, go to second test
669*
670* Compute 10^(LEN-1)
671*
672          fmove.s   FONE,fp2  ;init fp2 to 1.0
673          move.l    d4,d0               ;put LEN in d0
674          subq.l    #1,d0               ;d0 = LEN -1
675          clr.l     d3                  ;clr table index
676l_loop:
677          lsr.l     #1,d0               ;shift next bit into carry
678          bcc.b     l_next              ;if zero, skip the mul
679          fmul.x    (a1,d3),fp2         ;mul by 10**(d3_bit_no)
680l_next:
681          add.l     #12,d3              ;inc d3 to next pwrten table entry
682          tst.l     d0                  ;test if LEN is zero
683          bne.b     l_loop              ;if not, loop
684*
685* 10^LEN-1 is computed for this test and A14.  If the input was
686* denormalized, check only the case in which YINT > 10^LEN.
687*
688          tst.b     BINDEC_FLG(a6)      ;check if input was norm
689          beq.b     A13_con             ;if norm, continue with checking
690          fabs.x    fp0                 ;take abs of YINT
691          bra       test_2
692*
693* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
694*
695A13_con:
696          fabs.x    fp0                 ;take abs of YINT
697          fcmp.x    fp2,fp0             ;compare abs(YINT) with 10^(LEN-1)
698          fbge.w    test_2              ;if greater, do next test
699          subq.l    #1,d6               ;subtract 1 from ILOG
700          move.w    #1,d5               ;set ICTR
701          fmove.l   #rm_mode,FPCR       ;set rmode to RM
702          fmul.s    FTEN,fp2  ;compute 10^LEN
703          bra.w     A6_str              ;return to A6 and recompute YINT
704test_2:
705          fmul.s    FTEN,fp2  ;compute 10^LEN
706          fcmp.x    fp2,fp0             ;compare abs(YINT) with 10^LEN
707          fblt.w    A14_st              ;if less, all is ok, go to A14
708          fbgt.w    fix_ex              ;if greater, fix and redo
709          fdiv.s    FTEN,fp0  ;if equal, divide by 10
710          addq.l    #1,d6               ; and inc ILOG
711          bra.b     A14_st              ; and continue elsewhere
712fix_ex:
713          addq.l    #1,d6               ;increment ILOG by 1
714          move.w    #1,d5               ;set ICTR
715          fmove.l   #rm_mode,FPCR       ;set rmode to RM
716          bra.w     A6_str              ;return to A6 and recompute YINT
717*
718* Since ICTR <> 0, we have already been through one adjustment,
719* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
720* 10^LEN is again computed using whatever table is in a1 since the
721* value calculated cannot be inexact.
722*
723not_zr:
724          fmove.s   FONE,fp2  ;init fp2 to 1.0
725          move.l    d4,d0               ;put LEN in d0
726          clr.l     d3                  ;clr table index
727z_loop:
728          lsr.l     #1,d0               ;shift next bit into carry
729          bcc.b     z_next              ;if zero, skip the mul
730          fmul.x    (a1,d3),fp2         ;mul by 10**(d3_bit_no)
731z_next:
732          add.l     #12,d3              ;inc d3 to next pwrten table entry
733          tst.l     d0                  ;test if LEN is zero
734          bne.b     z_loop              ;if not, loop
735          fabs.x    fp0                 ;get abs(YINT)
736          fcmp.x    fp2,fp0             ;check if abs(YINT) = 10^LEN
737          fbne.w    A14_st              ;if not, skip this
738          fdiv.s    FTEN,fp0  ;divide abs(YINT) by 10
739          addq.l    #1,d6               ;and inc ILOG by 1
740          addq.l    #1,d4               ; and inc LEN
741          fmul.s    FTEN,fp2  ; if LEN++, the get 10^^LEN
742
743
744* A14. Convert the mantissa to bcd.
745*      The binstr routine is used to convert the LEN digit
746*      mantissa to bcd in memory.  The input to binstr is
747*      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
748*      such that the decimal point is to the left of bit 63.
749*      The bcd digits are stored in the correct position in
750*      the final string area in memory.
751*
752*
753* Register usage:
754*         Input/Output
755*         d0: x/LEN call to binstr - final is 0
756*         d1: x/0
757*         d2: x/ms 32-bits of mant of abs(YINT)
758*         d3: x/ls 32-bits of mant of abs(YINT)
759*         d4: LEN/Unchanged
760*         d5: ICTR:LAMBDA/LAMBDA:ICTR
761*         d6: ILOG
762*         d7: k-factor/Unchanged
763*         a0: pointer into memory for packed bcd string formation
764*             /ptr to first mantissa byte in result string
765*         a1: ptr to PTENxx array/Unchanged
766*         a2: ptr to FP_SCR2(a6)/Unchanged
767*         fp0: int portion of Y/abs(YINT) adjusted
768*         fp1: 10^ISCALE/Unchanged
769*         fp2: 10^LEN/Unchanged
770*         F_SCR1:x/Work area for final result
771*         F_SCR2:Y with original exponent/Unchanged
772*         L_SCR1:original USER_FPCR/Unchanged
773*         L_SCR2:first word of X packed/Unchanged
774
775A14_st:
776          fmove.l   #rz_mode,FPCR       ;force rz for conversion
777          fdiv.x    fp2,fp0             ;divide abs(YINT) by 10^LEN
778          lea.l     FP_SCR1(a6),a0
779          fmove.x   fp0,(a0)  ;move abs(YINT)/10^LEN to memory
780          move.l    4(a0),d2  ;move 2nd word of FP_RES to d2
781          move.l    8(a0),d3  ;move 3rd word of FP_RES to d3
782          clr.l     4(a0)               ;zero word 2 of FP_RES
783          clr.l     8(a0)               ;zero word 3 of FP_RES
784          move.l    (a0),d0             ;move exponent to d0
785          swap      d0                  ;put exponent in lower word
786          beq.b     no_sft              ;if zero, don't shift
787          subi.l    #$3ffd,d0 ;sub bias less 2 to make fract
788          tst.l     d0                  ;check if > 1
789          bgt.b     no_sft              ;if so, don't shift
790          neg.l     d0                  ;make exp positive
791m_loop:
792          lsr.l     #1,d2               ;shift d2:d3 right, add 0s
793          roxr.l    #1,d3               ;the number of places
794          dbf.w     d0,m_loop ;given in d0
795no_sft:
796          tst.l     d2                  ;check for mantissa of zero
797          bne.b     no_zr               ;if not, go on
798          tst.l     d3                  ;continue zero check
799          beq.b     zer_m               ;if zero, go directly to binstr
800no_zr:
801          clr.l     d1                  ;put zero in d1 for addx
802          addi.l    #$00000080,d3       ;inc at bit 7
803          addx.l    d1,d2               ;continue inc
804          andi.l    #$ffffff80,d3       ;strip off lsb not used by 882
805zer_m:
806          move.l    d4,d0               ;put LEN in d0 for binstr call
807          addq.l    #3,a0               ;a0 points to M16 byte in result
808          bsr       binstr              ;call binstr to convert mant
809
810
811* A15. Convert the exponent to bcd.
812*      As in A14 above, the exp is converted to bcd and the
813*      digits are stored in the final string.
814*
815*      Digits are stored in L_SCR1(a6) on return from BINDEC as:
816*
817*          32               16 15                0
818*         -----------------------------------------
819*         |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
820*         -----------------------------------------
821*
822* And are moved into their proper places in FP_SCR1.  If digit e4
823* is non-zero, OPERR is signaled.  In all cases, all 4 digits are
824* written as specified in the 881/882 manual for packed decimal.
825*
826* Register usage:
827*         Input/Output
828*         d0: x/LEN call to binstr - final is 0
829*         d1: x/scratch (0);shift count for final exponent packing
830*         d2: x/ms 32-bits of exp fraction/scratch
831*         d3: x/ls 32-bits of exp fraction
832*         d4: LEN/Unchanged
833*         d5: ICTR:LAMBDA/LAMBDA:ICTR
834*         d6: ILOG
835*         d7: k-factor/Unchanged
836*         a0: ptr to result string/ptr to L_SCR1(a6)
837*         a1: ptr to PTENxx array/Unchanged
838*         a2: ptr to FP_SCR2(a6)/Unchanged
839*         fp0: abs(YINT) adjusted/float(ILOG)
840*         fp1: 10^ISCALE/Unchanged
841*         fp2: 10^LEN/Unchanged
842*         F_SCR1:Work area for final result/BCD result
843*         F_SCR2:Y with original exponent/ILOG/10^4
844*         L_SCR1:original USER_FPCR/Exponent digits on return from binstr
845*         L_SCR2:first word of X packed/Unchanged
846
847A15_st:
848          tst.b     BINDEC_FLG(a6)      ;check for denorm
849          beq.b     not_denorm
850          ftst.x    fp0                 ;test for zero
851          fbeq.w    den_zero  ;if zero, use k-factor or 4933
852          fmove.l   d6,fp0              ;float ILOG
853          fabs.x    fp0                 ;get abs of ILOG
854          bra.b     convrt
855den_zero:
856          tst.l     d7                  ;check sign of the k-factor
857          blt.b     use_ilog  ;if negative, use ILOG
858          fmove.s   F4933,fp0 ;force exponent to 4933
859          bra.b     convrt              ;do it
860use_ilog:
861          fmove.l   d6,fp0              ;float ILOG
862          fabs.x    fp0                 ;get abs of ILOG
863          bra.b     convrt
864not_denorm:
865          ftst.x    fp0                 ;test for zero
866          fbne.w    not_zero  ;if zero, force exponent
867          fmove.s   FONE,fp0  ;force exponent to 1
868          bra.b     convrt              ;do it
869not_zero:
870          fmove.l   d6,fp0              ;float ILOG
871          fabs.x    fp0                 ;get abs of ILOG
872convrt:
873          fdiv.x    24(a1),fp0          ;compute ILOG/10^4
874          fmove.x   fp0,FP_SCR2(a6)     ;store fp0 in memory
875          move.l    4(a2),d2  ;move word 2 to d2
876          move.l    8(a2),d3  ;move word 3 to d3
877          move.w    (a2),d0             ;move exp to d0
878          beq.b     x_loop_fin          ;if zero, skip the shift
879          subi.w    #$3ffd,d0 ;subtract off bias
880          neg.w     d0                  ;make exp positive
881x_loop:
882          lsr.l     #1,d2               ;shift d2:d3 right
883          roxr.l    #1,d3               ;the number of places
884          dbf.w     d0,x_loop ;given in d0
885x_loop_fin:
886          clr.l     d1                  ;put zero in d1 for addx
887          addi.l    #$00000080,d3       ;inc at bit 6
888          addx.l    d1,d2               ;continue inc
889          andi.l    #$ffffff80,d3       ;strip off lsb not used by 882
890          move.l    #4,d0               ;put 4 in d0 for binstr call
891          lea.l     L_SCR1(a6),a0       ;a0 is ptr to L_SCR1 for exp digits
892          bsr       binstr              ;call binstr to convert exp
893          move.l    L_SCR1(a6),d0       ;load L_SCR1 lword to d0
894          move.l    #12,d1              ;use d1 for shift count
895          lsr.l     d1,d0               ;shift d0 right by 12
896          bfins     d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
897          lsr.l     d1,d0               ;shift d0 right by 12
898          bfins     d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
899          tst.b     d0                  ;check if e4 is zero
900          beq.b     A16_st              ;if zero, skip rest
901          or.l      #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
902
903
904* A16. Write sign bits to final string.
905*            Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
906*
907* Register usage:
908*         Input/Output
909*         d0: x/scratch - final is x
910*         d2: x/x
911*         d3: x/x
912*         d4: LEN/Unchanged
913*         d5: ICTR:LAMBDA/LAMBDA:ICTR
914*         d6: ILOG/ILOG adjusted
915*         d7: k-factor/Unchanged
916*         a0: ptr to L_SCR1(a6)/Unchanged
917*         a1: ptr to PTENxx array/Unchanged
918*         a2: ptr to FP_SCR2(a6)/Unchanged
919*         fp0: float(ILOG)/Unchanged
920*         fp1: 10^ISCALE/Unchanged
921*         fp2: 10^LEN/Unchanged
922*         F_SCR1:BCD result with correct signs
923*         F_SCR2:ILOG/10^4
924*         L_SCR1:Exponent digits on return from binstr
925*         L_SCR2:first word of X packed/Unchanged
926
927A16_st:
928          clr.l     d0                  ;clr d0 for collection of signs
929          andi.b    #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
930          tst.l     L_SCR2(a6)          ;check sign of original mantissa
931          bge.b     mant_p              ;if pos, don't set SM
932          moveq.l   #2,d0               ;move 2 in to d0 for SM
933mant_p:
934          tst.l     d6                  ;check sign of ILOG
935          bge.b     wr_sgn              ;if pos, don't set SE
936          addq.l    #1,d0               ;set bit 0 in d0 for SE
937wr_sgn:
938          bfins     d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
939
940* Clean up and restore all registers used.
941
942          fmove.l   #0,FPSR             ;clear possible inex2/ainex bits
943          fmovem.x (a7)+,fp0-fp2
944          movem.l   (a7)+,d2-d7/a2
945          rts
946
947          end
948