bindec.sa - OpenGrok cross reference for /netbsd/src/sys/arch/m68k/fpsp/bindec.sa

*         $NetBSD: bindec.sa,v 1.5 2001/12/09 01:43:13 briggs Exp $

*         MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
*         M68000 Hi-Performance Microprocessor Division
*         M68040 Software Package
*
*         M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
*         All rights reserved.
*
*         THE SOFTWARE is provided on an "AS IS" basis and without warranty.
*         To the maximum extent permitted by applicable law,
*         MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
*         INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
*         PARTICULAR PURPOSE and any warranty against infringement with
*         regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
*         and any accompanying written materials.
*
*         To the maximum extent permitted by applicable law,
*         IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
*         (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
*         PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
*         OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
*         SOFTWARE.  Motorola assumes no responsibility for the maintenance
*         and support of the SOFTWARE.
*
*         You are hereby granted a copyright license to use, modify, and
*         distribute the SOFTWARE so long as this entire notice is retained
*         without alteration in any modified and/or redistributed versions,
*         and that such modified versions are clearly identified as such.
*         No licenses are granted by implication, estoppel or otherwise
*         under any patents or trademarks of Motorola, Inc.

*
*         bindec.sa 3.4 1/3/91
*
*         bindec
*
*         Description:
*                   Converts an input in extended precision format
*                   to bcd format.
*
*         Input:
*                   a0 points to the input extended precision value
*                   value in memory; d0 contains the k-factor sign-extended
*                   to 32-bits.  The input may be either normalized,
*                   unnormalized, or denormalized.
*
*         Output:   result in the FP_SCR1 space on the stack.
*
*         Saves and Modifies: D2-D7,A2,FP2
*
*         Algorithm:
*
*         A1.       Set RM and size ext;  Set SIGMA = sign of input.
*                   The k-factor is saved for use in d7. Clear the
*                   BINDEC_FLG for separating normalized/denormalized
*                   input.  If input is unnormalized or denormalized,
*                   normalize it.
*
*         A2.       Set X = abs(input).
*
*         A3.       Compute ILOG.
*                   ILOG is the log base 10 of the input value.  It is
*                   approximated by adding e + 0.f when the original
*                   value is viewed as 2^^e * 1.f in extended precision.
*                   This value is stored in d6.
*
*         A4.       Clr INEX bit.
*                   The operation in A3 above may have set INEX2.
*
*         A5.       Set ICTR = 0;
*                   ICTR is a flag used in A13.  It must be set before the
*                   loop entry A6.
*
*         A6.       Calculate LEN.
*                   LEN is the number of digits to be displayed.  The
*                   k-factor can dictate either the total number of digits,
*                   if it is a positive number, or the number of digits
*                   after the decimal point which are to be included as
*                   significant.  See the 68882 manual for examples.
*                   If LEN is computed to be greater than 17, set OPERR in
*                   USER_FPSR.  LEN is stored in d4.
*
*         A7.       Calculate SCALE.
*                   SCALE is equal to 10^ISCALE, where ISCALE is the number
*                   of decimal places needed to insure LEN integer digits
*                   in the output before conversion to bcd. LAMBDA is the
*                   sign of ISCALE, used in A9. Fp1 contains
*                   10^^(abs(ISCALE)) using a rounding mode which is a
*                   function of the original rounding mode and the signs
*                   of ISCALE and X.  A table is given in the code.
*
*         A8.       Clr INEX; Force RZ.
*                   The operation in A3 above may have set INEX2.
*                   RZ mode is forced for the scaling operation to insure
*                   only one rounding error.  The grs bits are collected in
*                   the INEX flag for use in A10.
*
*         A9.       Scale X -> Y.
*                   The mantissa is scaled to the desired number of
*                   significant digits.  The excess digits are collected
*                   in INEX2.
*
*         A10.      Or in INEX.
*                   If INEX is set, round error occurred.  This is
*                   compensated for by 'or-ing' in the INEX2 flag to
*                   the lsb of Y.
*
*         A11.      Restore original FPCR; set size ext.
*                   Perform FINT operation in the user's rounding mode.
*                   Keep the size to extended.
*
*         A12.      Calculate YINT = FINT(Y) according to user's rounding
*                   mode.  The FPSP routine sintd0 is used.  The output
*                   is in fp0.
*
*         A13.      Check for LEN digits.
*                   If the int operation results in more than LEN digits,
*                   or less than LEN -1 digits, adjust ILOG and repeat from
*                   A6.  This test occurs only on the first pass.  If the
*                   result is exactly 10^LEN, decrement ILOG and divide
*                   the mantissa by 10.
*
*         A14.      Convert the mantissa to bcd.
*                   The binstr routine is used to convert the LEN digit
*                   mantissa to bcd in memory.  The input to binstr is
*                   to be a fraction; i.e. (mantissa)/10^LEN and adjusted
*                   such that the decimal point is to the left of bit 63.
*                   The bcd digits are stored in the correct position in
*                   the final string area in memory.
*
*         A15.      Convert the exponent to bcd.
*                   As in A14 above, the exp is converted to bcd and the
*                   digits are stored in the final string.
*                   Test the length of the final exponent string.  If the
*                   length is 4, set operr.
*
*         A16.      Write sign bits to final string.
*
*         Implementation Notes:
*
*         The registers are used as follows:
*
*                   d0: scratch; LEN input to binstr
*                   d1: scratch
*                   d2: upper 32-bits of mantissa for binstr
*                   d3: scratch;lower 32-bits of mantissa for binstr
*                   d4: LEN
*                   d5: LAMBDA/ICTR
*                   d6: ILOG
*                   d7: k-factor
*                   a0: ptr for original operand/final result
*                   a1: scratch pointer
*                   a2: pointer to FP_X; abs(original value) in ext
*                   fp0: scratch
*                   fp1: scratch
*                   fp2: scratch
*                   F_SCR1:
*                   F_SCR2:
*                   L_SCR1:
*                   L_SCR2:
*

BINDEC    IDNT    2,1 Motorola 040 Floating Point Software Package

          include   fpsp.h

          section   8

* Constants in extended precision
LOG2      dc.l      $3FFD0000,$9A209A84,$FBCFF798,$00000000
LOG2UP1   dc.l      $3FFD0000,$9A209A84,$FBCFF799,$00000000

* Constants in single precision
FONE      dc.l      $3F800000,$00000000,$00000000,$00000000
FTWO      dc.l      $40000000,$00000000,$00000000,$00000000
FTEN      dc.l      $41200000,$00000000,$00000000,$00000000
F4933     dc.l      $459A2800,$00000000,$00000000,$00000000

RBDTBL    dc.b      0,0,0,0
          dc.b      3,3,2,2
          dc.b      3,2,2,3
          dc.b      2,3,3,2

          xref      binstr
          xref      sintdo
          xref      ptenrn,ptenrm,ptenrp

          xdef      bindec
          xdef      sc_mul
bindec:
          movem.l   d2-d7/a2,-(a7)
          fmovem.x fp0-fp2,-(a7)

* A1. Set RM and size ext. Set SIGMA = sign input;
*     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
*     separating  normalized/denormalized input.  If the input
*     is a denormalized number, set the BINDEC_FLG memory word
*     to signal denorm.  If the input is unnormalized, normalize
*     the input and test for denormalized result.
*
          fmove.l   #rm_mode,FPCR       ;set RM and ext
          move.l    (a0),L_SCR2(a6)     ;save exponent for sign check
          move.l    d0,d7               ;move k-factor to d7
          clr.b     BINDEC_FLG(a6)      ;clr norm/denorm flag
          move.w    STAG(a6),d0         ;get stag
          andi.w    #$e000,d0 ;isolate stag bits
          beq       A2_str              ;if zero, input is norm
*
* Normalize the denorm
*
un_de_norm:
          move.w    (a0),d0
          andi.w    #$7fff,d0 ;strip sign of normalized exp
          move.l    4(a0),d1
          move.l    8(a0),d2
norm_loop:
          sub.w     #1,d0
          add.l     d2,d2
          addx.l    d1,d1
          tst.l     d1
          bge.b     norm_loop
*
* Test if the normalized input is denormalized
*
          tst.w     d0
          bgt.b     pos_exp             ;if greater than zero, it is a norm
          st        BINDEC_FLG(a6)      ;set flag for denorm
pos_exp:
          andi.w    #$7fff,d0 ;strip sign of normalized exp
          move.w    d0,(a0)
          move.l    d1,4(a0)
          move.l    d2,8(a0)

* A2. Set X = abs(input).
*
A2_str:
          move.l    (a0),FP_SCR2(a6) ; move input to work space
          move.l    4(a0),FP_SCR2+4(a6) ; move input to work space
          move.l    8(a0),FP_SCR2+8(a6) ; move input to work space
          andi.l    #$7fffffff,FP_SCR2(a6) ;create abs(X)

* A3. Compute ILOG.
*     ILOG is the log base 10 of the input value.  It is approx-
*     imated by adding e + 0.f when the original value is viewed
*     as 2^^e * 1.f in extended precision.  This value is stored
*     in d6.
*
* Register usage:
*         Input/Output
*         d0: k-factor/exponent
*         d2: x/x
*         d3: x/x
*         d4: x/x
*         d5: x/x
*         d6: x/ILOG
*         d7: k-factor/Unchanged
*         a0: ptr for original operand/final result
*         a1: x/x
*         a2: x/x
*         fp0: x/float(ILOG)
*         fp1: x/x
*         fp2: x/x
*         F_SCR1:x/x
*         F_SCR2:Abs(X)/Abs(X) with $3fff exponent
*         L_SCR1:x/x
*         L_SCR2:first word of X packed/Unchanged

          tst.b     BINDEC_FLG(a6)      ;check for denorm
          beq.b     A3_cont             ;if clr, continue with norm
          move.l    #-4933,d6 ;force ILOG = -4933
          bra.b     A4_str
A3_cont:
          move.w    FP_SCR2(a6),d0      ;move exp to d0
          move.w    #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
          fmove.x   FP_SCR2(a6),fp0     ;now fp0 has 1.f
          sub.w     #$3fff,d0 ;strip off bias
          fadd.w    d0,fp0              ;add in exp
          fsub.s    FONE,fp0  ;subtract off 1.0
          fbge.w    pos_res             ;if pos, branch
          fmul.x    LOG2UP1,fp0         ;if neg, mul by LOG2UP1
          fmove.l   fp0,d6              ;put ILOG in d6 as a lword
          bra.b     A4_str              ;go move out ILOG
pos_res:
          fmul.x    LOG2,fp0  ;if pos, mul by LOG2
          fmove.l   fp0,d6              ;put ILOG in d6 as a lword


* A4. Clr INEX bit.
*     The operation in A3 above may have set INEX2.

A4_str:
          fmove.l   #0,FPSR             ;zero all of fpsr - nothing needed


* A5. Set ICTR = 0;
*     ICTR is a flag used in A13.  It must be set before the
*     loop entry A6. The lower word of d5 is used for ICTR.

          clr.w     d5                  ;clear ICTR


* A6. Calculate LEN.
*     LEN is the number of digits to be displayed.  The k-factor
*     can dictate either the total number of digits, if it is
*     a positive number, or the number of digits after the
*     original decimal point which are to be included as
*     significant.  See the 68882 manual for examples.
*     If LEN is computed to be greater than 17, set OPERR in
*     USER_FPSR.  LEN is stored in d4.
*
* Register usage:
*         Input/Output
*         d0: exponent/Unchanged
*         d2: x/x/scratch
*         d3: x/x
*         d4: exc picture/LEN
*         d5: ICTR/Unchanged
*         d6: ILOG/Unchanged
*         d7: k-factor/Unchanged
*         a0: ptr for original operand/final result
*         a1: x/x
*         a2: x/x
*         fp0: float(ILOG)/Unchanged
*         fp1: x/x
*         fp2: x/x
*         F_SCR1:x/x
*         F_SCR2:Abs(X) with $3fff exponent/Unchanged
*         L_SCR1:x/x
*         L_SCR2:first word of X packed/Unchanged

A6_str:
          tst.l     d7                  ;branch on sign of k
          ble.b     k_neg               ;if k <= 0, LEN = ILOG + 1 - k
          move.l    d7,d4               ;if k > 0, LEN = k
          bra.b     len_ck              ;skip to LEN check
k_neg:
          move.l    d6,d4               ;first load ILOG to d4
          sub.l     d7,d4               ;subtract off k
          addq.l    #1,d4               ;add in the 1
len_ck:
          tst.l     d4                  ;LEN check: branch on sign of LEN
          ble.b     LEN_ng              ;if neg, set LEN = 1
          cmp.l     #17,d4              ;test if LEN > 17
          ble.b     A7_str              ;if not, forget it
          move.l    #17,d4              ;set max LEN = 17
          tst.l     d7                  ;if negative, never set OPERR
          ble.b     A7_str              ;if positive, continue
          or.l      #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
          bra.b     A7_str              ;finished here
LEN_ng:
          moveq.l   #1,d4               ;min LEN is 1


* A7. Calculate SCALE.
*     SCALE is equal to 10^ISCALE, where ISCALE is the number
*     of decimal places needed to insure LEN integer digits
*     in the output before conversion to bcd. LAMBDA is the sign
*     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
*     the rounding mode as given in the following table (see
*     Coonen, p. 7.23 as ref.; however, the SCALE variable is
*     of opposite sign in bindec.sa from Coonen).
*
*         Initial                                           USE
*         FPCR[6:5] LAMBDA    SIGN(X)             FPCR[6:5]
*         ----------------------------------------------
*          RN       00           0         0                00/0      RN
*          RN       00           0         1                00/0      RN
*          RN       00           1         0                00/0      RN
*          RN       00           1         1                00/0      RN
*          RZ       01           0         0                11/3      RP
*          RZ       01           0         1                11/3      RP
*          RZ       01           1         0                10/2      RM
*          RZ       01           1         1                10/2      RM
*          RM       10           0         0                11/3      RP
*          RM       10           0         1                10/2      RM
*          RM       10           1         0                10/2      RM
*          RM       10           1         1                11/3      RP
*          RP       11           0         0                10/2      RM
*          RP       11           0         1                11/3      RP
*          RP       11           1         0                11/3      RP
*          RP       11           1         1                10/2      RM
*
* Register usage:
*         Input/Output
*         d0: exponent/scratch - final is 0
*         d2: x/0 or 24 for A9
*         d3: x/scratch - offset ptr into PTENRM array
*         d4: LEN/Unchanged
*         d5: 0/ICTR:LAMBDA
*         d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
*         d7: k-factor/Unchanged
*         a0: ptr for original operand/final result
*         a1: x/ptr to PTENRM array
*         a2: x/x
*         fp0: float(ILOG)/Unchanged
*         fp1: x/10^ISCALE
*         fp2: x/x
*         F_SCR1:x/x
*         F_SCR2:Abs(X) with $3fff exponent/Unchanged
*         L_SCR1:x/x
*         L_SCR2:first word of X packed/Unchanged

A7_str:
          tst.l     d7                  ;test sign of k
          bgt.b     k_pos               ;if pos and > 0, skip this
          cmp.l     d6,d7               ;test k - ILOG
          blt.b     k_pos               ;if ILOG >= k, skip this
          move.l    d7,d6               ;if ((k<0) & (ILOG < k)) ILOG = k
k_pos:
          move.l    d6,d0               ;calc ILOG + 1 - LEN in d0
          addq.l    #1,d0               ;add the 1
          sub.l     d4,d0               ;sub off LEN
          swap      d5                  ;use upper word of d5 for LAMBDA
          clr.w     d5                  ;set it zero initially
          clr.w     d2                  ;set up d2 for very small case
          tst.l     d0                  ;test sign of ISCALE
          bge.b     iscale              ;if pos, skip next inst
          addq.w    #1,d5               ;if neg, set LAMBDA true
          cmp.l     #$ffffecd4,d0       ;test iscale <= -4908
          bgt.b     no_inf              ;if false, skip rest
          addi.l    #24,d0              ;add in 24 to iscale
          move.l    #24,d2              ;put 24 in d2 for A9
no_inf:
          neg.l     d0                  ;and take abs of ISCALE
iscale:
          fmove.s   FONE,fp1  ;init fp1 to 1
          bfextu    USER_FPCR(a6){26:2},d1 ;get initial rmode bits
          add.w     d1,d1               ;put them in bits 2:1
          add.w     d5,d1               ;add in LAMBDA
          add.w     d1,d1               ;put them in bits 3:1
          tst.l     L_SCR2(a6)          ;test sign of original x
          bge.b     x_pos               ;if pos, don't set bit 0
          addq.l    #1,d1               ;if neg, set bit 0
x_pos:
          lea.l     RBDTBL,a2 ;load rbdtbl base
          move.b    (a2,d1),d3          ;load d3 with new rmode
          lsl.l     #4,d3               ;put bits in proper position
          fmove.l   d3,fpcr             ;load bits into fpu
          lsr.l     #4,d3               ;put bits in proper position
          tst.b     d3                  ;decode new rmode for pten table
          bne.b     not_rn              ;if zero, it is RN
          lea.l     PTENRN,a1 ;load a1 with RN table base
          bra.b     rmode               ;exit decode
not_rn:
          lsr.b     #1,d3               ;get lsb in carry
          bcc.b     not_rp              ;if carry clear, it is RM
          lea.l     PTENRP,a1 ;load a1 with RP table base
          bra.b     rmode               ;exit decode
not_rp:
          lea.l     PTENRM,a1 ;load a1 with RM table base
rmode:
          clr.l     d3                  ;clr table index
e_loop:
          lsr.l     #1,d0               ;shift next bit into carry
          bcc.b     e_next              ;if zero, skip the mul
          fmul.x    (a1,d3),fp1         ;mul by 10**(d3_bit_no)
e_next:
          add.l     #12,d3              ;inc d3 to next pwrten table entry
          tst.l     d0                  ;test if ISCALE is zero
          bne.b     e_loop              ;if not, loop


* A8. Clr INEX; Force RZ.
*     The operation in A3 above may have set INEX2.
*     RZ mode is forced for the scaling operation to insure
*     only one rounding error.  The grs bits are collected in
*     the INEX flag for use in A10.
*
* Register usage:
*         Input/Output

          fmove.l   #0,FPSR             ;clr INEX
          fmove.l   #rz_mode,FPCR       ;set RZ rounding mode


* A9. Scale X -> Y.
*     The mantissa is scaled to the desired number of significant
*     digits.  The excess digits are collected in INEX2. If mul,
*     Check d2 for excess 10 exponential value.  If not zero,
*     the iscale value would have caused the pwrten calculation
*     to overflow.  Only a negative iscale can cause this, so
*     multiply by 10^(d2), which is now only allowed to be 24,
*     with a multiply by 10^8 and 10^16, which is exact since
*     10^24 is exact.  If the input was denormalized, we must
*     create a busy stack frame with the mul command and the
*     two operands, and allow the fpu to complete the multiply.
*
* Register usage:
*         Input/Output
*         d0: FPCR with RZ mode/Unchanged
*         d2: 0 or 24/unchanged
*         d3: x/x
*         d4: LEN/Unchanged
*         d5: ICTR:LAMBDA
*         d6: ILOG/Unchanged
*         d7: k-factor/Unchanged
*         a0: ptr for original operand/final result
*         a1: ptr to PTENRM array/Unchanged
*         a2: x/x
*         fp0: float(ILOG)/X adjusted for SCALE (Y)
*         fp1: 10^ISCALE/Unchanged
*         fp2: x/x
*         F_SCR1:x/x
*         F_SCR2:Abs(X) with $3fff exponent/Unchanged
*         L_SCR1:x/x
*         L_SCR2:first word of X packed/Unchanged

A9_str:
          fmove.x   (a0),fp0  ;load X from memory
          fabs.x    fp0                 ;use abs(X)
          tst.w     d5                  ;LAMBDA is in lower word of d5
          bne.b     short_sc_mul        ;if neg (LAMBDA = 1), scale by mul
          fdiv.x    fp1,fp0             ;calculate X / SCALE -> Y to fp0
          bra.b     A10_st              ;branch to A10

sc_mul:
short_sc_mul:
          tst.b     BINDEC_FLG(a6)      ;check for denorm
          beq.b     A9_norm             ;if norm, continue with mul
          fmovem.x fp1,-(a7)  ;load ETEMP with 10^ISCALE
          move.l    8(a0),-(a7)         ;load FPTEMP with input arg
          move.l    4(a0),-(a7)
          move.l    (a0),-(a7)
          move.l    #18,d3              ;load count for busy stack
A9_loop:
          clr.l     -(a7)               ;clear lword on stack
          dbf.w     d3,A9_loop
          move.b    VER_TMP(a6),(a7) ;write current version number
          move.b    #BUSY_SIZE-4,1(a7) ;write current busy size
          move.b    #$10,$44(a7)        ;set fcefpte[15] bit
          move.w    #$0023,$40(a7)      ;load cmdreg1b with mul command
          move.b    #$fe,$8(a7)         ;load all 1s to cu savepc
          frestore (a7)+                ;restore frame to fpu for completion
          fmul.x    36(a1),fp0          ;multiply fp0 by 10^8
          fmul.x    48(a1),fp0          ;multiply fp0 by 10^16
          bra.b     A10_st
A9_norm:
          tst.w     d2                  ;test for small exp case
          beq.b     A9_con              ;if zero, continue as normal
          fmul.x    36(a1),fp0          ;multiply fp0 by 10^8
          fmul.x    48(a1),fp0          ;multiply fp0 by 10^16
A9_con:
          fmul.x    fp1,fp0             ;calculate X * SCALE -> Y to fp0


* A10. Or in INEX.
*      If INEX is set, round error occurred.  This is compensated
*      for by 'or-ing' in the INEX2 flag to the lsb of Y.
*
* Register usage:
*         Input/Output
*         d0: FPCR with RZ mode/FPSR with INEX2 isolated
*         d2: x/x
*         d3: x/x
*         d4: LEN/Unchanged
*         d5: ICTR:LAMBDA
*         d6: ILOG/Unchanged
*         d7: k-factor/Unchanged
*         a0: ptr for original operand/final result
*         a1: ptr to PTENxx array/Unchanged
*         a2: x/ptr to FP_SCR2(a6)
*         fp0: Y/Y with lsb adjusted
*         fp1: 10^ISCALE/Unchanged
*         fp2: x/x

A10_st:
          fmove.l   FPSR,d0             ;get FPSR
          fmove.x   fp0,FP_SCR2(a6)     ;move Y to memory
          lea.l     FP_SCR2(a6),a2      ;load a2 with ptr to FP_SCR2
          btst.l    #9,d0               ;check if INEX2 set
          beq.b     A11_st              ;if clear, skip rest
          ori.l     #1,8(a2)  ;or in 1 to lsb of mantissa
          fmove.x   FP_SCR2(a6),fp0     ;write adjusted Y back to fpu


* A11. Restore original FPCR; set size ext.
*      Perform FINT operation in the user's rounding mode.  Keep
*      the size to extended.  The sintdo entry point in the sint
*      routine expects the FPCR value to be in USER_FPCR for
*      mode and precision.  The original FPCR is saved in L_SCR1.

A11_st:
          move.l    USER_FPCR(a6),L_SCR1(a6) ;save it for later
          andi.l    #$00000030,USER_FPCR(a6) ;set size to ext,
*                                                 ;block exceptions


* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
*      The FPSP routine sintd0 is used.  The output is in fp0.
*
* Register usage:
*         Input/Output
*         d0: FPSR with AINEX cleared/FPCR with size set to ext
*         d2: x/x/scratch
*         d3: x/x
*         d4: LEN/Unchanged
*         d5: ICTR:LAMBDA/Unchanged
*         d6: ILOG/Unchanged
*         d7: k-factor/Unchanged
*         a0: ptr for original operand/src ptr for sintdo
*         a1: ptr to PTENxx array/Unchanged
*         a2: ptr to FP_SCR2(a6)/Unchanged
*         a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
*         fp0: Y/YINT
*         fp1: 10^ISCALE/Unchanged
*         fp2: x/x
*         F_SCR1:x/x
*         F_SCR2:Y adjusted for inex/Y with original exponent
*         L_SCR1:x/original USER_FPCR
*         L_SCR2:first word of X packed/Unchanged

A12_st:
          movem.l   d0-d1/a0-a1,-(a7)   ;save regs used by sintd0
          move.l    L_SCR1(a6),-(a7)
          move.l    L_SCR2(a6),-(a7)
          lea.l     FP_SCR2(a6),a0                ;a0 is ptr to F_SCR2(a6)
          fmove.x   fp0,(a0)            ;move Y to memory at FP_SCR2(a6)
          tst.l     L_SCR2(a6)                    ;test sign of original operand
          bge.b     do_fint                       ;if pos, use Y
          or.l      #$80000000,(a0)               ;if neg, use -Y
do_fint:
          move.l    USER_FPSR(a6),-(a7)
          bsr       sintdo                        ;sint routine returns int in fp0
          move.b    (a7),USER_FPSR(a6)
          add.l     #4,a7
          move.l    (a7)+,L_SCR2(a6)
          move.l    (a7)+,L_SCR1(a6)
          movem.l   (a7)+,d0-d1/a0-a1   ;restore regs used by sint
          move.l    L_SCR2(a6),FP_SCR2(a6)        ;restore original exponent
          move.l    L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR


* A13. Check for LEN digits.
*      If the int operation results in more than LEN digits,
*      or less than LEN -1 digits, adjust ILOG and repeat from
*      A6.  This test occurs only on the first pass.  If the
*      result is exactly 10^LEN, decrement ILOG and divide
*      the mantissa by 10.  The calculation of 10^LEN cannot
*      be inexact, since all powers of ten upto 10^27 are exact
*      in extended precision, so the use of a previous power-of-ten
*      table will introduce no error.
*
*
* Register usage:
*         Input/Output
*         d0: FPCR with size set to ext/scratch final = 0
*         d2: x/x
*         d3: x/scratch final = x
*         d4: LEN/LEN adjusted
*         d5: ICTR:LAMBDA/LAMBDA:ICTR
*         d6: ILOG/ILOG adjusted
*         d7: k-factor/Unchanged
*         a0: pointer into memory for packed bcd string formation
*         a1: ptr to PTENxx array/Unchanged
*         a2: ptr to FP_SCR2(a6)/Unchanged
*         fp0: int portion of Y/abs(YINT) adjusted
*         fp1: 10^ISCALE/Unchanged
*         fp2: x/10^LEN
*         F_SCR1:x/x
*         F_SCR2:Y with original exponent/Unchanged
*         L_SCR1:original USER_FPCR/Unchanged
*         L_SCR2:first word of X packed/Unchanged

A13_st:
          swap      d5                  ;put ICTR in lower word of d5
          tst.w     d5                  ;check if ICTR = 0
          bne       not_zr              ;if non-zero, go to second test
*
* Compute 10^(LEN-1)
*
          fmove.s   FONE,fp2  ;init fp2 to 1.0
          move.l    d4,d0               ;put LEN in d0
          subq.l    #1,d0               ;d0 = LEN -1
          clr.l     d3                  ;clr table index
l_loop:
          lsr.l     #1,d0               ;shift next bit into carry
          bcc.b     l_next              ;if zero, skip the mul
          fmul.x    (a1,d3),fp2         ;mul by 10**(d3_bit_no)
l_next:
          add.l     #12,d3              ;inc d3 to next pwrten table entry
          tst.l     d0                  ;test if LEN is zero
          bne.b     l_loop              ;if not, loop
*
* 10^LEN-1 is computed for this test and A14.  If the input was
* denormalized, check only the case in which YINT > 10^LEN.
*
          tst.b     BINDEC_FLG(a6)      ;check if input was norm
          beq.b     A13_con             ;if norm, continue with checking
          fabs.x    fp0                 ;take abs of YINT
          bra       test_2
*
* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
*
A13_con:
          fabs.x    fp0                 ;take abs of YINT
          fcmp.x    fp2,fp0             ;compare abs(YINT) with 10^(LEN-1)
          fbge.w    test_2              ;if greater, do next test
          subq.l    #1,d6               ;subtract 1 from ILOG
          move.w    #1,d5               ;set ICTR
          fmove.l   #rm_mode,FPCR       ;set rmode to RM
          fmul.s    FTEN,fp2  ;compute 10^LEN
          bra.w     A6_str              ;return to A6 and recompute YINT
test_2:
          fmul.s    FTEN,fp2  ;compute 10^LEN
          fcmp.x    fp2,fp0             ;compare abs(YINT) with 10^LEN
          fblt.w    A14_st              ;if less, all is ok, go to A14
          fbgt.w    fix_ex              ;if greater, fix and redo
          fdiv.s    FTEN,fp0  ;if equal, divide by 10
          addq.l    #1,d6               ; and inc ILOG
          bra.b     A14_st              ; and continue elsewhere
fix_ex:
          addq.l    #1,d6               ;increment ILOG by 1
          move.w    #1,d5               ;set ICTR
          fmove.l   #rm_mode,FPCR       ;set rmode to RM
          bra.w     A6_str              ;return to A6 and recompute YINT
*
* Since ICTR <> 0, we have already been through one adjustment,
* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
* 10^LEN is again computed using whatever table is in a1 since the
* value calculated cannot be inexact.
*
not_zr:
          fmove.s   FONE,fp2  ;init fp2 to 1.0
          move.l    d4,d0               ;put LEN in d0
          clr.l     d3                  ;clr table index
z_loop:
          lsr.l     #1,d0               ;shift next bit into carry
          bcc.b     z_next              ;if zero, skip the mul
          fmul.x    (a1,d3),fp2         ;mul by 10**(d3_bit_no)
z_next:
          add.l     #12,d3              ;inc d3 to next pwrten table entry
          tst.l     d0                  ;test if LEN is zero
          bne.b     z_loop              ;if not, loop
          fabs.x    fp0                 ;get abs(YINT)
          fcmp.x    fp2,fp0             ;check if abs(YINT) = 10^LEN
          fbne.w    A14_st              ;if not, skip this
          fdiv.s    FTEN,fp0  ;divide abs(YINT) by 10
          addq.l    #1,d6               ;and inc ILOG by 1
          addq.l    #1,d4               ; and inc LEN
          fmul.s    FTEN,fp2  ; if LEN++, the get 10^^LEN


* A14. Convert the mantissa to bcd.
*      The binstr routine is used to convert the LEN digit
*      mantissa to bcd in memory.  The input to binstr is
*      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
*      such that the decimal point is to the left of bit 63.
*      The bcd digits are stored in the correct position in
*      the final string area in memory.
*
*
* Register usage:
*         Input/Output
*         d0: x/LEN call to binstr - final is 0
*         d1: x/0
*         d2: x/ms 32-bits of mant of abs(YINT)
*         d3: x/ls 32-bits of mant of abs(YINT)
*         d4: LEN/Unchanged
*         d5: ICTR:LAMBDA/LAMBDA:ICTR
*         d6: ILOG
*         d7: k-factor/Unchanged
*         a0: pointer into memory for packed bcd string formation
*             /ptr to first mantissa byte in result string
*         a1: ptr to PTENxx array/Unchanged
*         a2: ptr to FP_SCR2(a6)/Unchanged
*         fp0: int portion of Y/abs(YINT) adjusted
*         fp1: 10^ISCALE/Unchanged
*         fp2: 10^LEN/Unchanged
*         F_SCR1:x/Work area for final result
*         F_SCR2:Y with original exponent/Unchanged
*         L_SCR1:original USER_FPCR/Unchanged
*         L_SCR2:first word of X packed/Unchanged

A14_st:
          fmove.l   #rz_mode,FPCR       ;force rz for conversion
          fdiv.x    fp2,fp0             ;divide abs(YINT) by 10^LEN
          lea.l     FP_SCR1(a6),a0
          fmove.x   fp0,(a0)  ;move abs(YINT)/10^LEN to memory
          move.l    4(a0),d2  ;move 2nd word of FP_RES to d2
          move.l    8(a0),d3  ;move 3rd word of FP_RES to d3
          clr.l     4(a0)               ;zero word 2 of FP_RES
          clr.l     8(a0)               ;zero word 3 of FP_RES
          move.l    (a0),d0             ;move exponent to d0
          swap      d0                  ;put exponent in lower word
          beq.b     no_sft              ;if zero, don't shift
          subi.l    #$3ffd,d0 ;sub bias less 2 to make fract
          tst.l     d0                  ;check if > 1
          bgt.b     no_sft              ;if so, don't shift
          neg.l     d0                  ;make exp positive
m_loop:
          lsr.l     #1,d2               ;shift d2:d3 right, add 0s
          roxr.l    #1,d3               ;the number of places
          dbf.w     d0,m_loop ;given in d0
no_sft:
          tst.l     d2                  ;check for mantissa of zero
          bne.b     no_zr               ;if not, go on
          tst.l     d3                  ;continue zero check
          beq.b     zer_m               ;if zero, go directly to binstr
no_zr:
          clr.l     d1                  ;put zero in d1 for addx
          addi.l    #$00000080,d3       ;inc at bit 7
          addx.l    d1,d2               ;continue inc
          andi.l    #$ffffff80,d3       ;strip off lsb not used by 882
zer_m:
          move.l    d4,d0               ;put LEN in d0 for binstr call
          addq.l    #3,a0               ;a0 points to M16 byte in result
          bsr       binstr              ;call binstr to convert mant


* A15. Convert the exponent to bcd.
*      As in A14 above, the exp is converted to bcd and the
*      digits are stored in the final string.
*
*      Digits are stored in L_SCR1(a6) on return from BINDEC as:
*
*          32               16 15                0
*         -----------------------------------------
*         |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
*         -----------------------------------------
*
* And are moved into their proper places in FP_SCR1.  If digit e4
* is non-zero, OPERR is signaled.  In all cases, all 4 digits are
* written as specified in the 881/882 manual for packed decimal.
*
* Register usage:
*         Input/Output
*         d0: x/LEN call to binstr - final is 0
*         d1: x/scratch (0);shift count for final exponent packing
*         d2: x/ms 32-bits of exp fraction/scratch
*         d3: x/ls 32-bits of exp fraction
*         d4: LEN/Unchanged
*         d5: ICTR:LAMBDA/LAMBDA:ICTR
*         d6: ILOG
*         d7: k-factor/Unchanged
*         a0: ptr to result string/ptr to L_SCR1(a6)
*         a1: ptr to PTENxx array/Unchanged
*         a2: ptr to FP_SCR2(a6)/Unchanged
*         fp0: abs(YINT) adjusted/float(ILOG)
*         fp1: 10^ISCALE/Unchanged
*         fp2: 10^LEN/Unchanged
*         F_SCR1:Work area for final result/BCD result
*         F_SCR2:Y with original exponent/ILOG/10^4
*         L_SCR1:original USER_FPCR/Exponent digits on return from binstr
*         L_SCR2:first word of X packed/Unchanged

A15_st:
          tst.b     BINDEC_FLG(a6)      ;check for denorm
          beq.b     not_denorm
          ftst.x    fp0                 ;test for zero
          fbeq.w    den_zero  ;if zero, use k-factor or 4933
          fmove.l   d6,fp0              ;float ILOG
          fabs.x    fp0                 ;get abs of ILOG
          bra.b     convrt
den_zero:
          tst.l     d7                  ;check sign of the k-factor
          blt.b     use_ilog  ;if negative, use ILOG
          fmove.s   F4933,fp0 ;force exponent to 4933
          bra.b     convrt              ;do it
use_ilog:
          fmove.l   d6,fp0              ;float ILOG
          fabs.x    fp0                 ;get abs of ILOG
          bra.b     convrt
not_denorm:
          ftst.x    fp0                 ;test for zero
          fbne.w    not_zero  ;if zero, force exponent
          fmove.s   FONE,fp0  ;force exponent to 1
          bra.b     convrt              ;do it
not_zero:
          fmove.l   d6,fp0              ;float ILOG
          fabs.x    fp0                 ;get abs of ILOG
convrt:
          fdiv.x    24(a1),fp0          ;compute ILOG/10^4
          fmove.x   fp0,FP_SCR2(a6)     ;store fp0 in memory
          move.l    4(a2),d2  ;move word 2 to d2
          move.l    8(a2),d3  ;move word 3 to d3
          move.w    (a2),d0             ;move exp to d0
          beq.b     x_loop_fin          ;if zero, skip the shift
          subi.w    #$3ffd,d0 ;subtract off bias
          neg.w     d0                  ;make exp positive
x_loop:
          lsr.l     #1,d2               ;shift d2:d3 right
          roxr.l    #1,d3               ;the number of places
          dbf.w     d0,x_loop ;given in d0
x_loop_fin:
          clr.l     d1                  ;put zero in d1 for addx
          addi.l    #$00000080,d3       ;inc at bit 6
          addx.l    d1,d2               ;continue inc
          andi.l    #$ffffff80,d3       ;strip off lsb not used by 882
          move.l    #4,d0               ;put 4 in d0 for binstr call
          lea.l     L_SCR1(a6),a0       ;a0 is ptr to L_SCR1 for exp digits
          bsr       binstr              ;call binstr to convert exp
          move.l    L_SCR1(a6),d0       ;load L_SCR1 lword to d0
          move.l    #12,d1              ;use d1 for shift count
          lsr.l     d1,d0               ;shift d0 right by 12
          bfins     d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
          lsr.l     d1,d0               ;shift d0 right by 12
          bfins     d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
          tst.b     d0                  ;check if e4 is zero
          beq.b     A16_st              ;if zero, skip rest
          or.l      #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR


* A16. Write sign bits to final string.
*            Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
*
* Register usage:
*         Input/Output
*         d0: x/scratch - final is x
*         d2: x/x
*         d3: x/x
*         d4: LEN/Unchanged
*         d5: ICTR:LAMBDA/LAMBDA:ICTR
*         d6: ILOG/ILOG adjusted
*         d7: k-factor/Unchanged
*         a0: ptr to L_SCR1(a6)/Unchanged
*         a1: ptr to PTENxx array/Unchanged
*         a2: ptr to FP_SCR2(a6)/Unchanged
*         fp0: float(ILOG)/Unchanged
*         fp1: 10^ISCALE/Unchanged
*         fp2: 10^LEN/Unchanged
*         F_SCR1:BCD result with correct signs
*         F_SCR2:ILOG/10^4
*         L_SCR1:Exponent digits on return from binstr
*         L_SCR2:first word of X packed/Unchanged

A16_st:
          clr.l     d0                  ;clr d0 for collection of signs
          andi.b    #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
          tst.l     L_SCR2(a6)          ;check sign of original mantissa
          bge.b     mant_p              ;if pos, don't set SM
          moveq.l   #2,d0               ;move 2 in to d0 for SM
mant_p:
          tst.l     d6                  ;check sign of ILOG
          bge.b     wr_sgn              ;if pos, don't set SE
          addq.l    #1,d0               ;set bit 0 in d0 for SE
wr_sgn:
          bfins     d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1

* Clean up and restore all registers used.

          fmove.l   #0,FPSR             ;clear possible inex2/ainex bits
          fmovem.x (a7)+,fp0-fp2
          movem.l   (a7)+,d2-d7/a2
          rts

          end