1dnl Alpha mpn_mod_34lsub1.
2
3dnl  Copyright 2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     4 (?)
35C EV5:     2.67
36C EV6:     1.67
37
38
39dnl  INPUT PARAMETERS
40dnl  up             r16
41dnl  n              r17
42
43define(`l0',`r18')
44define(`l1',`r19')
45define(`l2',`r20')
46define(`a0',`r21')
47define(`a1',`r22')
48define(`a2',`r23')
49define(`c0',`r24')
50define(`c1',`r5')
51define(`c2',`r6')
52
53ASM_START()
54PROLOGUE(mpn_mod_34lsub1)
55          bis       r31, r31, c0
56          bis       r31, r31, c1
57          bis       r31, r31, c2
58
59          lda       r17, -3(r17)
60          bge       r17, $L_3_or_more
61          bis       r31, r31, a0
62          bis       r31, r31, a1
63          bis       r31, r31, a2
64          br        r31, $L_012
65
66$L_3_or_more:
67          ldq       a0, 0(r16)
68          ldq       a1, 8(r16)
69          ldq       a2, 16(r16)
70          lda       r16, 24(r16)
71          lda       r17, -3(r17)
72          blt       r17, $L_012
73
74$L_6_or_more:
75          ldq       l0, 0(r16)
76          ldq       l1, 8(r16)
77          ldq       l2, 16(r16)
78          addq      l0, a0, a0
79
80          lda       r16, 24(r16)
81          lda       r17, -3(r17)
82          blt       r17, $L_end
83
84          ALIGN(16)
85C Main loop
86$L_9_or_more:
87$Loop:    cmpult    a0, l0, r0
88          ldq       l0, 0(r16)
89          addq      r0, c0, c0
90          addq      l1, a1, a1
91          cmpult    a1, l1, r0
92          ldq       l1, 8(r16)
93          addq      r0, c1, c1
94          addq      l2, a2, a2
95          cmpult    a2, l2, r0
96          ldq       l2, 16(r16)
97          addq      r0, c2, c2
98          addq      l0, a0, a0
99          lda       r16, 24(r16)
100          lda       r17, -3(r17)
101          bge       r17, $Loop
102
103$L_end:   cmpult    a0, l0, r0
104          addq      r0, c0, c0
105          addq      l1, a1, a1
106          cmpult    a1, l1, r0
107          addq      r0, c1, c1
108          addq      l2, a2, a2
109          cmpult    a2, l2, r0
110          addq      r0, c2, c2
111
112C Handle the last (n mod 3) limbs
113$L_012:   lda       r17, 2(r17)
114          blt       r17, $L_0
115          ldq       l0, 0(r16)
116          addq      l0, a0, a0
117          cmpult    a0, l0, r0
118          addq      r0, c0, c0
119          beq       r17, $L_0
120          ldq       l1, 8(r16)
121          addq      l1, a1, a1
122          cmpult    a1, l1, r0
123          addq      r0, c1, c1
124
125C Align and sum our 3 main accumulators and 3 carry accumulators
126$L_0:     srl       a0, 48, r2
127          srl       a1, 32, r4
128ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
129`         insll     a1, 2, r1',                   C (a1 & 0xffffffff) << 16
130`         zapnot    a1, 15, r25
131          sll       r25, 16, r1')
132          zapnot    a0, 63, r0                    C a0 & 0xffffffffffff
133          srl       a2, 16, a1
134ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
135`         inswl     a2, 4, r3',                   C (a2 & 0xffff) << 32
136`         zapnot    a2, 3, r25
137          sll       r25, 32, r3')
138          addq      r1, r4, r1
139          addq      r0, r2, r0
140          srl       c0, 32, a2
141ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
142`         insll     c0, 2, r4',                   C (c0 & 0xffffffff) << 16
143`         zapnot    c0, 15, r25
144          sll       r25, 16, r4')
145          addq      r0, r1, r0
146          addq      r3, a1, r3
147          addq      r0, r3, r0
148          srl       c1, 16, c0
149ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
150`         inswl     c1, 4, r2',                   C (c1 & 0xffff) << 32
151`         zapnot    c1, 3, r25
152          sll       r25, 32, r2')
153          addq      r4, a2, r4
154C         srl       c2, 48, r3                    C This will be 0 in practise
155          zapnot    c2, 63, r1                    C r1 = c2 & 0xffffffffffff
156          addq      r0, r4, r0
157          addq      r2, c0, r2
158          addq      r0, r2, r0
159C         addq      r1, r3, r1
160          addq      r0, r1, r0
161
162          ret       r31, (r26), 1
163EPILOGUE(mpn_mod_34lsub1)
164ASM_END()
165