1dnl  Alpha mpn_rshift -- Shift a number right.
2
3dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     ?
35C EV5:     3.25
36C EV6:     1.75
37
38C  INPUT PARAMETERS
39C  rp     r16
40C  up     r17
41C  n      r18
42C  cnt    r19
43
44
45ASM_START()
46PROLOGUE(mpn_rshift)
47          ldq       r4,0(r17) C load first limb
48          subq      r31,r19,r20
49          subq      r18,1,r18
50          and       r18,4-1,r28         C number of limbs in first loop
51          sll       r4,r20,r0 C compute function result
52
53          beq       r28,L(L0)
54          subq      r18,r28,r18
55
56          ALIGN(8)
57L(top0):
58          ldq       r3,8(r17)
59          addq      r16,8,r16
60          srl       r4,r19,r5
61          addq      r17,8,r17
62          subq      r28,1,r28
63          sll       r3,r20,r6
64          bis       r3,r3,r4
65          bis       r5,r6,r8
66          stq       r8,-8(r16)
67          bne       r28,L(top0)
68
69L(L0):    srl       r4,r19,r24
70          beq       r18,L(end)
71C warm up phase 1
72          ldq       r1,8(r17)
73          subq      r18,4,r18
74          ldq       r2,16(r17)
75          ldq       r3,24(r17)
76          ldq       r4,32(r17)
77C warm up phase 2
78          sll       r1,r20,r7
79          srl       r1,r19,r21
80          sll       r2,r20,r8
81          beq       r18,L(end1)
82          ldq       r1,40(r17)
83          srl       r2,r19,r22
84          ldq       r2,48(r17)
85          sll       r3,r20,r5
86          bis       r7,r24,r7
87          srl       r3,r19,r23
88          bis       r8,r21,r8
89          sll       r4,r20,r6
90          ldq       r3,56(r17)
91          srl       r4,r19,r24
92          ldq       r4,64(r17)
93          subq      r18,4,r18
94          beq       r18,L(end2)
95          ALIGN(16)
96C main loop
97L(top):   stq       r7,0(r16)
98          bis       r5,r22,r5
99          stq       r8,8(r16)
100          bis       r6,r23,r6
101
102          sll       r1,r20,r7
103          subq      r18,4,r18
104          srl       r1,r19,r21
105          unop      C ldq     r31,-96(r17)
106
107          sll       r2,r20,r8
108          ldq       r1,72(r17)
109          srl       r2,r19,r22
110          ldq       r2,80(r17)
111
112          stq       r5,16(r16)
113          bis       r7,r24,r7
114          stq       r6,24(r16)
115          bis       r8,r21,r8
116
117          sll       r3,r20,r5
118          unop      C ldq     r31,-96(r17)
119          srl       r3,r19,r23
120          addq      r16,32,r16
121
122          sll       r4,r20,r6
123          ldq       r3,88(r17)
124          srl       r4,r19,r24
125          ldq       r4,96(r17)
126
127          addq      r17,32,r17
128          bne       r18,L(top)
129C cool down phase 2/1
130L(end2):
131          stq       r7,0(r16)
132          bis       r5,r22,r5
133          stq       r8,8(r16)
134          bis       r6,r23,r6
135          sll       r1,r20,r7
136          srl       r1,r19,r21
137          sll       r2,r20,r8
138          srl       r2,r19,r22
139          stq       r5,16(r16)
140          bis       r7,r24,r7
141          stq       r6,24(r16)
142          bis       r8,r21,r8
143          sll       r3,r20,r5
144          srl       r3,r19,r23
145          sll       r4,r20,r6
146          srl       r4,r19,r24
147C cool down phase 2/2
148          stq       r7,32(r16)
149          bis       r5,r22,r5
150          stq       r8,40(r16)
151          bis       r6,r23,r6
152          stq       r5,48(r16)
153          stq       r6,56(r16)
154C cool down phase 2/3
155          stq       r24,64(r16)
156          ret       r31,(r26),1
157
158C cool down phase 1/1
159L(end1):
160          srl       r2,r19,r22
161          sll       r3,r20,r5
162          bis       r7,r24,r7
163          srl       r3,r19,r23
164          bis       r8,r21,r8
165          sll       r4,r20,r6
166          srl       r4,r19,r24
167C cool down phase 1/2
168          stq       r7,0(r16)
169          bis       r5,r22,r5
170          stq       r8,8(r16)
171          bis       r6,r23,r6
172          stq       r5,16(r16)
173          stq       r6,24(r16)
174          stq       r24,32(r16)
175          ret       r31,(r26),1
176
177L(end):   stq       r24,0(r16)
178          ret       r31,(r26),1
179EPILOGUE(mpn_rshift)
180ASM_END()
181