1dnl  PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                   cycles/limb
34C POWER3/PPC630          1.5
35C POWER4/PPC970          2
36C POWER5                 2
37C POWER6                 2.78
38C POWER7               2.15-2.87
39
40C This code is based on powerpc64/aors_n.asm.
41
42C INPUT PARAMETERS
43C rp      r3
44C up      r4
45C vp      r5
46C n       r6
47
48ifdef(`OPERATION_add_n',`
49  define(ADDSUBC,   adde)
50  define(ADDSUB,    addc)
51  define(func,                mpn_add_n)
52  define(func_nc,   mpn_add_nc)
53  define(GENRVAL,   `addi     r3, r3, 1')
54  define(SETCBR,    `addic    r0, $1, -1')
55  define(CLRCB,               `addic    r0, r0, 0')
56')
57ifdef(`OPERATION_sub_n',`
58  define(ADDSUBC,   subfe)
59  define(ADDSUB,    subfc)
60  define(func,                mpn_sub_n)
61  define(func_nc,   mpn_sub_nc)
62  define(GENRVAL,   `neg      r3, r3')
63  define(SETCBR,    `subfic   r0, $1, 0')
64  define(CLRCB,               `addic    r0, r1, -1')
65')
66
67MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
68
69ASM_START()
70PROLOGUE(func_nc)
71          SETCBR(r7)
72          b         L(ent)
73EPILOGUE()
74
75PROLOGUE(func)
76          CLRCB
77L(ent):   stwu      r1, -32(r1)
78          rlwinm.   r0, r6, 0,30,31     C r0 = n & 3, set cr0
79          cmpwi     cr6, r0, 2
80          stw       r28, 8(r1)
81          addi      r6, r6, 3 C compute count...
82          stw       r29, 12(r1)
83          srwi      r6, r6, 2 C ...for ctr
84          stw       r30, 16(r1)
85          mtctr     r6                  C copy count into ctr
86          stw       r31, 20(r1)
87          beq       cr0, L(b00)
88          blt       cr6, L(b01)
89          beq       cr6, L(b10)
90
91L(b11):   lwz       r8, 0(r4) C load s1 limb
92          lwz       r9, 0(r5) C load s2 limb
93          lwz       r10, 4(r4)          C load s1 limb
94          lwz       r11, 4(r5)          C load s2 limb
95          lwz       r12, 8(r4)          C load s1 limb
96          addi      r4, r4, 12
97          lwz       r0, 8(r5) C load s2 limb
98          addi      r5, r5, 12
99          ADDSUBC   r29, r9, r8
100          ADDSUBC   r30, r11, r10
101          ADDSUBC   r31, r0, r12
102          stw       r29, 0(r3)
103          stw       r30, 4(r3)
104          stw       r31, 8(r3)
105          addi      r3, r3, 12
106          bdnz      L(go)
107          b         L(ret)
108
109L(b01):   lwz       r12, 0(r4)          C load s1 limb
110          addi      r4, r4, 4
111          lwz       r0, 0(r5) C load s2 limb
112          addi      r5, r5, 4
113          ADDSUBC   r31, r0, r12        C add
114          stw       r31, 0(r3)
115          addi      r3, r3, 4
116          bdnz      L(go)
117          b         L(ret)
118
119L(b10):   lwz       r10, 0(r4)          C load s1 limb
120          lwz       r11, 0(r5)          C load s2 limb
121          lwz       r12, 4(r4)          C load s1 limb
122          addi      r4, r4, 8
123          lwz       r0, 4(r5) C load s2 limb
124          addi      r5, r5, 8
125          ADDSUBC   r30, r11, r10       C add
126          ADDSUBC   r31, r0, r12        C add
127          stw       r30, 0(r3)
128          stw       r31, 4(r3)
129          addi      r3, r3, 8
130          bdnz      L(go)
131          b         L(ret)
132
133L(b00):   C INITCY            C clear/set cy
134L(go):    lwz       r6, 0(r4) C load s1 limb
135          lwz       r7, 0(r5) C load s2 limb
136          lwz       r8, 4(r4) C load s1 limb
137          lwz       r9, 4(r5) C load s2 limb
138          lwz       r10, 8(r4)          C load s1 limb
139          lwz       r11, 8(r5)          C load s2 limb
140          lwz       r12, 12(r4)         C load s1 limb
141          lwz       r0, 12(r5)          C load s2 limb
142          bdz       L(end)
143
144          addi      r4, r4, 16
145          addi      r5, r5, 16
146
147          ALIGN(16)
148L(top):   ADDSUBC   r28, r7, r6
149          lwz       r6, 0(r4) C load s1 limb
150          lwz       r7, 0(r5) C load s2 limb
151          ADDSUBC   r29, r9, r8
152          lwz       r8, 4(r4) C load s1 limb
153          lwz       r9, 4(r5) C load s2 limb
154          ADDSUBC   r30, r11, r10
155          lwz       r10, 8(r4)          C load s1 limb
156          lwz       r11, 8(r5)          C load s2 limb
157          ADDSUBC   r31, r0, r12
158          lwz       r12, 12(r4)         C load s1 limb
159          lwz       r0, 12(r5)          C load s2 limb
160          stw       r28, 0(r3)
161          addi      r4, r4, 16
162          stw       r29, 4(r3)
163          addi      r5, r5, 16
164          stw       r30, 8(r3)
165          stw       r31, 12(r3)
166          addi      r3, r3, 16
167          bdnz      L(top)              C decrement ctr and loop back
168
169L(end):   ADDSUBC   r28, r7, r6
170          ADDSUBC   r29, r9, r8
171          ADDSUBC   r30, r11, r10
172          ADDSUBC   r31, r0, r12
173          stw       r28, 0(r3)
174          stw       r29, 4(r3)
175          stw       r30, 8(r3)
176          stw       r31, 12(r3)
177
178L(ret):
179          lwz       r28, 8(r1)
180          lwz       r29, 12(r1)
181          subfe     r3, r0, r0          C -cy
182          lwz       r30, 16(r1)
183          GENRVAL
184          lwz       r31, 20(r1)
185          addi      r1, r1, 32
186          blr
187EPILOGUE()
188