1dnl  PowerPC-64 mpn_com.
2
3dnl  Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                  cycles/limb
34C POWER3/PPC630          ?
35C POWER4/PPC970          1.25
36C POWER5                 ?
37C POWER6                 1.32
38C POWER7                 1.13
39
40C INPUT PARAMETERS
41define(`rp',        `r3')
42define(`up',        `r4')
43define(`n',         `r5')
44
45ASM_START()
46PROLOGUE(mpn_com)
47
48ifdef(`HAVE_ABI_mode32',
49`         rldicl    n, n, 0,32')
50
51          cmpdi     cr0, n, 4
52          blt       L(sml)
53
54          addi      r10, n, 4
55          srdi      r10, r10, 3
56          mtctr     r10
57
58          andi.     r0, n, 1
59          rlwinm    r11, n, 0,30,30
60          rlwinm    r12, n, 0,29,29
61          cmpdi     cr6, r11, 0
62          cmpdi     cr7, r12, 0
63
64          beq       cr0, L(xx0)
65L(xx1):   ld        r6, 0(up)
66          addi      up, up, 8
67          nor       r6, r6, r6
68          std       r6, 0(rp)
69          addi      rp, rp, 8
70
71L(xx0):   bne       cr6, L(x10)
72L(x00):   ld        r6, 0(r4)
73          ld        r7, 8(r4)
74          bne       cr7, L(100)
75L(000):   addi      rp, rp, -32
76          b         L(lo0)
77L(100):   addi      up, up, -32
78          b         L(lo4)
79L(x10):   ld        r8, 0(r4)
80          ld        r9, 8(r4)
81          bne       cr7, L(110)
82L(010):   addi      up, up, 16
83          addi      rp, rp, -16
84          b         L(lo2)
85L(110):   addi      up, up, -16
86          addi      rp, rp, -48
87          b         L(lo6)
88
89L(sml):   mtctr     n
90L(t):     ld        r6, 0(up)
91          addi      up, up, 8
92          nor       r6, r6, r6
93          std       r6, 0(rp)
94          addi      rp, rp, 8
95          bdnz      L(t)
96          blr
97
98          ALIGN(32)
99L(top):   nor       r6, r6, r6
100          nor       r7, r7, r7
101          std       r6, 0(rp)
102          std       r7, 8(rp)
103L(lo2):   ld        r6, 0(up)
104          ld        r7, 8(up)
105          nor       r8, r8, r8
106          nor       r9, r9, r9
107          std       r8, 16(rp)
108          std       r9, 24(rp)
109L(lo0):   ld        r8, 16(up)
110          ld        r9, 24(up)
111          nor       r6, r6, r6
112          nor       r7, r7, r7
113          std       r6, 32(rp)
114          std       r7, 40(rp)
115L(lo6):   ld        r6, 32(up)
116          ld        r7, 40(up)
117          nor       r8, r8, r8
118          nor       r9, r9, r9
119          std       r8, 48(rp)
120          std       r9, 56(rp)
121          addi      rp, rp, 64
122L(lo4):   ld        r8, 48(up)
123          ld        r9, 56(up)
124          addi      up, up, 64
125          bdnz      L(top)
126
127L(end):   nor       r6, r6, r6
128          nor       r7, r7, r7
129          std       r6, 0(rp)
130          std       r7, 8(rp)
131          nor       r8, r8, r8
132          nor       r9, r9, r9
133          std       r8, 16(rp)
134          std       r9, 24(rp)
135          blr
136EPILOGUE()
137