1dnl  PowerPC-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4dnl  Copyright 2003-2005 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34C                  cycles/limb
35C POWER3/PPC630          1.75
36C POWER4/PPC970          2.10
37C POWER5                 ?
38C POWER6                 ?
39C POWER7                 1.75
40
41C   n        POWER3/PPC630   POWER4/PPC970
42C     1          15.00               15.33
43C     2             7.50                7.99
44C     3             5.33                6.00
45C     4             4.50                4.74
46C     5             4.20                4.39
47C     6             3.50                3.99
48C     7             3.14                3.64
49C     8             3.00                3.36
50C     9             3.00                3.36
51C    10             2.70                3.25
52C    11             2.63                3.11
53C    12             2.58                3.00
54C    13             2.61                3.02
55C    14             2.42                2.82
56C    15             2.40                2.79
57C    50             2.08                2.67
58C   100             1.85                2.31
59C   200             1.80                2.18
60C   400             1.77                2.14
61C  1000             1.76                2.10#
62C  2000             1.75#               2.13
63C  4000             2.30                2.57
64C  8000             2.62                2.58
65C 16000             2.52                4.25
66C 32000             2.49             16.25
67C 64000             2.66             18.76
68
69ifdef(`OPERATION_and_n',
70`         define(`func',`mpn_and_n')
71          define(`logop',               `and')')
72ifdef(`OPERATION_andn_n',
73`         define(`func',`mpn_andn_n')
74          define(`logop',               `andc')')
75ifdef(`OPERATION_nand_n',
76`         define(`func',`mpn_nand_n')
77          define(`logop',               `nand')')
78ifdef(`OPERATION_ior_n',
79`         define(`func',`mpn_ior_n')
80          define(`logop',               `or')')
81ifdef(`OPERATION_iorn_n',
82`         define(`func',`mpn_iorn_n')
83          define(`logop',               `orc')')
84ifdef(`OPERATION_nior_n',
85`         define(`func',`mpn_nior_n')
86          define(`logop',               `nor')')
87ifdef(`OPERATION_xor_n',
88`         define(`func',`mpn_xor_n')
89          define(`logop',               `xor')')
90ifdef(`OPERATION_xnor_n',
91`         define(`func',`mpn_xnor_n')
92          define(`logop',               `eqv')')
93
94C INPUT PARAMETERS
95C rp      r3
96C up      r4
97C vp      r5
98C n       r6
99
100MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
101
102ASM_START()
103PROLOGUE(func)
104          ld        r8, 0(r4) C read lowest u limb
105          ld        r9, 0(r5) C read lowest v limb
106          addi      r6, r6, 3 C compute branch count (1)
107          rldic.    r0, r6, 3, 59       C r0 = (n-1 & 3) << 3; cr0 = (n == 4(t+1))?
108          cmpldi    cr6, r0, 16         C cr6 = (n cmp 4t + 3)
109
110ifdef(`HAVE_ABI_mode32',
111`         rldicl    r6, r6, 62,34',     C ...branch count
112`         rldicl    r6, r6, 62, 2')     C ...branch count
113          mtctr     r6
114
115          ld        r6, 0(r4) C read lowest u limb (again)
116          ld        r7, 0(r5) C read lowest v limb (again)
117
118          add       r5, r5, r0          C offset vp
119          add       r4, r4, r0          C offset up
120          add       r3, r3, r0          C offset rp
121
122          beq       cr0, L(L01)
123          blt       cr6, L(L10)
124          beq       cr6, L(L11)
125          b         L(L00)
126
127L(oop):   ld        r8, -24(r4)
128          ld        r9, -24(r5)
129          logop     r10, r6, r7
130          std       r10, -32(r3)
131L(L00):   ld        r6, -16(r4)
132          ld        r7, -16(r5)
133          logop     r10, r8, r9
134          std       r10, -24(r3)
135L(L11):   ld        r8, -8(r4)
136          ld        r9, -8(r5)
137          logop     r10, r6, r7
138          std       r10, -16(r3)
139L(L10):   ld        r6, 0(r4)
140          ld        r7, 0(r5)
141          logop     r10, r8, r9
142          std       r10, -8(r3)
143L(L01):   addi      r5, r5, 32
144          addi      r4, r4, 32
145          addi      r3, r3, 32
146          bdnz      L(oop)
147
148          logop     r10, r6, r7
149          std       r10, -32(r3)
150          blr
151EPILOGUE()
152