1dnl  x86 mpn_sec_tabselect.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C                                 cycles/limb
35C P5                                     ?
36C P6 model 0-8,10-12                     ?
37C P6 model 9  (Banias)                   ?
38C P6 model 13 (Dothan)                   ?
39C P4 model 0  (Willamette)     ?
40C P4 model 1  (?)              ?
41C P4 model 2  (Northwood)      4.5
42C P4 model 3  (Prescott)       ?
43C P4 model 4  (Nocona)                   ?
44C Intel Atom                             ?
45C AMD K6                       ?
46C AMD K7                       3.4
47C AMD K8                       ?
48C AMD K10                      ?
49
50C NOTES
51C  * This has not been tuned for any specific processor.  Its speed should not
52C    be too bad, though.
53C  * Using SSE2 could result in many-fold speedup.
54
55C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
56define(`rp',     `%edi')
57define(`tp',     `%esi')
58define(`n',      `%ebx')
59define(`nents',  `%ecx')
60define(`which',  `36(%esp)')
61
62define(`i',      `%ebp')
63define(`maskp',  `20(%esp)')
64define(`maskn',  `32(%esp)')
65
66ASM_START()
67          TEXT
68          ALIGN(16)
69PROLOGUE(mpn_sec_tabselect)
70          push      %edi
71          push      %esi
72          push      %ebx
73          push      %ebp
74          mov       20(%esp), rp
75          mov       24(%esp), tp
76          mov       28(%esp), n
77          mov       32(%esp), nents
78
79          lea       (rp,n,4), rp
80          lea       (tp,n,4), tp
81          sub       nents, which
82L(outer):
83          mov       which, %eax
84          add       nents, %eax
85          neg       %eax                          C set CF iff 'which' != k
86          sbb       %eax, %eax
87          mov       %eax, maskn
88          not       %eax
89          mov       %eax, maskp
90
91          mov       n, i
92          neg       i
93
94          ALIGN(16)
95L(top):   mov       (tp,i,4), %eax
96          and       maskp, %eax
97          mov       (rp,i,4), %edx
98          and       maskn, %edx
99          or        %edx, %eax
100          mov       %eax, (rp,i,4)
101          inc       i
102          js        L(top)
103
104L(end):   mov       n, %eax
105          lea       (tp,%eax,4), tp
106          dec       nents
107          jne       L(outer)
108
109L(outer_end):
110          pop       %ebp
111          pop       %ebx
112          pop       %esi
113          pop       %edi
114          ret
115EPILOGUE()
116