1dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
2
3dnl  This file is part of the GNU MP Library.
4dnl
5dnl  The GNU MP Library is free software; you can redistribute it and/or modify
6dnl  it under the terms of either:
7dnl
8dnl    * the GNU Lesser General Public License as published by the Free
9dnl      Software Foundation; either version 3 of the License, or (at your
10dnl      option) any later version.
11dnl
12dnl  or
13dnl
14dnl    * the GNU General Public License as published by the Free Software
15dnl      Foundation; either version 2 of the License, or (at your option) any
16dnl      later version.
17dnl
18dnl  or both in parallel, as here.
19dnl
20dnl  The GNU MP Library is distributed in the hope that it will be useful, but
21dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
23dnl  for more details.
24dnl
25dnl  You should have received copies of the GNU General Public License and the
26dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
27dnl  see https://www.gnu.org/licenses/.
28
29
30dnl  Optimizations:
31dnl  * Avoid skip instructions
32dnl  * Put carry-generating and carry-consuming insns consecutively
33dnl  * Don't allocate any stack, "home" positions for parameters could be used.
34
35include(`../config.m4')
36
37define(`p0',`%r28')
38define(`p1',`%r29')
39define(`t32',`%r19')
40define(`t0',`%r20')
41define(`t1',`%r21')
42define(`x',`%r22')
43define(`m0',`%r23')
44define(`m1',`%r24')
45
46ifdef(`HAVE_ABI_2_0w',
47`         .level    2.0w
48',`       .level    2.0
49')
50PROLOGUE(mpn_umul_ppmm_r)
51          ldo                 128(%r30),%r30
52ifdef(`HAVE_ABI_2_0w',
53`         std                 %r26,-64(%r30)
54          std                 %r25,-56(%r30)
55          copy                %r24,%r31
56',`
57          depd                %r25,31,32,%r26
58          std                 %r26,-64(%r30)
59          depd                %r23,31,32,%r24
60          std                 %r24,-56(%r30)
61          ldw                 -180(%r30),%r31
62')
63
64          fldd                -64(%r30),%fr4
65          fldd                -56(%r30),%fr5
66
67          xmpyu               %fr5R,%fr4R,%fr6
68          fstd                %fr6,-128(%r30)
69          xmpyu               %fr5R,%fr4L,%fr7
70          fstd                %fr7,-120(%r30)
71          xmpyu               %fr5L,%fr4R,%fr8
72          fstd                %fr8,-112(%r30)
73          xmpyu               %fr5L,%fr4L,%fr9
74          fstd                %fr9,-104(%r30)
75
76          depdi,z             1,31,1,t32                    C t32 = 2^32
77
78          ldd                 -128(%r30),p0                 C lo = low 64 bit of product
79          ldd                 -120(%r30),m0                 C m0 = mid0 64 bit of product
80          ldd                 -112(%r30),m1                 C m1 = mid1 64 bit of product
81          ldd                 -104(%r30),p1                 C hi = high 64 bit of product
82
83          add,l,*nuv          m0,m1,x                       C x = m1+m0
84           add,l              t32,p1,p1           C propagate carry to mid of p1
85          depd,z              x,31,32,t0                    C lo32(m1+m0)
86          add                 t0,p0,p0
87          extrd,u             x,31,32,t1                    C hi32(m1+m0)
88          add,dc              t1,p1,p1
89
90          std                 p0,0(%r31)                    C store low half of product
91ifdef(`HAVE_ABI_2_0w',
92`         copy                p1,%r28                       C return val in %r28
93',`       extrd,u             p1,31,32,%r28                 C return val in %r28,%r29
94')
95          bve                 (%r2)
96          ldo                 -128(%r30),%r30
97EPILOGUE(mpn_umul_ppmm_r)
98