1 /* mulmod_bnm1.c -- multiplication mod B^n-1.
2 
3    Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
4    Marco Bodrato.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp-impl.h"
40 #include "longlong.h"
41 
42 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
43    mod B^rn - 1, and values are semi-normalised; zero is represented
44    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
45    tp==rp is allowed. */
46 void
mpn_bc_mulmod_bnm1(mp_ptr rp,mp_srcptr ap,mp_srcptr bp,mp_size_t rn,mp_ptr tp)47 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
48                         mp_ptr tp)
49 {
50   mp_limb_t cy;
51 
52   ASSERT (0 < rn);
53 
54   mpn_mul_n (tp, ap, bp, rn);
55   cy = mpn_add_n (rp, tp, tp + rn, rn);
56   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
57    * be no overflow when adding in the carry. */
58   MPN_INCR_U (rp, rn, cy);
59 }
60 
61 
62 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
63    semi-normalised representation, computation is mod B^rn + 1. Needs
64    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
65    Output is normalised. */
66 static void
mpn_bc_mulmod_bnp1(mp_ptr rp,mp_srcptr ap,mp_srcptr bp,mp_size_t rn,mp_ptr tp)67 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
68                         mp_ptr tp)
69 {
70   mp_limb_t cy;
71 
72   ASSERT (0 < rn);
73 
74   mpn_mul_n (tp, ap, bp, rn + 1);
75   ASSERT (tp[2*rn+1] == 0);
76   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
77   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
78   rp[rn] = 0;
79   MPN_INCR_U (rp, rn+1, cy);
80 }
81 
82 
83 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
84  *
85  * The result is expected to be ZERO if and only if one of the operand
86  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
87  * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
88  * combine results and obtain a natural number when one knows in
89  * advance that the final value is less than (B^rn-1).
90  * Moreover it should not be a problem if mulmod_bnm1 is used to
91  * compute the full product with an+bn <= rn, because this condition
92  * implies (B^an-1)(B^bn-1) < (B^rn-1) .
93  *
94  * Requires 0 < bn <= an <= rn and an + bn > rn/2
95  * Scratch need: rn + (need for recursive call OR rn + 4). This gives
96  *
97  * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
98  */
99 void
mpn_mulmod_bnm1(mp_ptr rp,mp_size_t rn,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr tp)100 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
101 {
102   ASSERT (0 < bn);
103   ASSERT (bn <= an);
104   ASSERT (an <= rn);
105 
106   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
107     {
108       if (UNLIKELY (bn < rn))
109           {
110             if (UNLIKELY (an + bn <= rn))
111               {
112                 mpn_mul (rp, ap, an, bp, bn);
113               }
114             else
115               {
116                 mp_limb_t cy;
117                 mpn_mul (tp, ap, an, bp, bn);
118                 cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
119                 MPN_INCR_U (rp, rn, cy);
120               }
121           }
122       else
123           mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
124     }
125   else
126     {
127       mp_size_t n;
128       mp_limb_t cy;
129       mp_limb_t hi;
130 
131       n = rn >> 1;
132 
133       /* We need at least an + bn >= n, to be able to fit one of the
134            recursive products at rp. Requiring strict inequality makes
135            the code slightly simpler. If desired, we could avoid this
136            restriction by initially halving rn as long as rn is even and
137            an + bn <= rn/2. */
138 
139       ASSERT (an + bn > n);
140 
141       /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
142            and crt together as
143 
144            x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
145       */
146 
147 #define a0 ap
148 #define a1 (ap + n)
149 #define b0 bp
150 #define b1 (bp + n)
151 
152 #define xp  tp      /* 2n + 2 */
153       /* am1  maybe in {xp, n} */
154       /* bm1  maybe in {xp + n, n} */
155 #define sp1 (tp + 2*n + 2)
156       /* ap1  maybe in {sp1, n + 1} */
157       /* bp1  maybe in {sp1 + n + 1, n + 1} */
158 
159       {
160           mp_srcptr am1, bm1;
161           mp_size_t anm, bnm;
162           mp_ptr so;
163 
164           bm1 = b0;
165           bnm = bn;
166           if (LIKELY (an > n))
167             {
168               am1 = xp;
169               cy = mpn_add (xp, a0, n, a1, an - n);
170               MPN_INCR_U (xp, n, cy);
171               anm = n;
172               so = xp + n;
173               if (LIKELY (bn > n))
174                 {
175                     bm1 = so;
176                     cy = mpn_add (so, b0, n, b1, bn - n);
177                     MPN_INCR_U (so, n, cy);
178                     bnm = n;
179                     so += n;
180                 }
181             }
182           else
183             {
184               so = xp;
185               am1 = a0;
186               anm = an;
187             }
188 
189           mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
190       }
191 
192       {
193           int       k;
194           mp_srcptr ap1, bp1;
195           mp_size_t anp, bnp;
196 
197           bp1 = b0;
198           bnp = bn;
199           if (LIKELY (an > n)) {
200             ap1 = sp1;
201             cy = mpn_sub (sp1, a0, n, a1, an - n);
202             sp1[n] = 0;
203             MPN_INCR_U (sp1, n + 1, cy);
204             anp = n + ap1[n];
205             if (LIKELY (bn > n)) {
206               bp1 = sp1 + n + 1;
207               cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
208               sp1[2*n+1] = 0;
209               MPN_INCR_U (sp1 + n + 1, n + 1, cy);
210               bnp = n + bp1[n];
211             }
212           } else {
213             ap1 = a0;
214             anp = an;
215           }
216 
217           if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
218             k=0;
219           else
220             {
221               int mask;
222               k = mpn_fft_best_k (n, 0);
223               mask = (1<<k) - 1;
224               while (n & mask) {k--; mask >>=1;};
225             }
226           if (k >= FFT_FIRST_K)
227             xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
228           else if (UNLIKELY (bp1 == b0))
229             {
230               ASSERT (anp + bnp <= 2*n+1);
231               ASSERT (anp + bnp > n);
232               ASSERT (anp >= bnp);
233               mpn_mul (xp, ap1, anp, bp1, bnp);
234               anp = anp + bnp - n;
235               ASSERT (anp <= n || xp[2*n]==0);
236               anp-= anp > n;
237               cy = mpn_sub (xp, xp, n, xp + n, anp);
238               xp[n] = 0;
239               MPN_INCR_U (xp, n+1, cy);
240             }
241           else
242             mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
243       }
244 
245       /* Here the CRT recomposition begins.
246 
247            xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
248            Division by 2 is a bitwise rotation.
249 
250            Assumes xp normalised mod (B^n+1).
251 
252            The residue class [0] is represented by [B^n-1]; except when
253            both input are ZERO.
254       */
255 
256 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
257 #if HAVE_NATIVE_mpn_rsh1add_nc
258       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
259       hi = cy << (GMP_NUMB_BITS - 1);
260       cy = 0;
261       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
262            overflows, i.e. a further increment will not overflow again. */
263 #else /* ! _nc */
264       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
265       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
266       cy >>= 1;
267       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
268            the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
269 #endif
270 #if GMP_NAIL_BITS == 0
271       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
272 #else
273       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
274       rp[n-1] ^= hi;
275 #endif
276 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
277 #if HAVE_NATIVE_mpn_add_nc
278       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
279 #else /* ! _nc */
280       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
281 #endif
282       cy += (rp[0]&1);
283       mpn_rshift(rp, rp, n, 1);
284       ASSERT (cy <= 2);
285       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
286       cy >>= 1;
287       /* We can have cy != 0 only if hi = 0... */
288       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
289       rp[n-1] |= hi;
290       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
291 #endif
292       ASSERT (cy <= 1);
293       /* Next increment can not overflow, read the previous comments about cy. */
294       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
295       MPN_INCR_U(rp, n, cy);
296 
297       /* Compute the highest half:
298            ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
299        */
300       if (UNLIKELY (an + bn < rn))
301           {
302             /* Note that in this case, the only way the result can equal
303                zero mod B^{rn} - 1 is if one of the inputs is zero, and
304                then the output of both the recursive calls and this CRT
305                reconstruction is zero, not B^{rn} - 1. Which is good,
306                since the latter representation doesn't fit in the output
307                area.*/
308             cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
309 
310             /* FIXME: This subtraction of the high parts is not really
311                necessary, we do it to get the carry out, and for sanity
312                checking. */
313             cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
314                                            xp + an + bn - n, rn - (an + bn), cy);
315             ASSERT (an + bn == rn - 1 ||
316                       mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
317             cy = mpn_sub_1 (rp, rp, an + bn, cy);
318             ASSERT (cy == (xp + an + bn - n)[0]);
319           }
320       else
321           {
322             cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
323             /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
324                DECR will affect _at most_ the lowest n limbs. */
325             MPN_DECR_U (rp, 2*n, cy);
326           }
327 #undef a0
328 #undef a1
329 #undef b0
330 #undef b1
331 #undef xp
332 #undef sp1
333     }
334 }
335 
336 mp_size_t
mpn_mulmod_bnm1_next_size(mp_size_t n)337 mpn_mulmod_bnm1_next_size (mp_size_t n)
338 {
339   mp_size_t nh;
340 
341   if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
342     return n;
343   if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
344     return (n + (2-1)) & (-2);
345   if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
346     return (n + (4-1)) & (-4);
347 
348   nh = (n + 1) >> 1;
349 
350   if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
351     return (n + (8-1)) & (-8);
352 
353   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
354 }
355