1 /* crypto/bn/bn_exp.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 
113 #include "cryptlib.h"
114 #include "bn_lcl.h"
115 
116 /* maximum precomputation table size for *variable* sliding windows */
117 #define TABLE_SIZE	32
118 
119 /* this one works - simple but works */
BN_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,BN_CTX * ctx)120 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121 	{
122 	int i,bits,ret=0;
123 	BIGNUM *v,*rr;
124 
125 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
126 		{
127 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
128 		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
129 		return -1;
130 		}
131 
132 	BN_CTX_start(ctx);
133 	if ((r == a) || (r == p))
134 		rr = BN_CTX_get(ctx);
135 	else
136 		rr = r;
137 	if ((v = BN_CTX_get(ctx)) == NULL) goto err;
138 
139 	if (BN_copy(v,a) == NULL) goto err;
140 	bits=BN_num_bits(p);
141 
142 	if (BN_is_odd(p))
143 		{ if (BN_copy(rr,a) == NULL) goto err; }
144 	else	{ if (!BN_one(rr)) goto err; }
145 
146 	for (i=1; i<bits; i++)
147 		{
148 		if (!BN_sqr(v,v,ctx)) goto err;
149 		if (BN_is_bit_set(p,i))
150 			{
151 			if (!BN_mul(rr,rr,v,ctx)) goto err;
152 			}
153 		}
154 	ret=1;
155 err:
156 	if (r != rr) BN_copy(r,rr);
157 	BN_CTX_end(ctx);
158 	return(ret);
159 	}
160 
161 
BN_mod_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)162 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
163 	       BN_CTX *ctx)
164 	{
165 	int ret;
166 
167 	bn_check_top(a);
168 	bn_check_top(p);
169 	bn_check_top(m);
170 
171 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
172 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
173 	 * exponentiation for the odd part), using appropriate exponent
174 	 * reductions, and combine the results using the CRT.
175 	 *
176 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
177 	 * exponentiation using the reciprocal-based quick remaindering
178 	 * algorithm is used.
179 	 *
180 	 * (Timing obtained with expspeed.c [computations  a^p mod m
181 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
182 	 * 4096, 8192 bits], compared to the running time of the
183 	 * standard algorithm:
184 	 *
185 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
186          *                     55 .. 77 %  [UltraSparc processor, but
187 	 *                                  debug-solaris-sparcv8-gcc conf.]
188 	 *
189 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
190 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
191 	 *
192 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
193 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
194 	 * slower even than the standard algorithm!
195 	 *
196 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
197 	 * should be obtained when the new Montgomery reduction code
198 	 * has been integrated into OpenSSL.)
199 	 */
200 
201 #define MONT_MUL_MOD
202 #define MONT_EXP_WORD
203 #define RECP_MUL_MOD
204 
205 #ifdef MONT_MUL_MOD
206 	/* I have finally been able to take out this pre-condition of
207 	 * the top bit being set.  It was caused by an error in BN_div
208 	 * with negatives.  There was also another problem when for a^b%m
209 	 * a >= m.  eay 07-May-97 */
210 /*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
211 
212 	if (BN_is_odd(m))
213 		{
214 #  ifdef MONT_EXP_WORD
215 		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) == 0))
216 			{
217 			BN_ULONG A = a->d[0];
218 			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
219 			}
220 		else
221 #  endif
222 			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
223 		}
224 	else
225 #endif
226 #ifdef RECP_MUL_MOD
227 		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
228 #else
229 		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
230 #endif
231 
232 	return(ret);
233 	}
234 
235 
BN_mod_exp_recp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)236 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
237 		    const BIGNUM *m, BN_CTX *ctx)
238 	{
239 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
240 	int start=1,ts=0;
241 	BIGNUM *aa;
242 	BIGNUM val[TABLE_SIZE];
243 	BN_RECP_CTX recp;
244 
245 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
246 		{
247 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
248 		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
249 		return -1;
250 		}
251 
252 	bits=BN_num_bits(p);
253 
254 	if (bits == 0)
255 		{
256 		ret = BN_one(r);
257 		return ret;
258 		}
259 
260 	BN_CTX_start(ctx);
261 	if ((aa = BN_CTX_get(ctx)) == NULL) goto err;
262 
263 	BN_RECP_CTX_init(&recp);
264 	if (m->neg)
265 		{
266 		/* ignore sign of 'm' */
267 		if (!BN_copy(aa, m)) goto err;
268 		aa->neg = 0;
269 		if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
270 		}
271 	else
272 		{
273 		if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
274 		}
275 
276 	BN_init(&(val[0]));
277 	ts=1;
278 
279 	if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err;		/* 1 */
280 	if (BN_is_zero(&(val[0])))
281 		{
282 		ret = BN_zero(r);
283 		goto err;
284 		}
285 
286 	window = BN_window_bits_for_exponent_size(bits);
287 	if (window > 1)
288 		{
289 		if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
290 			goto err;				/* 2 */
291 		j=1<<(window-1);
292 		for (i=1; i<j; i++)
293 			{
294 			BN_init(&val[i]);
295 			if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
296 				goto err;
297 			}
298 		ts=i;
299 		}
300 
301 	start=1;	/* This is used to avoid multiplication etc
302 			 * when there is only the value '1' in the
303 			 * buffer. */
304 	wvalue=0;	/* The 'value' of the window */
305 	wstart=bits-1;	/* The top bit of the window */
306 	wend=0;		/* The bottom bit of the window */
307 
308 	if (!BN_one(r)) goto err;
309 
310 	for (;;)
311 		{
312 		if (BN_is_bit_set(p,wstart) == 0)
313 			{
314 			if (!start)
315 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
316 				goto err;
317 			if (wstart == 0) break;
318 			wstart--;
319 			continue;
320 			}
321 		/* We now have wstart on a 'set' bit, we now need to work out
322 		 * how bit a window to do.  To do this we need to scan
323 		 * forward until the last set bit before the end of the
324 		 * window */
325 		j=wstart;
326 		wvalue=1;
327 		wend=0;
328 		for (i=1; i<window; i++)
329 			{
330 			if (wstart-i < 0) break;
331 			if (BN_is_bit_set(p,wstart-i))
332 				{
333 				wvalue<<=(i-wend);
334 				wvalue|=1;
335 				wend=i;
336 				}
337 			}
338 
339 		/* wend is the size of the current window */
340 		j=wend+1;
341 		/* add the 'bytes above' */
342 		if (!start)
343 			for (i=0; i<j; i++)
344 				{
345 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
346 					goto err;
347 				}
348 
349 		/* wvalue will be an odd number < 2^window */
350 		if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
351 			goto err;
352 
353 		/* move the 'window' down further */
354 		wstart-=wend+1;
355 		wvalue=0;
356 		start=0;
357 		if (wstart < 0) break;
358 		}
359 	ret=1;
360 err:
361 	BN_CTX_end(ctx);
362 	for (i=0; i<ts; i++)
363 		BN_clear_free(&(val[i]));
364 	BN_RECP_CTX_free(&recp);
365 	return(ret);
366 	}
367 
368 
BN_mod_exp_mont(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)369 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
370 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
371 	{
372 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
373 	int start=1,ts=0;
374 	BIGNUM *d,*r;
375 	const BIGNUM *aa;
376 	BIGNUM val[TABLE_SIZE];
377 	BN_MONT_CTX *mont=NULL;
378 
379 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
380 		{
381 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
382 		}
383 
384 	bn_check_top(a);
385 	bn_check_top(p);
386 	bn_check_top(m);
387 
388 	if (!(m->d[0] & 1))
389 		{
390 		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
391 		return(0);
392 		}
393 	bits=BN_num_bits(p);
394 	if (bits == 0)
395 		{
396 		ret = BN_one(rr);
397 		return ret;
398 		}
399 
400 	BN_CTX_start(ctx);
401 	d = BN_CTX_get(ctx);
402 	r = BN_CTX_get(ctx);
403 	if (d == NULL || r == NULL) goto err;
404 
405 	/* If this is not done, things will break in the montgomery
406 	 * part */
407 
408 	if (in_mont != NULL)
409 		mont=in_mont;
410 	else
411 		{
412 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
413 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
414 		}
415 
416 	BN_init(&val[0]);
417 	ts=1;
418 	if (a->neg || BN_ucmp(a,m) >= 0)
419 		{
420 		if (!BN_nnmod(&(val[0]),a,m,ctx))
421 			goto err;
422 		aa= &(val[0]);
423 		}
424 	else
425 		aa=a;
426 	if (BN_is_zero(aa))
427 		{
428 		ret = BN_zero(rr);
429 		goto err;
430 		}
431 	if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
432 
433 	window = BN_window_bits_for_exponent_size(bits);
434 	if (window > 1)
435 		{
436 		if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
437 		j=1<<(window-1);
438 		for (i=1; i<j; i++)
439 			{
440 			BN_init(&(val[i]));
441 			if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
442 				goto err;
443 			}
444 		ts=i;
445 		}
446 
447 	start=1;	/* This is used to avoid multiplication etc
448 			 * when there is only the value '1' in the
449 			 * buffer. */
450 	wvalue=0;	/* The 'value' of the window */
451 	wstart=bits-1;	/* The top bit of the window */
452 	wend=0;		/* The bottom bit of the window */
453 
454 	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
455 	for (;;)
456 		{
457 		if (BN_is_bit_set(p,wstart) == 0)
458 			{
459 			if (!start)
460 				{
461 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
462 				goto err;
463 				}
464 			if (wstart == 0) break;
465 			wstart--;
466 			continue;
467 			}
468 		/* We now have wstart on a 'set' bit, we now need to work out
469 		 * how bit a window to do.  To do this we need to scan
470 		 * forward until the last set bit before the end of the
471 		 * window */
472 		j=wstart;
473 		wvalue=1;
474 		wend=0;
475 		for (i=1; i<window; i++)
476 			{
477 			if (wstart-i < 0) break;
478 			if (BN_is_bit_set(p,wstart-i))
479 				{
480 				wvalue<<=(i-wend);
481 				wvalue|=1;
482 				wend=i;
483 				}
484 			}
485 
486 		/* wend is the size of the current window */
487 		j=wend+1;
488 		/* add the 'bytes above' */
489 		if (!start)
490 			for (i=0; i<j; i++)
491 				{
492 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
493 					goto err;
494 				}
495 
496 		/* wvalue will be an odd number < 2^window */
497 		if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
498 			goto err;
499 
500 		/* move the 'window' down further */
501 		wstart-=wend+1;
502 		wvalue=0;
503 		start=0;
504 		if (wstart < 0) break;
505 		}
506 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
507 	ret=1;
508 err:
509 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
510 	BN_CTX_end(ctx);
511 	for (i=0; i<ts; i++)
512 		BN_clear_free(&(val[i]));
513 	return(ret);
514 	}
515 
516 
517 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
518  * so that accessing any of these table values shows the same access pattern as far
519  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
520  * from/to that table. */
521 
MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM * b,int top,unsigned char * buf,int idx,int width)522 static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
523 	{
524 	size_t i, j;
525 
526 	if (bn_wexpand(b, top) == NULL)
527 		return 0;
528 	while (b->top < top)
529 		{
530 		b->d[b->top++] = 0;
531 		}
532 
533 	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
534 		{
535 		buf[j] = ((unsigned char*)b->d)[i];
536 		}
537 
538 	bn_fix_top(b);
539 	return 1;
540 	}
541 
MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM * b,int top,unsigned char * buf,int idx,int width)542 static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
543 	{
544 	size_t i, j;
545 
546 	if (bn_wexpand(b, top) == NULL)
547 		return 0;
548 
549 	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
550 		{
551 		((unsigned char*)b->d)[i] = buf[j];
552 		}
553 
554 	b->top = top;
555 	bn_fix_top(b);
556 	return 1;
557 	}
558 
559 /* Given a pointer value, compute the next address that is a cache line multiple. */
560 #define MOD_EXP_CTIME_ALIGN(x_) \
561 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
562 
563 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
564  * precomputation memory layout to limit data-dependency to a minimum
565  * to protect secret exponents (cf. the hyper-threading timing attacks
566  * pointed out by Colin Percival,
567  * http://www.daemonology.net/hyperthreading-considered-harmful/)
568  */
BN_mod_exp_mont_consttime(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)569 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
570 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
571 	{
572 	int i,bits,ret=0,idx,window,wvalue;
573 	int top;
574  	BIGNUM *r;
575 	const BIGNUM *aa;
576 	BN_MONT_CTX *mont=NULL;
577 
578 	int numPowers;
579 	unsigned char *powerbufFree=NULL;
580 	int powerbufLen = 0;
581 	unsigned char *powerbuf=NULL;
582 	BIGNUM *computeTemp=NULL, *am=NULL;
583 
584 	bn_check_top(a);
585 	bn_check_top(p);
586 	bn_check_top(m);
587 
588 	top = m->top;
589 
590 	if (!(m->d[0] & 1))
591 		{
592 		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
593 		return(0);
594 		}
595 	bits=BN_num_bits(p);
596 	if (bits == 0)
597 		{
598 		ret = BN_one(rr);
599 		return ret;
600 		}
601 
602  	/* Initialize BIGNUM context and allocate intermediate result */
603 	BN_CTX_start(ctx);
604 	r = BN_CTX_get(ctx);
605 	if (r == NULL) goto err;
606 
607 	/* Allocate a montgomery context if it was not supplied by the caller.
608 	 * If this is not done, things will break in the montgomery part.
609  	 */
610 	if (in_mont != NULL)
611 		mont=in_mont;
612 	else
613 		{
614 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
615 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
616 		}
617 
618 	/* Get the window size to use with size of p. */
619 	window = BN_window_bits_for_ctime_exponent_size(bits);
620 
621 	/* Allocate a buffer large enough to hold all of the pre-computed
622 	 * powers of a.
623 	 */
624 	numPowers = 1 << window;
625 	powerbufLen = sizeof(m->d[0])*top*numPowers;
626 	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
627 		goto err;
628 
629 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
630 	memset(powerbuf, 0, powerbufLen);
631 
632  	/* Initialize the intermediate result. Do this early to save double conversion,
633 	 * once each for a^0 and intermediate result.
634 	 */
635  	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
636 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
637 
638 	/* Initialize computeTemp as a^1 with montgomery precalcs */
639 	computeTemp = BN_CTX_get(ctx);
640 	am = BN_CTX_get(ctx);
641 	if (computeTemp==NULL || am==NULL) goto err;
642 
643 	if (a->neg || BN_ucmp(a,m) >= 0)
644 		{
645 		if (!BN_mod(am,a,m,ctx))
646 			goto err;
647 		aa= am;
648 		}
649 	else
650 		aa=a;
651 	if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
652 	if (!BN_copy(computeTemp, am)) goto err;
653 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
654 
655 	/* If the window size is greater than 1, then calculate
656 	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
657 	 * (even powers could instead be computed as (a^(i/2))^2
658 	 * to use the slight performance advantage of sqr over mul).
659 	 */
660 	if (window > 1)
661 		{
662 		for (i=2; i<numPowers; i++)
663 			{
664 			/* Calculate a^i = a^(i-1) * a */
665 			if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
666 				goto err;
667 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
668 			}
669 		}
670 
671  	/* Adjust the number of bits up to a multiple of the window size.
672  	 * If the exponent length is not a multiple of the window size, then
673  	 * this pads the most significant bits with zeros to normalize the
674  	 * scanning loop to there's no special cases.
675  	 *
676  	 * * NOTE: Making the window size a power of two less than the native
677 	 * * word size ensures that the padded bits won't go past the last
678  	 * * word in the internal BIGNUM structure. Going past the end will
679  	 * * still produce the correct result, but causes a different branch
680  	 * * to be taken in the BN_is_bit_set function.
681  	 */
682  	bits = ((bits+window-1)/window)*window;
683  	idx=bits-1;	/* The top bit of the window */
684 
685  	/* Scan the exponent one window at a time starting from the most
686  	 * significant bits.
687  	 */
688  	while (idx >= 0)
689   		{
690  		wvalue=0; /* The 'value' of the window */
691 
692  		/* Scan the window, squaring the result as we go */
693  		for (i=0; i<window; i++,idx--)
694  			{
695 			if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))	goto err;
696 			wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
697   			}
698 
699 		/* Fetch the appropriate pre-computed value from the pre-buf */
700 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
701 
702  		/* Multiply the result into the intermediate result */
703  		if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
704   		}
705 
706  	/* Convert the final result from montgomery to standard format */
707 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
708 	ret=1;
709 err:
710 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
711 	if (powerbuf!=NULL)
712 		{
713 		OPENSSL_cleanse(powerbuf,powerbufLen);
714 		OPENSSL_free(powerbufFree);
715 		}
716  	if (am!=NULL) BN_clear(am);
717  	if (computeTemp!=NULL) BN_clear(computeTemp);
718 	BN_CTX_end(ctx);
719 	return(ret);
720 	}
721 
BN_mod_exp_mont_word(BIGNUM * rr,BN_ULONG a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)722 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
723                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
724 	{
725 	BN_MONT_CTX *mont = NULL;
726 	int b, bits, ret=0;
727 	int r_is_one;
728 	BN_ULONG w, next_w;
729 	BIGNUM *d, *r, *t;
730 	BIGNUM *swap_tmp;
731 #define BN_MOD_MUL_WORD(r, w, m) \
732 		(BN_mul_word(r, (w)) && \
733 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
734 			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
735 		/* BN_MOD_MUL_WORD is only used with 'w' large,
736 		 * so the BN_ucmp test is probably more overhead
737 		 * than always using BN_mod (which uses BN_copy if
738 		 * a similar test returns true). */
739 		/* We can use BN_mod and do not need BN_nnmod because our
740 		 * accumulator is never negative (the result of BN_mod does
741 		 * not depend on the sign of the modulus).
742 		 */
743 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
744 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
745 
746 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
747 		{
748 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
749 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
750 		return -1;
751 		}
752 
753 	bn_check_top(p);
754 	bn_check_top(m);
755 
756 	if (m->top == 0 || !(m->d[0] & 1))
757 		{
758 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
759 		return(0);
760 		}
761 	if (m->top == 1)
762 		a %= m->d[0]; /* make sure that 'a' is reduced */
763 
764 	bits = BN_num_bits(p);
765 	if (bits == 0)
766 		{
767 		ret = BN_one(rr);
768 		return ret;
769 		}
770 	if (a == 0)
771 		{
772 		ret = BN_zero(rr);
773 		return ret;
774 		}
775 
776 	BN_CTX_start(ctx);
777 	d = BN_CTX_get(ctx);
778 	r = BN_CTX_get(ctx);
779 	t = BN_CTX_get(ctx);
780 	if (d == NULL || r == NULL || t == NULL) goto err;
781 
782 	if (in_mont != NULL)
783 		mont=in_mont;
784 	else
785 		{
786 		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
787 		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
788 		}
789 
790 	r_is_one = 1; /* except for Montgomery factor */
791 
792 	/* bits-1 >= 0 */
793 
794 	/* The result is accumulated in the product r*w. */
795 	w = a; /* bit 'bits-1' of 'p' is always set */
796 	for (b = bits-2; b >= 0; b--)
797 		{
798 		/* First, square r*w. */
799 		next_w = w*w;
800 		if ((next_w/w) != w) /* overflow */
801 			{
802 			if (r_is_one)
803 				{
804 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
805 				r_is_one = 0;
806 				}
807 			else
808 				{
809 				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
810 				}
811 			next_w = 1;
812 			}
813 		w = next_w;
814 		if (!r_is_one)
815 			{
816 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
817 			}
818 
819 		/* Second, multiply r*w by 'a' if exponent bit is set. */
820 		if (BN_is_bit_set(p, b))
821 			{
822 			next_w = w*a;
823 			if ((next_w/a) != w) /* overflow */
824 				{
825 				if (r_is_one)
826 					{
827 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
828 					r_is_one = 0;
829 					}
830 				else
831 					{
832 					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
833 					}
834 				next_w = a;
835 				}
836 			w = next_w;
837 			}
838 		}
839 
840 	/* Finally, set r:=r*w. */
841 	if (w != 1)
842 		{
843 		if (r_is_one)
844 			{
845 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
846 			r_is_one = 0;
847 			}
848 		else
849 			{
850 			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
851 			}
852 		}
853 
854 	if (r_is_one) /* can happen only if a == 1*/
855 		{
856 		if (!BN_one(rr)) goto err;
857 		}
858 	else
859 		{
860 		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
861 		}
862 	ret = 1;
863 err:
864 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
865 	BN_CTX_end(ctx);
866 	return(ret);
867 	}
868 
869 
870 /* The old fallback, simple version :-) */
BN_mod_exp_simple(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)871 int BN_mod_exp_simple(BIGNUM *r,
872 	const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
873 	BN_CTX *ctx)
874 	{
875 	int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
876 	int start=1;
877 	BIGNUM *d;
878 	BIGNUM val[TABLE_SIZE];
879 
880 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
881 		{
882 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
883 		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
884 		return -1;
885 		}
886 
887 	bits=BN_num_bits(p);
888 
889 	if (bits == 0)
890 		{
891 		ret = BN_one(r);
892 		return ret;
893 		}
894 
895 	BN_CTX_start(ctx);
896 	if ((d = BN_CTX_get(ctx)) == NULL) goto err;
897 
898 	BN_init(&(val[0]));
899 	ts=1;
900 	if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err;		/* 1 */
901 	if (BN_is_zero(&(val[0])))
902 		{
903 		ret = BN_zero(r);
904 		goto err;
905 		}
906 
907 	window = BN_window_bits_for_exponent_size(bits);
908 	if (window > 1)
909 		{
910 		if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
911 			goto err;				/* 2 */
912 		j=1<<(window-1);
913 		for (i=1; i<j; i++)
914 			{
915 			BN_init(&(val[i]));
916 			if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
917 				goto err;
918 			}
919 		ts=i;
920 		}
921 
922 	start=1;	/* This is used to avoid multiplication etc
923 			 * when there is only the value '1' in the
924 			 * buffer. */
925 	wvalue=0;	/* The 'value' of the window */
926 	wstart=bits-1;	/* The top bit of the window */
927 	wend=0;		/* The bottom bit of the window */
928 
929 	if (!BN_one(r)) goto err;
930 
931 	for (;;)
932 		{
933 		if (BN_is_bit_set(p,wstart) == 0)
934 			{
935 			if (!start)
936 				if (!BN_mod_mul(r,r,r,m,ctx))
937 				goto err;
938 			if (wstart == 0) break;
939 			wstart--;
940 			continue;
941 			}
942 		/* We now have wstart on a 'set' bit, we now need to work out
943 		 * how bit a window to do.  To do this we need to scan
944 		 * forward until the last set bit before the end of the
945 		 * window */
946 		j=wstart;
947 		wvalue=1;
948 		wend=0;
949 		for (i=1; i<window; i++)
950 			{
951 			if (wstart-i < 0) break;
952 			if (BN_is_bit_set(p,wstart-i))
953 				{
954 				wvalue<<=(i-wend);
955 				wvalue|=1;
956 				wend=i;
957 				}
958 			}
959 
960 		/* wend is the size of the current window */
961 		j=wend+1;
962 		/* add the 'bytes above' */
963 		if (!start)
964 			for (i=0; i<j; i++)
965 				{
966 				if (!BN_mod_mul(r,r,r,m,ctx))
967 					goto err;
968 				}
969 
970 		/* wvalue will be an odd number < 2^window */
971 		if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx))
972 			goto err;
973 
974 		/* move the 'window' down further */
975 		wstart-=wend+1;
976 		wvalue=0;
977 		start=0;
978 		if (wstart < 0) break;
979 		}
980 	ret=1;
981 err:
982 	BN_CTX_end(ctx);
983 	for (i=0; i<ts; i++)
984 		BN_clear_free(&(val[i]));
985 	return(ret);
986 	}
987 
988