xref: /freebsd-11-stable/crypto/openssl/crypto/ec/ecp_nistp224.c (revision a7ce0a90be11992abc2c716cb6bd16c0ea43f6d2)
1 /* crypto/ec/ecp_nistp224.c */
2 /*
3  * Written by Emilia Kasper (Google) for the OpenSSL project.
4  */
5 /* Copyright 2011 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  *
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *  Unless required by applicable law or agreed to in writing, software
15  *  distributed under the License is distributed on an "AS IS" BASIS,
16  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  *  See the License for the specific language governing permissions and
18  *  limitations under the License.
19  */
20 
21 /*
22  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23  *
24  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25  * and Adam Langley's public domain 64-bit C implementation of curve25519
26  */
27 
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30 
31 # ifndef OPENSSL_SYS_VMS
32 #  include <stdint.h>
33 # else
34 #  include <inttypes.h>
35 # endif
36 
37 # include <string.h>
38 # include <openssl/err.h>
39 # include "ec_lcl.h"
40 # include "bn_int.h" /* bn_bn2lebinpad, bn_lebin2bn */
41 
42 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43   /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
45                                  * platforms */
46 # else
47 #  error "Need GCC 3.1 or later to define type uint128_t"
48 # endif
49 
50 typedef uint8_t u8;
51 typedef uint64_t u64;
52 
53 /******************************************************************************/
54 /*-
55  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56  *
57  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58  * using 64-bit coefficients called 'limbs',
59  * and sometimes (for multiplication results) as
60  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61  * using 128-bit coefficients called 'widelimbs'.
62  * A 4-limb representation is an 'felem';
63  * a 7-widelimb representation is a 'widefelem'.
64  * Even within felems, bits of adjacent limbs overlap, and we don't always
65  * reduce the representations: we ensure that inputs to each felem
66  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67  * and fit into a 128-bit word without overflow. The coefficients are then
68  * again partially reduced to obtain an felem satisfying a_i < 2^57.
69  * We only reduce to the unique minimal representation at the end of the
70  * computation.
71  */
72 
73 typedef uint64_t limb;
74 typedef uint128_t widelimb;
75 
76 typedef limb felem[4];
77 typedef widelimb widefelem[7];
78 
79 /*
80  * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81  * group order size for the elliptic curve, and we also use this type for
82  * scalars for point multiplication.
83  */
84 typedef u8 felem_bytearray[28];
85 
86 static const felem_bytearray nistp224_curve_params[5] = {
87     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102 };
103 
104 /*-
105  * Precomputed multiples of the standard generator
106  * Points are given in coordinates (X, Y, Z) where Z normally is 1
107  * (0 for the point at infinity).
108  * For each field element, slice a_0 is word 0, etc.
109  *
110  * The table has 2 * 16 elements, starting with the following:
111  * index | bits    | point
112  * ------+---------+------------------------------
113  *     0 | 0 0 0 0 | 0G
114  *     1 | 0 0 0 1 | 1G
115  *     2 | 0 0 1 0 | 2^56G
116  *     3 | 0 0 1 1 | (2^56 + 1)G
117  *     4 | 0 1 0 0 | 2^112G
118  *     5 | 0 1 0 1 | (2^112 + 1)G
119  *     6 | 0 1 1 0 | (2^112 + 2^56)G
120  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121  *     8 | 1 0 0 0 | 2^168G
122  *     9 | 1 0 0 1 | (2^168 + 1)G
123  *    10 | 1 0 1 0 | (2^168 + 2^56)G
124  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125  *    12 | 1 1 0 0 | (2^168 + 2^112)G
126  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129  * followed by a copy of this with each element multiplied by 2^28.
130  *
131  * The reason for this is so that we can clock bits into four different
132  * locations when doing simple scalar multiplies against the base point,
133  * and then another four locations using the second 16 elements.
134  */
135 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136                                         {0, 0, 0, 0},
137                                         {0, 0, 0, 0}},
138                                        {{0x3280d6115c1d21, 0xc1d356c2112234,
139                                          0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140                                         {0xd5819985007e34, 0x75a05a07476444,
141                                          0xfb4c22dfe6cd43, 0xbd376388b5f723},
142                                         {1, 0, 0, 0}},
143                                        {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144                                          0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145                                         {0x29e0b892dc9c43, 0xece8608436e662,
146                                          0xdc858f185310d0, 0x9812dd4eb8d321},
147                                         {1, 0, 0, 0}},
148                                        {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149                                          0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150                                         {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151                                          0x313865468fafec, 0x5cb379ba910a17},
152                                         {1, 0, 0, 0}},
153                                        {{0x0641966cab26e3, 0x91fb2991fab0a0,
154                                          0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155                                         {0x7510407766af5d, 0x84d929610d5450,
156                                          0x81d77aae82f706, 0x6916f6d4338c5b},
157                                         {1, 0, 0, 0}},
158                                        {{0xea95ac3b1f15c6, 0x086000905e82d4,
159                                          0xdd323ae4d1c8b1, 0x932b56be7685a3},
160                                         {0x9ef93dea25dbbf, 0x41665960f390f0,
161                                          0xfdec76dbe2a8a7, 0x523e80f019062a},
162                                         {1, 0, 0, 0}},
163                                        {{0x822fdd26732c73, 0xa01c83531b5d0f,
164                                          0x363f37347c1ba4, 0xc391b45c84725c},
165                                         {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166                                          0xc393da7e222a7f, 0x1efb7890ede244},
167                                         {1, 0, 0, 0}},
168                                        {{0x4c9e90ca217da1, 0xd11beca79159bb,
169                                          0xff8d33c2c98b7c, 0x2610b39409f849},
170                                         {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171                                          0x966c079b753c89, 0xfe67e4e820b112},
172                                         {1, 0, 0, 0}},
173                                        {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174                                          0x79b7619a3e7c4c, 0x05c73240899b47},
175                                         {0x9f7f6382c73e3a, 0x18615165c56bda,
176                                          0x641fab2116fd56, 0x72855882b08394},
177                                         {1, 0, 0, 0}},
178                                        {{0x0469182f161c09, 0x74a98ca8d00fb5,
179                                          0xb89da93489a3e0, 0x41c98768fb0c1d},
180                                         {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181                                          0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182                                         {1, 0, 0, 0}},
183                                        {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184                                          0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185                                         {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186                                          0x9281d688bcbe50, 0xc40331df893881},
187                                         {1, 0, 0, 0}},
188                                        {{0xb89530796f0f60, 0xade92bd26909a3,
189                                          0x1a0c83fb4884da, 0x1765bf22a5a984},
190                                         {0x772a9ee75db09e, 0x23bc6c67cec16f,
191                                          0x4c1edba8b14e2f, 0xe2a215d9611369},
192                                         {1, 0, 0, 0}},
193                                        {{0x571e509fb5efb3, 0xade88696410552,
194                                          0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195                                         {0xff9f51160f4652, 0xb47ce2495a6539,
196                                          0xa2946c53b582f4, 0x286d2db3ee9a60},
197                                         {1, 0, 0, 0}},
198                                        {{0x40bbd5081a44af, 0x0995183b13926c,
199                                          0xbcefba6f47f6d0, 0x215619e9cc0057},
200                                         {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201                                          0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202                                         {1, 0, 0, 0}},
203                                        {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204                                          0x1c29819435d2c6, 0xc813132f4c07e9},
205                                         {0x2891425503b11f, 0x08781030579fea,
206                                          0xf5426ba5cc9674, 0x1e28ebf18562bc},
207                                         {1, 0, 0, 0}},
208                                        {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209                                          0xff17036691a973, 0xf1aef351497c58},
210                                         {0xdd1f2d600564ff, 0xdead073b1402db,
211                                          0x74a684435bd693, 0xeea7471f962558},
212                                         {1, 0, 0, 0}}},
213 {{{0, 0, 0, 0},
214   {0, 0, 0, 0},
215   {0, 0, 0, 0}},
216  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218   {1, 0, 0, 0}},
219  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221   {1, 0, 0, 0}},
222  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224   {1, 0, 0, 0}},
225  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227   {1, 0, 0, 0}},
228  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230   {1, 0, 0, 0}},
231  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233   {1, 0, 0, 0}},
234  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236   {1, 0, 0, 0}},
237  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239   {1, 0, 0, 0}},
240  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242   {1, 0, 0, 0}},
243  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245   {1, 0, 0, 0}},
246  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248   {1, 0, 0, 0}},
249  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251   {1, 0, 0, 0}},
252  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254   {1, 0, 0, 0}},
255  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257   {1, 0, 0, 0}},
258  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260   {1, 0, 0, 0}}}
261 };
262 
263 /* Precomputation for the group generator. */
264 typedef struct {
265     felem g_pre_comp[2][16][3];
266     int references;
267 } NISTP224_PRE_COMP;
268 
EC_GFp_nistp224_method(void)269 const EC_METHOD *EC_GFp_nistp224_method(void)
270 {
271     static const EC_METHOD ret = {
272         EC_FLAGS_DEFAULT_OCT,
273         NID_X9_62_prime_field,
274         ec_GFp_nistp224_group_init,
275         ec_GFp_simple_group_finish,
276         ec_GFp_simple_group_clear_finish,
277         ec_GFp_nist_group_copy,
278         ec_GFp_nistp224_group_set_curve,
279         ec_GFp_simple_group_get_curve,
280         ec_GFp_simple_group_get_degree,
281         ec_GFp_simple_group_check_discriminant,
282         ec_GFp_simple_point_init,
283         ec_GFp_simple_point_finish,
284         ec_GFp_simple_point_clear_finish,
285         ec_GFp_simple_point_copy,
286         ec_GFp_simple_point_set_to_infinity,
287         ec_GFp_simple_set_Jprojective_coordinates_GFp,
288         ec_GFp_simple_get_Jprojective_coordinates_GFp,
289         ec_GFp_simple_point_set_affine_coordinates,
290         ec_GFp_nistp224_point_get_affine_coordinates,
291         0 /* point_set_compressed_coordinates */ ,
292         0 /* point2oct */ ,
293         0 /* oct2point */ ,
294         ec_GFp_simple_add,
295         ec_GFp_simple_dbl,
296         ec_GFp_simple_invert,
297         ec_GFp_simple_is_at_infinity,
298         ec_GFp_simple_is_on_curve,
299         ec_GFp_simple_cmp,
300         ec_GFp_simple_make_affine,
301         ec_GFp_simple_points_make_affine,
302         ec_GFp_nistp224_points_mul,
303         ec_GFp_nistp224_precompute_mult,
304         ec_GFp_nistp224_have_precompute_mult,
305         ec_GFp_nist_field_mul,
306         ec_GFp_nist_field_sqr,
307         0 /* field_div */ ,
308         0 /* field_encode */ ,
309         0 /* field_decode */ ,
310         0                       /* field_set_to_one */
311     };
312 
313     return &ret;
314 }
315 
316 /*
317  * Helper functions to convert field elements to/from internal representation
318  */
bin28_to_felem(felem out,const u8 in[28])319 static void bin28_to_felem(felem out, const u8 in[28])
320 {
321     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324     out[3] = (*((const uint64_t *)(in+20))) >> 8;
325 }
326 
felem_to_bin28(u8 out[28],const felem in)327 static void felem_to_bin28(u8 out[28], const felem in)
328 {
329     unsigned i;
330     for (i = 0; i < 7; ++i) {
331         out[i] = in[0] >> (8 * i);
332         out[i + 7] = in[1] >> (8 * i);
333         out[i + 14] = in[2] >> (8 * i);
334         out[i + 21] = in[3] >> (8 * i);
335     }
336 }
337 
338 /* From OpenSSL BIGNUM to internal representation */
BN_to_felem(felem out,const BIGNUM * bn)339 static int BN_to_felem(felem out, const BIGNUM *bn)
340 {
341     felem_bytearray b_out;
342     int num_bytes;
343 
344     if (BN_is_negative(bn)) {
345         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
346         return 0;
347     }
348     num_bytes = bn_bn2lebinpad(bn, b_out, sizeof(b_out));
349     if (num_bytes < 0) {
350         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
351         return 0;
352     }
353     bin28_to_felem(out, b_out);
354     return 1;
355 }
356 
357 /* From internal representation to OpenSSL BIGNUM */
felem_to_BN(BIGNUM * out,const felem in)358 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
359 {
360     felem_bytearray b_out;
361     felem_to_bin28(b_out, in);
362     return bn_lebin2bn(b_out, sizeof(b_out), out);
363 }
364 
365 /******************************************************************************/
366 /*-
367  *                              FIELD OPERATIONS
368  *
369  * Field operations, using the internal representation of field elements.
370  * NB! These operations are specific to our point multiplication and cannot be
371  * expected to be correct in general - e.g., multiplication with a large scalar
372  * will cause an overflow.
373  *
374  */
375 
felem_one(felem out)376 static void felem_one(felem out)
377 {
378     out[0] = 1;
379     out[1] = 0;
380     out[2] = 0;
381     out[3] = 0;
382 }
383 
felem_assign(felem out,const felem in)384 static void felem_assign(felem out, const felem in)
385 {
386     out[0] = in[0];
387     out[1] = in[1];
388     out[2] = in[2];
389     out[3] = in[3];
390 }
391 
392 /* Sum two field elements: out += in */
felem_sum(felem out,const felem in)393 static void felem_sum(felem out, const felem in)
394 {
395     out[0] += in[0];
396     out[1] += in[1];
397     out[2] += in[2];
398     out[3] += in[3];
399 }
400 
401 /* Get negative value: out = -in */
402 /* Assumes in[i] < 2^57 */
felem_neg(felem out,const felem in)403 static void felem_neg(felem out, const felem in)
404 {
405     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
406     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
407     static const limb two58m42m2 = (((limb) 1) << 58) -
408         (((limb) 1) << 42) - (((limb) 1) << 2);
409 
410     /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
411     out[0] = two58p2 - in[0];
412     out[1] = two58m42m2 - in[1];
413     out[2] = two58m2 - in[2];
414     out[3] = two58m2 - in[3];
415 }
416 
417 /* Subtract field elements: out -= in */
418 /* Assumes in[i] < 2^57 */
felem_diff(felem out,const felem in)419 static void felem_diff(felem out, const felem in)
420 {
421     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
422     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
423     static const limb two58m42m2 = (((limb) 1) << 58) -
424         (((limb) 1) << 42) - (((limb) 1) << 2);
425 
426     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
427     out[0] += two58p2;
428     out[1] += two58m42m2;
429     out[2] += two58m2;
430     out[3] += two58m2;
431 
432     out[0] -= in[0];
433     out[1] -= in[1];
434     out[2] -= in[2];
435     out[3] -= in[3];
436 }
437 
438 /* Subtract in unreduced 128-bit mode: out -= in */
439 /* Assumes in[i] < 2^119 */
widefelem_diff(widefelem out,const widefelem in)440 static void widefelem_diff(widefelem out, const widefelem in)
441 {
442     static const widelimb two120 = ((widelimb) 1) << 120;
443     static const widelimb two120m64 = (((widelimb) 1) << 120) -
444         (((widelimb) 1) << 64);
445     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
446         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
447 
448     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
449     out[0] += two120;
450     out[1] += two120m64;
451     out[2] += two120m64;
452     out[3] += two120;
453     out[4] += two120m104m64;
454     out[5] += two120m64;
455     out[6] += two120m64;
456 
457     out[0] -= in[0];
458     out[1] -= in[1];
459     out[2] -= in[2];
460     out[3] -= in[3];
461     out[4] -= in[4];
462     out[5] -= in[5];
463     out[6] -= in[6];
464 }
465 
466 /* Subtract in mixed mode: out128 -= in64 */
467 /* in[i] < 2^63 */
felem_diff_128_64(widefelem out,const felem in)468 static void felem_diff_128_64(widefelem out, const felem in)
469 {
470     static const widelimb two64p8 = (((widelimb) 1) << 64) +
471         (((widelimb) 1) << 8);
472     static const widelimb two64m8 = (((widelimb) 1) << 64) -
473         (((widelimb) 1) << 8);
474     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
475         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
476 
477     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
478     out[0] += two64p8;
479     out[1] += two64m48m8;
480     out[2] += two64m8;
481     out[3] += two64m8;
482 
483     out[0] -= in[0];
484     out[1] -= in[1];
485     out[2] -= in[2];
486     out[3] -= in[3];
487 }
488 
489 /*
490  * Multiply a field element by a scalar: out = out * scalar The scalars we
491  * actually use are small, so results fit without overflow
492  */
felem_scalar(felem out,const limb scalar)493 static void felem_scalar(felem out, const limb scalar)
494 {
495     out[0] *= scalar;
496     out[1] *= scalar;
497     out[2] *= scalar;
498     out[3] *= scalar;
499 }
500 
501 /*
502  * Multiply an unreduced field element by a scalar: out = out * scalar The
503  * scalars we actually use are small, so results fit without overflow
504  */
widefelem_scalar(widefelem out,const widelimb scalar)505 static void widefelem_scalar(widefelem out, const widelimb scalar)
506 {
507     out[0] *= scalar;
508     out[1] *= scalar;
509     out[2] *= scalar;
510     out[3] *= scalar;
511     out[4] *= scalar;
512     out[5] *= scalar;
513     out[6] *= scalar;
514 }
515 
516 /* Square a field element: out = in^2 */
felem_square(widefelem out,const felem in)517 static void felem_square(widefelem out, const felem in)
518 {
519     limb tmp0, tmp1, tmp2;
520     tmp0 = 2 * in[0];
521     tmp1 = 2 * in[1];
522     tmp2 = 2 * in[2];
523     out[0] = ((widelimb) in[0]) * in[0];
524     out[1] = ((widelimb) in[0]) * tmp1;
525     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
526     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
527     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
528     out[5] = ((widelimb) in[3]) * tmp2;
529     out[6] = ((widelimb) in[3]) * in[3];
530 }
531 
532 /* Multiply two field elements: out = in1 * in2 */
felem_mul(widefelem out,const felem in1,const felem in2)533 static void felem_mul(widefelem out, const felem in1, const felem in2)
534 {
535     out[0] = ((widelimb) in1[0]) * in2[0];
536     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
537     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
538         ((widelimb) in1[2]) * in2[0];
539     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
540         ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
541     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
542         ((widelimb) in1[3]) * in2[1];
543     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
544     out[6] = ((widelimb) in1[3]) * in2[3];
545 }
546 
547 /*-
548  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
549  * Requires in[i] < 2^126,
550  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
felem_reduce(felem out,const widefelem in)551 static void felem_reduce(felem out, const widefelem in)
552 {
553     static const widelimb two127p15 = (((widelimb) 1) << 127) +
554         (((widelimb) 1) << 15);
555     static const widelimb two127m71 = (((widelimb) 1) << 127) -
556         (((widelimb) 1) << 71);
557     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
558         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
559     widelimb output[5];
560 
561     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
562     output[0] = in[0] + two127p15;
563     output[1] = in[1] + two127m71m55;
564     output[2] = in[2] + two127m71;
565     output[3] = in[3];
566     output[4] = in[4];
567 
568     /* Eliminate in[4], in[5], in[6] */
569     output[4] += in[6] >> 16;
570     output[3] += (in[6] & 0xffff) << 40;
571     output[2] -= in[6];
572 
573     output[3] += in[5] >> 16;
574     output[2] += (in[5] & 0xffff) << 40;
575     output[1] -= in[5];
576 
577     output[2] += output[4] >> 16;
578     output[1] += (output[4] & 0xffff) << 40;
579     output[0] -= output[4];
580 
581     /* Carry 2 -> 3 -> 4 */
582     output[3] += output[2] >> 56;
583     output[2] &= 0x00ffffffffffffff;
584 
585     output[4] = output[3] >> 56;
586     output[3] &= 0x00ffffffffffffff;
587 
588     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
589 
590     /* Eliminate output[4] */
591     output[2] += output[4] >> 16;
592     /* output[2] < 2^56 + 2^56 = 2^57 */
593     output[1] += (output[4] & 0xffff) << 40;
594     output[0] -= output[4];
595 
596     /* Carry 0 -> 1 -> 2 -> 3 */
597     output[1] += output[0] >> 56;
598     out[0] = output[0] & 0x00ffffffffffffff;
599 
600     output[2] += output[1] >> 56;
601     /* output[2] < 2^57 + 2^72 */
602     out[1] = output[1] & 0x00ffffffffffffff;
603     output[3] += output[2] >> 56;
604     /* output[3] <= 2^56 + 2^16 */
605     out[2] = output[2] & 0x00ffffffffffffff;
606 
607     /*-
608      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
609      * out[3] <= 2^56 + 2^16 (due to final carry),
610      * so out < 2*p
611      */
612     out[3] = output[3];
613 }
614 
felem_square_reduce(felem out,const felem in)615 static void felem_square_reduce(felem out, const felem in)
616 {
617     widefelem tmp;
618     felem_square(tmp, in);
619     felem_reduce(out, tmp);
620 }
621 
felem_mul_reduce(felem out,const felem in1,const felem in2)622 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
623 {
624     widefelem tmp;
625     felem_mul(tmp, in1, in2);
626     felem_reduce(out, tmp);
627 }
628 
629 /*
630  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
631  * call felem_reduce first)
632  */
felem_contract(felem out,const felem in)633 static void felem_contract(felem out, const felem in)
634 {
635     static const int64_t two56 = ((limb) 1) << 56;
636     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
637     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
638     int64_t tmp[4], a;
639     tmp[0] = in[0];
640     tmp[1] = in[1];
641     tmp[2] = in[2];
642     tmp[3] = in[3];
643     /* Case 1: a = 1 iff in >= 2^224 */
644     a = (in[3] >> 56);
645     tmp[0] -= a;
646     tmp[1] += a << 40;
647     tmp[3] &= 0x00ffffffffffffff;
648     /*
649      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
650      * and the lower part is non-zero
651      */
652     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
653         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
654     a &= 0x00ffffffffffffff;
655     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
656     a = (a - 1) >> 63;
657     /* subtract 2^224 - 2^96 + 1 if a is all-one */
658     tmp[3] &= a ^ 0xffffffffffffffff;
659     tmp[2] &= a ^ 0xffffffffffffffff;
660     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
661     tmp[0] -= 1 & a;
662 
663     /*
664      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
665      * non-zero, so we only need one step
666      */
667     a = tmp[0] >> 63;
668     tmp[0] += two56 & a;
669     tmp[1] -= 1 & a;
670 
671     /* carry 1 -> 2 -> 3 */
672     tmp[2] += tmp[1] >> 56;
673     tmp[1] &= 0x00ffffffffffffff;
674 
675     tmp[3] += tmp[2] >> 56;
676     tmp[2] &= 0x00ffffffffffffff;
677 
678     /* Now 0 <= out < p */
679     out[0] = tmp[0];
680     out[1] = tmp[1];
681     out[2] = tmp[2];
682     out[3] = tmp[3];
683 }
684 
685 /*
686  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
687  * elements are reduced to in < 2^225, so we only need to check three cases:
688  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
689  */
felem_is_zero(const felem in)690 static limb felem_is_zero(const felem in)
691 {
692     limb zero, two224m96p1, two225m97p2;
693 
694     zero = in[0] | in[1] | in[2] | in[3];
695     zero = (((int64_t) (zero) - 1) >> 63) & 1;
696     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
697         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
698     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
699     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
700         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
701     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
702     return (zero | two224m96p1 | two225m97p2);
703 }
704 
felem_is_zero_int(const void * in)705 static int felem_is_zero_int(const void *in)
706 {
707     return (int)(felem_is_zero(in) & ((limb) 1));
708 }
709 
710 /* Invert a field element */
711 /* Computation chain copied from djb's code */
felem_inv(felem out,const felem in)712 static void felem_inv(felem out, const felem in)
713 {
714     felem ftmp, ftmp2, ftmp3, ftmp4;
715     widefelem tmp;
716     unsigned i;
717 
718     felem_square(tmp, in);
719     felem_reduce(ftmp, tmp);    /* 2 */
720     felem_mul(tmp, in, ftmp);
721     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
722     felem_square(tmp, ftmp);
723     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
724     felem_mul(tmp, in, ftmp);
725     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
726     felem_square(tmp, ftmp);
727     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
728     felem_square(tmp, ftmp2);
729     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
730     felem_square(tmp, ftmp2);
731     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
732     felem_mul(tmp, ftmp2, ftmp);
733     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
734     felem_square(tmp, ftmp);
735     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
736     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
737         felem_square(tmp, ftmp2);
738         felem_reduce(ftmp2, tmp);
739     }
740     felem_mul(tmp, ftmp2, ftmp);
741     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
742     felem_square(tmp, ftmp2);
743     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
744     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
745         felem_square(tmp, ftmp3);
746         felem_reduce(ftmp3, tmp);
747     }
748     felem_mul(tmp, ftmp3, ftmp2);
749     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
750     felem_square(tmp, ftmp2);
751     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
752     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
753         felem_square(tmp, ftmp3);
754         felem_reduce(ftmp3, tmp);
755     }
756     felem_mul(tmp, ftmp3, ftmp2);
757     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
758     felem_square(tmp, ftmp3);
759     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
760     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
761         felem_square(tmp, ftmp4);
762         felem_reduce(ftmp4, tmp);
763     }
764     felem_mul(tmp, ftmp3, ftmp4);
765     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
766     felem_square(tmp, ftmp3);
767     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
768     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
769         felem_square(tmp, ftmp4);
770         felem_reduce(ftmp4, tmp);
771     }
772     felem_mul(tmp, ftmp2, ftmp4);
773     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
774     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
775         felem_square(tmp, ftmp2);
776         felem_reduce(ftmp2, tmp);
777     }
778     felem_mul(tmp, ftmp2, ftmp);
779     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
780     felem_square(tmp, ftmp);
781     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
782     felem_mul(tmp, ftmp, in);
783     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
784     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
785         felem_square(tmp, ftmp);
786         felem_reduce(ftmp, tmp);
787     }
788     felem_mul(tmp, ftmp, ftmp3);
789     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
790 }
791 
792 /*
793  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
794  * out to itself.
795  */
copy_conditional(felem out,const felem in,limb icopy)796 static void copy_conditional(felem out, const felem in, limb icopy)
797 {
798     unsigned i;
799     /*
800      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
801      */
802     const limb copy = -icopy;
803     for (i = 0; i < 4; ++i) {
804         const limb tmp = copy & (in[i] ^ out[i]);
805         out[i] ^= tmp;
806     }
807 }
808 
809 /******************************************************************************/
810 /*-
811  *                       ELLIPTIC CURVE POINT OPERATIONS
812  *
813  * Points are represented in Jacobian projective coordinates:
814  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
815  * or to the point at infinity if Z == 0.
816  *
817  */
818 
819 /*-
820  * Double an elliptic curve point:
821  * (X', Y', Z') = 2 * (X, Y, Z), where
822  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
823  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
824  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
825  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
826  * while x_out == y_in is not (maybe this works, but it's not tested).
827  */
828 static void
point_double(felem x_out,felem y_out,felem z_out,const felem x_in,const felem y_in,const felem z_in)829 point_double(felem x_out, felem y_out, felem z_out,
830              const felem x_in, const felem y_in, const felem z_in)
831 {
832     widefelem tmp, tmp2;
833     felem delta, gamma, beta, alpha, ftmp, ftmp2;
834 
835     felem_assign(ftmp, x_in);
836     felem_assign(ftmp2, x_in);
837 
838     /* delta = z^2 */
839     felem_square(tmp, z_in);
840     felem_reduce(delta, tmp);
841 
842     /* gamma = y^2 */
843     felem_square(tmp, y_in);
844     felem_reduce(gamma, tmp);
845 
846     /* beta = x*gamma */
847     felem_mul(tmp, x_in, gamma);
848     felem_reduce(beta, tmp);
849 
850     /* alpha = 3*(x-delta)*(x+delta) */
851     felem_diff(ftmp, delta);
852     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
853     felem_sum(ftmp2, delta);
854     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
855     felem_scalar(ftmp2, 3);
856     /* ftmp2[i] < 3 * 2^58 < 2^60 */
857     felem_mul(tmp, ftmp, ftmp2);
858     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
859     felem_reduce(alpha, tmp);
860 
861     /* x' = alpha^2 - 8*beta */
862     felem_square(tmp, alpha);
863     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
864     felem_assign(ftmp, beta);
865     felem_scalar(ftmp, 8);
866     /* ftmp[i] < 8 * 2^57 = 2^60 */
867     felem_diff_128_64(tmp, ftmp);
868     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
869     felem_reduce(x_out, tmp);
870 
871     /* z' = (y + z)^2 - gamma - delta */
872     felem_sum(delta, gamma);
873     /* delta[i] < 2^57 + 2^57 = 2^58 */
874     felem_assign(ftmp, y_in);
875     felem_sum(ftmp, z_in);
876     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
877     felem_square(tmp, ftmp);
878     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
879     felem_diff_128_64(tmp, delta);
880     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
881     felem_reduce(z_out, tmp);
882 
883     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
884     felem_scalar(beta, 4);
885     /* beta[i] < 4 * 2^57 = 2^59 */
886     felem_diff(beta, x_out);
887     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
888     felem_mul(tmp, alpha, beta);
889     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
890     felem_square(tmp2, gamma);
891     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
892     widefelem_scalar(tmp2, 8);
893     /* tmp2[i] < 8 * 2^116 = 2^119 */
894     widefelem_diff(tmp, tmp2);
895     /* tmp[i] < 2^119 + 2^120 < 2^121 */
896     felem_reduce(y_out, tmp);
897 }
898 
899 /*-
900  * Add two elliptic curve points:
901  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
902  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
903  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
904  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
905  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
906  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
907  *
908  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
909  */
910 
911 /*
912  * This function is not entirely constant-time: it includes a branch for
913  * checking whether the two input points are equal, (while not equal to the
914  * point at infinity). This case never happens during single point
915  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
916  */
point_add(felem x3,felem y3,felem z3,const felem x1,const felem y1,const felem z1,const int mixed,const felem x2,const felem y2,const felem z2)917 static void point_add(felem x3, felem y3, felem z3,
918                       const felem x1, const felem y1, const felem z1,
919                       const int mixed, const felem x2, const felem y2,
920                       const felem z2)
921 {
922     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
923     widefelem tmp, tmp2;
924     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
925 
926     if (!mixed) {
927         /* ftmp2 = z2^2 */
928         felem_square(tmp, z2);
929         felem_reduce(ftmp2, tmp);
930 
931         /* ftmp4 = z2^3 */
932         felem_mul(tmp, ftmp2, z2);
933         felem_reduce(ftmp4, tmp);
934 
935         /* ftmp4 = z2^3*y1 */
936         felem_mul(tmp2, ftmp4, y1);
937         felem_reduce(ftmp4, tmp2);
938 
939         /* ftmp2 = z2^2*x1 */
940         felem_mul(tmp2, ftmp2, x1);
941         felem_reduce(ftmp2, tmp2);
942     } else {
943         /*
944          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
945          */
946 
947         /* ftmp4 = z2^3*y1 */
948         felem_assign(ftmp4, y1);
949 
950         /* ftmp2 = z2^2*x1 */
951         felem_assign(ftmp2, x1);
952     }
953 
954     /* ftmp = z1^2 */
955     felem_square(tmp, z1);
956     felem_reduce(ftmp, tmp);
957 
958     /* ftmp3 = z1^3 */
959     felem_mul(tmp, ftmp, z1);
960     felem_reduce(ftmp3, tmp);
961 
962     /* tmp = z1^3*y2 */
963     felem_mul(tmp, ftmp3, y2);
964     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965 
966     /* ftmp3 = z1^3*y2 - z2^3*y1 */
967     felem_diff_128_64(tmp, ftmp4);
968     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969     felem_reduce(ftmp3, tmp);
970 
971     /* tmp = z1^2*x2 */
972     felem_mul(tmp, ftmp, x2);
973     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
974 
975     /* ftmp = z1^2*x2 - z2^2*x1 */
976     felem_diff_128_64(tmp, ftmp2);
977     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
978     felem_reduce(ftmp, tmp);
979 
980     /*
981      * the formulae are incorrect if the points are equal so we check for
982      * this and do doubling if this happens
983      */
984     x_equal = felem_is_zero(ftmp);
985     y_equal = felem_is_zero(ftmp3);
986     z1_is_zero = felem_is_zero(z1);
987     z2_is_zero = felem_is_zero(z2);
988     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
989     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
990         point_double(x3, y3, z3, x1, y1, z1);
991         return;
992     }
993 
994     /* ftmp5 = z1*z2 */
995     if (!mixed) {
996         felem_mul(tmp, z1, z2);
997         felem_reduce(ftmp5, tmp);
998     } else {
999         /* special case z2 = 0 is handled later */
1000         felem_assign(ftmp5, z1);
1001     }
1002 
1003     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1004     felem_mul(tmp, ftmp, ftmp5);
1005     felem_reduce(z_out, tmp);
1006 
1007     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1008     felem_assign(ftmp5, ftmp);
1009     felem_square(tmp, ftmp);
1010     felem_reduce(ftmp, tmp);
1011 
1012     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1013     felem_mul(tmp, ftmp, ftmp5);
1014     felem_reduce(ftmp5, tmp);
1015 
1016     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1017     felem_mul(tmp, ftmp2, ftmp);
1018     felem_reduce(ftmp2, tmp);
1019 
1020     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1021     felem_mul(tmp, ftmp4, ftmp5);
1022     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1023 
1024     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1025     felem_square(tmp2, ftmp3);
1026     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1027 
1028     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1029     felem_diff_128_64(tmp2, ftmp5);
1030     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1031 
1032     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033     felem_assign(ftmp5, ftmp2);
1034     felem_scalar(ftmp5, 2);
1035     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1036 
1037     /*-
1038      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1039      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1040      */
1041     felem_diff_128_64(tmp2, ftmp5);
1042     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1043     felem_reduce(x_out, tmp2);
1044 
1045     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1046     felem_diff(ftmp2, x_out);
1047     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1048 
1049     /*
1050      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1051      */
1052     felem_mul(tmp2, ftmp3, ftmp2);
1053     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1054 
1055     /*-
1056      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1057      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1058      */
1059     widefelem_diff(tmp2, tmp);
1060     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1061     felem_reduce(y_out, tmp2);
1062 
1063     /*
1064      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1065      * the point at infinity, so we need to check for this separately
1066      */
1067 
1068     /*
1069      * if point 1 is at infinity, copy point 2 to output, and vice versa
1070      */
1071     copy_conditional(x_out, x2, z1_is_zero);
1072     copy_conditional(x_out, x1, z2_is_zero);
1073     copy_conditional(y_out, y2, z1_is_zero);
1074     copy_conditional(y_out, y1, z2_is_zero);
1075     copy_conditional(z_out, z2, z1_is_zero);
1076     copy_conditional(z_out, z1, z2_is_zero);
1077     felem_assign(x3, x_out);
1078     felem_assign(y3, y_out);
1079     felem_assign(z3, z_out);
1080 }
1081 
1082 /*
1083  * select_point selects the |idx|th point from a precomputation table and
1084  * copies it to out.
1085  * The pre_comp array argument should be size of |size| argument
1086  */
select_point(const u64 idx,unsigned int size,const felem pre_comp[][3],felem out[3])1087 static void select_point(const u64 idx, unsigned int size,
1088                          const felem pre_comp[][3], felem out[3])
1089 {
1090     unsigned i, j;
1091     limb *outlimbs = &out[0][0];
1092     memset(outlimbs, 0, 3 * sizeof(felem));
1093 
1094     for (i = 0; i < size; i++) {
1095         const limb *inlimbs = &pre_comp[i][0][0];
1096         u64 mask = i ^ idx;
1097         mask |= mask >> 4;
1098         mask |= mask >> 2;
1099         mask |= mask >> 1;
1100         mask &= 1;
1101         mask--;
1102         for (j = 0; j < 4 * 3; j++)
1103             outlimbs[j] |= inlimbs[j] & mask;
1104     }
1105 }
1106 
1107 /* get_bit returns the |i|th bit in |in| */
get_bit(const felem_bytearray in,unsigned i)1108 static char get_bit(const felem_bytearray in, unsigned i)
1109 {
1110     if (i >= 224)
1111         return 0;
1112     return (in[i >> 3] >> (i & 7)) & 1;
1113 }
1114 
1115 /*
1116  * Interleaved point multiplication using precomputed point multiples: The
1117  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1118  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1119  * generator, using certain (large) precomputed multiples in g_pre_comp.
1120  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1121  */
batch_mul(felem x_out,felem y_out,felem z_out,const felem_bytearray scalars[],const unsigned num_points,const u8 * g_scalar,const int mixed,const felem pre_comp[][17][3],const felem g_pre_comp[2][16][3])1122 static void batch_mul(felem x_out, felem y_out, felem z_out,
1123                       const felem_bytearray scalars[],
1124                       const unsigned num_points, const u8 *g_scalar,
1125                       const int mixed, const felem pre_comp[][17][3],
1126                       const felem g_pre_comp[2][16][3])
1127 {
1128     int i, skip;
1129     unsigned num;
1130     unsigned gen_mul = (g_scalar != NULL);
1131     felem nq[3], tmp[4];
1132     u64 bits;
1133     u8 sign, digit;
1134 
1135     /* set nq to the point at infinity */
1136     memset(nq, 0, 3 * sizeof(felem));
1137 
1138     /*
1139      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1140      * of the generator (two in each of the last 28 rounds) and additions of
1141      * other points multiples (every 5th round).
1142      */
1143     skip = 1;                   /* save two point operations in the first
1144                                  * round */
1145     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1146         /* double */
1147         if (!skip)
1148             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1149 
1150         /* add multiples of the generator */
1151         if (gen_mul && (i <= 27)) {
1152             /* first, look 28 bits upwards */
1153             bits = get_bit(g_scalar, i + 196) << 3;
1154             bits |= get_bit(g_scalar, i + 140) << 2;
1155             bits |= get_bit(g_scalar, i + 84) << 1;
1156             bits |= get_bit(g_scalar, i + 28);
1157             /* select the point to add, in constant time */
1158             select_point(bits, 16, g_pre_comp[1], tmp);
1159 
1160             if (!skip) {
1161                 /* value 1 below is argument for "mixed" */
1162                 point_add(nq[0], nq[1], nq[2],
1163                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1164             } else {
1165                 memcpy(nq, tmp, 3 * sizeof(felem));
1166                 skip = 0;
1167             }
1168 
1169             /* second, look at the current position */
1170             bits = get_bit(g_scalar, i + 168) << 3;
1171             bits |= get_bit(g_scalar, i + 112) << 2;
1172             bits |= get_bit(g_scalar, i + 56) << 1;
1173             bits |= get_bit(g_scalar, i);
1174             /* select the point to add, in constant time */
1175             select_point(bits, 16, g_pre_comp[0], tmp);
1176             point_add(nq[0], nq[1], nq[2],
1177                       nq[0], nq[1], nq[2],
1178                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1179         }
1180 
1181         /* do other additions every 5 doublings */
1182         if (num_points && (i % 5 == 0)) {
1183             /* loop over all scalars */
1184             for (num = 0; num < num_points; ++num) {
1185                 bits = get_bit(scalars[num], i + 4) << 5;
1186                 bits |= get_bit(scalars[num], i + 3) << 4;
1187                 bits |= get_bit(scalars[num], i + 2) << 3;
1188                 bits |= get_bit(scalars[num], i + 1) << 2;
1189                 bits |= get_bit(scalars[num], i) << 1;
1190                 bits |= get_bit(scalars[num], i - 1);
1191                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1192 
1193                 /* select the point to add or subtract */
1194                 select_point(digit, 17, pre_comp[num], tmp);
1195                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1196                                             * point */
1197                 copy_conditional(tmp[1], tmp[3], sign);
1198 
1199                 if (!skip) {
1200                     point_add(nq[0], nq[1], nq[2],
1201                               nq[0], nq[1], nq[2],
1202                               mixed, tmp[0], tmp[1], tmp[2]);
1203                 } else {
1204                     memcpy(nq, tmp, 3 * sizeof(felem));
1205                     skip = 0;
1206                 }
1207             }
1208         }
1209     }
1210     felem_assign(x_out, nq[0]);
1211     felem_assign(y_out, nq[1]);
1212     felem_assign(z_out, nq[2]);
1213 }
1214 
1215 /******************************************************************************/
1216 /*
1217  * FUNCTIONS TO MANAGE PRECOMPUTATION
1218  */
1219 
nistp224_pre_comp_new()1220 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1221 {
1222     NISTP224_PRE_COMP *ret = NULL;
1223     ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret));
1224     if (!ret) {
1225         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1226         return ret;
1227     }
1228     memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1229     ret->references = 1;
1230     return ret;
1231 }
1232 
nistp224_pre_comp_dup(void * src_)1233 static void *nistp224_pre_comp_dup(void *src_)
1234 {
1235     NISTP224_PRE_COMP *src = src_;
1236 
1237     /* no need to actually copy, these objects never change! */
1238     CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1239 
1240     return src_;
1241 }
1242 
nistp224_pre_comp_free(void * pre_)1243 static void nistp224_pre_comp_free(void *pre_)
1244 {
1245     int i;
1246     NISTP224_PRE_COMP *pre = pre_;
1247 
1248     if (!pre)
1249         return;
1250 
1251     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1252     if (i > 0)
1253         return;
1254 
1255     OPENSSL_free(pre);
1256 }
1257 
nistp224_pre_comp_clear_free(void * pre_)1258 static void nistp224_pre_comp_clear_free(void *pre_)
1259 {
1260     int i;
1261     NISTP224_PRE_COMP *pre = pre_;
1262 
1263     if (!pre)
1264         return;
1265 
1266     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1267     if (i > 0)
1268         return;
1269 
1270     OPENSSL_cleanse(pre, sizeof(*pre));
1271     OPENSSL_free(pre);
1272 }
1273 
1274 /******************************************************************************/
1275 /*
1276  * OPENSSL EC_METHOD FUNCTIONS
1277  */
1278 
ec_GFp_nistp224_group_init(EC_GROUP * group)1279 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1280 {
1281     int ret;
1282     ret = ec_GFp_simple_group_init(group);
1283     group->a_is_minus3 = 1;
1284     return ret;
1285 }
1286 
ec_GFp_nistp224_group_set_curve(EC_GROUP * group,const BIGNUM * p,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)1287 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1288                                     const BIGNUM *a, const BIGNUM *b,
1289                                     BN_CTX *ctx)
1290 {
1291     int ret = 0;
1292     BN_CTX *new_ctx = NULL;
1293     BIGNUM *curve_p, *curve_a, *curve_b;
1294 
1295     if (ctx == NULL)
1296         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1297             return 0;
1298     BN_CTX_start(ctx);
1299     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1300         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1301         ((curve_b = BN_CTX_get(ctx)) == NULL))
1302         goto err;
1303     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1304     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1305     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1306     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1307         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1308               EC_R_WRONG_CURVE_PARAMETERS);
1309         goto err;
1310     }
1311     group->field_mod_func = BN_nist_mod_224;
1312     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1313  err:
1314     BN_CTX_end(ctx);
1315     if (new_ctx != NULL)
1316         BN_CTX_free(new_ctx);
1317     return ret;
1318 }
1319 
1320 /*
1321  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1322  * (X/Z^2, Y/Z^3)
1323  */
ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)1324 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1325                                                  const EC_POINT *point,
1326                                                  BIGNUM *x, BIGNUM *y,
1327                                                  BN_CTX *ctx)
1328 {
1329     felem z1, z2, x_in, y_in, x_out, y_out;
1330     widefelem tmp;
1331 
1332     if (EC_POINT_is_at_infinity(group, point)) {
1333         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1334               EC_R_POINT_AT_INFINITY);
1335         return 0;
1336     }
1337     if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1338         (!BN_to_felem(z1, &point->Z)))
1339         return 0;
1340     felem_inv(z2, z1);
1341     felem_square(tmp, z2);
1342     felem_reduce(z1, tmp);
1343     felem_mul(tmp, x_in, z1);
1344     felem_reduce(x_in, tmp);
1345     felem_contract(x_out, x_in);
1346     if (x != NULL) {
1347         if (!felem_to_BN(x, x_out)) {
1348             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1349                   ERR_R_BN_LIB);
1350             return 0;
1351         }
1352     }
1353     felem_mul(tmp, z1, z2);
1354     felem_reduce(z1, tmp);
1355     felem_mul(tmp, y_in, z1);
1356     felem_reduce(y_in, tmp);
1357     felem_contract(y_out, y_in);
1358     if (y != NULL) {
1359         if (!felem_to_BN(y, y_out)) {
1360             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1361                   ERR_R_BN_LIB);
1362             return 0;
1363         }
1364     }
1365     return 1;
1366 }
1367 
make_points_affine(size_t num,felem points[][3],felem tmp_felems[])1368 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1369                                felem tmp_felems[ /* num+1 */ ])
1370 {
1371     /*
1372      * Runs in constant time, unless an input is the point at infinity (which
1373      * normally shouldn't happen).
1374      */
1375     ec_GFp_nistp_points_make_affine_internal(num,
1376                                              points,
1377                                              sizeof(felem),
1378                                              tmp_felems,
1379                                              (void (*)(void *))felem_one,
1380                                              felem_is_zero_int,
1381                                              (void (*)(void *, const void *))
1382                                              felem_assign,
1383                                              (void (*)(void *, const void *))
1384                                              felem_square_reduce, (void (*)
1385                                                                    (void *,
1386                                                                     const void
1387                                                                     *,
1388                                                                     const void
1389                                                                     *))
1390                                              felem_mul_reduce,
1391                                              (void (*)(void *, const void *))
1392                                              felem_inv,
1393                                              (void (*)(void *, const void *))
1394                                              felem_contract);
1395 }
1396 
1397 /*
1398  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1399  * values Result is stored in r (r can equal one of the inputs).
1400  */
ec_GFp_nistp224_points_mul(const EC_GROUP * group,EC_POINT * r,const BIGNUM * scalar,size_t num,const EC_POINT * points[],const BIGNUM * scalars[],BN_CTX * ctx)1401 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1402                                const BIGNUM *scalar, size_t num,
1403                                const EC_POINT *points[],
1404                                const BIGNUM *scalars[], BN_CTX *ctx)
1405 {
1406     int ret = 0;
1407     int j;
1408     unsigned i;
1409     int mixed = 0;
1410     BN_CTX *new_ctx = NULL;
1411     BIGNUM *x, *y, *z, *tmp_scalar;
1412     felem_bytearray g_secret;
1413     felem_bytearray *secrets = NULL;
1414     felem(*pre_comp)[17][3] = NULL;
1415     felem *tmp_felems = NULL;
1416     int num_bytes;
1417     int have_pre_comp = 0;
1418     size_t num_points = num;
1419     felem x_in, y_in, z_in, x_out, y_out, z_out;
1420     NISTP224_PRE_COMP *pre = NULL;
1421     const felem(*g_pre_comp)[16][3] = NULL;
1422     EC_POINT *generator = NULL;
1423     const EC_POINT *p = NULL;
1424     const BIGNUM *p_scalar = NULL;
1425 
1426     if (ctx == NULL)
1427         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1428             return 0;
1429     BN_CTX_start(ctx);
1430     if (((x = BN_CTX_get(ctx)) == NULL) ||
1431         ((y = BN_CTX_get(ctx)) == NULL) ||
1432         ((z = BN_CTX_get(ctx)) == NULL) ||
1433         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1434         goto err;
1435 
1436     if (scalar != NULL) {
1437         pre = EC_EX_DATA_get_data(group->extra_data,
1438                                   nistp224_pre_comp_dup,
1439                                   nistp224_pre_comp_free,
1440                                   nistp224_pre_comp_clear_free);
1441         if (pre)
1442             /* we have precomputation, try to use it */
1443             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1444         else
1445             /* try to use the standard precomputation */
1446             g_pre_comp = &gmul[0];
1447         generator = EC_POINT_new(group);
1448         if (generator == NULL)
1449             goto err;
1450         /* get the generator from precomputation */
1451         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1452             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1453             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1454             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1455             goto err;
1456         }
1457         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1458                                                       generator, x, y, z,
1459                                                       ctx))
1460             goto err;
1461         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1462             /* precomputation matches generator */
1463             have_pre_comp = 1;
1464         else
1465             /*
1466              * we don't have valid precomputation: treat the generator as a
1467              * random point
1468              */
1469             num_points = num_points + 1;
1470     }
1471 
1472     if (num_points > 0) {
1473         if (num_points >= 3) {
1474             /*
1475              * unless we precompute multiples for just one or two points,
1476              * converting those into affine form is time well spent
1477              */
1478             mixed = 1;
1479         }
1480         secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1481         pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1482         if (mixed)
1483             tmp_felems =
1484                 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1485         if ((secrets == NULL) || (pre_comp == NULL)
1486             || (mixed && (tmp_felems == NULL))) {
1487             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1488             goto err;
1489         }
1490 
1491         /*
1492          * we treat NULL scalars as 0, and NULL points as points at infinity,
1493          * i.e., they contribute nothing to the linear combination
1494          */
1495         memset(secrets, 0, num_points * sizeof(felem_bytearray));
1496         memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1497         for (i = 0; i < num_points; ++i) {
1498             if (i == num) {
1499                 /* the generator */
1500                 p = EC_GROUP_get0_generator(group);
1501                 p_scalar = scalar;
1502             } else {
1503                 /* the i^th point */
1504                 p = points[i];
1505                 p_scalar = scalars[i];
1506             }
1507             if ((p_scalar != NULL) && (p != NULL)) {
1508                 /* reduce scalar to 0 <= scalar < 2^224 */
1509                 if ((BN_num_bits(p_scalar) > 224)
1510                     || (BN_is_negative(p_scalar))) {
1511                     /*
1512                      * this is an unusual input, and we don't guarantee
1513                      * constant-timeness
1514                      */
1515                     if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1516                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1517                         goto err;
1518                     }
1519                     num_bytes = bn_bn2lebinpad(tmp_scalar,
1520                                                secrets[i], sizeof(secrets[i]));
1521                 } else {
1522                     num_bytes = bn_bn2lebinpad(p_scalar,
1523                                                secrets[i], sizeof(secrets[i]));
1524                 }
1525                 if (num_bytes < 0) {
1526                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1527                     goto err;
1528                 }
1529                 /* precompute multiples */
1530                 if ((!BN_to_felem(x_out, &p->X)) ||
1531                     (!BN_to_felem(y_out, &p->Y)) ||
1532                     (!BN_to_felem(z_out, &p->Z)))
1533                     goto err;
1534                 felem_assign(pre_comp[i][1][0], x_out);
1535                 felem_assign(pre_comp[i][1][1], y_out);
1536                 felem_assign(pre_comp[i][1][2], z_out);
1537                 for (j = 2; j <= 16; ++j) {
1538                     if (j & 1) {
1539                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1540                                   pre_comp[i][j][2], pre_comp[i][1][0],
1541                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1542                                   pre_comp[i][j - 1][0],
1543                                   pre_comp[i][j - 1][1],
1544                                   pre_comp[i][j - 1][2]);
1545                     } else {
1546                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1547                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1548                                      pre_comp[i][j / 2][1],
1549                                      pre_comp[i][j / 2][2]);
1550                     }
1551                 }
1552             }
1553         }
1554         if (mixed)
1555             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1556     }
1557 
1558     /* the scalar for the generator */
1559     if ((scalar != NULL) && (have_pre_comp)) {
1560         memset(g_secret, 0, sizeof(g_secret));
1561         /* reduce scalar to 0 <= scalar < 2^224 */
1562         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1563             /*
1564              * this is an unusual input, and we don't guarantee
1565              * constant-timeness
1566              */
1567             if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1568                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1569                 goto err;
1570             }
1571             num_bytes = bn_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1572         } else {
1573             num_bytes = bn_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1574         }
1575         /* do the multiplication with generator precomputation */
1576         batch_mul(x_out, y_out, z_out,
1577                   (const felem_bytearray(*))secrets, num_points,
1578                   g_secret,
1579                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1580     } else {
1581         /* do the multiplication without generator precomputation */
1582         batch_mul(x_out, y_out, z_out,
1583                   (const felem_bytearray(*))secrets, num_points,
1584                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1585     }
1586     /* reduce the output to its unique minimal representation */
1587     felem_contract(x_in, x_out);
1588     felem_contract(y_in, y_out);
1589     felem_contract(z_in, z_out);
1590     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1591         (!felem_to_BN(z, z_in))) {
1592         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1593         goto err;
1594     }
1595     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1596 
1597  err:
1598     BN_CTX_end(ctx);
1599     if (generator != NULL)
1600         EC_POINT_free(generator);
1601     if (new_ctx != NULL)
1602         BN_CTX_free(new_ctx);
1603     if (secrets != NULL)
1604         OPENSSL_free(secrets);
1605     if (pre_comp != NULL)
1606         OPENSSL_free(pre_comp);
1607     if (tmp_felems != NULL)
1608         OPENSSL_free(tmp_felems);
1609     return ret;
1610 }
1611 
ec_GFp_nistp224_precompute_mult(EC_GROUP * group,BN_CTX * ctx)1612 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1613 {
1614     int ret = 0;
1615     NISTP224_PRE_COMP *pre = NULL;
1616     int i, j;
1617     BN_CTX *new_ctx = NULL;
1618     BIGNUM *x, *y;
1619     EC_POINT *generator = NULL;
1620     felem tmp_felems[32];
1621 
1622     /* throw away old precomputation */
1623     EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1624                          nistp224_pre_comp_free,
1625                          nistp224_pre_comp_clear_free);
1626     if (ctx == NULL)
1627         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1628             return 0;
1629     BN_CTX_start(ctx);
1630     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1631         goto err;
1632     /* get the generator */
1633     if (group->generator == NULL)
1634         goto err;
1635     generator = EC_POINT_new(group);
1636     if (generator == NULL)
1637         goto err;
1638     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1639     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1640     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1641         goto err;
1642     if ((pre = nistp224_pre_comp_new()) == NULL)
1643         goto err;
1644     /*
1645      * if the generator is the standard one, use built-in precomputation
1646      */
1647     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1648         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1649         goto done;
1650     }
1651     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1652         (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1653         (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1654         goto err;
1655     /*
1656      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1657      * 2^140*G, 2^196*G for the second one
1658      */
1659     for (i = 1; i <= 8; i <<= 1) {
1660         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1662                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1663         for (j = 0; j < 27; ++j) {
1664             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1666                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1667         }
1668         if (i == 8)
1669             break;
1670         point_double(pre->g_pre_comp[0][2 * i][0],
1671                      pre->g_pre_comp[0][2 * i][1],
1672                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1673                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1674         for (j = 0; j < 27; ++j) {
1675             point_double(pre->g_pre_comp[0][2 * i][0],
1676                          pre->g_pre_comp[0][2 * i][1],
1677                          pre->g_pre_comp[0][2 * i][2],
1678                          pre->g_pre_comp[0][2 * i][0],
1679                          pre->g_pre_comp[0][2 * i][1],
1680                          pre->g_pre_comp[0][2 * i][2]);
1681         }
1682     }
1683     for (i = 0; i < 2; i++) {
1684         /* g_pre_comp[i][0] is the point at infinity */
1685         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1686         /* the remaining multiples */
1687         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1688         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1689                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1690                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1691                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1692                   pre->g_pre_comp[i][2][2]);
1693         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1694         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1695                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1696                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1697                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1698                   pre->g_pre_comp[i][2][2]);
1699         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1700         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1701                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1702                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1703                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1704                   pre->g_pre_comp[i][4][2]);
1705         /*
1706          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1707          */
1708         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1709                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1710                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1711                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1712                   pre->g_pre_comp[i][2][2]);
1713         for (j = 1; j < 8; ++j) {
1714             /* odd multiples: add G resp. 2^28*G */
1715             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1716                       pre->g_pre_comp[i][2 * j + 1][1],
1717                       pre->g_pre_comp[i][2 * j + 1][2],
1718                       pre->g_pre_comp[i][2 * j][0],
1719                       pre->g_pre_comp[i][2 * j][1],
1720                       pre->g_pre_comp[i][2 * j][2], 0,
1721                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1722                       pre->g_pre_comp[i][1][2]);
1723         }
1724     }
1725     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1726 
1727  done:
1728     if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1729                              nistp224_pre_comp_free,
1730                              nistp224_pre_comp_clear_free))
1731         goto err;
1732     ret = 1;
1733     pre = NULL;
1734  err:
1735     BN_CTX_end(ctx);
1736     if (generator != NULL)
1737         EC_POINT_free(generator);
1738     if (new_ctx != NULL)
1739         BN_CTX_free(new_ctx);
1740     if (pre)
1741         nistp224_pre_comp_free(pre);
1742     return ret;
1743 }
1744 
ec_GFp_nistp224_have_precompute_mult(const EC_GROUP * group)1745 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746 {
1747     if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1748                             nistp224_pre_comp_free,
1749                             nistp224_pre_comp_clear_free)
1750         != NULL)
1751         return 1;
1752     else
1753         return 0;
1754 }
1755 
1756 #else
1757 static void *dummy = &dummy;
1758 #endif
1759