1 /*-
2 * Copyright (c) 2016 The FreeBSD Foundation
3 * Copyright (c) 2020 Ampere Computing
4 * All rights reserved.
5 *
6 * This software was developed by Andrew Turner under
7 * sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * This file is derived from aesni_wrap.c:
31 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
32 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
33 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
34 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
35 * Copyright (c) 2014 The FreeBSD Foundation
36 */
37
38 /*
39 * This code is built with floating-point enabled. Make sure to have entered
40 * into floating-point context before calling any of these functions.
41 */
42
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/queue.h>
48
49 #include <opencrypto/cryptodev.h>
50 #include <opencrypto/gmac.h>
51 #include <crypto/rijndael/rijndael.h>
52 #include <crypto/armv8/armv8_crypto.h>
53
54 #include <arm_neon.h>
55
56 static uint8x16_t
armv8_aes_enc(int rounds,const uint8x16_t * keysched,const uint8x16_t from)57 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
58 {
59 uint8x16_t tmp;
60 int i;
61
62 tmp = from;
63 for (i = 0; i < rounds - 1; i += 2) {
64 tmp = vaeseq_u8(tmp, keysched[i]);
65 tmp = vaesmcq_u8(tmp);
66 tmp = vaeseq_u8(tmp, keysched[i + 1]);
67 tmp = vaesmcq_u8(tmp);
68 }
69
70 tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
71 tmp = vaesmcq_u8(tmp);
72 tmp = vaeseq_u8(tmp, keysched[rounds]);
73 tmp = veorq_u8(tmp, keysched[rounds + 1]);
74
75 return (tmp);
76 }
77
78 static uint8x16_t
armv8_aes_dec(int rounds,const uint8x16_t * keysched,const uint8x16_t from)79 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
80 {
81 uint8x16_t tmp;
82 int i;
83
84 tmp = from;
85 for (i = 0; i < rounds - 1; i += 2) {
86 tmp = vaesdq_u8(tmp, keysched[i]);
87 tmp = vaesimcq_u8(tmp);
88 tmp = vaesdq_u8(tmp, keysched[i+1]);
89 tmp = vaesimcq_u8(tmp);
90 }
91
92 tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
93 tmp = vaesimcq_u8(tmp);
94 tmp = vaesdq_u8(tmp, keysched[rounds]);
95 tmp = veorq_u8(tmp, keysched[rounds + 1]);
96
97 return (tmp);
98 }
99
100 void
armv8_aes_encrypt_cbc(const AES_key_t * key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])101 armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
102 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
103 const uint8_t iv[static AES_BLOCK_LEN])
104 {
105 uint8x16_t tot, ivreg, tmp;
106 uint8_t block[AES_BLOCK_LEN], *from, *to;
107 size_t fromseglen, oseglen, seglen, toseglen;
108
109 KASSERT(len % AES_BLOCK_LEN == 0,
110 ("%s: length %zu not a multiple of the block size", __func__, len));
111
112 ivreg = vld1q_u8(iv);
113 for (; len > 0; len -= seglen) {
114 from = crypto_cursor_segment(fromc, &fromseglen);
115 to = crypto_cursor_segment(toc, &toseglen);
116
117 seglen = ulmin(len, ulmin(fromseglen, toseglen));
118 if (seglen < AES_BLOCK_LEN) {
119 crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
120 tmp = vld1q_u8(block);
121 tot = armv8_aes_enc(key->aes_rounds - 1,
122 (const void *)key->aes_key, veorq_u8(tmp, ivreg));
123 ivreg = tot;
124 vst1q_u8(block, tot);
125 crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
126 seglen = AES_BLOCK_LEN;
127 } else {
128 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
129 seglen -= AES_BLOCK_LEN) {
130 tmp = vld1q_u8(from);
131 tot = armv8_aes_enc(key->aes_rounds - 1,
132 (const void *)key->aes_key,
133 veorq_u8(tmp, ivreg));
134 ivreg = tot;
135 vst1q_u8(to, tot);
136 from += AES_BLOCK_LEN;
137 to += AES_BLOCK_LEN;
138 }
139 seglen = oseglen - seglen;
140 crypto_cursor_advance(fromc, seglen);
141 crypto_cursor_advance(toc, seglen);
142 }
143 }
144
145 explicit_bzero(block, sizeof(block));
146 }
147
148 void
armv8_aes_decrypt_cbc(const AES_key_t * key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])149 armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
150 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
151 const uint8_t iv[static AES_BLOCK_LEN])
152 {
153 uint8x16_t ivreg, nextiv, tmp;
154 uint8_t block[AES_BLOCK_LEN], *from, *to;
155 size_t fromseglen, oseglen, seglen, toseglen;
156
157 KASSERT(len % AES_BLOCK_LEN == 0,
158 ("%s: length %zu not a multiple of the block size", __func__, len));
159
160 ivreg = vld1q_u8(iv);
161 for (; len > 0; len -= seglen) {
162 from = crypto_cursor_segment(fromc, &fromseglen);
163 to = crypto_cursor_segment(toc, &toseglen);
164
165 seglen = ulmin(len, ulmin(fromseglen, toseglen));
166 if (seglen < AES_BLOCK_LEN) {
167 crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
168 nextiv = vld1q_u8(block);
169 tmp = armv8_aes_dec(key->aes_rounds - 1,
170 (const void *)key->aes_key, nextiv);
171 vst1q_u8(block, veorq_u8(tmp, ivreg));
172 ivreg = nextiv;
173 crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
174 seglen = AES_BLOCK_LEN;
175 } else {
176 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
177 seglen -= AES_BLOCK_LEN) {
178 nextiv = vld1q_u8(from);
179 tmp = armv8_aes_dec(key->aes_rounds - 1,
180 (const void *)key->aes_key, nextiv);
181 vst1q_u8(to, veorq_u8(tmp, ivreg));
182 ivreg = nextiv;
183 from += AES_BLOCK_LEN;
184 to += AES_BLOCK_LEN;
185 }
186 crypto_cursor_advance(fromc, oseglen - seglen);
187 crypto_cursor_advance(toc, oseglen - seglen);
188 seglen = oseglen - seglen;
189 }
190 }
191
192 explicit_bzero(block, sizeof(block));
193 }
194
195 #define AES_XTS_BLOCKSIZE 16
196 #define AES_XTS_IVSIZE 8
197 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
198
199 static inline int32x4_t
xts_crank_lfsr(int32x4_t inp)200 xts_crank_lfsr(int32x4_t inp)
201 {
202 const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
203 int32x4_t xtweak, ret;
204
205 /* set up xor mask */
206 xtweak = vextq_s32(inp, inp, 3);
207 xtweak = vshrq_n_s32(xtweak, 31);
208 xtweak &= alphamask;
209
210 /* next term */
211 ret = vshlq_n_s32(inp, 1);
212 ret ^= xtweak;
213
214 return ret;
215 }
216
217 static void
armv8_aes_crypt_xts_block(int rounds,const uint8x16_t * key_schedule,uint8x16_t * tweak,const uint8_t * from,uint8_t * to,int do_encrypt)218 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
219 uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
220 {
221 uint8x16_t block;
222
223 block = vld1q_u8(from) ^ *tweak;
224
225 if (do_encrypt)
226 block = armv8_aes_enc(rounds - 1, key_schedule, block);
227 else
228 block = armv8_aes_dec(rounds - 1, key_schedule, block);
229
230 vst1q_u8(to, block ^ *tweak);
231
232 *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
233 }
234
235 static void
armv8_aes_crypt_xts(int rounds,const uint8x16_t * data_schedule,const uint8x16_t * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN],int do_encrypt)236 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
237 const uint8x16_t *tweak_schedule, size_t len,
238 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
239 const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
240 {
241 uint8x16_t tweakreg;
242 uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16);
243 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
244 uint8_t *from, *to;
245 size_t fromseglen, oseglen, seglen, toseglen;
246
247 KASSERT(len % AES_XTS_BLOCKSIZE == 0,
248 ("%s: length %zu not a multiple of the block size", __func__, len));
249
250 /*
251 * Prepare tweak as E_k2(IV). IV is specified as LE representation
252 * of a 64-bit block number which we allow to be passed in directly.
253 */
254 #if BYTE_ORDER == LITTLE_ENDIAN
255 bcopy(iv, tweak, AES_XTS_IVSIZE);
256 /* Last 64 bits of IV are always zero. */
257 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
258 #else
259 #error Only LITTLE_ENDIAN architectures are supported.
260 #endif
261 tweakreg = vld1q_u8(tweak);
262 tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
263
264 for (; len > 0; len -= seglen) {
265 from = crypto_cursor_segment(fromc, &fromseglen);
266 to = crypto_cursor_segment(toc, &toseglen);
267
268 seglen = ulmin(len, ulmin(fromseglen, toseglen));
269 if (seglen < AES_XTS_BLOCKSIZE) {
270 crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block);
271 armv8_aes_crypt_xts_block(rounds, data_schedule,
272 &tweakreg, block, block, do_encrypt);
273 crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block);
274 seglen = AES_XTS_BLOCKSIZE;
275 } else {
276 for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE;
277 seglen -= AES_XTS_BLOCKSIZE) {
278 armv8_aes_crypt_xts_block(rounds, data_schedule,
279 &tweakreg, from, to, do_encrypt);
280 from += AES_XTS_BLOCKSIZE;
281 to += AES_XTS_BLOCKSIZE;
282 }
283 seglen = oseglen - seglen;
284 crypto_cursor_advance(fromc, seglen);
285 crypto_cursor_advance(toc, seglen);
286 }
287 }
288
289 explicit_bzero(block, sizeof(block));
290 }
291
292 void
armv8_aes_encrypt_xts(AES_key_t * data_schedule,const void * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])293 armv8_aes_encrypt_xts(AES_key_t *data_schedule,
294 const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc,
295 struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN])
296 {
297 armv8_aes_crypt_xts(data_schedule->aes_rounds,
298 (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
299 toc, iv, 1);
300 }
301
302 void
armv8_aes_decrypt_xts(AES_key_t * data_schedule,const void * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])303 armv8_aes_decrypt_xts(AES_key_t *data_schedule,
304 const void *tweak_schedule, size_t len,
305 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
306 const uint8_t iv[static AES_BLOCK_LEN])
307 {
308 armv8_aes_crypt_xts(data_schedule->aes_rounds,
309 (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
310 toc, iv, 0);
311
312 }
313 #define AES_INC_COUNTER(counter) \
314 do { \
315 for (int pos = AES_BLOCK_LEN - 1; \
316 pos >= 0; pos--) \
317 if (++(counter)[pos]) \
318 break; \
319 } while (0)
320
321 struct armv8_gcm_state {
322 __uint128_val_t EK0;
323 __uint128_val_t EKi;
324 __uint128_val_t Xi;
325 __uint128_val_t lenblock;
326 uint8_t aes_counter[AES_BLOCK_LEN];
327 };
328
329 static void
armv8_aes_gmac_setup(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint8_t * authdata,size_t authdatalen,const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)330 armv8_aes_gmac_setup(struct armv8_gcm_state *s, AES_key_t *aes_key,
331 const uint8_t *authdata, size_t authdatalen,
332 const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable)
333 {
334 uint8_t block[AES_BLOCK_LEN];
335 size_t trailer;
336
337 bzero(s->aes_counter, AES_BLOCK_LEN);
338 memcpy(s->aes_counter, iv, AES_GCM_IV_LEN);
339
340 /* Setup the counter */
341 s->aes_counter[AES_BLOCK_LEN - 1] = 1;
342
343 /* EK0 for a final GMAC round */
344 aes_v8_encrypt(s->aes_counter, s->EK0.c, aes_key);
345
346 /* GCM starts with 2 as counter, 1 is used for final xor of tag. */
347 s->aes_counter[AES_BLOCK_LEN - 1] = 2;
348
349 memset(s->Xi.c, 0, sizeof(s->Xi.c));
350 trailer = authdatalen % AES_BLOCK_LEN;
351 if (authdatalen - trailer > 0) {
352 gcm_ghash_v8(s->Xi.u, Htable, authdata, authdatalen - trailer);
353 authdata += authdatalen - trailer;
354 }
355 if (trailer > 0 || authdatalen == 0) {
356 memset(block, 0, sizeof(block));
357 memcpy(block, authdata, trailer);
358 gcm_ghash_v8(s->Xi.u, Htable, block, AES_BLOCK_LEN);
359 }
360 }
361
362 static void
armv8_aes_gmac_finish(struct armv8_gcm_state * s,size_t len,size_t authdatalen,const __uint128_val_t * Htable)363 armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len,
364 size_t authdatalen, const __uint128_val_t *Htable)
365 {
366 /* Lengths block */
367 s->lenblock.u[0] = s->lenblock.u[1] = 0;
368 s->lenblock.d[1] = htobe32(authdatalen * 8);
369 s->lenblock.d[3] = htobe32(len * 8);
370 gcm_ghash_v8(s->Xi.u, Htable, s->lenblock.c, AES_BLOCK_LEN);
371
372 s->Xi.u[0] ^= s->EK0.u[0];
373 s->Xi.u[1] ^= s->EK0.u[1];
374 }
375
376 static void
armv8_aes_encrypt_gcm_block(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint64_t * from,uint64_t * to)377 armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
378 const uint64_t *from, uint64_t *to)
379 {
380 aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key);
381 AES_INC_COUNTER(s->aes_counter);
382 to[0] = from[0] ^ s->EKi.u[0];
383 to[1] = from[1] ^ s->EKi.u[1];
384 }
385
386 static void
armv8_aes_decrypt_gcm_block(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint64_t * from,uint64_t * to)387 armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
388 const uint64_t *from, uint64_t *to)
389 {
390 armv8_aes_encrypt_gcm_block(s, aes_key, from, to);
391 }
392
393 void
armv8_aes_encrypt_gcm(AES_key_t * aes_key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,size_t authdatalen,const uint8_t * authdata,uint8_t tag[static GMAC_DIGEST_LEN],const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)394 armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
395 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
396 size_t authdatalen, const uint8_t *authdata,
397 uint8_t tag[static GMAC_DIGEST_LEN],
398 const uint8_t iv[static AES_GCM_IV_LEN],
399 const __uint128_val_t *Htable)
400 {
401 struct armv8_gcm_state s;
402 uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN);
403 uint64_t *from64, *to64;
404 size_t fromseglen, i, olen, oseglen, seglen, toseglen;
405
406 armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
407
408 for (olen = len; len > 0; len -= seglen) {
409 from64 = crypto_cursor_segment(fromc, &fromseglen);
410 to64 = crypto_cursor_segment(toc, &toseglen);
411
412 seglen = ulmin(len, ulmin(fromseglen, toseglen));
413 if (seglen < AES_BLOCK_LEN) {
414 seglen = ulmin(len, AES_BLOCK_LEN);
415
416 memset(block, 0, sizeof(block));
417 crypto_cursor_copydata(fromc, (int)seglen, block);
418
419 if (seglen == AES_BLOCK_LEN) {
420 armv8_aes_encrypt_gcm_block(&s, aes_key,
421 (uint64_t *)block, (uint64_t *)block);
422 } else {
423 aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
424 AES_INC_COUNTER(s.aes_counter);
425 for (i = 0; i < seglen; i++)
426 block[i] ^= s.EKi.c[i];
427 }
428 gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
429
430 crypto_cursor_copyback(toc, (int)seglen, block);
431 } else {
432 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
433 seglen -= AES_BLOCK_LEN) {
434 armv8_aes_encrypt_gcm_block(&s, aes_key, from64,
435 to64);
436 gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64,
437 AES_BLOCK_LEN);
438
439 from64 += 2;
440 to64 += 2;
441 }
442
443 seglen = oseglen - seglen;
444 crypto_cursor_advance(fromc, seglen);
445 crypto_cursor_advance(toc, seglen);
446 }
447 }
448
449 armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
450 memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
451
452 explicit_bzero(block, sizeof(block));
453 explicit_bzero(&s, sizeof(s));
454 }
455
456 int
armv8_aes_decrypt_gcm(AES_key_t * aes_key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,size_t authdatalen,const uint8_t * authdata,const uint8_t tag[static GMAC_DIGEST_LEN],const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)457 armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
458 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
459 size_t authdatalen, const uint8_t *authdata,
460 const uint8_t tag[static GMAC_DIGEST_LEN],
461 const uint8_t iv[static AES_GCM_IV_LEN],
462 const __uint128_val_t *Htable)
463 {
464 struct armv8_gcm_state s;
465 struct crypto_buffer_cursor fromcc;
466 uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from;
467 uint64_t *block64, *from64, *to64;
468 size_t fromseglen, olen, oseglen, seglen, toseglen;
469 int error;
470
471 armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
472
473 crypto_cursor_copy(fromc, &fromcc);
474 for (olen = len; len > 0; len -= seglen) {
475 from = crypto_cursor_segment(&fromcc, &fromseglen);
476 seglen = ulmin(len, fromseglen);
477 seglen -= seglen % AES_BLOCK_LEN;
478 if (seglen > 0) {
479 gcm_ghash_v8(s.Xi.u, Htable, from, seglen);
480 crypto_cursor_advance(&fromcc, seglen);
481 } else {
482 memset(block, 0, sizeof(block));
483 seglen = ulmin(len, AES_BLOCK_LEN);
484 crypto_cursor_copydata(&fromcc, seglen, block);
485 gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
486 }
487 }
488
489 armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
490
491 if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
492 error = EBADMSG;
493 goto out;
494 }
495
496 block64 = (uint64_t *)block;
497 for (len = olen; len > 0; len -= seglen) {
498 from64 = crypto_cursor_segment(fromc, &fromseglen);
499 to64 = crypto_cursor_segment(toc, &toseglen);
500
501 seglen = ulmin(len, ulmin(fromseglen, toseglen));
502 if (seglen < AES_BLOCK_LEN) {
503 seglen = ulmin(len, AES_BLOCK_LEN);
504
505 memset(block, 0, sizeof(block));
506 crypto_cursor_copydata(fromc, seglen, block);
507
508 armv8_aes_decrypt_gcm_block(&s, aes_key, block64,
509 block64);
510
511 crypto_cursor_copyback(toc, (int)seglen, block);
512 } else {
513 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
514 seglen -= AES_BLOCK_LEN) {
515 armv8_aes_decrypt_gcm_block(&s, aes_key, from64,
516 to64);
517
518 from64 += 2;
519 to64 += 2;
520 }
521
522 seglen = oseglen - seglen;
523 crypto_cursor_advance(fromc, seglen);
524 crypto_cursor_advance(toc, seglen);
525 }
526 }
527
528 error = 0;
529 out:
530 explicit_bzero(block, sizeof(block));
531 explicit_bzero(&s, sizeof(s));
532 return (error);
533 }
534