aesni: Add a GCM AEAD based on the AES-NI key schedule
[strongswan.git] / src / libstrongswan / plugins / aesni / aesni_gcm.c
1 /*
2 * Copyright (C) 2015 Martin Willi
3 * Copyright (C) 2015 revosec AG
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16 #include "aesni_gcm.h"
17 #include "aesni_key.h"
18
19 #include <crypto/iv/iv_gen_seq.h>
20
21 #include <tmmintrin.h>
22
23 #define NONCE_SIZE 12
24 #define IV_SIZE 8
25 #define SALT_SIZE (NONCE_SIZE - IV_SIZE)
26
27 typedef struct private_aesni_gcm_t private_aesni_gcm_t;
28
29 /**
30 * GCM en/decryption method type
31 */
32 typedef void (*aesni_gcm_fn_t)(private_aesni_gcm_t*, size_t, u_char*, u_char*,
33 u_char*, size_t, u_char*, u_char*);
34
35 /**
36 * Private data of an aesni_gcm_t object.
37 */
38 struct private_aesni_gcm_t {
39
40 /**
41 * Public aesni_gcm_t interface.
42 */
43 aesni_gcm_t public;
44
45 /**
46 * Encryption key schedule
47 */
48 aesni_key_t *key;
49
50 /**
51 * IV generator.
52 */
53 iv_gen_t *iv_gen;
54
55 /**
56 * Length of the integrity check value
57 */
58 size_t icv_size;
59
60 /**
61 * Length of the key in bytes
62 */
63 size_t key_size;
64
65 /**
66 * GCM encryption function
67 */
68 aesni_gcm_fn_t encrypt;
69
70 /**
71 * GCM decryption function
72 */
73 aesni_gcm_fn_t decrypt;
74
75 /**
76 * salt to add to nonce
77 */
78 u_char salt[SALT_SIZE];
79
80 /**
81 * GHASH subkey H, big-endian
82 */
83 __m128i h;
84 };
85
86 /**
87 * Byte-swap a 128-bit integer
88 */
89 static inline __m128i swap128(__m128i x)
90 {
91 return _mm_shuffle_epi8(x,
92 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
93 }
94
95 /**
96 * Multiply two blocks in GF128
97 */
98 static inline __m128i mult_block(__m128i h, __m128i y)
99 {
100 __m128i t1, t2, t3, t4, t5, t6;
101
102 y = swap128(y);
103
104 t1 = _mm_clmulepi64_si128(h, y, 0x00);
105 t2 = _mm_clmulepi64_si128(h, y, 0x01);
106 t3 = _mm_clmulepi64_si128(h, y, 0x10);
107 t4 = _mm_clmulepi64_si128(h, y, 0x11);
108
109 t2 = _mm_xor_si128(t2, t3);
110 t3 = _mm_slli_si128(t2, 8);
111 t2 = _mm_srli_si128(t2, 8);
112 t1 = _mm_xor_si128(t1, t3);
113 t4 = _mm_xor_si128(t4, t2);
114
115 t5 = _mm_srli_epi32(t1, 31);
116 t1 = _mm_slli_epi32(t1, 1);
117 t6 = _mm_srli_epi32(t4, 31);
118 t4 = _mm_slli_epi32(t4, 1);
119
120 t3 = _mm_srli_si128(t5, 12);
121 t6 = _mm_slli_si128(t6, 4);
122 t5 = _mm_slli_si128(t5, 4);
123 t1 = _mm_or_si128(t1, t5);
124 t4 = _mm_or_si128(t4, t6);
125 t4 = _mm_or_si128(t4, t3);
126
127 t5 = _mm_slli_epi32(t1, 31);
128 t6 = _mm_slli_epi32(t1, 30);
129 t3 = _mm_slli_epi32(t1, 25);
130
131 t5 = _mm_xor_si128(t5, t6);
132 t5 = _mm_xor_si128(t5, t3);
133 t6 = _mm_srli_si128(t5, 4);
134 t4 = _mm_xor_si128(t4, t6);
135 t5 = _mm_slli_si128(t5, 12);
136 t1 = _mm_xor_si128(t1, t5);
137 t4 = _mm_xor_si128(t4, t1);
138
139 t5 = _mm_srli_epi32(t1, 1);
140 t2 = _mm_srli_epi32(t1, 2);
141 t3 = _mm_srli_epi32(t1, 7);
142 t4 = _mm_xor_si128(t4, t2);
143 t4 = _mm_xor_si128(t4, t3);
144 t4 = _mm_xor_si128(t4, t5);
145
146 return swap128(t4);
147 }
148
149 /**
150 * GHASH on a single block
151 */
152 static __m128i ghash(__m128i h, __m128i y, __m128i x)
153 {
154 return mult_block(h, _mm_xor_si128(y, x));
155 }
156
157 /**
158 * Start constructing the ICV for the associated data
159 */
160 static __m128i icv_header(private_aesni_gcm_t *this, void *assoc, size_t alen)
161 {
162 u_int blocks, rem, i;
163 __m128i y, last, *ab;
164
165 y = _mm_setzero_si128();
166 ab = assoc;
167 blocks = alen / AES_BLOCK_SIZE;
168 rem = alen % AES_BLOCK_SIZE;
169 for (i = 0; i < blocks; i++)
170 {
171 y = ghash(this->h, y, _mm_loadu_si128(ab + i));
172 }
173 if (rem)
174 {
175 last = _mm_setzero_si128();
176 memcpy(&last, ab + blocks, rem);
177
178 y = ghash(this->h, y, last);
179 }
180
181 return y;
182 }
183
184 /**
185 * Complete the ICV by hashing a assoc/data length block
186 */
187 static __m128i icv_tailer(private_aesni_gcm_t *this, __m128i y,
188 size_t alen, size_t dlen)
189 {
190 __m128i b;
191
192 htoun64(&b, alen * 8);
193 htoun64((u_char*)&b + sizeof(u_int64_t), dlen * 8);
194
195 return ghash(this->h, y, b);
196 }
197
198 /**
199 * En-/Decrypt the ICV, trim and store it
200 */
201 static void icv_crypt(private_aesni_gcm_t *this, __m128i y, __m128i j,
202 u_char *icv)
203 {
204 __m128i t, b;
205 u_int round;
206
207 t = _mm_xor_si128(j, this->key->schedule[0]);
208 for (round = 1; round < this->key->rounds; round++)
209 {
210 t = _mm_aesenc_si128(t, this->key->schedule[round]);
211 }
212 t = _mm_aesenclast_si128(t, this->key->schedule[this->key->rounds]);
213
214 t = _mm_xor_si128(y, t);
215
216 _mm_storeu_si128(&b, t);
217 memcpy(icv, &b, this->icv_size);
218 }
219
220 /**
221 * Do big-endian increment on x
222 */
223 static inline __m128i increment_be(__m128i x)
224 {
225 x = swap128(x);
226 x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
227 x = swap128(x);
228
229 return x;
230 }
231
232 /**
233 * Generate the block J0
234 */
235 static inline __m128i create_j(private_aesni_gcm_t *this, u_char *iv)
236 {
237 u_char j[AES_BLOCK_SIZE];
238
239 memcpy(j, this->salt, SALT_SIZE);
240 memcpy(j + SALT_SIZE, iv, IV_SIZE);
241 htoun32(j + SALT_SIZE + IV_SIZE, 1);
242
243 return _mm_loadu_si128((__m128i*)j);
244 }
245
246 /**
247 * Encrypt a remaining incomplete block, return updated Y
248 */
249 static __m128i encrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem,
250 void *in, void *out, __m128i cb, __m128i y)
251 {
252 __m128i t, b;
253 u_int round;
254
255 memset(&b, 0, sizeof(b));
256 memcpy(&b, in, rem);
257
258 t = _mm_xor_si128(cb, this->key->schedule[0]);
259 for (round = 1; round < this->key->rounds; round++)
260 {
261 t = _mm_aesenc_si128(t, this->key->schedule[round]);
262 }
263 t = _mm_aesenclast_si128(t, this->key->schedule[this->key->rounds]);
264 b = _mm_xor_si128(t, b);
265
266 memcpy(out, &b, rem);
267
268 memset((u_char*)&b + rem, 0, AES_BLOCK_SIZE - rem);
269 return ghash(this->h, y, b);
270 }
271
272 /**
273 * Decrypt a remaining incomplete block, return updated Y
274 */
275 static __m128i decrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem,
276 void *in, void *out, __m128i cb, __m128i y)
277 {
278 __m128i t, b;
279 u_int round;
280
281 memset(&b, 0, sizeof(b));
282 memcpy(&b, in, rem);
283
284 y = ghash(this->h, y, b);
285
286 t = _mm_xor_si128(cb, this->key->schedule[0]);
287 for (round = 1; round < this->key->rounds; round++)
288 {
289 t = _mm_aesenc_si128(t, this->key->schedule[round]);
290 }
291 t = _mm_aesenclast_si128(t, this->key->schedule[this->key->rounds]);
292 b = _mm_xor_si128(t, b);
293
294 memcpy(out, &b, rem);
295
296 return y;
297 }
298
299 /**
300 * Generic GCM encryption/ICV generation
301 */
302 static void encrypt_gcm(private_aesni_gcm_t *this,
303 size_t len, u_char *in, u_char *out, u_char *iv,
304 size_t alen, u_char *assoc, u_char *icv)
305 {
306 __m128i d, t, y, j, cb, *bi, *bo;
307 u_int round, blocks, rem, i;
308
309 j = create_j(this, iv);
310 y = icv_header(this, assoc, alen);
311 blocks = len / AES_BLOCK_SIZE;
312 rem = len % AES_BLOCK_SIZE;
313 bi = (__m128i*)in;
314 bo = (__m128i*)out;
315
316 cb = increment_be(j);
317 for (i = 0; i < blocks; i++)
318 {
319 d = _mm_loadu_si128(bi + i);
320 t = _mm_xor_si128(cb, this->key->schedule[0]);
321 for (round = 1; round < this->key->rounds; round++)
322 {
323 t = _mm_aesenc_si128(t, this->key->schedule[round]);
324 }
325 t = _mm_aesenclast_si128(t, this->key->schedule[this->key->rounds]);
326 t = _mm_xor_si128(t, d);
327 _mm_storeu_si128(bo + i, t);
328
329 y = ghash(this->h, y, t);
330
331 cb = increment_be(cb);
332 }
333
334 if (rem)
335 {
336 y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
337 }
338 y = icv_tailer(this, y, alen, len);
339 icv_crypt(this, y, j, icv);
340 }
341
342 /**
343 * Generic GCM decryption/ICV generation
344 */
345 static void decrypt_gcm(private_aesni_gcm_t *this,
346 size_t len, u_char *in, u_char *out, u_char *iv,
347 size_t alen, u_char *assoc, u_char *icv)
348 {
349 __m128i d, t, y, j, cb, *bi, *bo;
350 u_int round, blocks, rem, i;
351
352 j = create_j(this, iv);
353 y = icv_header(this, assoc, alen);
354 blocks = len / AES_BLOCK_SIZE;
355 rem = len % AES_BLOCK_SIZE;
356 bi = (__m128i*)in;
357 bo = (__m128i*)out;
358
359 cb = increment_be(j);
360 for (i = 0; i < blocks; i++)
361 {
362 d = _mm_loadu_si128(bi + i);
363
364 y = ghash(this->h, y, d);
365
366 t = _mm_xor_si128(cb, this->key->schedule[0]);
367 for (round = 1; round < this->key->rounds; round++)
368 {
369 t = _mm_aesenc_si128(t, this->key->schedule[round]);
370 }
371 t = _mm_aesenclast_si128(t, this->key->schedule[this->key->rounds]);
372 t = _mm_xor_si128(t, d);
373 _mm_storeu_si128(bo + i, t);
374
375 cb = increment_be(cb);
376 }
377
378 if (rem)
379 {
380 y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y);
381 }
382 y = icv_tailer(this, y, alen, len);
383 icv_crypt(this, y, j, icv);
384 }
385
386 METHOD(aead_t, encrypt, bool,
387 private_aesni_gcm_t *this, chunk_t plain, chunk_t assoc, chunk_t iv,
388 chunk_t *encr)
389 {
390 u_char *out;
391
392 if (!this->key || iv.len != IV_SIZE)
393 {
394 return FALSE;
395 }
396 out = plain.ptr;
397 if (encr)
398 {
399 *encr = chunk_alloc(plain.len + this->icv_size);
400 out = encr->ptr;
401 }
402 this->encrypt(this, plain.len, plain.ptr, out, iv.ptr,
403 assoc.len, assoc.ptr, out + plain.len);
404 return TRUE;
405 }
406
407 METHOD(aead_t, decrypt, bool,
408 private_aesni_gcm_t *this, chunk_t encr, chunk_t assoc, chunk_t iv,
409 chunk_t *plain)
410 {
411 u_char *out, icv[this->icv_size];
412
413 if (!this->key || iv.len != IV_SIZE || encr.len < this->icv_size)
414 {
415 return FALSE;
416 }
417 encr.len -= this->icv_size;
418 out = encr.ptr;
419 if (plain)
420 {
421 *plain = chunk_alloc(encr.len);
422 out = plain->ptr;
423 }
424 this->decrypt(this, encr.len, encr.ptr, out, iv.ptr,
425 assoc.len, assoc.ptr, icv);
426 return memeq_const(icv, encr.ptr + encr.len, this->icv_size);
427 }
428
429 METHOD(aead_t, get_block_size, size_t,
430 private_aesni_gcm_t *this)
431 {
432 return 1;
433 }
434
435 METHOD(aead_t, get_icv_size, size_t,
436 private_aesni_gcm_t *this)
437 {
438 return this->icv_size;
439 }
440
441 METHOD(aead_t, get_iv_size, size_t,
442 private_aesni_gcm_t *this)
443 {
444 return IV_SIZE;
445 }
446
447 METHOD(aead_t, get_iv_gen, iv_gen_t*,
448 private_aesni_gcm_t *this)
449 {
450 return this->iv_gen;
451 }
452
453 METHOD(aead_t, get_key_size, size_t,
454 private_aesni_gcm_t *this)
455 {
456 return this->key_size + SALT_SIZE;
457 }
458
459 METHOD(aead_t, set_key, bool,
460 private_aesni_gcm_t *this, chunk_t key)
461 {
462 u_int round;
463 __m128i h;
464
465 if (key.len != this->key_size + SALT_SIZE)
466 {
467 return FALSE;
468 }
469
470 memcpy(this->salt, key.ptr + key.len - SALT_SIZE, SALT_SIZE);
471 key.len -= SALT_SIZE;
472
473 DESTROY_IF(this->key);
474 this->key = aesni_key_create(TRUE, key);
475
476 h = _mm_xor_si128(_mm_setzero_si128(), this->key->schedule[0]);
477 for (round = 1; round < this->key->rounds; round++)
478 {
479 h = _mm_aesenc_si128(h, this->key->schedule[round]);
480 }
481 h = _mm_aesenclast_si128(h, this->key->schedule[this->key->rounds]);
482
483 this->h = swap128(h);
484
485 return TRUE;
486 }
487
488 METHOD(aead_t, destroy, void,
489 private_aesni_gcm_t *this)
490 {
491 DESTROY_IF(this->key);
492 memwipe(&this->h, sizeof(this->h));
493 this->iv_gen->destroy(this->iv_gen);
494 free(this);
495 }
496
497 /**
498 * See header
499 */
500 aesni_gcm_t *aesni_gcm_create(encryption_algorithm_t algo,
501 size_t key_size, size_t salt_size)
502 {
503 private_aesni_gcm_t *this;
504 size_t icv_size;
505
506 switch (key_size)
507 {
508 case 0:
509 key_size = 16;
510 break;
511 case 16:
512 case 24:
513 case 32:
514 break;
515 default:
516 return NULL;
517 }
518 if (salt_size && salt_size != SALT_SIZE)
519 {
520 /* currently not supported */
521 return NULL;
522 }
523 switch (algo)
524 {
525 case ENCR_AES_GCM_ICV8:
526 algo = ENCR_AES_CBC;
527 icv_size = 8;
528 break;
529 case ENCR_AES_GCM_ICV12:
530 algo = ENCR_AES_CBC;
531 icv_size = 12;
532 break;
533 case ENCR_AES_GCM_ICV16:
534 algo = ENCR_AES_CBC;
535 icv_size = 16;
536 break;
537 default:
538 return NULL;
539 }
540
541 INIT(this,
542 .public = {
543 .aead = {
544 .encrypt = _encrypt,
545 .decrypt = _decrypt,
546 .get_block_size = _get_block_size,
547 .get_icv_size = _get_icv_size,
548 .get_iv_size = _get_iv_size,
549 .get_iv_gen = _get_iv_gen,
550 .get_key_size = _get_key_size,
551 .set_key = _set_key,
552 .destroy = _destroy,
553 },
554 },
555 .key_size = key_size,
556 .iv_gen = iv_gen_seq_create(),
557 .icv_size = icv_size,
558 .encrypt = encrypt_gcm,
559 .decrypt = decrypt_gcm,
560 );
561
562 return &this->public;
563 }