aesni: Use dedicated round count specific encryption functions in CTR mode
[strongswan.git] / src / libstrongswan / plugins / aesni / aesni_ctr.c
1 /*
2 * Copyright (C) 2015 Martin Willi
3 * Copyright (C) 2015 revosec AG
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16 #include "aesni_ctr.h"
17 #include "aesni_key.h"
18
19 #include <tmmintrin.h>
20
21 typedef struct private_aesni_ctr_t private_aesni_ctr_t;
22
23 /**
24 * CTR en/decryption method type
25 */
26 typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*);
27
28 /**
29 * Private data of an aesni_ctr_t object.
30 */
31 struct private_aesni_ctr_t {
32
33 /**
34 * Public aesni_ctr_t interface.
35 */
36 aesni_ctr_t public;
37
38 /**
39 * Key size
40 */
41 u_int key_size;
42
43 /**
44 * Key schedule
45 */
46 aesni_key_t *key;
47
48 /**
49 * Encryption method
50 */
51 aesni_ctr_fn_t crypt;
52
53 /**
54 * Counter state
55 */
56 struct {
57 char nonce[4];
58 char iv[8];
59 u_int32_t counter;
60 } __attribute__((packed, aligned(sizeof(__m128i)))) state;
61 };
62
63 /**
64 * Do big-endian increment on x
65 */
66 static inline __m128i increment_be(__m128i x)
67 {
68 __m128i swap;
69
70 swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
71
72 x = _mm_shuffle_epi8(x, swap);
73 x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
74 x = _mm_shuffle_epi8(x, swap);
75
76 return x;
77 }
78
79 /**
80 * AES-128 CTR encryption
81 */
82 static void encrypt_ctr128(private_aesni_ctr_t *this,
83 size_t len, u_char *in, u_char *out)
84 {
85 __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10;
86 __m128i state, t, d, b, *bi, *bo;
87 u_int i, blocks, rem;
88
89 state = _mm_load_si128((__m128i*)&this->state);
90 blocks = len / AES_BLOCK_SIZE;
91 rem = len % AES_BLOCK_SIZE;
92 bi = (__m128i*)in;
93 bo = (__m128i*)out;
94
95 k0 = this->key->schedule[0];
96 k1 = this->key->schedule[1];
97 k2 = this->key->schedule[2];
98 k3 = this->key->schedule[3];
99 k4 = this->key->schedule[4];
100 k5 = this->key->schedule[5];
101 k6 = this->key->schedule[6];
102 k7 = this->key->schedule[7];
103 k8 = this->key->schedule[8];
104 k9 = this->key->schedule[9];
105 k10 = this->key->schedule[10];
106
107 for (i = 0; i < blocks; i++)
108 {
109 d = _mm_loadu_si128(bi + i);
110 t = _mm_xor_si128(state, k0);
111
112 t = _mm_aesenc_si128(t, k1);
113 t = _mm_aesenc_si128(t, k2);
114 t = _mm_aesenc_si128(t, k3);
115 t = _mm_aesenc_si128(t, k4);
116 t = _mm_aesenc_si128(t, k5);
117 t = _mm_aesenc_si128(t, k6);
118 t = _mm_aesenc_si128(t, k7);
119 t = _mm_aesenc_si128(t, k8);
120 t = _mm_aesenc_si128(t, k9);
121
122 t = _mm_aesenclast_si128(t, k10);
123 t = _mm_xor_si128(t, d);
124 _mm_storeu_si128(bo + i, t);
125
126 state = increment_be(state);
127 }
128
129 if (rem)
130 {
131 memset(&b, 0, sizeof(b));
132 memcpy(&b, bi + blocks, rem);
133
134 d = _mm_loadu_si128(&b);
135 t = _mm_xor_si128(state, k0);
136
137 t = _mm_aesenc_si128(t, k1);
138 t = _mm_aesenc_si128(t, k2);
139 t = _mm_aesenc_si128(t, k3);
140 t = _mm_aesenc_si128(t, k4);
141 t = _mm_aesenc_si128(t, k5);
142 t = _mm_aesenc_si128(t, k6);
143 t = _mm_aesenc_si128(t, k7);
144 t = _mm_aesenc_si128(t, k8);
145 t = _mm_aesenc_si128(t, k9);
146
147 t = _mm_aesenclast_si128(t, k10);
148 t = _mm_xor_si128(t, d);
149 _mm_storeu_si128(&b, t);
150
151 memcpy(bo + blocks, &b, rem);
152 }
153 }
154
155 /**
156 * AES-192 CTR encryption
157 */
158 static void encrypt_ctr192(private_aesni_ctr_t *this,
159 size_t len, u_char *in, u_char *out)
160 {
161 __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12;
162 __m128i state, t, d, b, *bi, *bo;
163 u_int i, blocks, rem;
164
165 state = _mm_load_si128((__m128i*)&this->state);
166 blocks = len / AES_BLOCK_SIZE;
167 rem = len % AES_BLOCK_SIZE;
168 bi = (__m128i*)in;
169 bo = (__m128i*)out;
170
171 k0 = this->key->schedule[0];
172 k1 = this->key->schedule[1];
173 k2 = this->key->schedule[2];
174 k3 = this->key->schedule[3];
175 k4 = this->key->schedule[4];
176 k5 = this->key->schedule[5];
177 k6 = this->key->schedule[6];
178 k7 = this->key->schedule[7];
179 k8 = this->key->schedule[8];
180 k9 = this->key->schedule[9];
181 k10 = this->key->schedule[10];
182 k11 = this->key->schedule[11];
183 k12 = this->key->schedule[12];
184
185 for (i = 0; i < blocks; i++)
186 {
187 d = _mm_loadu_si128(bi + i);
188 t = _mm_xor_si128(state, k0);
189
190 t = _mm_aesenc_si128(t, k1);
191 t = _mm_aesenc_si128(t, k2);
192 t = _mm_aesenc_si128(t, k3);
193 t = _mm_aesenc_si128(t, k4);
194 t = _mm_aesenc_si128(t, k5);
195 t = _mm_aesenc_si128(t, k6);
196 t = _mm_aesenc_si128(t, k7);
197 t = _mm_aesenc_si128(t, k8);
198 t = _mm_aesenc_si128(t, k9);
199 t = _mm_aesenc_si128(t, k10);
200 t = _mm_aesenc_si128(t, k11);
201
202 t = _mm_aesenclast_si128(t, k12);
203 t = _mm_xor_si128(t, d);
204 _mm_storeu_si128(bo + i, t);
205
206 state = increment_be(state);
207 }
208
209 if (rem)
210 {
211 memset(&b, 0, sizeof(b));
212 memcpy(&b, bi + blocks, rem);
213
214 d = _mm_loadu_si128(&b);
215 t = _mm_xor_si128(state, k0);
216
217 t = _mm_aesenc_si128(t, k1);
218 t = _mm_aesenc_si128(t, k2);
219 t = _mm_aesenc_si128(t, k3);
220 t = _mm_aesenc_si128(t, k4);
221 t = _mm_aesenc_si128(t, k5);
222 t = _mm_aesenc_si128(t, k6);
223 t = _mm_aesenc_si128(t, k7);
224 t = _mm_aesenc_si128(t, k8);
225 t = _mm_aesenc_si128(t, k9);
226 t = _mm_aesenc_si128(t, k10);
227 t = _mm_aesenc_si128(t, k11);
228
229 t = _mm_aesenclast_si128(t, k12);
230 t = _mm_xor_si128(t, d);
231 _mm_storeu_si128(&b, t);
232
233 memcpy(bo + blocks, &b, rem);
234 }
235 }
236
237 /**
238 * AES-256 CTR encryption
239 */
240 static void encrypt_ctr256(private_aesni_ctr_t *this,
241 size_t len, u_char *in, u_char *out)
242 {
243 __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13, k14;
244 __m128i state, t, d, b, *bi, *bo;
245 u_int i, blocks, rem;
246
247 state = _mm_load_si128((__m128i*)&this->state);
248 blocks = len / AES_BLOCK_SIZE;
249 rem = len % AES_BLOCK_SIZE;
250 bi = (__m128i*)in;
251 bo = (__m128i*)out;
252
253 k0 = this->key->schedule[0];
254 k1 = this->key->schedule[1];
255 k2 = this->key->schedule[2];
256 k3 = this->key->schedule[3];
257 k4 = this->key->schedule[4];
258 k5 = this->key->schedule[5];
259 k6 = this->key->schedule[6];
260 k7 = this->key->schedule[7];
261 k8 = this->key->schedule[8];
262 k9 = this->key->schedule[9];
263 k10 = this->key->schedule[10];
264 k11 = this->key->schedule[11];
265 k12 = this->key->schedule[12];
266 k13 = this->key->schedule[13];
267 k14 = this->key->schedule[14];
268
269 for (i = 0; i < blocks; i++)
270 {
271 d = _mm_loadu_si128(bi + i);
272 t = _mm_xor_si128(state, k0);
273
274 t = _mm_aesenc_si128(t, k1);
275 t = _mm_aesenc_si128(t, k2);
276 t = _mm_aesenc_si128(t, k3);
277 t = _mm_aesenc_si128(t, k4);
278 t = _mm_aesenc_si128(t, k5);
279 t = _mm_aesenc_si128(t, k6);
280 t = _mm_aesenc_si128(t, k7);
281 t = _mm_aesenc_si128(t, k8);
282 t = _mm_aesenc_si128(t, k9);
283 t = _mm_aesenc_si128(t, k10);
284 t = _mm_aesenc_si128(t, k11);
285 t = _mm_aesenc_si128(t, k12);
286 t = _mm_aesenc_si128(t, k13);
287
288 t = _mm_aesenclast_si128(t, k14);
289 t = _mm_xor_si128(t, d);
290 _mm_storeu_si128(bo + i, t);
291
292 state = increment_be(state);
293 }
294
295 if (rem)
296 {
297 memset(&b, 0, sizeof(b));
298 memcpy(&b, bi + blocks, rem);
299
300 d = _mm_loadu_si128(&b);
301 t = _mm_xor_si128(state, k0);
302
303 t = _mm_aesenc_si128(t, k1);
304 t = _mm_aesenc_si128(t, k2);
305 t = _mm_aesenc_si128(t, k3);
306 t = _mm_aesenc_si128(t, k4);
307 t = _mm_aesenc_si128(t, k5);
308 t = _mm_aesenc_si128(t, k6);
309 t = _mm_aesenc_si128(t, k7);
310 t = _mm_aesenc_si128(t, k8);
311 t = _mm_aesenc_si128(t, k9);
312 t = _mm_aesenc_si128(t, k10);
313 t = _mm_aesenc_si128(t, k11);
314 t = _mm_aesenc_si128(t, k12);
315 t = _mm_aesenc_si128(t, k13);
316
317 t = _mm_aesenclast_si128(t, k14);
318 t = _mm_xor_si128(t, d);
319 _mm_storeu_si128(&b, t);
320
321 memcpy(bo + blocks, &b, rem);
322 }
323 }
324
325 METHOD(crypter_t, crypt, bool,
326 private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out)
327 {
328 u_char *buf;
329
330 if (!this->key || iv.len != sizeof(this->state.iv))
331 {
332 return FALSE;
333 }
334 memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv));
335 this->state.counter = htonl(1);
336
337 buf = in.ptr;
338 if (out)
339 {
340 *out = chunk_alloc(in.len);
341 buf = out->ptr;
342 }
343 this->crypt(this, in.len, in.ptr, buf);
344 return TRUE;
345 }
346
347 METHOD(crypter_t, get_block_size, size_t,
348 private_aesni_ctr_t *this)
349 {
350 return 1;
351 }
352
353 METHOD(crypter_t, get_iv_size, size_t,
354 private_aesni_ctr_t *this)
355 {
356 return sizeof(this->state.iv);
357 }
358
359 METHOD(crypter_t, get_key_size, size_t,
360 private_aesni_ctr_t *this)
361 {
362 return this->key_size + sizeof(this->state.nonce);
363 }
364
365 METHOD(crypter_t, set_key, bool,
366 private_aesni_ctr_t *this, chunk_t key)
367 {
368 if (key.len != get_key_size(this))
369 {
370 return FALSE;
371 }
372
373 memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce),
374 sizeof(this->state.nonce));
375 key.len -= sizeof(this->state.nonce);
376
377 DESTROY_IF(this->key);
378 this->key = aesni_key_create(TRUE, key);
379
380 return this->key;
381 }
382
383 METHOD(crypter_t, destroy, void,
384 private_aesni_ctr_t *this)
385 {
386 DESTROY_IF(this->key);
387 free(this);
388 }
389
390 /**
391 * See header
392 */
393 aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size)
394 {
395 private_aesni_ctr_t *this;
396
397 if (algo != ENCR_AES_CTR)
398 {
399 return NULL;
400 }
401 switch (key_size)
402 {
403 case 0:
404 key_size = 16;
405 break;
406 case 16:
407 case 24:
408 case 32:
409 break;
410 default:
411 return NULL;
412 }
413
414 INIT(this,
415 .public = {
416 .crypter = {
417 .encrypt = _crypt,
418 .decrypt = _crypt,
419 .get_block_size = _get_block_size,
420 .get_iv_size = _get_iv_size,
421 .get_key_size = _get_key_size,
422 .set_key = _set_key,
423 .destroy = _destroy,
424 },
425 },
426 .key_size = key_size,
427 );
428
429 switch (key_size)
430 {
431 case 16:
432 this->crypt = encrypt_ctr128;
433 break;
434 case 24:
435 this->crypt = encrypt_ctr192;
436 break;
437 case 32:
438 this->crypt = encrypt_ctr256;
439 break;
440 }
441
442 return &this->public;
443 }