vici: With start_action=start, terminate IKE_SA without children on unload
[strongswan.git] / src / libstrongswan / plugins / aesni / aesni_ctr.c
1 /*
2 * Copyright (C) 2015 Martin Willi
3 * Copyright (C) 2015 revosec AG
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16 #include "aesni_ctr.h"
17 #include "aesni_key.h"
18
19 #include <tmmintrin.h>
20
21 /**
22 * Pipeline parallelism we use for CTR en/decryption
23 */
24 #define CTR_CRYPT_PARALLELISM 4
25
26 typedef struct private_aesni_ctr_t private_aesni_ctr_t;
27
28 /**
29 * CTR en/decryption method type
30 */
31 typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*);
32
33 /**
34 * Private data of an aesni_ctr_t object.
35 */
36 struct private_aesni_ctr_t {
37
38 /**
39 * Public aesni_ctr_t interface.
40 */
41 aesni_ctr_t public;
42
43 /**
44 * Key size
45 */
46 u_int key_size;
47
48 /**
49 * Key schedule
50 */
51 aesni_key_t *key;
52
53 /**
54 * Encryption method
55 */
56 aesni_ctr_fn_t crypt;
57
58 /**
59 * Counter state
60 */
61 struct {
62 char nonce[4];
63 char iv[8];
64 uint32_t counter;
65 } __attribute__((packed, aligned(sizeof(__m128i)))) state;
66 };
67
68 /**
69 * Do big-endian increment on x
70 */
71 static inline __m128i increment_be(__m128i x)
72 {
73 __m128i swap;
74
75 swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
76
77 x = _mm_shuffle_epi8(x, swap);
78 x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
79 x = _mm_shuffle_epi8(x, swap);
80
81 return x;
82 }
83
84 /**
85 * AES-128 CTR encryption
86 */
87 static void encrypt_ctr128(private_aesni_ctr_t *this,
88 size_t len, u_char *in, u_char *out)
89 {
90 __m128i t1, t2, t3, t4;
91 __m128i d1, d2, d3, d4;
92 __m128i *ks, state, b, *bi, *bo;
93 u_int i, blocks, pblocks, rem;
94
95 state = _mm_load_si128((__m128i*)&this->state);
96 blocks = len / AES_BLOCK_SIZE;
97 pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
98 rem = len % AES_BLOCK_SIZE;
99 bi = (__m128i*)in;
100 bo = (__m128i*)out;
101
102 ks = this->key->schedule;
103
104 for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
105 {
106 d1 = _mm_loadu_si128(bi + i + 0);
107 d2 = _mm_loadu_si128(bi + i + 1);
108 d3 = _mm_loadu_si128(bi + i + 2);
109 d4 = _mm_loadu_si128(bi + i + 3);
110
111 t1 = _mm_xor_si128(state, ks[0]);
112 state = increment_be(state);
113 t2 = _mm_xor_si128(state, ks[0]);
114 state = increment_be(state);
115 t3 = _mm_xor_si128(state, ks[0]);
116 state = increment_be(state);
117 t4 = _mm_xor_si128(state, ks[0]);
118 state = increment_be(state);
119
120 t1 = _mm_aesenc_si128(t1, ks[1]);
121 t2 = _mm_aesenc_si128(t2, ks[1]);
122 t3 = _mm_aesenc_si128(t3, ks[1]);
123 t4 = _mm_aesenc_si128(t4, ks[1]);
124 t1 = _mm_aesenc_si128(t1, ks[2]);
125 t2 = _mm_aesenc_si128(t2, ks[2]);
126 t3 = _mm_aesenc_si128(t3, ks[2]);
127 t4 = _mm_aesenc_si128(t4, ks[2]);
128 t1 = _mm_aesenc_si128(t1, ks[3]);
129 t2 = _mm_aesenc_si128(t2, ks[3]);
130 t3 = _mm_aesenc_si128(t3, ks[3]);
131 t4 = _mm_aesenc_si128(t4, ks[3]);
132 t1 = _mm_aesenc_si128(t1, ks[4]);
133 t2 = _mm_aesenc_si128(t2, ks[4]);
134 t3 = _mm_aesenc_si128(t3, ks[4]);
135 t4 = _mm_aesenc_si128(t4, ks[4]);
136 t1 = _mm_aesenc_si128(t1, ks[5]);
137 t2 = _mm_aesenc_si128(t2, ks[5]);
138 t3 = _mm_aesenc_si128(t3, ks[5]);
139 t4 = _mm_aesenc_si128(t4, ks[5]);
140 t1 = _mm_aesenc_si128(t1, ks[6]);
141 t2 = _mm_aesenc_si128(t2, ks[6]);
142 t3 = _mm_aesenc_si128(t3, ks[6]);
143 t4 = _mm_aesenc_si128(t4, ks[6]);
144 t1 = _mm_aesenc_si128(t1, ks[7]);
145 t2 = _mm_aesenc_si128(t2, ks[7]);
146 t3 = _mm_aesenc_si128(t3, ks[7]);
147 t4 = _mm_aesenc_si128(t4, ks[7]);
148 t1 = _mm_aesenc_si128(t1, ks[8]);
149 t2 = _mm_aesenc_si128(t2, ks[8]);
150 t3 = _mm_aesenc_si128(t3, ks[8]);
151 t4 = _mm_aesenc_si128(t4, ks[8]);
152 t1 = _mm_aesenc_si128(t1, ks[9]);
153 t2 = _mm_aesenc_si128(t2, ks[9]);
154 t3 = _mm_aesenc_si128(t3, ks[9]);
155 t4 = _mm_aesenc_si128(t4, ks[9]);
156
157 t1 = _mm_aesenclast_si128(t1, ks[10]);
158 t2 = _mm_aesenclast_si128(t2, ks[10]);
159 t3 = _mm_aesenclast_si128(t3, ks[10]);
160 t4 = _mm_aesenclast_si128(t4, ks[10]);
161 t1 = _mm_xor_si128(t1, d1);
162 t2 = _mm_xor_si128(t2, d2);
163 t3 = _mm_xor_si128(t3, d3);
164 t4 = _mm_xor_si128(t4, d4);
165 _mm_storeu_si128(bo + i + 0, t1);
166 _mm_storeu_si128(bo + i + 1, t2);
167 _mm_storeu_si128(bo + i + 2, t3);
168 _mm_storeu_si128(bo + i + 3, t4);
169 }
170
171 for (i = pblocks; i < blocks; i++)
172 {
173 d1 = _mm_loadu_si128(bi + i);
174
175 t1 = _mm_xor_si128(state, ks[0]);
176 state = increment_be(state);
177
178 t1 = _mm_aesenc_si128(t1, ks[1]);
179 t1 = _mm_aesenc_si128(t1, ks[2]);
180 t1 = _mm_aesenc_si128(t1, ks[3]);
181 t1 = _mm_aesenc_si128(t1, ks[4]);
182 t1 = _mm_aesenc_si128(t1, ks[5]);
183 t1 = _mm_aesenc_si128(t1, ks[6]);
184 t1 = _mm_aesenc_si128(t1, ks[7]);
185 t1 = _mm_aesenc_si128(t1, ks[8]);
186 t1 = _mm_aesenc_si128(t1, ks[9]);
187
188 t1 = _mm_aesenclast_si128(t1, ks[10]);
189 t1 = _mm_xor_si128(t1, d1);
190 _mm_storeu_si128(bo + i, t1);
191 }
192
193 if (rem)
194 {
195 memset(&b, 0, sizeof(b));
196 memcpy(&b, bi + blocks, rem);
197
198 d1 = _mm_loadu_si128(&b);
199 t1 = _mm_xor_si128(state, ks[0]);
200
201 t1 = _mm_aesenc_si128(t1, ks[1]);
202 t1 = _mm_aesenc_si128(t1, ks[2]);
203 t1 = _mm_aesenc_si128(t1, ks[3]);
204 t1 = _mm_aesenc_si128(t1, ks[4]);
205 t1 = _mm_aesenc_si128(t1, ks[5]);
206 t1 = _mm_aesenc_si128(t1, ks[6]);
207 t1 = _mm_aesenc_si128(t1, ks[7]);
208 t1 = _mm_aesenc_si128(t1, ks[8]);
209 t1 = _mm_aesenc_si128(t1, ks[9]);
210
211 t1 = _mm_aesenclast_si128(t1, ks[10]);
212 t1 = _mm_xor_si128(t1, d1);
213 _mm_storeu_si128(&b, t1);
214
215 memcpy(bo + blocks, &b, rem);
216 }
217 }
218
219 /**
220 * AES-192 CTR encryption
221 */
222 static void encrypt_ctr192(private_aesni_ctr_t *this,
223 size_t len, u_char *in, u_char *out)
224 {
225 __m128i t1, t2, t3, t4;
226 __m128i d1, d2, d3, d4;
227 __m128i *ks, state, b, *bi, *bo;
228 u_int i, blocks, pblocks, rem;
229
230 state = _mm_load_si128((__m128i*)&this->state);
231 blocks = len / AES_BLOCK_SIZE;
232 pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
233 rem = len % AES_BLOCK_SIZE;
234 bi = (__m128i*)in;
235 bo = (__m128i*)out;
236
237 ks = this->key->schedule;
238
239 for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
240 {
241 d1 = _mm_loadu_si128(bi + i + 0);
242 d2 = _mm_loadu_si128(bi + i + 1);
243 d3 = _mm_loadu_si128(bi + i + 2);
244 d4 = _mm_loadu_si128(bi + i + 3);
245
246 t1 = _mm_xor_si128(state, ks[0]);
247 state = increment_be(state);
248 t2 = _mm_xor_si128(state, ks[0]);
249 state = increment_be(state);
250 t3 = _mm_xor_si128(state, ks[0]);
251 state = increment_be(state);
252 t4 = _mm_xor_si128(state, ks[0]);
253 state = increment_be(state);
254
255 t1 = _mm_aesenc_si128(t1, ks[1]);
256 t2 = _mm_aesenc_si128(t2, ks[1]);
257 t3 = _mm_aesenc_si128(t3, ks[1]);
258 t4 = _mm_aesenc_si128(t4, ks[1]);
259 t1 = _mm_aesenc_si128(t1, ks[2]);
260 t2 = _mm_aesenc_si128(t2, ks[2]);
261 t3 = _mm_aesenc_si128(t3, ks[2]);
262 t4 = _mm_aesenc_si128(t4, ks[2]);
263 t1 = _mm_aesenc_si128(t1, ks[3]);
264 t2 = _mm_aesenc_si128(t2, ks[3]);
265 t3 = _mm_aesenc_si128(t3, ks[3]);
266 t4 = _mm_aesenc_si128(t4, ks[3]);
267 t1 = _mm_aesenc_si128(t1, ks[4]);
268 t2 = _mm_aesenc_si128(t2, ks[4]);
269 t3 = _mm_aesenc_si128(t3, ks[4]);
270 t4 = _mm_aesenc_si128(t4, ks[4]);
271 t1 = _mm_aesenc_si128(t1, ks[5]);
272 t2 = _mm_aesenc_si128(t2, ks[5]);
273 t3 = _mm_aesenc_si128(t3, ks[5]);
274 t4 = _mm_aesenc_si128(t4, ks[5]);
275 t1 = _mm_aesenc_si128(t1, ks[6]);
276 t2 = _mm_aesenc_si128(t2, ks[6]);
277 t3 = _mm_aesenc_si128(t3, ks[6]);
278 t4 = _mm_aesenc_si128(t4, ks[6]);
279 t1 = _mm_aesenc_si128(t1, ks[7]);
280 t2 = _mm_aesenc_si128(t2, ks[7]);
281 t3 = _mm_aesenc_si128(t3, ks[7]);
282 t4 = _mm_aesenc_si128(t4, ks[7]);
283 t1 = _mm_aesenc_si128(t1, ks[8]);
284 t2 = _mm_aesenc_si128(t2, ks[8]);
285 t3 = _mm_aesenc_si128(t3, ks[8]);
286 t4 = _mm_aesenc_si128(t4, ks[8]);
287 t1 = _mm_aesenc_si128(t1, ks[9]);
288 t2 = _mm_aesenc_si128(t2, ks[9]);
289 t3 = _mm_aesenc_si128(t3, ks[9]);
290 t4 = _mm_aesenc_si128(t4, ks[9]);
291 t1 = _mm_aesenc_si128(t1, ks[10]);
292 t2 = _mm_aesenc_si128(t2, ks[10]);
293 t3 = _mm_aesenc_si128(t3, ks[10]);
294 t4 = _mm_aesenc_si128(t4, ks[10]);
295 t1 = _mm_aesenc_si128(t1, ks[11]);
296 t2 = _mm_aesenc_si128(t2, ks[11]);
297 t3 = _mm_aesenc_si128(t3, ks[11]);
298 t4 = _mm_aesenc_si128(t4, ks[11]);
299
300 t1 = _mm_aesenclast_si128(t1, ks[12]);
301 t2 = _mm_aesenclast_si128(t2, ks[12]);
302 t3 = _mm_aesenclast_si128(t3, ks[12]);
303 t4 = _mm_aesenclast_si128(t4, ks[12]);
304 t1 = _mm_xor_si128(t1, d1);
305 t2 = _mm_xor_si128(t2, d2);
306 t3 = _mm_xor_si128(t3, d3);
307 t4 = _mm_xor_si128(t4, d4);
308 _mm_storeu_si128(bo + i + 0, t1);
309 _mm_storeu_si128(bo + i + 1, t2);
310 _mm_storeu_si128(bo + i + 2, t3);
311 _mm_storeu_si128(bo + i + 3, t4);
312 }
313
314 for (i = pblocks; i < blocks; i++)
315 {
316 d1 = _mm_loadu_si128(bi + i);
317
318 t1 = _mm_xor_si128(state, ks[0]);
319 state = increment_be(state);
320
321 t1 = _mm_aesenc_si128(t1, ks[1]);
322 t1 = _mm_aesenc_si128(t1, ks[2]);
323 t1 = _mm_aesenc_si128(t1, ks[3]);
324 t1 = _mm_aesenc_si128(t1, ks[4]);
325 t1 = _mm_aesenc_si128(t1, ks[5]);
326 t1 = _mm_aesenc_si128(t1, ks[6]);
327 t1 = _mm_aesenc_si128(t1, ks[7]);
328 t1 = _mm_aesenc_si128(t1, ks[8]);
329 t1 = _mm_aesenc_si128(t1, ks[9]);
330 t1 = _mm_aesenc_si128(t1, ks[10]);
331 t1 = _mm_aesenc_si128(t1, ks[11]);
332
333 t1 = _mm_aesenclast_si128(t1, ks[12]);
334 t1 = _mm_xor_si128(t1, d1);
335 _mm_storeu_si128(bo + i, t1);
336 }
337
338 if (rem)
339 {
340 memset(&b, 0, sizeof(b));
341 memcpy(&b, bi + blocks, rem);
342
343 d1 = _mm_loadu_si128(&b);
344 t1 = _mm_xor_si128(state, ks[0]);
345
346 t1 = _mm_aesenc_si128(t1, ks[1]);
347 t1 = _mm_aesenc_si128(t1, ks[2]);
348 t1 = _mm_aesenc_si128(t1, ks[3]);
349 t1 = _mm_aesenc_si128(t1, ks[4]);
350 t1 = _mm_aesenc_si128(t1, ks[5]);
351 t1 = _mm_aesenc_si128(t1, ks[6]);
352 t1 = _mm_aesenc_si128(t1, ks[7]);
353 t1 = _mm_aesenc_si128(t1, ks[8]);
354 t1 = _mm_aesenc_si128(t1, ks[9]);
355 t1 = _mm_aesenc_si128(t1, ks[10]);
356 t1 = _mm_aesenc_si128(t1, ks[11]);
357
358 t1 = _mm_aesenclast_si128(t1, ks[12]);
359 t1 = _mm_xor_si128(t1, d1);
360 _mm_storeu_si128(&b, t1);
361
362 memcpy(bo + blocks, &b, rem);
363 }
364 }
365
366 /**
367 * AES-256 CTR encryption
368 */
369 static void encrypt_ctr256(private_aesni_ctr_t *this,
370 size_t len, u_char *in, u_char *out)
371 {
372 __m128i t1, t2, t3, t4;
373 __m128i d1, d2, d3, d4;
374 __m128i *ks, state, b, *bi, *bo;
375 u_int i, blocks, pblocks, rem;
376
377 state = _mm_load_si128((__m128i*)&this->state);
378 blocks = len / AES_BLOCK_SIZE;
379 pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
380 rem = len % AES_BLOCK_SIZE;
381 bi = (__m128i*)in;
382 bo = (__m128i*)out;
383
384 ks = this->key->schedule;
385
386 for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
387 {
388 d1 = _mm_loadu_si128(bi + i + 0);
389 d2 = _mm_loadu_si128(bi + i + 1);
390 d3 = _mm_loadu_si128(bi + i + 2);
391 d4 = _mm_loadu_si128(bi + i + 3);
392
393 t1 = _mm_xor_si128(state, ks[0]);
394 state = increment_be(state);
395 t2 = _mm_xor_si128(state, ks[0]);
396 state = increment_be(state);
397 t3 = _mm_xor_si128(state, ks[0]);
398 state = increment_be(state);
399 t4 = _mm_xor_si128(state, ks[0]);
400 state = increment_be(state);
401
402 t1 = _mm_aesenc_si128(t1, ks[1]);
403 t2 = _mm_aesenc_si128(t2, ks[1]);
404 t3 = _mm_aesenc_si128(t3, ks[1]);
405 t4 = _mm_aesenc_si128(t4, ks[1]);
406 t1 = _mm_aesenc_si128(t1, ks[2]);
407 t2 = _mm_aesenc_si128(t2, ks[2]);
408 t3 = _mm_aesenc_si128(t3, ks[2]);
409 t4 = _mm_aesenc_si128(t4, ks[2]);
410 t1 = _mm_aesenc_si128(t1, ks[3]);
411 t2 = _mm_aesenc_si128(t2, ks[3]);
412 t3 = _mm_aesenc_si128(t3, ks[3]);
413 t4 = _mm_aesenc_si128(t4, ks[3]);
414 t1 = _mm_aesenc_si128(t1, ks[4]);
415 t2 = _mm_aesenc_si128(t2, ks[4]);
416 t3 = _mm_aesenc_si128(t3, ks[4]);
417 t4 = _mm_aesenc_si128(t4, ks[4]);
418 t1 = _mm_aesenc_si128(t1, ks[5]);
419 t2 = _mm_aesenc_si128(t2, ks[5]);
420 t3 = _mm_aesenc_si128(t3, ks[5]);
421 t4 = _mm_aesenc_si128(t4, ks[5]);
422 t1 = _mm_aesenc_si128(t1, ks[6]);
423 t2 = _mm_aesenc_si128(t2, ks[6]);
424 t3 = _mm_aesenc_si128(t3, ks[6]);
425 t4 = _mm_aesenc_si128(t4, ks[6]);
426 t1 = _mm_aesenc_si128(t1, ks[7]);
427 t2 = _mm_aesenc_si128(t2, ks[7]);
428 t3 = _mm_aesenc_si128(t3, ks[7]);
429 t4 = _mm_aesenc_si128(t4, ks[7]);
430 t1 = _mm_aesenc_si128(t1, ks[8]);
431 t2 = _mm_aesenc_si128(t2, ks[8]);
432 t3 = _mm_aesenc_si128(t3, ks[8]);
433 t4 = _mm_aesenc_si128(t4, ks[8]);
434 t1 = _mm_aesenc_si128(t1, ks[9]);
435 t2 = _mm_aesenc_si128(t2, ks[9]);
436 t3 = _mm_aesenc_si128(t3, ks[9]);
437 t4 = _mm_aesenc_si128(t4, ks[9]);
438 t1 = _mm_aesenc_si128(t1, ks[10]);
439 t2 = _mm_aesenc_si128(t2, ks[10]);
440 t3 = _mm_aesenc_si128(t3, ks[10]);
441 t4 = _mm_aesenc_si128(t4, ks[10]);
442 t1 = _mm_aesenc_si128(t1, ks[11]);
443 t2 = _mm_aesenc_si128(t2, ks[11]);
444 t3 = _mm_aesenc_si128(t3, ks[11]);
445 t4 = _mm_aesenc_si128(t4, ks[11]);
446 t1 = _mm_aesenc_si128(t1, ks[12]);
447 t2 = _mm_aesenc_si128(t2, ks[12]);
448 t3 = _mm_aesenc_si128(t3, ks[12]);
449 t4 = _mm_aesenc_si128(t4, ks[12]);
450 t1 = _mm_aesenc_si128(t1, ks[13]);
451 t2 = _mm_aesenc_si128(t2, ks[13]);
452 t3 = _mm_aesenc_si128(t3, ks[13]);
453 t4 = _mm_aesenc_si128(t4, ks[13]);
454
455 t1 = _mm_aesenclast_si128(t1, ks[14]);
456 t2 = _mm_aesenclast_si128(t2, ks[14]);
457 t3 = _mm_aesenclast_si128(t3, ks[14]);
458 t4 = _mm_aesenclast_si128(t4, ks[14]);
459 t1 = _mm_xor_si128(t1, d1);
460 t2 = _mm_xor_si128(t2, d2);
461 t3 = _mm_xor_si128(t3, d3);
462 t4 = _mm_xor_si128(t4, d4);
463 _mm_storeu_si128(bo + i + 0, t1);
464 _mm_storeu_si128(bo + i + 1, t2);
465 _mm_storeu_si128(bo + i + 2, t3);
466 _mm_storeu_si128(bo + i + 3, t4);
467 }
468
469 for (i = pblocks; i < blocks; i++)
470 {
471 d1 = _mm_loadu_si128(bi + i);
472
473 t1 = _mm_xor_si128(state, ks[0]);
474 state = increment_be(state);
475
476 t1 = _mm_aesenc_si128(t1, ks[1]);
477 t1 = _mm_aesenc_si128(t1, ks[2]);
478 t1 = _mm_aesenc_si128(t1, ks[3]);
479 t1 = _mm_aesenc_si128(t1, ks[4]);
480 t1 = _mm_aesenc_si128(t1, ks[5]);
481 t1 = _mm_aesenc_si128(t1, ks[6]);
482 t1 = _mm_aesenc_si128(t1, ks[7]);
483 t1 = _mm_aesenc_si128(t1, ks[8]);
484 t1 = _mm_aesenc_si128(t1, ks[9]);
485 t1 = _mm_aesenc_si128(t1, ks[10]);
486 t1 = _mm_aesenc_si128(t1, ks[11]);
487 t1 = _mm_aesenc_si128(t1, ks[12]);
488 t1 = _mm_aesenc_si128(t1, ks[13]);
489
490 t1 = _mm_aesenclast_si128(t1, ks[14]);
491 t1 = _mm_xor_si128(t1, d1);
492 _mm_storeu_si128(bo + i, t1);
493 }
494
495 if (rem)
496 {
497 memset(&b, 0, sizeof(b));
498 memcpy(&b, bi + blocks, rem);
499
500 d1 = _mm_loadu_si128(&b);
501 t1 = _mm_xor_si128(state, ks[0]);
502
503 t1 = _mm_aesenc_si128(t1, ks[1]);
504 t1 = _mm_aesenc_si128(t1, ks[2]);
505 t1 = _mm_aesenc_si128(t1, ks[3]);
506 t1 = _mm_aesenc_si128(t1, ks[4]);
507 t1 = _mm_aesenc_si128(t1, ks[5]);
508 t1 = _mm_aesenc_si128(t1, ks[6]);
509 t1 = _mm_aesenc_si128(t1, ks[7]);
510 t1 = _mm_aesenc_si128(t1, ks[8]);
511 t1 = _mm_aesenc_si128(t1, ks[9]);
512 t1 = _mm_aesenc_si128(t1, ks[10]);
513 t1 = _mm_aesenc_si128(t1, ks[11]);
514 t1 = _mm_aesenc_si128(t1, ks[12]);
515 t1 = _mm_aesenc_si128(t1, ks[13]);
516
517 t1 = _mm_aesenclast_si128(t1, ks[14]);
518 t1 = _mm_xor_si128(t1, d1);
519 _mm_storeu_si128(&b, t1);
520
521 memcpy(bo + blocks, &b, rem);
522 }
523 }
524
525 METHOD(crypter_t, crypt, bool,
526 private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out)
527 {
528 u_char *buf;
529
530 if (!this->key || iv.len != sizeof(this->state.iv))
531 {
532 return FALSE;
533 }
534 memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv));
535 this->state.counter = htonl(1);
536
537 buf = in.ptr;
538 if (out)
539 {
540 *out = chunk_alloc(in.len);
541 buf = out->ptr;
542 }
543 this->crypt(this, in.len, in.ptr, buf);
544 return TRUE;
545 }
546
547 METHOD(crypter_t, get_block_size, size_t,
548 private_aesni_ctr_t *this)
549 {
550 return 1;
551 }
552
553 METHOD(crypter_t, get_iv_size, size_t,
554 private_aesni_ctr_t *this)
555 {
556 return sizeof(this->state.iv);
557 }
558
559 METHOD(crypter_t, get_key_size, size_t,
560 private_aesni_ctr_t *this)
561 {
562 return this->key_size + sizeof(this->state.nonce);
563 }
564
565 METHOD(crypter_t, set_key, bool,
566 private_aesni_ctr_t *this, chunk_t key)
567 {
568 if (key.len != get_key_size(this))
569 {
570 return FALSE;
571 }
572
573 memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce),
574 sizeof(this->state.nonce));
575 key.len -= sizeof(this->state.nonce);
576
577 DESTROY_IF(this->key);
578 this->key = aesni_key_create(TRUE, key);
579
580 return this->key;
581 }
582
583 METHOD(crypter_t, destroy, void,
584 private_aesni_ctr_t *this)
585 {
586 DESTROY_IF(this->key);
587 free_align(this);
588 }
589
590 /**
591 * See header
592 */
593 aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size)
594 {
595 private_aesni_ctr_t *this;
596
597 if (algo != ENCR_AES_CTR)
598 {
599 return NULL;
600 }
601 switch (key_size)
602 {
603 case 0:
604 key_size = 16;
605 break;
606 case 16:
607 case 24:
608 case 32:
609 break;
610 default:
611 return NULL;
612 }
613
614 INIT_ALIGN(this, sizeof(__m128i),
615 .public = {
616 .crypter = {
617 .encrypt = _crypt,
618 .decrypt = _crypt,
619 .get_block_size = _get_block_size,
620 .get_iv_size = _get_iv_size,
621 .get_key_size = _get_key_size,
622 .set_key = _set_key,
623 .destroy = _destroy,
624 },
625 },
626 .key_size = key_size,
627 );
628
629 switch (key_size)
630 {
631 case 16:
632 this->crypt = encrypt_ctr128;
633 break;
634 case 24:
635 this->crypt = encrypt_ctr192;
636 break;
637 case 32:
638 this->crypt = encrypt_ctr256;
639 break;
640 }
641
642 return &this->public;
643 }