vici: With start_action=start, terminate IKE_SA without children on unload
[strongswan.git] / src / libstrongswan / plugins / aesni / aesni_cbc.c
1 /*
2 * Copyright (C) 2015 Martin Willi
3 * Copyright (C) 2015 revosec AG
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16 #include "aesni_cbc.h"
17 #include "aesni_key.h"
18
19 /**
20 * Pipeline parallelism we use for CBC decryption
21 */
22 #define CBC_DECRYPT_PARALLELISM 4
23
24 typedef struct private_aesni_cbc_t private_aesni_cbc_t;
25
26 /**
27 * CBC en/decryption method type
28 */
29 typedef void (*aesni_cbc_fn_t)(aesni_key_t*, u_int, u_char*, u_char*, u_char*);
30
31 /**
32 * Private data of an aesni_cbc_t object.
33 */
34 struct private_aesni_cbc_t {
35
36 /**
37 * Public aesni_cbc_t interface.
38 */
39 aesni_cbc_t public;
40
41 /**
42 * Key size
43 */
44 u_int key_size;
45
46 /**
47 * Encryption key schedule
48 */
49 aesni_key_t *ekey;
50
51 /**
52 * Decryption key schedule
53 */
54 aesni_key_t *dkey;
55
56 /**
57 * Encryption method
58 */
59 aesni_cbc_fn_t encrypt;
60
61 /**
62 * Decryption method
63 */
64 aesni_cbc_fn_t decrypt;
65 };
66
67 /**
68 * AES-128 CBC encryption
69 */
70 static void encrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
71 u_char *iv, u_char *out)
72 {
73 __m128i *ks, t, fb, *bi, *bo;
74 int i;
75
76 ks = key->schedule;
77 bi = (__m128i*)in;
78 bo = (__m128i*)out;
79
80 fb = _mm_loadu_si128((__m128i*)iv);
81 for (i = 0; i < blocks; i++)
82 {
83 t = _mm_loadu_si128(bi + i);
84 fb = _mm_xor_si128(t, fb);
85 fb = _mm_xor_si128(fb, ks[0]);
86
87 fb = _mm_aesenc_si128(fb, ks[1]);
88 fb = _mm_aesenc_si128(fb, ks[2]);
89 fb = _mm_aesenc_si128(fb, ks[3]);
90 fb = _mm_aesenc_si128(fb, ks[4]);
91 fb = _mm_aesenc_si128(fb, ks[5]);
92 fb = _mm_aesenc_si128(fb, ks[6]);
93 fb = _mm_aesenc_si128(fb, ks[7]);
94 fb = _mm_aesenc_si128(fb, ks[8]);
95 fb = _mm_aesenc_si128(fb, ks[9]);
96
97 fb = _mm_aesenclast_si128(fb, ks[10]);
98 _mm_storeu_si128(bo + i, fb);
99 }
100 }
101
102 /**
103 * AES-128 CBC decryption
104 */
105 static void decrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
106 u_char *iv, u_char *out)
107 {
108 __m128i *ks, last, *bi, *bo;
109 __m128i t1, t2, t3, t4;
110 __m128i f1, f2, f3, f4;
111 u_int i, pblocks;
112
113 ks = key->schedule;
114 bi = (__m128i*)in;
115 bo = (__m128i*)out;
116 pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
117
118 f1 = _mm_loadu_si128((__m128i*)iv);
119
120 for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
121 {
122 t1 = _mm_loadu_si128(bi + i + 0);
123 t2 = _mm_loadu_si128(bi + i + 1);
124 t3 = _mm_loadu_si128(bi + i + 2);
125 t4 = _mm_loadu_si128(bi + i + 3);
126
127 f2 = t1;
128 f3 = t2;
129 f4 = t3;
130 last = t4;
131
132 t1 = _mm_xor_si128(t1, ks[0]);
133 t2 = _mm_xor_si128(t2, ks[0]);
134 t3 = _mm_xor_si128(t3, ks[0]);
135 t4 = _mm_xor_si128(t4, ks[0]);
136
137 t1 = _mm_aesdec_si128(t1, ks[1]);
138 t2 = _mm_aesdec_si128(t2, ks[1]);
139 t3 = _mm_aesdec_si128(t3, ks[1]);
140 t4 = _mm_aesdec_si128(t4, ks[1]);
141 t1 = _mm_aesdec_si128(t1, ks[2]);
142 t2 = _mm_aesdec_si128(t2, ks[2]);
143 t3 = _mm_aesdec_si128(t3, ks[2]);
144 t4 = _mm_aesdec_si128(t4, ks[2]);
145 t1 = _mm_aesdec_si128(t1, ks[3]);
146 t2 = _mm_aesdec_si128(t2, ks[3]);
147 t3 = _mm_aesdec_si128(t3, ks[3]);
148 t4 = _mm_aesdec_si128(t4, ks[3]);
149 t1 = _mm_aesdec_si128(t1, ks[4]);
150 t2 = _mm_aesdec_si128(t2, ks[4]);
151 t3 = _mm_aesdec_si128(t3, ks[4]);
152 t4 = _mm_aesdec_si128(t4, ks[4]);
153 t1 = _mm_aesdec_si128(t1, ks[5]);
154 t2 = _mm_aesdec_si128(t2, ks[5]);
155 t3 = _mm_aesdec_si128(t3, ks[5]);
156 t4 = _mm_aesdec_si128(t4, ks[5]);
157 t1 = _mm_aesdec_si128(t1, ks[6]);
158 t2 = _mm_aesdec_si128(t2, ks[6]);
159 t3 = _mm_aesdec_si128(t3, ks[6]);
160 t4 = _mm_aesdec_si128(t4, ks[6]);
161 t1 = _mm_aesdec_si128(t1, ks[7]);
162 t2 = _mm_aesdec_si128(t2, ks[7]);
163 t3 = _mm_aesdec_si128(t3, ks[7]);
164 t4 = _mm_aesdec_si128(t4, ks[7]);
165 t1 = _mm_aesdec_si128(t1, ks[8]);
166 t2 = _mm_aesdec_si128(t2, ks[8]);
167 t3 = _mm_aesdec_si128(t3, ks[8]);
168 t4 = _mm_aesdec_si128(t4, ks[8]);
169 t1 = _mm_aesdec_si128(t1, ks[9]);
170 t2 = _mm_aesdec_si128(t2, ks[9]);
171 t3 = _mm_aesdec_si128(t3, ks[9]);
172 t4 = _mm_aesdec_si128(t4, ks[9]);
173
174 t1 = _mm_aesdeclast_si128(t1, ks[10]);
175 t2 = _mm_aesdeclast_si128(t2, ks[10]);
176 t3 = _mm_aesdeclast_si128(t3, ks[10]);
177 t4 = _mm_aesdeclast_si128(t4, ks[10]);
178 t1 = _mm_xor_si128(t1, f1);
179 t2 = _mm_xor_si128(t2, f2);
180 t3 = _mm_xor_si128(t3, f3);
181 t4 = _mm_xor_si128(t4, f4);
182 _mm_storeu_si128(bo + i + 0, t1);
183 _mm_storeu_si128(bo + i + 1, t2);
184 _mm_storeu_si128(bo + i + 2, t3);
185 _mm_storeu_si128(bo + i + 3, t4);
186 f1 = last;
187 }
188
189 for (i = pblocks; i < blocks; i++)
190 {
191 last = _mm_loadu_si128(bi + i);
192 t1 = _mm_xor_si128(last, ks[0]);
193
194 t1 = _mm_aesdec_si128(t1, ks[1]);
195 t1 = _mm_aesdec_si128(t1, ks[2]);
196 t1 = _mm_aesdec_si128(t1, ks[3]);
197 t1 = _mm_aesdec_si128(t1, ks[4]);
198 t1 = _mm_aesdec_si128(t1, ks[5]);
199 t1 = _mm_aesdec_si128(t1, ks[6]);
200 t1 = _mm_aesdec_si128(t1, ks[7]);
201 t1 = _mm_aesdec_si128(t1, ks[8]);
202 t1 = _mm_aesdec_si128(t1, ks[9]);
203
204 t1 = _mm_aesdeclast_si128(t1, ks[10]);
205 t1 = _mm_xor_si128(t1, f1);
206 _mm_storeu_si128(bo + i, t1);
207 f1 = last;
208 }
209 }
210
211 /**
212 * AES-192 CBC encryption
213 */
214 static void encrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
215 u_char *iv, u_char *out)
216 {
217 __m128i *ks, t, fb, *bi, *bo;
218 int i;
219
220 ks = key->schedule;
221 bi = (__m128i*)in;
222 bo = (__m128i*)out;
223
224 fb = _mm_loadu_si128((__m128i*)iv);
225 for (i = 0; i < blocks; i++)
226 {
227 t = _mm_loadu_si128(bi + i);
228 fb = _mm_xor_si128(t, fb);
229 fb = _mm_xor_si128(fb, ks[0]);
230
231 fb = _mm_aesenc_si128(fb, ks[1]);
232 fb = _mm_aesenc_si128(fb, ks[2]);
233 fb = _mm_aesenc_si128(fb, ks[3]);
234 fb = _mm_aesenc_si128(fb, ks[4]);
235 fb = _mm_aesenc_si128(fb, ks[5]);
236 fb = _mm_aesenc_si128(fb, ks[6]);
237 fb = _mm_aesenc_si128(fb, ks[7]);
238 fb = _mm_aesenc_si128(fb, ks[8]);
239 fb = _mm_aesenc_si128(fb, ks[9]);
240 fb = _mm_aesenc_si128(fb, ks[10]);
241 fb = _mm_aesenc_si128(fb, ks[11]);
242
243 fb = _mm_aesenclast_si128(fb, ks[12]);
244 _mm_storeu_si128(bo + i, fb);
245 }
246 }
247
248 /**
249 * AES-192 CBC decryption
250 */
251 static void decrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
252 u_char *iv, u_char *out)
253 {
254 __m128i *ks, last, *bi, *bo;
255 __m128i t1, t2, t3, t4;
256 __m128i f1, f2, f3, f4;
257 u_int i, pblocks;
258
259 ks = key->schedule;
260 bi = (__m128i*)in;
261 bo = (__m128i*)out;
262 pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
263
264 f1 = _mm_loadu_si128((__m128i*)iv);
265
266 for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
267 {
268 t1 = _mm_loadu_si128(bi + i + 0);
269 t2 = _mm_loadu_si128(bi + i + 1);
270 t3 = _mm_loadu_si128(bi + i + 2);
271 t4 = _mm_loadu_si128(bi + i + 3);
272
273 f2 = t1;
274 f3 = t2;
275 f4 = t3;
276 last = t4;
277
278 t1 = _mm_xor_si128(t1, ks[0]);
279 t2 = _mm_xor_si128(t2, ks[0]);
280 t3 = _mm_xor_si128(t3, ks[0]);
281 t4 = _mm_xor_si128(t4, ks[0]);
282
283 t1 = _mm_aesdec_si128(t1, ks[1]);
284 t2 = _mm_aesdec_si128(t2, ks[1]);
285 t3 = _mm_aesdec_si128(t3, ks[1]);
286 t4 = _mm_aesdec_si128(t4, ks[1]);
287 t1 = _mm_aesdec_si128(t1, ks[2]);
288 t2 = _mm_aesdec_si128(t2, ks[2]);
289 t3 = _mm_aesdec_si128(t3, ks[2]);
290 t4 = _mm_aesdec_si128(t4, ks[2]);
291 t1 = _mm_aesdec_si128(t1, ks[3]);
292 t2 = _mm_aesdec_si128(t2, ks[3]);
293 t3 = _mm_aesdec_si128(t3, ks[3]);
294 t4 = _mm_aesdec_si128(t4, ks[3]);
295 t1 = _mm_aesdec_si128(t1, ks[4]);
296 t2 = _mm_aesdec_si128(t2, ks[4]);
297 t3 = _mm_aesdec_si128(t3, ks[4]);
298 t4 = _mm_aesdec_si128(t4, ks[4]);
299 t1 = _mm_aesdec_si128(t1, ks[5]);
300 t2 = _mm_aesdec_si128(t2, ks[5]);
301 t3 = _mm_aesdec_si128(t3, ks[5]);
302 t4 = _mm_aesdec_si128(t4, ks[5]);
303 t1 = _mm_aesdec_si128(t1, ks[6]);
304 t2 = _mm_aesdec_si128(t2, ks[6]);
305 t3 = _mm_aesdec_si128(t3, ks[6]);
306 t4 = _mm_aesdec_si128(t4, ks[6]);
307 t1 = _mm_aesdec_si128(t1, ks[7]);
308 t2 = _mm_aesdec_si128(t2, ks[7]);
309 t3 = _mm_aesdec_si128(t3, ks[7]);
310 t4 = _mm_aesdec_si128(t4, ks[7]);
311 t1 = _mm_aesdec_si128(t1, ks[8]);
312 t2 = _mm_aesdec_si128(t2, ks[8]);
313 t3 = _mm_aesdec_si128(t3, ks[8]);
314 t4 = _mm_aesdec_si128(t4, ks[8]);
315 t1 = _mm_aesdec_si128(t1, ks[9]);
316 t2 = _mm_aesdec_si128(t2, ks[9]);
317 t3 = _mm_aesdec_si128(t3, ks[9]);
318 t4 = _mm_aesdec_si128(t4, ks[9]);
319 t1 = _mm_aesdec_si128(t1, ks[10]);
320 t2 = _mm_aesdec_si128(t2, ks[10]);
321 t3 = _mm_aesdec_si128(t3, ks[10]);
322 t4 = _mm_aesdec_si128(t4, ks[10]);
323 t1 = _mm_aesdec_si128(t1, ks[11]);
324 t2 = _mm_aesdec_si128(t2, ks[11]);
325 t3 = _mm_aesdec_si128(t3, ks[11]);
326 t4 = _mm_aesdec_si128(t4, ks[11]);
327
328 t1 = _mm_aesdeclast_si128(t1, ks[12]);
329 t2 = _mm_aesdeclast_si128(t2, ks[12]);
330 t3 = _mm_aesdeclast_si128(t3, ks[12]);
331 t4 = _mm_aesdeclast_si128(t4, ks[12]);
332 t1 = _mm_xor_si128(t1, f1);
333 t2 = _mm_xor_si128(t2, f2);
334 t3 = _mm_xor_si128(t3, f3);
335 t4 = _mm_xor_si128(t4, f4);
336 _mm_storeu_si128(bo + i + 0, t1);
337 _mm_storeu_si128(bo + i + 1, t2);
338 _mm_storeu_si128(bo + i + 2, t3);
339 _mm_storeu_si128(bo + i + 3, t4);
340 f1 = last;
341 }
342
343 for (i = pblocks; i < blocks; i++)
344 {
345 last = _mm_loadu_si128(bi + i);
346 t1 = _mm_xor_si128(last, ks[0]);
347
348 t1 = _mm_aesdec_si128(t1, ks[1]);
349 t1 = _mm_aesdec_si128(t1, ks[2]);
350 t1 = _mm_aesdec_si128(t1, ks[3]);
351 t1 = _mm_aesdec_si128(t1, ks[4]);
352 t1 = _mm_aesdec_si128(t1, ks[5]);
353 t1 = _mm_aesdec_si128(t1, ks[6]);
354 t1 = _mm_aesdec_si128(t1, ks[7]);
355 t1 = _mm_aesdec_si128(t1, ks[8]);
356 t1 = _mm_aesdec_si128(t1, ks[9]);
357 t1 = _mm_aesdec_si128(t1, ks[10]);
358 t1 = _mm_aesdec_si128(t1, ks[11]);
359
360 t1 = _mm_aesdeclast_si128(t1, ks[12]);
361 t1 = _mm_xor_si128(t1, f1);
362 _mm_storeu_si128(bo + i, t1);
363 f1 = last;
364 }
365 }
366
367 /**
368 * AES-256 CBC encryption
369 */
370 static void encrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
371 u_char *iv, u_char *out)
372 {
373 __m128i *ks, t, fb, *bi, *bo;
374 int i;
375
376 ks = key->schedule;
377 bi = (__m128i*)in;
378 bo = (__m128i*)out;
379
380 fb = _mm_loadu_si128((__m128i*)iv);
381 for (i = 0; i < blocks; i++)
382 {
383 t = _mm_loadu_si128(bi + i);
384 fb = _mm_xor_si128(t, fb);
385 fb = _mm_xor_si128(fb, ks[0]);
386
387 fb = _mm_aesenc_si128(fb, ks[1]);
388 fb = _mm_aesenc_si128(fb, ks[2]);
389 fb = _mm_aesenc_si128(fb, ks[3]);
390 fb = _mm_aesenc_si128(fb, ks[4]);
391 fb = _mm_aesenc_si128(fb, ks[5]);
392 fb = _mm_aesenc_si128(fb, ks[6]);
393 fb = _mm_aesenc_si128(fb, ks[7]);
394 fb = _mm_aesenc_si128(fb, ks[8]);
395 fb = _mm_aesenc_si128(fb, ks[9]);
396 fb = _mm_aesenc_si128(fb, ks[10]);
397 fb = _mm_aesenc_si128(fb, ks[11]);
398 fb = _mm_aesenc_si128(fb, ks[12]);
399 fb = _mm_aesenc_si128(fb, ks[13]);
400
401 fb = _mm_aesenclast_si128(fb, ks[14]);
402 _mm_storeu_si128(bo + i, fb);
403 }
404 }
405
406 /**
407 * AES-256 CBC decryption
408 */
409 static void decrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
410 u_char *iv, u_char *out)
411 {
412 __m128i *ks, last, *bi, *bo;
413 __m128i t1, t2, t3, t4;
414 __m128i f1, f2, f3, f4;
415 u_int i, pblocks;
416
417 ks = key->schedule;
418 bi = (__m128i*)in;
419 bo = (__m128i*)out;
420 pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
421
422 f1 = _mm_loadu_si128((__m128i*)iv);
423
424 for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
425 {
426 t1 = _mm_loadu_si128(bi + i + 0);
427 t2 = _mm_loadu_si128(bi + i + 1);
428 t3 = _mm_loadu_si128(bi + i + 2);
429 t4 = _mm_loadu_si128(bi + i + 3);
430
431 f2 = t1;
432 f3 = t2;
433 f4 = t3;
434 last = t4;
435
436 t1 = _mm_xor_si128(t1, ks[0]);
437 t2 = _mm_xor_si128(t2, ks[0]);
438 t3 = _mm_xor_si128(t3, ks[0]);
439 t4 = _mm_xor_si128(t4, ks[0]);
440
441 t1 = _mm_aesdec_si128(t1, ks[1]);
442 t2 = _mm_aesdec_si128(t2, ks[1]);
443 t3 = _mm_aesdec_si128(t3, ks[1]);
444 t4 = _mm_aesdec_si128(t4, ks[1]);
445 t1 = _mm_aesdec_si128(t1, ks[2]);
446 t2 = _mm_aesdec_si128(t2, ks[2]);
447 t3 = _mm_aesdec_si128(t3, ks[2]);
448 t4 = _mm_aesdec_si128(t4, ks[2]);
449 t1 = _mm_aesdec_si128(t1, ks[3]);
450 t2 = _mm_aesdec_si128(t2, ks[3]);
451 t3 = _mm_aesdec_si128(t3, ks[3]);
452 t4 = _mm_aesdec_si128(t4, ks[3]);
453 t1 = _mm_aesdec_si128(t1, ks[4]);
454 t2 = _mm_aesdec_si128(t2, ks[4]);
455 t3 = _mm_aesdec_si128(t3, ks[4]);
456 t4 = _mm_aesdec_si128(t4, ks[4]);
457 t1 = _mm_aesdec_si128(t1, ks[5]);
458 t2 = _mm_aesdec_si128(t2, ks[5]);
459 t3 = _mm_aesdec_si128(t3, ks[5]);
460 t4 = _mm_aesdec_si128(t4, ks[5]);
461 t1 = _mm_aesdec_si128(t1, ks[6]);
462 t2 = _mm_aesdec_si128(t2, ks[6]);
463 t3 = _mm_aesdec_si128(t3, ks[6]);
464 t4 = _mm_aesdec_si128(t4, ks[6]);
465 t1 = _mm_aesdec_si128(t1, ks[7]);
466 t2 = _mm_aesdec_si128(t2, ks[7]);
467 t3 = _mm_aesdec_si128(t3, ks[7]);
468 t4 = _mm_aesdec_si128(t4, ks[7]);
469 t1 = _mm_aesdec_si128(t1, ks[8]);
470 t2 = _mm_aesdec_si128(t2, ks[8]);
471 t3 = _mm_aesdec_si128(t3, ks[8]);
472 t4 = _mm_aesdec_si128(t4, ks[8]);
473 t1 = _mm_aesdec_si128(t1, ks[9]);
474 t2 = _mm_aesdec_si128(t2, ks[9]);
475 t3 = _mm_aesdec_si128(t3, ks[9]);
476 t4 = _mm_aesdec_si128(t4, ks[9]);
477 t1 = _mm_aesdec_si128(t1, ks[10]);
478 t2 = _mm_aesdec_si128(t2, ks[10]);
479 t3 = _mm_aesdec_si128(t3, ks[10]);
480 t4 = _mm_aesdec_si128(t4, ks[10]);
481 t1 = _mm_aesdec_si128(t1, ks[11]);
482 t2 = _mm_aesdec_si128(t2, ks[11]);
483 t3 = _mm_aesdec_si128(t3, ks[11]);
484 t4 = _mm_aesdec_si128(t4, ks[11]);
485 t1 = _mm_aesdec_si128(t1, ks[12]);
486 t2 = _mm_aesdec_si128(t2, ks[12]);
487 t3 = _mm_aesdec_si128(t3, ks[12]);
488 t4 = _mm_aesdec_si128(t4, ks[12]);
489 t1 = _mm_aesdec_si128(t1, ks[13]);
490 t2 = _mm_aesdec_si128(t2, ks[13]);
491 t3 = _mm_aesdec_si128(t3, ks[13]);
492 t4 = _mm_aesdec_si128(t4, ks[13]);
493
494 t1 = _mm_aesdeclast_si128(t1, ks[14]);
495 t2 = _mm_aesdeclast_si128(t2, ks[14]);
496 t3 = _mm_aesdeclast_si128(t3, ks[14]);
497 t4 = _mm_aesdeclast_si128(t4, ks[14]);
498 t1 = _mm_xor_si128(t1, f1);
499 t2 = _mm_xor_si128(t2, f2);
500 t3 = _mm_xor_si128(t3, f3);
501 t4 = _mm_xor_si128(t4, f4);
502 _mm_storeu_si128(bo + i + 0, t1);
503 _mm_storeu_si128(bo + i + 1, t2);
504 _mm_storeu_si128(bo + i + 2, t3);
505 _mm_storeu_si128(bo + i + 3, t4);
506 f1 = last;
507 }
508
509 for (i = pblocks; i < blocks; i++)
510 {
511 last = _mm_loadu_si128(bi + i);
512 t1 = _mm_xor_si128(last, ks[0]);
513
514 t1 = _mm_aesdec_si128(t1, ks[1]);
515 t1 = _mm_aesdec_si128(t1, ks[2]);
516 t1 = _mm_aesdec_si128(t1, ks[3]);
517 t1 = _mm_aesdec_si128(t1, ks[4]);
518 t1 = _mm_aesdec_si128(t1, ks[5]);
519 t1 = _mm_aesdec_si128(t1, ks[6]);
520 t1 = _mm_aesdec_si128(t1, ks[7]);
521 t1 = _mm_aesdec_si128(t1, ks[8]);
522 t1 = _mm_aesdec_si128(t1, ks[9]);
523 t1 = _mm_aesdec_si128(t1, ks[10]);
524 t1 = _mm_aesdec_si128(t1, ks[11]);
525 t1 = _mm_aesdec_si128(t1, ks[12]);
526 t1 = _mm_aesdec_si128(t1, ks[13]);
527
528 t1 = _mm_aesdeclast_si128(t1, ks[14]);
529 t1 = _mm_xor_si128(t1, f1);
530 _mm_storeu_si128(bo + i, t1);
531 f1 = last;
532 }
533 }
534
535 /**
536 * Do inline or allocated de/encryption using key schedule
537 */
538 static bool crypt(aesni_cbc_fn_t fn, aesni_key_t *key,
539 chunk_t data, chunk_t iv, chunk_t *out)
540 {
541 u_char *buf;
542
543 if (!key || iv.len != AES_BLOCK_SIZE || data.len % AES_BLOCK_SIZE)
544 {
545 return FALSE;
546 }
547 if (out)
548 {
549 *out = chunk_alloc(data.len);
550 buf = out->ptr;
551 }
552 else
553 {
554 buf = data.ptr;
555 }
556 fn(key, data.len / AES_BLOCK_SIZE, data.ptr, iv.ptr, buf);
557 return TRUE;
558 }
559
560 METHOD(crypter_t, encrypt, bool,
561 private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
562 {
563 return crypt(this->encrypt, this->ekey, data, iv, encrypted);
564 }
565
566 METHOD(crypter_t, decrypt, bool,
567 private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
568 {
569 return crypt(this->decrypt, this->dkey, data, iv, decrypted);
570 }
571
572 METHOD(crypter_t, get_block_size, size_t,
573 private_aesni_cbc_t *this)
574 {
575 return AES_BLOCK_SIZE;
576 }
577
578 METHOD(crypter_t, get_iv_size, size_t,
579 private_aesni_cbc_t *this)
580 {
581 return AES_BLOCK_SIZE;
582 }
583
584 METHOD(crypter_t, get_key_size, size_t,
585 private_aesni_cbc_t *this)
586 {
587 return this->key_size;
588 }
589
590 METHOD(crypter_t, set_key, bool,
591 private_aesni_cbc_t *this, chunk_t key)
592 {
593 if (key.len != this->key_size)
594 {
595 return FALSE;
596 }
597
598 DESTROY_IF(this->ekey);
599 DESTROY_IF(this->dkey);
600
601 this->ekey = aesni_key_create(TRUE, key);
602 this->dkey = aesni_key_create(FALSE, key);
603
604 return this->ekey && this->dkey;
605 }
606
607 METHOD(crypter_t, destroy, void,
608 private_aesni_cbc_t *this)
609 {
610 DESTROY_IF(this->ekey);
611 DESTROY_IF(this->dkey);
612 free_align(this);
613 }
614
615 /**
616 * See header
617 */
618 aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size)
619 {
620 private_aesni_cbc_t *this;
621
622 if (algo != ENCR_AES_CBC)
623 {
624 return NULL;
625 }
626 switch (key_size)
627 {
628 case 0:
629 key_size = 16;
630 break;
631 case 16:
632 case 24:
633 case 32:
634 break;
635 default:
636 return NULL;
637 }
638
639 INIT_ALIGN(this, sizeof(__m128i),
640 .public = {
641 .crypter = {
642 .encrypt = _encrypt,
643 .decrypt = _decrypt,
644 .get_block_size = _get_block_size,
645 .get_iv_size = _get_iv_size,
646 .get_key_size = _get_key_size,
647 .set_key = _set_key,
648 .destroy = _destroy,
649 },
650 },
651 .key_size = key_size,
652 );
653
654 switch (key_size)
655 {
656 case 16:
657 this->encrypt = encrypt_cbc128;
658 this->decrypt = decrypt_cbc128;
659 break;
660 case 24:
661 this->encrypt = encrypt_cbc192;
662 this->decrypt = decrypt_cbc192;
663 break;
664 case 32:
665 this->encrypt = encrypt_cbc256;
666 this->decrypt = decrypt_cbc256;
667 break;
668 }
669
670 return &this->public;
671 }