aesni: Align all class instances to 16 byte boundaries
[strongswan.git] / src / libstrongswan / plugins / aesni / aesni_cmac.c
1 /*
2 * Copyright (C) 2012 Tobias Brunner
3 * Hochschule fuer Technik Rapperswil
4 * Copyright (C) 2015 Martin Willi
5 * Copyright (C) 2015 revosec AG
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
16 */
17
18 #include "aesni_cmac.h"
19 #include "aesni_key.h"
20
21 #include <crypto/prfs/mac_prf.h>
22 #include <crypto/signers/mac_signer.h>
23
24 typedef struct private_mac_t private_mac_t;
25
26 /**
27 * Private data of a mac_t object.
28 */
29 struct private_mac_t {
30
31 /**
32 * Public interface.
33 */
34 mac_t public;
35
36 /**
37 * Key schedule for key K
38 */
39 aesni_key_t *k;
40
41 /**
42 * K1
43 */
44 __m128i k1;
45
46 /**
47 * K2
48 */
49 __m128i k2;
50
51 /**
52 * T
53 */
54 __m128i t;
55
56 /**
57 * remaining, unprocessed bytes in append mode
58 */
59 u_char rem[AES_BLOCK_SIZE];
60
61 /**
62 * number of bytes in remaining
63 */
64 int rem_size;
65 };
66
67 METHOD(mac_t, get_mac, bool,
68 private_mac_t *this, chunk_t data, u_int8_t *out)
69 {
70 __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10;
71 __m128i t, l, *bi;
72 u_int blocks, rem, i;
73
74 if (!this->k)
75 {
76 return FALSE;
77 }
78
79 k0 = this->k->schedule[0];
80 k1 = this->k->schedule[1];
81 k2 = this->k->schedule[2];
82 k3 = this->k->schedule[3];
83 k4 = this->k->schedule[4];
84 k5 = this->k->schedule[5];
85 k6 = this->k->schedule[6];
86 k7 = this->k->schedule[7];
87 k8 = this->k->schedule[8];
88 k9 = this->k->schedule[9];
89 k10 = this->k->schedule[10];
90
91 t = this->t;
92
93 if (this->rem_size + data.len > AES_BLOCK_SIZE)
94 {
95 /* T := 0x00000000000000000000000000000000 (initially)
96 * for each block M_i (except the last)
97 * X := T XOR M_i;
98 * T := AES-128(K, X);
99 */
100
101 /* append data to remaining bytes, process block M_1 */
102 memcpy(this->rem + this->rem_size, data.ptr,
103 AES_BLOCK_SIZE - this->rem_size);
104 data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size);
105
106 t = _mm_xor_si128(t, _mm_loadu_si128((__m128i*)this->rem));
107
108 t = _mm_xor_si128(t, k0);
109 t = _mm_aesenc_si128(t, k1);
110 t = _mm_aesenc_si128(t, k2);
111 t = _mm_aesenc_si128(t, k3);
112 t = _mm_aesenc_si128(t, k4);
113 t = _mm_aesenc_si128(t, k5);
114 t = _mm_aesenc_si128(t, k6);
115 t = _mm_aesenc_si128(t, k7);
116 t = _mm_aesenc_si128(t, k8);
117 t = _mm_aesenc_si128(t, k9);
118 t = _mm_aesenclast_si128(t, k10);
119
120 /* process blocks M_2 ... M_n-1 */
121 bi = (__m128i*)data.ptr;
122 rem = data.len % AES_BLOCK_SIZE;
123 blocks = data.len / AES_BLOCK_SIZE;
124 if (!rem && blocks)
125 { /* don't do last block */
126 rem = AES_BLOCK_SIZE;
127 blocks--;
128 }
129
130 /* process blocks M[2] ... M[n-1] */
131 for (i = 0; i < blocks; i++)
132 {
133 t = _mm_xor_si128(t, _mm_loadu_si128(bi + i));
134
135 t = _mm_xor_si128(t, k0);
136 t = _mm_aesenc_si128(t, k1);
137 t = _mm_aesenc_si128(t, k2);
138 t = _mm_aesenc_si128(t, k3);
139 t = _mm_aesenc_si128(t, k4);
140 t = _mm_aesenc_si128(t, k5);
141 t = _mm_aesenc_si128(t, k6);
142 t = _mm_aesenc_si128(t, k7);
143 t = _mm_aesenc_si128(t, k8);
144 t = _mm_aesenc_si128(t, k9);
145 t = _mm_aesenclast_si128(t, k10);
146 }
147
148 /* store remaining bytes of block M_n */
149 memcpy(this->rem, data.ptr + data.len - rem, rem);
150 this->rem_size = rem;
151 }
152 else
153 {
154 /* no complete block (or last block), just copy into remaining */
155 memcpy(this->rem + this->rem_size, data.ptr, data.len);
156 this->rem_size += data.len;
157 }
158 if (out)
159 {
160 /* if last block is complete
161 * M_last := M_n XOR K1;
162 * else
163 * M_last := padding(M_n) XOR K2;
164 */
165 if (this->rem_size == AES_BLOCK_SIZE)
166 {
167 l = _mm_loadu_si128((__m128i*)this->rem);
168 l = _mm_xor_si128(l, this->k1);
169 }
170 else
171 {
172 /* padding(x) = x || 10^i where i is 128-8*r-1
173 * That is, padding(x) is the concatenation of x and a single '1',
174 * followed by the minimum number of '0's, so that the total length is
175 * equal to 128 bits.
176 */
177 if (this->rem_size < AES_BLOCK_SIZE)
178 {
179 memset(this->rem + this->rem_size, 0,
180 AES_BLOCK_SIZE - this->rem_size);
181 this->rem[this->rem_size] = 0x80;
182 }
183 l = _mm_loadu_si128((__m128i*)this->rem);
184 l = _mm_xor_si128(l, this->k2);
185 }
186 /* T := M_last XOR T;
187 * T := AES-128(K,T);
188 */
189 t = _mm_xor_si128(l, t);
190
191 t = _mm_xor_si128(t, k0);
192 t = _mm_aesenc_si128(t, k1);
193 t = _mm_aesenc_si128(t, k2);
194 t = _mm_aesenc_si128(t, k3);
195 t = _mm_aesenc_si128(t, k4);
196 t = _mm_aesenc_si128(t, k5);
197 t = _mm_aesenc_si128(t, k6);
198 t = _mm_aesenc_si128(t, k7);
199 t = _mm_aesenc_si128(t, k8);
200 t = _mm_aesenc_si128(t, k9);
201 t = _mm_aesenclast_si128(t, k10);
202
203 _mm_storeu_si128((__m128i*)out, t);
204
205 /* reset state */
206 t = _mm_setzero_si128();
207 this->rem_size = 0;
208 }
209 this->t = t;
210 return TRUE;
211 }
212
213 METHOD(mac_t, get_mac_size, size_t,
214 private_mac_t *this)
215 {
216 return AES_BLOCK_SIZE;
217 }
218
219 /**
220 * Left-shift the given chunk by one bit.
221 */
222 static void bit_shift(chunk_t chunk)
223 {
224 size_t i;
225
226 for (i = 0; i < chunk.len; i++)
227 {
228 chunk.ptr[i] <<= 1;
229 if (i < chunk.len - 1 && chunk.ptr[i + 1] & 0x80)
230 {
231 chunk.ptr[i] |= 0x01;
232 }
233 }
234 }
235
236 METHOD(mac_t, set_key, bool,
237 private_mac_t *this, chunk_t key)
238 {
239 __m128i rb, msb, l, a;
240 u_int round;
241 chunk_t k;
242
243 this->t = _mm_setzero_si128();
244 this->rem_size = 0;
245
246 /* we support variable keys as defined in RFC 4615 */
247 if (key.len == AES_BLOCK_SIZE)
248 {
249 k = key;
250 }
251 else
252 { /* use cmac recursively to resize longer or shorter keys */
253 k = chunk_alloca(AES_BLOCK_SIZE);
254 memset(k.ptr, 0, k.len);
255 if (!set_key(this, k) || !get_mac(this, key, k.ptr))
256 {
257 return FALSE;
258 }
259 }
260
261 DESTROY_IF(this->k);
262 this->k = aesni_key_create(TRUE, k);
263 if (!this->k)
264 {
265 return FALSE;
266 }
267
268 /*
269 * Rb = 0x00000000000000000000000000000087
270 * L = 0x00000000000000000000000000000000 encrypted with K
271 * if MSB(L) == 0
272 * K1 = L << 1
273 * else
274 * K1 = (L << 1) XOR Rb
275 * if MSB(K1) == 0
276 * K2 = K1 << 1
277 * else
278 * K2 = (K1 << 1) XOR Rb
279 */
280
281 rb = _mm_set_epi32(0x87000000, 0, 0, 0);
282 msb = _mm_set_epi32(0, 0, 0, 0x80);
283
284 l = _mm_setzero_si128();
285
286 l = _mm_xor_si128(l, this->k->schedule[0]);
287 for (round = 1; round < this->k->rounds; round++)
288 {
289 l = _mm_aesenc_si128(l, this->k->schedule[round]);
290 }
291 l = _mm_aesenclast_si128(l, this->k->schedule[this->k->rounds]);
292
293 this->k1 = l;
294 bit_shift(chunk_from_thing(this->k1));
295 a = _mm_and_si128(l, msb);
296 if (memchr(&a, 0x80, 1))
297 {
298 this->k1 = _mm_xor_si128(this->k1, rb);
299 }
300 this->k2 = this->k1;
301 bit_shift(chunk_from_thing(this->k2));
302 a = _mm_and_si128(this->k1, msb);
303 if (memchr(&a, 0x80, 1))
304 {
305 this->k2 = _mm_xor_si128(this->k2, rb);
306 }
307
308 return TRUE;
309 }
310
311 METHOD(mac_t, destroy, void,
312 private_mac_t *this)
313 {
314 DESTROY_IF(this->k);
315 memwipe(&this->k1, sizeof(this->k1));
316 memwipe(&this->k2, sizeof(this->k2));
317 free_align(this);
318 }
319
320 /*
321 * Described in header
322 */
323 mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size)
324 {
325 private_mac_t *this;
326
327 INIT_ALIGN(this, sizeof(__m128i),
328 .public = {
329 .get_mac = _get_mac,
330 .get_mac_size = _get_mac_size,
331 .set_key = _set_key,
332 .destroy = _destroy,
333 },
334 );
335
336 return &this->public;
337 }
338
339 /*
340 * Described in header.
341 */
342 prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo)
343 {
344 mac_t *cmac;
345
346 switch (algo)
347 {
348 case PRF_AES128_CMAC:
349 cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
350 break;
351 default:
352 return NULL;
353 }
354 if (cmac)
355 {
356 return mac_prf_create(cmac);
357 }
358 return NULL;
359 }
360
361 /*
362 * Described in header
363 */
364 signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo)
365 {
366 size_t truncation;
367 mac_t *cmac;
368
369 switch (algo)
370 {
371 case AUTH_AES_CMAC_96:
372 cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
373 truncation = 12;
374 break;
375 default:
376 return NULL;
377 }
378 if (cmac)
379 {
380 return mac_signer_create(cmac, truncation);
381 }
382 return NULL;
383 }