Merge branch 'aesni'
[strongswan.git] / src / libstrongswan / plugins / aesni / aesni_cmac.c
1 /*
2 * Copyright (C) 2012 Tobias Brunner
3 * Hochschule fuer Technik Rapperswil
4 * Copyright (C) 2015 Martin Willi
5 * Copyright (C) 2015 revosec AG
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
16 */
17
18 #include "aesni_cmac.h"
19 #include "aesni_key.h"
20
21 #include <crypto/prfs/mac_prf.h>
22 #include <crypto/signers/mac_signer.h>
23
24 typedef struct private_mac_t private_mac_t;
25
26 /**
27 * Private data of a mac_t object.
28 */
29 struct private_mac_t {
30
31 /**
32 * Public interface.
33 */
34 mac_t public;
35
36 /**
37 * Key schedule for key K
38 */
39 aesni_key_t *k;
40
41 /**
42 * K1
43 */
44 __m128i k1;
45
46 /**
47 * K2
48 */
49 __m128i k2;
50
51 /**
52 * T
53 */
54 __m128i t;
55
56 /**
57 * remaining, unprocessed bytes in append mode
58 */
59 u_char rem[AES_BLOCK_SIZE];
60
61 /**
62 * number of bytes in remaining
63 */
64 int rem_size;
65 };
66
67 METHOD(mac_t, get_mac, bool,
68 private_mac_t *this, chunk_t data, u_int8_t *out)
69 {
70 __m128i *ks, t, l, *bi;
71 u_int blocks, rem, i;
72
73 if (!this->k)
74 {
75 return FALSE;
76 }
77
78 ks = this->k->schedule;
79 t = this->t;
80
81 if (this->rem_size + data.len > AES_BLOCK_SIZE)
82 {
83 /* T := 0x00000000000000000000000000000000 (initially)
84 * for each block M_i (except the last)
85 * X := T XOR M_i;
86 * T := AES-128(K, X);
87 */
88
89 /* append data to remaining bytes, process block M_1 */
90 memcpy(this->rem + this->rem_size, data.ptr,
91 AES_BLOCK_SIZE - this->rem_size);
92 data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size);
93
94 t = _mm_xor_si128(t, _mm_loadu_si128((__m128i*)this->rem));
95
96 t = _mm_xor_si128(t, ks[0]);
97 t = _mm_aesenc_si128(t, ks[1]);
98 t = _mm_aesenc_si128(t, ks[2]);
99 t = _mm_aesenc_si128(t, ks[3]);
100 t = _mm_aesenc_si128(t, ks[4]);
101 t = _mm_aesenc_si128(t, ks[5]);
102 t = _mm_aesenc_si128(t, ks[6]);
103 t = _mm_aesenc_si128(t, ks[7]);
104 t = _mm_aesenc_si128(t, ks[8]);
105 t = _mm_aesenc_si128(t, ks[9]);
106 t = _mm_aesenclast_si128(t, ks[10]);
107
108 /* process blocks M_2 ... M_n-1 */
109 bi = (__m128i*)data.ptr;
110 rem = data.len % AES_BLOCK_SIZE;
111 blocks = data.len / AES_BLOCK_SIZE;
112 if (!rem && blocks)
113 { /* don't do last block */
114 rem = AES_BLOCK_SIZE;
115 blocks--;
116 }
117
118 /* process blocks M[2] ... M[n-1] */
119 for (i = 0; i < blocks; i++)
120 {
121 t = _mm_xor_si128(t, _mm_loadu_si128(bi + i));
122
123 t = _mm_xor_si128(t, ks[0]);
124 t = _mm_aesenc_si128(t, ks[1]);
125 t = _mm_aesenc_si128(t, ks[2]);
126 t = _mm_aesenc_si128(t, ks[3]);
127 t = _mm_aesenc_si128(t, ks[4]);
128 t = _mm_aesenc_si128(t, ks[5]);
129 t = _mm_aesenc_si128(t, ks[6]);
130 t = _mm_aesenc_si128(t, ks[7]);
131 t = _mm_aesenc_si128(t, ks[8]);
132 t = _mm_aesenc_si128(t, ks[9]);
133 t = _mm_aesenclast_si128(t, ks[10]);
134 }
135
136 /* store remaining bytes of block M_n */
137 memcpy(this->rem, data.ptr + data.len - rem, rem);
138 this->rem_size = rem;
139 }
140 else
141 {
142 /* no complete block (or last block), just copy into remaining */
143 memcpy(this->rem + this->rem_size, data.ptr, data.len);
144 this->rem_size += data.len;
145 }
146 if (out)
147 {
148 /* if last block is complete
149 * M_last := M_n XOR K1;
150 * else
151 * M_last := padding(M_n) XOR K2;
152 */
153 if (this->rem_size == AES_BLOCK_SIZE)
154 {
155 l = _mm_loadu_si128((__m128i*)this->rem);
156 l = _mm_xor_si128(l, this->k1);
157 }
158 else
159 {
160 /* padding(x) = x || 10^i where i is 128-8*r-1
161 * That is, padding(x) is the concatenation of x and a single '1',
162 * followed by the minimum number of '0's, so that the total length is
163 * equal to 128 bits.
164 */
165 if (this->rem_size < AES_BLOCK_SIZE)
166 {
167 memset(this->rem + this->rem_size, 0,
168 AES_BLOCK_SIZE - this->rem_size);
169 this->rem[this->rem_size] = 0x80;
170 }
171 l = _mm_loadu_si128((__m128i*)this->rem);
172 l = _mm_xor_si128(l, this->k2);
173 }
174 /* T := M_last XOR T;
175 * T := AES-128(K,T);
176 */
177 t = _mm_xor_si128(l, t);
178
179 t = _mm_xor_si128(t, ks[0]);
180 t = _mm_aesenc_si128(t, ks[1]);
181 t = _mm_aesenc_si128(t, ks[2]);
182 t = _mm_aesenc_si128(t, ks[3]);
183 t = _mm_aesenc_si128(t, ks[4]);
184 t = _mm_aesenc_si128(t, ks[5]);
185 t = _mm_aesenc_si128(t, ks[6]);
186 t = _mm_aesenc_si128(t, ks[7]);
187 t = _mm_aesenc_si128(t, ks[8]);
188 t = _mm_aesenc_si128(t, ks[9]);
189 t = _mm_aesenclast_si128(t, ks[10]);
190
191 _mm_storeu_si128((__m128i*)out, t);
192
193 /* reset state */
194 t = _mm_setzero_si128();
195 this->rem_size = 0;
196 }
197 this->t = t;
198 return TRUE;
199 }
200
201 METHOD(mac_t, get_mac_size, size_t,
202 private_mac_t *this)
203 {
204 return AES_BLOCK_SIZE;
205 }
206
207 /**
208 * Left-shift the given chunk by one bit.
209 */
210 static void bit_shift(chunk_t chunk)
211 {
212 size_t i;
213
214 for (i = 0; i < chunk.len; i++)
215 {
216 chunk.ptr[i] <<= 1;
217 if (i < chunk.len - 1 && chunk.ptr[i + 1] & 0x80)
218 {
219 chunk.ptr[i] |= 0x01;
220 }
221 }
222 }
223
224 METHOD(mac_t, set_key, bool,
225 private_mac_t *this, chunk_t key)
226 {
227 __m128i rb, msb, l, a;
228 u_int round;
229 chunk_t k;
230
231 this->t = _mm_setzero_si128();
232 this->rem_size = 0;
233
234 /* we support variable keys as defined in RFC 4615 */
235 if (key.len == AES_BLOCK_SIZE)
236 {
237 k = key;
238 }
239 else
240 { /* use cmac recursively to resize longer or shorter keys */
241 k = chunk_alloca(AES_BLOCK_SIZE);
242 memset(k.ptr, 0, k.len);
243 if (!set_key(this, k) || !get_mac(this, key, k.ptr))
244 {
245 return FALSE;
246 }
247 }
248
249 DESTROY_IF(this->k);
250 this->k = aesni_key_create(TRUE, k);
251 if (!this->k)
252 {
253 return FALSE;
254 }
255
256 /*
257 * Rb = 0x00000000000000000000000000000087
258 * L = 0x00000000000000000000000000000000 encrypted with K
259 * if MSB(L) == 0
260 * K1 = L << 1
261 * else
262 * K1 = (L << 1) XOR Rb
263 * if MSB(K1) == 0
264 * K2 = K1 << 1
265 * else
266 * K2 = (K1 << 1) XOR Rb
267 */
268
269 rb = _mm_set_epi32(0x87000000, 0, 0, 0);
270 msb = _mm_set_epi32(0, 0, 0, 0x80);
271
272 l = _mm_setzero_si128();
273
274 l = _mm_xor_si128(l, this->k->schedule[0]);
275 for (round = 1; round < this->k->rounds; round++)
276 {
277 l = _mm_aesenc_si128(l, this->k->schedule[round]);
278 }
279 l = _mm_aesenclast_si128(l, this->k->schedule[this->k->rounds]);
280
281 this->k1 = l;
282 bit_shift(chunk_from_thing(this->k1));
283 a = _mm_and_si128(l, msb);
284 if (memchr(&a, 0x80, 1))
285 {
286 this->k1 = _mm_xor_si128(this->k1, rb);
287 }
288 this->k2 = this->k1;
289 bit_shift(chunk_from_thing(this->k2));
290 a = _mm_and_si128(this->k1, msb);
291 if (memchr(&a, 0x80, 1))
292 {
293 this->k2 = _mm_xor_si128(this->k2, rb);
294 }
295
296 return TRUE;
297 }
298
299 METHOD(mac_t, destroy, void,
300 private_mac_t *this)
301 {
302 DESTROY_IF(this->k);
303 memwipe(&this->k1, sizeof(this->k1));
304 memwipe(&this->k2, sizeof(this->k2));
305 free_align(this);
306 }
307
308 /*
309 * Described in header
310 */
311 mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size)
312 {
313 private_mac_t *this;
314
315 INIT_ALIGN(this, sizeof(__m128i),
316 .public = {
317 .get_mac = _get_mac,
318 .get_mac_size = _get_mac_size,
319 .set_key = _set_key,
320 .destroy = _destroy,
321 },
322 );
323
324 return &this->public;
325 }
326
327 /*
328 * Described in header.
329 */
330 prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo)
331 {
332 mac_t *cmac;
333
334 switch (algo)
335 {
336 case PRF_AES128_CMAC:
337 cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
338 break;
339 default:
340 return NULL;
341 }
342 if (cmac)
343 {
344 return mac_prf_create(cmac);
345 }
346 return NULL;
347 }
348
349 /*
350 * Described in header
351 */
352 signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo)
353 {
354 size_t truncation;
355 mac_t *cmac;
356
357 switch (algo)
358 {
359 case AUTH_AES_CMAC_96:
360 cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
361 truncation = 12;
362 break;
363 default:
364 return NULL;
365 }
366 if (cmac)
367 {
368 return mac_signer_create(cmac, truncation);
369 }
370 return NULL;
371 }