Use word-aligned XOR in sha3_absorb()
[strongswan.git] / src / libstrongswan / plugins / sha3 / sha3_hasher.c
1 /*
2 * Copyright (C) 2015 Andreas Steffen
3 * HSR Hochschule fuer Technik Rapperswil
4 *
5 * Based on the implementation by the Keccak, Keyak and Ketje Teams, namely,
6 * Guido Bertoni, Joan Daemen, Michaël Peeters, Gilles Van Assche and
7 * Ronny Van Keer, hereby denoted as "the implementer".
8 *
9 * To the extent possible under law, the implementer has waived all copyright
10 * and related or neighboring rights to the source code in this file.
11 * http://creativecommons.org/publicdomain/zero/1.0/
12 */
13
14 #include <string.h>
15
16 #include "sha3_hasher.h"
17
18 typedef struct private_sha3_hasher_t private_sha3_hasher_t;
19
20 #define KECCAK_STATE_SIZE 200 /* bytes */
21 #define KECCAK_MAX_RATE 144 /* bytes */
22 #define DELIMITED_SUFFIX 0x06
23
24 static const uint64_t round_constants[] = {
25 0x0000000000000001ULL,
26 0x0000000000008082ULL,
27 0x800000000000808aULL,
28 0x8000000080008000ULL,
29 0x000000000000808bULL,
30 0x0000000080000001ULL,
31 0x8000000080008081ULL,
32 0x8000000000008009ULL,
33 0x000000000000008aULL,
34 0x0000000000000088ULL,
35 0x0000000080008009ULL,
36 0x000000008000000aULL,
37 0x000000008000808bULL,
38 0x800000000000008bULL,
39 0x8000000000008089ULL,
40 0x8000000000008003ULL,
41 0x8000000000008002ULL,
42 0x8000000000000080ULL,
43 0x000000000000800aULL,
44 0x800000008000000aULL,
45 0x8000000080008081ULL,
46 0x8000000000008080ULL,
47 0x0000000080000001ULL,
48 0x8000000080008008ULL
49 };
50
51 /**
52 * Private data structure with hashing context for SHA-3
53 */
54 struct private_sha3_hasher_t {
55
56 /**
57 * Public interface for this hasher.
58 */
59 sha3_hasher_t public;
60
61 /**
62 * SHA-3 algorithm to be used
63 */
64 hash_algorithm_t algorithm;
65
66 /**
67 * Internal state of 1600 bits as defined by FIPS-202
68 */
69 uint8_t state[KECCAK_STATE_SIZE];
70
71 /**
72 * Rate in bytes
73 */
74 u_int rate;
75
76 /**
77 * Rate input buffer
78 */
79 uint8_t rate_buffer[KECCAK_MAX_RATE];
80
81 /**
82 * Index pointing to the current position in the rate buffer
83 */
84 u_int rate_index;
85
86 };
87
88 #if BYTE_ORDER != LITTLE_ENDIAN
89 /**
90 * Function to load a 64-bit value using the little-endian (LE) convention.
91 * On a LE platform, this could be greatly simplified using a cast.
92 */
93 static uint64_t load64(const uint8_t *x)
94 {
95 int i;
96 uint64_t u = 0;
97
98 for (i = 7; i >= 0; --i)
99 {
100 u <<= 8;
101 u |= x[i];
102 }
103 return u;
104 }
105
106 /**
107 * Function to store a 64-bit value using the little-endian (LE) convention.
108 * On a LE platform, this could be greatly simplified using a cast.
109 */
110 static void store64(uint8_t *x, uint64_t u)
111 {
112 u_int i;
113
114 for (i = 0; i < 8; ++i)
115 {
116 x[i] = u;
117 u >>= 8;
118 }
119 }
120
121 /**
122 * Function to XOR into a 64-bit value using the little-endian (LE) convention.
123 * On a LE platform, this could be greatly simplified using a cast.
124 */
125 static void xor64(uint8_t *x, uint64_t u)
126 {
127 u_int i;
128
129 for (i = 0; i < 8; ++i)
130 {
131 x[i] ^= u;
132 u >>= 8;
133 }
134 }
135 #endif
136
137 /**
138 * Some macros used by the Keccak-f[1600] permutation.
139 */
140 #define ROL64(a, offset) ((((uint64_t)a) << offset) ^ (((uint64_t)a) >> (64-offset)))
141
142 #if BYTE_ORDER == LITTLE_ENDIAN
143 #define readLane(i) (((uint64_t*)state)[i])
144 #define writeLane(i, lane) (((uint64_t*)state)[i]) = (lane)
145 #define XORLane(i, lane) (((uint64_t*)state)[i]) ^= (lane)
146 #elif BYTE_ORDER == BIG_ENDIAN
147 #define readLane(i) load64((uint8_t*)state+sizeof(uint64_t)*i))
148 #define writeLane(i, lane) store64((uint8_t*)state+sizeof(uint64_t)*i, lane)
149 #define XORLane(i, lane) xor64((uint8_t*)state+sizeof(uint64_t)*i, lane)
150 #endif
151
152 /**
153 * Function that computes the Keccak-f[1600] permutation on the given state.
154 */
155 static void keccak_f1600_state_permute(void *state)
156 {
157 int round;
158
159 for (round = 0; round < 24; round++)
160 {
161 { /* θ step (see [Keccak Reference, Section 2.3.2]) */
162
163 uint64_t C[5], D;
164
165 /* Compute the parity of the columns */
166 C[0] = readLane(0) ^ readLane( 5) ^ readLane(10)
167 ^ readLane(15) ^ readLane(20);
168 C[1] = readLane(1) ^ readLane( 6) ^ readLane(11)
169 ^ readLane(16) ^ readLane(21);
170 C[2] = readLane(2) ^ readLane( 7) ^ readLane(12)
171 ^ readLane(17) ^ readLane(22);
172 C[3] = readLane(3) ^ readLane( 8) ^ readLane(13)
173 ^ readLane(18) ^ readLane(23);
174 C[4] = readLane(4) ^ readLane( 9) ^ readLane(14)
175 ^ readLane(19) ^ readLane(24);
176
177 /* Compute and add the θ effect to the whole column */
178 D = C[4] ^ ROL64(C[1], 1);
179 XORLane( 0, D);
180 XORLane( 5, D);
181 XORLane(10, D);
182 XORLane(15, D);
183 XORLane(20, D);
184
185 D = C[0] ^ ROL64(C[2], 1);
186 XORLane( 1, D);
187 XORLane( 6, D);
188 XORLane(11, D);
189 XORLane(16, D);
190 XORLane(21, D);
191
192 D = C[1] ^ ROL64(C[3], 1);
193 XORLane( 2, D);
194 XORLane( 7, D);
195 XORLane(12, D);
196 XORLane(17, D);
197 XORLane(22, D);
198
199 D = C[2] ^ ROL64(C[4], 1);
200 XORLane( 3, D);
201 XORLane( 8, D);
202 XORLane(13, D);
203 XORLane(18, D);
204 XORLane(23, D);
205
206 D = C[3] ^ ROL64(C[0], 1);
207 XORLane( 4, D);
208 XORLane( 9, D);
209 XORLane(14, D);
210 XORLane(19, D);
211 XORLane(24, D);
212 }
213
214 { /* ρ and π steps (see [Keccak Reference, Sections 2.3.3 and 2.3.4]) */
215
216 uint64_t t1, t2;
217
218 t1 = readLane( 1);
219
220 t2 = readLane(10);
221 writeLane(10, ROL64(t1, 1));
222
223 t1 = readLane( 7);
224 writeLane( 7, ROL64(t2, 3));
225
226 t2 = readLane(11);
227 writeLane(11, ROL64(t1, 6));
228
229 t1 = readLane(17);
230 writeLane(17, ROL64(t2, 10));
231
232 t2 = readLane(18);
233 writeLane(18, ROL64(t1, 15));
234
235 t1 = readLane( 3);
236 writeLane( 3, ROL64(t2, 21));
237
238 t2 = readLane( 5);
239 writeLane( 5, ROL64(t1, 28));
240
241 t1 = readLane(16);
242 writeLane(16, ROL64(t2, 36));
243
244 t2 = readLane( 8);
245 writeLane( 8, ROL64(t1, 45));
246
247 t1 = readLane(21);
248 writeLane(21, ROL64(t2, 55));
249
250 t2 = readLane(24);
251 writeLane(24, ROL64(t1, 2));
252
253 t1 = readLane( 4);
254 writeLane( 4, ROL64(t2, 14));
255
256 t2 = readLane(15);
257 writeLane(15, ROL64(t1, 27));
258
259 t1 = readLane(23);
260 writeLane(23, ROL64(t2, 41));
261
262 t2 = readLane(19);
263 writeLane(19, ROL64(t1, 56));
264
265 t1 = readLane(13);
266 writeLane(13, ROL64(t2, 8));
267
268 t2 = readLane(12);
269 writeLane(12, ROL64(t1, 25));
270
271 t1 = readLane( 2);
272 writeLane( 2, ROL64(t2, 43));
273
274 t2 = readLane(20);
275 writeLane(20, ROL64(t1, 62));
276
277 t1 = readLane(14);
278 writeLane(14, ROL64(t2, 18));
279
280 t2 = readLane(22);
281 writeLane(22, ROL64(t1, 39));
282
283 t1 = readLane( 9);
284 writeLane( 9, ROL64(t2, 61));
285
286 t2 = readLane( 6);
287 writeLane( 6, ROL64(t1, 20));
288
289 writeLane( 1, ROL64(t2, 44));
290 }
291
292 { /* χ step (see [Keccak Reference, Section 2.3.1]) */
293
294 uint64_t t[5];
295
296 t[0] = readLane(0);
297 t[1] = readLane(1);
298 t[2] = readLane(2);
299 t[3] = readLane(3);
300 t[4] = readLane(4);
301
302 writeLane(0, t[0] ^ ((~t[1]) & t[2]));
303 writeLane(1, t[1] ^ ((~t[2]) & t[3]));
304 writeLane(2, t[2] ^ ((~t[3]) & t[4]));
305 writeLane(3, t[3] ^ ((~t[4]) & t[0]));
306 writeLane(4, t[4] ^ ((~t[0]) & t[1]));
307
308 t[0] = readLane(5);
309 t[1] = readLane(6);
310 t[2] = readLane(7);
311 t[3] = readLane(8);
312 t[4] = readLane(9);
313
314 writeLane(5, t[0] ^ ((~t[1]) & t[2]));
315 writeLane(6, t[1] ^ ((~t[2]) & t[3]));
316 writeLane(7, t[2] ^ ((~t[3]) & t[4]));
317 writeLane(8, t[3] ^ ((~t[4]) & t[0]));
318 writeLane(9, t[4] ^ ((~t[0]) & t[1]));
319
320 t[0] = readLane(10);
321 t[1] = readLane(11);
322 t[2] = readLane(12);
323 t[3] = readLane(13);
324 t[4] = readLane(14);
325
326 writeLane(10, t[0] ^ ((~t[1]) & t[2]));
327 writeLane(11, t[1] ^ ((~t[2]) & t[3]));
328 writeLane(12, t[2] ^ ((~t[3]) & t[4]));
329 writeLane(13, t[3] ^ ((~t[4]) & t[0]));
330 writeLane(14, t[4] ^ ((~t[0]) & t[1]));
331
332 t[0] = readLane(15);
333 t[1] = readLane(16);
334 t[2] = readLane(17);
335 t[3] = readLane(18);
336 t[4] = readLane(19);
337
338 writeLane(15, t[0] ^ ((~t[1]) & t[2]));
339 writeLane(16, t[1] ^ ((~t[2]) & t[3]));
340 writeLane(17, t[2] ^ ((~t[3]) & t[4]));
341 writeLane(18, t[3] ^ ((~t[4]) & t[0]));
342 writeLane(19, t[4] ^ ((~t[0]) & t[1]));
343
344 t[0] = readLane(20);
345 t[1] = readLane(21);
346 t[2] = readLane(22);
347 t[3] = readLane(23);
348 t[4] = readLane(24);
349
350 writeLane(20, t[0] ^ ((~t[1]) & t[2]));
351 writeLane(21, t[1] ^ ((~t[2]) & t[3]));
352 writeLane(22, t[2] ^ ((~t[3]) & t[4]));
353 writeLane(23, t[3] ^ ((~t[4]) & t[0]));
354 writeLane(24, t[4] ^ ((~t[0]) & t[1]));
355 }
356
357 { /* ι step (see [Keccak Reference, Section 2.3.5]) */
358
359 XORLane(0, round_constants[round]);
360 }
361 }
362 }
363
364 METHOD(hasher_t, reset, bool,
365 private_sha3_hasher_t *this)
366 {
367 memset(this->state, 0x00, KECCAK_STATE_SIZE);
368 this->rate_index = 0;
369
370 return TRUE;
371 }
372
373 METHOD(hasher_t, get_hash_size, size_t,
374 private_sha3_hasher_t *this)
375 {
376 switch (this->algorithm)
377 {
378 case HASH_SHA3_224:
379 return HASH_SIZE_SHA224;
380 case HASH_SHA3_256:
381 return HASH_SIZE_SHA256;
382 case HASH_SHA3_384:
383 return HASH_SIZE_SHA384;
384 case HASH_SHA3_512:
385 return HASH_SIZE_SHA512;
386 default:
387 return 0;
388 }
389 }
390
391 static void sha3_absorb(private_sha3_hasher_t *this, chunk_t data)
392 {
393 uint64_t *buffer_lanes, *state_lanes;
394 size_t len, rate_lanes;
395 int i;
396
397 buffer_lanes = (uint64_t*)this->rate_buffer;
398 state_lanes = (uint64_t*)this->state;
399 rate_lanes = this->rate / sizeof(uint64_t);
400
401 while (data.len)
402 {
403 len = min(data.len, this->rate - this->rate_index);
404 memcpy(this->rate_buffer + this->rate_index, data.ptr, len);
405 this->rate_index += len;
406 data.ptr += len;
407 data.len -= len;
408
409 if (this->rate_index == this->rate)
410 {
411 for (i = 0; i < rate_lanes; i++)
412 {
413 state_lanes[i] ^= buffer_lanes[i];
414 }
415 this->rate_index = 0;
416
417 keccak_f1600_state_permute(this->state);
418 }
419 }
420 }
421
422 static void sha3_final(private_sha3_hasher_t *this)
423 {
424 uint64_t *buffer_lanes, *state_lanes;
425 size_t rate_lanes, remainder;
426 int i;
427
428 /* Add the delimitedSuffix as the first bit of padding */
429 this->rate_buffer[this->rate_index++] = DELIMITED_SUFFIX;
430
431 buffer_lanes = (uint64_t*)this->rate_buffer;
432 state_lanes = (uint64_t*)this->state;
433 rate_lanes = this->rate_index / sizeof(uint64_t);
434
435 remainder = this->rate_index - rate_lanes * sizeof(uint64_t);
436 if (remainder)
437 {
438 memset(this->rate_buffer + this->rate_index, 0x00,
439 sizeof(uint64_t) - remainder);
440 rate_lanes++;
441 }
442 for (i = 0; i < rate_lanes; i++)
443 {
444 state_lanes[i] ^= buffer_lanes[i];
445 }
446
447 /* Add the second bit of padding */
448 this->state[this->rate - 1] ^= 0x80;
449
450 /* Switch to the squeezing phase */
451 keccak_f1600_state_permute(this->state);
452 }
453
454 METHOD(hasher_t, get_hash, bool,
455 private_sha3_hasher_t *this, chunk_t chunk, uint8_t *buffer)
456 {
457 sha3_absorb(this, chunk);
458
459 if (buffer != NULL)
460 {
461 sha3_final(this);
462 memcpy(buffer, this->state, get_hash_size(this));
463 reset(this);
464 }
465 return TRUE;
466 }
467
468 METHOD(hasher_t, allocate_hash, bool,
469 private_sha3_hasher_t *this, chunk_t chunk, chunk_t *hash)
470 {
471 chunk_t allocated_hash;
472
473 sha3_absorb(this, chunk);
474
475 if (hash != NULL)
476 {
477 sha3_final(this);
478 allocated_hash = chunk_alloc(get_hash_size(this));
479 memcpy(allocated_hash.ptr, this->state, allocated_hash.len);
480 reset(this);
481 *hash = allocated_hash;
482 }
483 return TRUE;
484 }
485
486 METHOD(hasher_t, destroy, void,
487 sha3_hasher_t *this)
488 {
489 free(this);
490 }
491
492 /*
493 * Described in header.
494 */
495 sha3_hasher_t *sha3_hasher_create(hash_algorithm_t algorithm)
496 {
497 private_sha3_hasher_t *this;
498
499 switch (algorithm)
500 {
501 case HASH_SHA3_224:
502 case HASH_SHA3_256:
503 case HASH_SHA3_384:
504 case HASH_SHA3_512:
505 break;
506 default:
507 return NULL;
508 }
509
510 INIT(this,
511 .public = {
512 .hasher_interface = {
513 .reset = _reset,
514 .get_hash_size = _get_hash_size,
515 .get_hash = _get_hash,
516 .allocate_hash = _allocate_hash,
517 .destroy = _destroy,
518 },
519 },
520 .algorithm = algorithm,
521 );
522
523 this->rate = KECCAK_STATE_SIZE - 2*get_hash_size(this);
524 reset(this);
525
526 return &this->public;
527 }