sha3: Fix readLane() macro on big-endian platforms
[strongswan.git] / src / libstrongswan / plugins / sha3 / sha3_keccak.c
1 /*
2 * Copyright (C) 2015-2016 Andreas Steffen
3 * HSR Hochschule fuer Technik Rapperswil
4 *
5 * Based on the implementation by the Keccak, Keyak and Ketje Teams, namely,
6 * Guido Bertoni, Joan Daemen, Michaël Peeters, Gilles Van Assche and
7 * Ronny Van Keer, hereby denoted as "the implementer".
8 *
9 * To the extent possible under law, the implementer has waived all copyright
10 * and related or neighboring rights to the source code in this file.
11 * http://creativecommons.org/publicdomain/zero/1.0/
12 */
13
14 #include <string.h>
15
16 #include "sha3_keccak.h"
17
18 typedef struct private_sha3_keccak_t private_sha3_keccak_t;
19
20 #define KECCAK_STATE_SIZE 200 /* bytes */
21 #define KECCAK_MAX_RATE 168 /* bytes */
22
23 static const uint64_t round_constants[] = {
24 0x0000000000000001ULL,
25 0x0000000000008082ULL,
26 0x800000000000808aULL,
27 0x8000000080008000ULL,
28 0x000000000000808bULL,
29 0x0000000080000001ULL,
30 0x8000000080008081ULL,
31 0x8000000000008009ULL,
32 0x000000000000008aULL,
33 0x0000000000000088ULL,
34 0x0000000080008009ULL,
35 0x000000008000000aULL,
36 0x000000008000808bULL,
37 0x800000000000008bULL,
38 0x8000000000008089ULL,
39 0x8000000000008003ULL,
40 0x8000000000008002ULL,
41 0x8000000000000080ULL,
42 0x000000000000800aULL,
43 0x800000008000000aULL,
44 0x8000000080008081ULL,
45 0x8000000000008080ULL,
46 0x0000000080000001ULL,
47 0x8000000080008008ULL
48 };
49
50 /**
51 * Private data structure with hashing context for SHA-3
52 */
53 struct private_sha3_keccak_t {
54
55 /**
56 * Public interface for this hasher.
57 */
58 sha3_keccak_t public;
59
60 /**
61 * Internal state of 1600 bits as defined by FIPS-202
62 */
63 uint8_t state[KECCAK_STATE_SIZE];
64
65 /**
66 * Rate in bytes
67 */
68 u_int rate;
69
70 /**
71 * Rate input buffer
72 */
73 uint8_t rate_buffer[KECCAK_MAX_RATE];
74
75 /**
76 * Index pointing to the current position in the rate buffer
77 */
78 u_int rate_index;
79
80 /**
81 * Suffix delimiting the input message
82 */
83 uint8_t delimited_suffix;
84
85 };
86
87 #if BYTE_ORDER != LITTLE_ENDIAN
88 /**
89 * Function to load a 64-bit value using the little-endian (LE) convention.
90 * On a LE platform, this could be greatly simplified using a cast.
91 */
92 static uint64_t load64(const uint8_t *x)
93 {
94 int i;
95 uint64_t u = 0;
96
97 for (i = 7; i >= 0; --i)
98 {
99 u <<= 8;
100 u |= x[i];
101 }
102 return u;
103 }
104
105 /**
106 * Function to store a 64-bit value using the little-endian (LE) convention.
107 * On a LE platform, this could be greatly simplified using a cast.
108 */
109 static void store64(uint8_t *x, uint64_t u)
110 {
111 u_int i;
112
113 for (i = 0; i < 8; ++i)
114 {
115 x[i] = u;
116 u >>= 8;
117 }
118 }
119
120 /**
121 * Function to XOR into a 64-bit value using the little-endian (LE) convention.
122 * On a LE platform, this could be greatly simplified using a cast.
123 */
124 static void xor64(uint8_t *x, uint64_t u)
125 {
126 u_int i;
127
128 for (i = 0; i < 8; ++i)
129 {
130 x[i] ^= u;
131 u >>= 8;
132 }
133 }
134 #endif
135
136 /**
137 * Some macros used by the Keccak-f[1600] permutation.
138 */
139 #define ROL64(a, offset) ((((uint64_t)a) << offset) ^ (((uint64_t)a) >> (64-offset)))
140
141 #if BYTE_ORDER == LITTLE_ENDIAN
142 #define readLane(i) (((uint64_t*)state)[i])
143 #define writeLane(i, lane) (((uint64_t*)state)[i]) = (lane)
144 #define XORLane(i, lane) (((uint64_t*)state)[i]) ^= (lane)
145 #elif BYTE_ORDER == BIG_ENDIAN
146 #define readLane(i) load64((uint8_t*)state+sizeof(uint64_t)*i)
147 #define writeLane(i, lane) store64((uint8_t*)state+sizeof(uint64_t)*i, lane)
148 #define XORLane(i, lane) xor64((uint8_t*)state+sizeof(uint64_t)*i, lane)
149 #endif
150
151 /**
152 * Function that computes the Keccak-f[1600] permutation on the given state.
153 */
154 static void keccak_f1600_state_permute(void *state)
155 {
156 int round;
157
158 for (round = 0; round < 24; round++)
159 {
160 { /* θ step (see [Keccak Reference, Section 2.3.2]) */
161
162 uint64_t C[5], D;
163
164 /* Compute the parity of the columns */
165 C[0] = readLane(0) ^ readLane( 5) ^ readLane(10)
166 ^ readLane(15) ^ readLane(20);
167 C[1] = readLane(1) ^ readLane( 6) ^ readLane(11)
168 ^ readLane(16) ^ readLane(21);
169 C[2] = readLane(2) ^ readLane( 7) ^ readLane(12)
170 ^ readLane(17) ^ readLane(22);
171 C[3] = readLane(3) ^ readLane( 8) ^ readLane(13)
172 ^ readLane(18) ^ readLane(23);
173 C[4] = readLane(4) ^ readLane( 9) ^ readLane(14)
174 ^ readLane(19) ^ readLane(24);
175
176 /* Compute and add the θ effect to the whole column */
177 D = C[4] ^ ROL64(C[1], 1);
178 XORLane( 0, D);
179 XORLane( 5, D);
180 XORLane(10, D);
181 XORLane(15, D);
182 XORLane(20, D);
183
184 D = C[0] ^ ROL64(C[2], 1);
185 XORLane( 1, D);
186 XORLane( 6, D);
187 XORLane(11, D);
188 XORLane(16, D);
189 XORLane(21, D);
190
191 D = C[1] ^ ROL64(C[3], 1);
192 XORLane( 2, D);
193 XORLane( 7, D);
194 XORLane(12, D);
195 XORLane(17, D);
196 XORLane(22, D);
197
198 D = C[2] ^ ROL64(C[4], 1);
199 XORLane( 3, D);
200 XORLane( 8, D);
201 XORLane(13, D);
202 XORLane(18, D);
203 XORLane(23, D);
204
205 D = C[3] ^ ROL64(C[0], 1);
206 XORLane( 4, D);
207 XORLane( 9, D);
208 XORLane(14, D);
209 XORLane(19, D);
210 XORLane(24, D);
211 }
212
213 { /* ρ and π steps (see [Keccak Reference, Sections 2.3.3 and 2.3.4]) */
214
215 uint64_t t1, t2;
216
217 t1 = readLane( 1);
218
219 t2 = readLane(10);
220 writeLane(10, ROL64(t1, 1));
221
222 t1 = readLane( 7);
223 writeLane( 7, ROL64(t2, 3));
224
225 t2 = readLane(11);
226 writeLane(11, ROL64(t1, 6));
227
228 t1 = readLane(17);
229 writeLane(17, ROL64(t2, 10));
230
231 t2 = readLane(18);
232 writeLane(18, ROL64(t1, 15));
233
234 t1 = readLane( 3);
235 writeLane( 3, ROL64(t2, 21));
236
237 t2 = readLane( 5);
238 writeLane( 5, ROL64(t1, 28));
239
240 t1 = readLane(16);
241 writeLane(16, ROL64(t2, 36));
242
243 t2 = readLane( 8);
244 writeLane( 8, ROL64(t1, 45));
245
246 t1 = readLane(21);
247 writeLane(21, ROL64(t2, 55));
248
249 t2 = readLane(24);
250 writeLane(24, ROL64(t1, 2));
251
252 t1 = readLane( 4);
253 writeLane( 4, ROL64(t2, 14));
254
255 t2 = readLane(15);
256 writeLane(15, ROL64(t1, 27));
257
258 t1 = readLane(23);
259 writeLane(23, ROL64(t2, 41));
260
261 t2 = readLane(19);
262 writeLane(19, ROL64(t1, 56));
263
264 t1 = readLane(13);
265 writeLane(13, ROL64(t2, 8));
266
267 t2 = readLane(12);
268 writeLane(12, ROL64(t1, 25));
269
270 t1 = readLane( 2);
271 writeLane( 2, ROL64(t2, 43));
272
273 t2 = readLane(20);
274 writeLane(20, ROL64(t1, 62));
275
276 t1 = readLane(14);
277 writeLane(14, ROL64(t2, 18));
278
279 t2 = readLane(22);
280 writeLane(22, ROL64(t1, 39));
281
282 t1 = readLane( 9);
283 writeLane( 9, ROL64(t2, 61));
284
285 t2 = readLane( 6);
286 writeLane( 6, ROL64(t1, 20));
287
288 writeLane( 1, ROL64(t2, 44));
289 }
290
291 { /* χ step (see [Keccak Reference, Section 2.3.1]) */
292
293 uint64_t t[5];
294
295 t[0] = readLane(0);
296 t[1] = readLane(1);
297 t[2] = readLane(2);
298 t[3] = readLane(3);
299 t[4] = readLane(4);
300
301 writeLane(0, t[0] ^ ((~t[1]) & t[2]));
302 writeLane(1, t[1] ^ ((~t[2]) & t[3]));
303 writeLane(2, t[2] ^ ((~t[3]) & t[4]));
304 writeLane(3, t[3] ^ ((~t[4]) & t[0]));
305 writeLane(4, t[4] ^ ((~t[0]) & t[1]));
306
307 t[0] = readLane(5);
308 t[1] = readLane(6);
309 t[2] = readLane(7);
310 t[3] = readLane(8);
311 t[4] = readLane(9);
312
313 writeLane(5, t[0] ^ ((~t[1]) & t[2]));
314 writeLane(6, t[1] ^ ((~t[2]) & t[3]));
315 writeLane(7, t[2] ^ ((~t[3]) & t[4]));
316 writeLane(8, t[3] ^ ((~t[4]) & t[0]));
317 writeLane(9, t[4] ^ ((~t[0]) & t[1]));
318
319 t[0] = readLane(10);
320 t[1] = readLane(11);
321 t[2] = readLane(12);
322 t[3] = readLane(13);
323 t[4] = readLane(14);
324
325 writeLane(10, t[0] ^ ((~t[1]) & t[2]));
326 writeLane(11, t[1] ^ ((~t[2]) & t[3]));
327 writeLane(12, t[2] ^ ((~t[3]) & t[4]));
328 writeLane(13, t[3] ^ ((~t[4]) & t[0]));
329 writeLane(14, t[4] ^ ((~t[0]) & t[1]));
330
331 t[0] = readLane(15);
332 t[1] = readLane(16);
333 t[2] = readLane(17);
334 t[3] = readLane(18);
335 t[4] = readLane(19);
336
337 writeLane(15, t[0] ^ ((~t[1]) & t[2]));
338 writeLane(16, t[1] ^ ((~t[2]) & t[3]));
339 writeLane(17, t[2] ^ ((~t[3]) & t[4]));
340 writeLane(18, t[3] ^ ((~t[4]) & t[0]));
341 writeLane(19, t[4] ^ ((~t[0]) & t[1]));
342
343 t[0] = readLane(20);
344 t[1] = readLane(21);
345 t[2] = readLane(22);
346 t[3] = readLane(23);
347 t[4] = readLane(24);
348
349 writeLane(20, t[0] ^ ((~t[1]) & t[2]));
350 writeLane(21, t[1] ^ ((~t[2]) & t[3]));
351 writeLane(22, t[2] ^ ((~t[3]) & t[4]));
352 writeLane(23, t[3] ^ ((~t[4]) & t[0]));
353 writeLane(24, t[4] ^ ((~t[0]) & t[1]));
354 }
355
356 { /* ι step (see [Keccak Reference, Section 2.3.5]) */
357
358 XORLane(0, round_constants[round]);
359 }
360 }
361 }
362
363 METHOD(sha3_keccak_t, get_rate, u_int,
364 private_sha3_keccak_t *this)
365 {
366 return this->rate;
367 }
368
369 METHOD(sha3_keccak_t, reset, void,
370 private_sha3_keccak_t *this)
371 {
372 memset(this->state, 0x00, KECCAK_STATE_SIZE);
373 this->rate_index = 0;
374 }
375
376
377 METHOD(sha3_keccak_t, absorb, void,
378 private_sha3_keccak_t *this, chunk_t data)
379 {
380 uint64_t *buffer_lanes, *state_lanes;
381 size_t len, rate_lanes;
382 int i;
383
384 buffer_lanes = (uint64_t*)this->rate_buffer;
385 state_lanes = (uint64_t*)this->state;
386 rate_lanes = this->rate / sizeof(uint64_t);
387
388 while (data.len)
389 {
390 len = min(data.len, this->rate - this->rate_index);
391 memcpy(this->rate_buffer + this->rate_index, data.ptr, len);
392 this->rate_index += len;
393 data.ptr += len;
394 data.len -= len;
395
396 if (this->rate_index == this->rate)
397 {
398 for (i = 0; i < rate_lanes; i++)
399 {
400 state_lanes[i] ^= buffer_lanes[i];
401 }
402 this->rate_index = 0;
403
404 keccak_f1600_state_permute(this->state);
405 }
406 }
407 }
408
409 METHOD(sha3_keccak_t, finalize, void,
410 private_sha3_keccak_t *this)
411 {
412 uint64_t *buffer_lanes, *state_lanes;
413 size_t rate_lanes, remainder;
414 int i;
415
416 /* Add the delimitedSuffix as the first bit of padding */
417 this->rate_buffer[this->rate_index++] = this->delimited_suffix;
418
419 buffer_lanes = (uint64_t*)this->rate_buffer;
420 state_lanes = (uint64_t*)this->state;
421 rate_lanes = this->rate_index / sizeof(uint64_t);
422
423 remainder = this->rate_index - rate_lanes * sizeof(uint64_t);
424 if (remainder)
425 {
426 memset(this->rate_buffer + this->rate_index, 0x00,
427 sizeof(uint64_t) - remainder);
428 rate_lanes++;
429 }
430 for (i = 0; i < rate_lanes; i++)
431 {
432 state_lanes[i] ^= buffer_lanes[i];
433 }
434
435 /* Add the second bit of padding */
436 this->state[this->rate - 1] ^= 0x80;
437
438 /* Switch to the squeezing phase */
439 keccak_f1600_state_permute(this->state);
440 this->rate_index = 0;
441 }
442
443 METHOD(sha3_keccak_t, squeeze, void,
444 private_sha3_keccak_t *this, size_t out_len, uint8_t *out)
445 {
446 size_t index = 0, len;
447
448 while (index < out_len)
449 {
450 if (this->rate_index == this->rate)
451 {
452 keccak_f1600_state_permute(this->state);
453 this->rate_index = 0;
454 }
455 len = min(out_len - index, this->rate - this->rate_index);
456 memcpy(out, &this->state[this->rate_index], len);
457 out += len;
458 index += len;
459 this->rate_index += len;
460 }
461 }
462
463 METHOD(sha3_keccak_t, destroy, void,
464 private_sha3_keccak_t *this)
465 {
466 free(this);
467 }
468
469 /*
470 * Described in header.
471 */
472 sha3_keccak_t *sha3_keccak_create(u_int capacity, uint8_t delimited_suffix)
473 {
474 private_sha3_keccak_t *this;
475 int rate;
476
477 rate = KECCAK_STATE_SIZE - capacity;
478
479 if (rate <= 0 || rate > KECCAK_MAX_RATE)
480 {
481 return NULL;
482 }
483
484 INIT(this,
485 .public = {
486 .get_rate = _get_rate,
487 .reset = _reset,
488 .absorb = _absorb,
489 .finalize = _finalize,
490 .squeeze = _squeeze,
491 .destroy = _destroy,
492 },
493 .rate = rate,
494 .delimited_suffix = delimited_suffix,
495 );
496
497 return &this->public;
498 }