fixed aes code, we support now aes128, aes192, aes256 in IKE
[strongswan.git] / src / libstrongswan / crypto / crypters / aes_cbc_crypter.c
1 /**
2 * @file aes_cbc_crypter.c
3 *
4 * @brief Implementation of aes_cbc_crypter_t
5 *
6 */
7
8 /*
9 * Copyright (C) 2001 Dr B. R. Gladman <brg@gladman.uk.net>
10 * Copyright (C) 2005 Jan Hutter, Martin Willi
11 * Hochschule fuer Technik Rapperswil
12 *
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2 of the License, or (at your
16 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 * for more details.
22 */
23
24 #include "aes_cbc_crypter.h"
25
26
27
28 /*
29 * The number of key schedule words for different block and key lengths
30 * allowing for method of computation which requires the length to be a
31 * multiple of the key length. This version of AES implementation supports
32 * all three keylengths 16, 24 and 32 bytes!
33 *
34 * Nk = 4 6 8
35 * -------------
36 * Nb = 4 | 60 60 64
37 * 6 | 96 90 96
38 * 8 | 120 120 120
39 */
40 #define AES_KS_LENGTH 120
41 #define AES_RC_LENGTH 29
42
43 #define AES_BLOCK_SIZE 16
44
45 typedef struct private_aes_cbc_crypter_t private_aes_cbc_crypter_t;
46
47 /**
48 * @brief Class implementing the AES symmetric encryption algorithm.
49 *
50 * @ingroup crypters
51 */
52 struct private_aes_cbc_crypter_t {
53
54 /**
55 * Public part of this class.
56 */
57 aes_cbc_crypter_t public;
58
59 /**
60 * Number of words in the key input block.
61 */
62 u_int32_t aes_Nkey;
63
64 /**
65 * The number of cipher rounds.
66 */
67 u_int32_t aes_Nrnd;
68
69 /**
70 * The encryption key schedule.
71 */
72 u_int32_t aes_e_key[AES_KS_LENGTH];
73
74 /**
75 * The decryption key schedule.
76 */
77 u_int32_t aes_d_key[AES_KS_LENGTH];
78
79 /**
80 * Key size of this AES cypher object.
81 */
82 u_int32_t key_size;
83
84 /**
85 * Decrypts a block.
86 *
87 * No memory gets allocated.
88 *
89 * @param this calling object
90 * @param[in] in_blk block to decrypt
91 * @param[out] out_blk decrypted data are written to this location
92 */
93 void (*decrypt_block) (const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[]);
94
95 /**
96 * Encrypts a block.
97 *
98 * No memory gets allocated.
99 *
100 * @param this calling object
101 * @param[in] in_blk block to encrypt
102 * @param[out] out_blk encrypted data are written to this location
103 */
104 void (*encrypt_block) (const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[]);
105 };
106
107
108 /* ugly macro stuff */
109
110 /* 1. Define UNROLL for full loop unrolling in encryption and decryption.
111 * 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
112 * 3. Define FIXED_TABLES for compiled rather than dynamic tables.
113 * 4. Define FF_TABLES to use tables for field multiplies and inverses.
114 * Do not enable this without understanding stack space requirements.
115 * 5. Define ARRAYS to use arrays to hold the local state block. If this
116 * is not defined, individually declared 32-bit words are used.
117 * 6. Define FAST_VARIABLE if a high speed variable block implementation
118 * is needed (essentially three separate fixed block size code sequences)
119 * 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
120 * version using 1 table (2 kbytes of table space) or 4 tables (8
121 * kbytes of table space) for higher speed.
122 * 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
123 * increase by using tables for the last rounds but with more table
124 * space (2 or 8 kbytes extra).
125 * 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
126 * slower version is provided.
127 * 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
128 * or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
129 */
130
131 #define UNROLL
132 //#define PARTIAL_UNROLL
133
134 #define FIXED_TABLES
135 //#define FF_TABLES
136 //#define ARRAYS
137 #define FAST_VARIABLE
138
139 //#define ONE_TABLE
140 #define FOUR_TABLES
141
142 //#define ONE_LR_TABLE
143 #define FOUR_LR_TABLES
144
145 //#define ONE_IM_TABLE
146 #define FOUR_IM_TABLES
147
148 #if defined(UNROLL) && defined (PARTIAL_UNROLL)
149 #error both UNROLL and PARTIAL_UNROLL are defined
150 #endif
151
152 #if defined(ONE_TABLE) && defined (FOUR_TABLES)
153 #error both ONE_TABLE and FOUR_TABLES are defined
154 #endif
155
156 #if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
157 #error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
158 #endif
159
160 #if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
161 #error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
162 #endif
163
164 #if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
165 #error an illegal block size has been specified
166 #endif
167
168 /**
169 * Rotates bytes within words by n positions, moving bytes
170 * to higher index positions with wrap around into low positions.
171 */
172 #define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
173 /**
174 * Moves bytes by n positions to higher index positions in
175 * words but without wrap around.
176 */
177 #define ups(x,n) ((x) << 8 * (n))
178
179 /**
180 * Extracts a byte from a word.
181 */
182 #define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
183 #define bytes2word(b0, b1, b2, b3) \
184 ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
185
186
187 /* little endian processor without data alignment restrictions: AES_LE_OK */
188 /* original code: i386 */
189 #if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386)
190 #define AES_LE_OK 1
191 /* added (tested): alpha --jjo */
192 #elif defined(__alpha__)|| defined (__alpha)
193 #define AES_LE_OK 1
194 /* added (tested): ia64 --jjo */
195 #elif defined(__ia64__)|| defined (__ia64)
196 #define AES_LE_OK 1
197 #endif
198
199 #ifdef AES_LE_OK
200 /* little endian processor without data alignment restrictions */
201 #define word_in(x) *(u_int32_t*)(x)
202 #define const_word_in(x) *(const u_int32_t*)(x)
203 #define word_out(x,v) *(u_int32_t*)(x) = (v)
204 #define const_word_out(x,v) *(const u_int32_t*)(x) = (v)
205 #else
206 /* slower but generic big endian or with data alignment restrictions */
207 /* some additional "const" touches to stop "gcc -Wcast-qual" complains --jjo */
208 #define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
209 #define const_word_in(x) ((const u_int32_t)(((const unsigned char *)(x))[0])|((const u_int32_t)(((const unsigned char *)(x))[1])<<8)|((const u_int32_t)(((const unsigned char *)(x))[2])<<16)|((const u_int32_t)(((const unsigned char *)(x))[3])<<24))
210 #define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
211 #define const_word_out(x,v) ((const unsigned char *)(x))[0]=(v),((const unsigned char *)(x))[1]=((v)>>8),((const unsigned char *)(x))[2]=((v)>>16),((const unsigned char *)(x))[3]=((v)>>24)
212 #endif
213
214 // Disable at least some poor combinations of options
215
216 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
217 #define FIXED_TABLES
218 #undef UNROLL
219 #undef ONE_LR_TABLE
220 #undef FOUR_LR_TABLES
221 #undef ONE_IM_TABLE
222 #undef FOUR_IM_TABLES
223 #elif !defined(FOUR_TABLES)
224 #ifdef FOUR_LR_TABLES
225 #undef FOUR_LR_TABLES
226 #define ONE_LR_TABLE
227 #endif
228 #ifdef FOUR_IM_TABLES
229 #undef FOUR_IM_TABLES
230 #define ONE_IM_TABLE
231 #endif
232 #elif !defined(AES_BLOCK_SIZE)
233 #if defined(UNROLL)
234 #define PARTIAL_UNROLL
235 #undef UNROLL
236 #endif
237 #endif
238
239 // the finite field modular polynomial and elements
240
241 #define ff_poly 0x011b
242 #define ff_hi 0x80
243
244 // multiply four bytes in GF(2^8) by 'x' {02} in parallel
245
246 #define m1 0x80808080
247 #define m2 0x7f7f7f7f
248 #define m3 0x0000001b
249 #define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
250
251 // The following defines provide alternative definitions of FFmulX that might
252 // give improved performance if a fast 32-bit multiply is not available. Note
253 // that a temporary variable u needs to be defined where FFmulX is used.
254
255 // #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
256 // #define m4 0x1b1b1b1b
257 // #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
258
259 // perform column mix operation on four bytes in parallel
260
261 #define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
262
263 #if defined(FIXED_TABLES)
264
265 // the S-Box table
266
267 static const unsigned char s_box[256] =
268 {
269 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
270 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
271 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
272 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
273 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
274 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
275 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
276 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
277 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
278 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
279 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
280 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
281 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
282 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
283 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
284 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
285 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
286 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
287 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
288 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
289 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
290 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
291 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
292 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
293 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
294 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
295 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
296 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
297 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
298 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
299 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
300 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
301 };
302
303 // the inverse S-Box table
304
305 static const unsigned char inv_s_box[256] =
306 {
307 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
308 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
309 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
310 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
311 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
312 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
313 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
314 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
315 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
316 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
317 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
318 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
319 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
320 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
321 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
322 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
323 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
324 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
325 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
326 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
327 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
328 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
329 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
330 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
331 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
332 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
333 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
334 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
335 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
336 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
337 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
338 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
339 };
340
341 #define w0(p) 0x000000##p
342
343 // Number of elements required in this table for different
344 // block and key lengths is:
345 //
346 // Nk = 4 6 8
347 // ----------
348 // Nb = 4 | 10 8 7
349 // 6 | 19 12 11
350 // 8 | 29 19 14
351 //
352 // this table can be a table of bytes if the key schedule
353 // code is adjusted accordingly
354
355 static const u_int32_t rcon_tab[29] =
356 {
357 w0(01), w0(02), w0(04), w0(08),
358 w0(10), w0(20), w0(40), w0(80),
359 w0(1b), w0(36), w0(6c), w0(d8),
360 w0(ab), w0(4d), w0(9a), w0(2f),
361 w0(5e), w0(bc), w0(63), w0(c6),
362 w0(97), w0(35), w0(6a), w0(d4),
363 w0(b3), w0(7d), w0(fa), w0(ef),
364 w0(c5)
365 };
366
367 #undef w0
368
369 #define r0(p,q,r,s) 0x##p##q##r##s
370 #define r1(p,q,r,s) 0x##q##r##s##p
371 #define r2(p,q,r,s) 0x##r##s##p##q
372 #define r3(p,q,r,s) 0x##s##p##q##r
373 #define w0(p) 0x000000##p
374 #define w1(p) 0x0000##p##00
375 #define w2(p) 0x00##p##0000
376 #define w3(p) 0x##p##000000
377
378 #if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
379
380 // data for forward tables (other than last round)
381
382 #define f_table \
383 r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
384 r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
385 r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
386 r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
387 r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
388 r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
389 r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
390 r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
391 r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
392 r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
393 r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
394 r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
395 r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
396 r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
397 r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
398 r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
399 r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
400 r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
401 r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
402 r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
403 r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
404 r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
405 r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
406 r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
407 r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
408 r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
409 r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
410 r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
411 r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
412 r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
413 r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
414 r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
415 r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
416 r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
417 r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
418 r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
419 r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
420 r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
421 r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
422 r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
423 r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
424 r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
425 r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
426 r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
427 r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
428 r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
429 r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
430 r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
431 r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
432 r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
433 r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
434 r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
435 r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
436 r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
437 r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
438 r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
439 r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
440 r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
441 r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
442 r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
443 r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
444 r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
445 r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
446 r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
447
448 // data for inverse tables (other than last round)
449
450 #define i_table \
451 r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
452 r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
453 r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
454 r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
455 r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
456 r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
457 r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
458 r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
459 r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
460 r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
461 r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
462 r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
463 r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
464 r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
465 r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
466 r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
467 r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
468 r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
469 r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
470 r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
471 r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
472 r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
473 r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
474 r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
475 r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
476 r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
477 r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
478 r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
479 r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
480 r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
481 r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
482 r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
483 r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
484 r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
485 r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
486 r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
487 r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
488 r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
489 r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
490 r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
491 r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
492 r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
493 r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
494 r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
495 r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
496 r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
497 r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
498 r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
499 r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
500 r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
501 r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
502 r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
503 r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
504 r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
505 r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
506 r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
507 r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
508 r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
509 r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
510 r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
511 r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
512 r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
513 r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
514 r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
515
516 // generate the required tables in the desired endian format
517
518 #undef r
519 #define r r0
520
521 #if defined(ONE_TABLE)
522 static const u_int32_t ft_tab[256] =
523 { f_table };
524 #elif defined(FOUR_TABLES)
525 static const u_int32_t ft_tab[4][256] =
526 { { f_table },
527 #undef r
528 #define r r1
529 { f_table },
530 #undef r
531 #define r r2
532 { f_table },
533 #undef r
534 #define r r3
535 { f_table }
536 };
537 #endif
538
539 #undef r
540 #define r r0
541 #if defined(ONE_TABLE)
542 static const u_int32_t it_tab[256] =
543 { i_table };
544 #elif defined(FOUR_TABLES)
545 static const u_int32_t it_tab[4][256] =
546 { { i_table },
547 #undef r
548 #define r r1
549 { i_table },
550 #undef r
551 #define r r2
552 { i_table },
553 #undef r
554 #define r r3
555 { i_table }
556 };
557 #endif
558
559 #endif
560
561 #if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
562
563 // data for inverse tables (last round)
564
565 #define li_table \
566 w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
567 w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
568 w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
569 w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
570 w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
571 w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
572 w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
573 w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
574 w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
575 w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
576 w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
577 w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
578 w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
579 w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
580 w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
581 w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
582 w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
583 w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
584 w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
585 w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
586 w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
587 w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
588 w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
589 w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
590 w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
591 w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
592 w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
593 w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
594 w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
595 w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
596 w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
597 w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
598
599 // generate the required tables in the desired endian format
600
601 #undef r
602 #define r(p,q,r,s) w0(q)
603 #if defined(ONE_LR_TABLE)
604 static const u_int32_t fl_tab[256] =
605 { f_table };
606 #elif defined(FOUR_LR_TABLES)
607 static const u_int32_t fl_tab[4][256] =
608 { { f_table },
609 #undef r
610 #define r(p,q,r,s) w1(q)
611 { f_table },
612 #undef r
613 #define r(p,q,r,s) w2(q)
614 { f_table },
615 #undef r
616 #define r(p,q,r,s) w3(q)
617 { f_table }
618 };
619 #endif
620
621 #undef w
622 #define w w0
623 #if defined(ONE_LR_TABLE)
624 static const u_int32_t il_tab[256] =
625 { li_table };
626 #elif defined(FOUR_LR_TABLES)
627 static const u_int32_t il_tab[4][256] =
628 { { li_table },
629 #undef w
630 #define w w1
631 { li_table },
632 #undef w
633 #define w w2
634 { li_table },
635 #undef w
636 #define w w3
637 { li_table }
638 };
639 #endif
640
641 #endif
642
643 #if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
644
645 #define m_table \
646 r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
647 r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
648 r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
649 r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
650 r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
651 r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
652 r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
653 r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
654 r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
655 r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
656 r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
657 r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
658 r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
659 r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
660 r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
661 r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
662 r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
663 r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
664 r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
665 r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
666 r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
667 r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
668 r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
669 r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
670 r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
671 r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
672 r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
673 r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
674 r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
675 r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
676 r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
677 r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
678 r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
679 r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
680 r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
681 r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
682 r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
683 r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
684 r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
685 r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
686 r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
687 r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
688 r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
689 r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
690 r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
691 r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
692 r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
693 r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
694 r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
695 r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
696 r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
697 r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
698 r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
699 r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
700 r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
701 r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
702 r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
703 r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
704 r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
705 r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
706 r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
707 r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
708 r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
709 r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
710
711 #undef r
712 #define r r0
713
714 #if defined(ONE_IM_TABLE)
715 static const u_int32_t im_tab[256] =
716 { m_table };
717 #elif defined(FOUR_IM_TABLES)
718 static const u_int32_t im_tab[4][256] =
719 { { m_table },
720 #undef r
721 #define r r1
722 { m_table },
723 #undef r
724 #define r r2
725 { m_table },
726 #undef r
727 #define r r3
728 { m_table }
729 };
730 #endif
731
732 #endif
733
734 #else
735
736 static int tab_gen = 0;
737
738 static unsigned char s_box[256]; // the S box
739 static unsigned char inv_s_box[256]; // the inverse S box
740 static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
741
742 #if defined(ONE_TABLE)
743 static u_int32_t ft_tab[256];
744 static u_int32_t it_tab[256];
745 #elif defined(FOUR_TABLES)
746 static u_int32_t ft_tab[4][256];
747 static u_int32_t it_tab[4][256];
748 #endif
749
750 #if defined(ONE_LR_TABLE)
751 static u_int32_t fl_tab[256];
752 static u_int32_t il_tab[256];
753 #elif defined(FOUR_LR_TABLES)
754 static u_int32_t fl_tab[4][256];
755 static u_int32_t il_tab[4][256];
756 #endif
757
758 #if defined(ONE_IM_TABLE)
759 static u_int32_t im_tab[256];
760 #elif defined(FOUR_IM_TABLES)
761 static u_int32_t im_tab[4][256];
762 #endif
763
764 // Generate the tables for the dynamic table option
765
766 #if !defined(FF_TABLES)
767
768 // It will generally be sensible to use tables to compute finite
769 // field multiplies and inverses but where memory is scarse this
770 // code might sometimes be better.
771
772 // return 2 ^ (n - 1) where n is the bit number of the highest bit
773 // set in x with x in the range 1 < x < 0x00000200. This form is
774 // used so that locals within FFinv can be bytes rather than words
775
776 static unsigned char hibit(const u_int32_t x)
777 { unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
778
779 r |= (r >> 2);
780 r |= (r >> 4);
781 return (r + 1) >> 1;
782 }
783
784 // return the inverse of the finite field element x
785
786 static unsigned char FFinv(const unsigned char x)
787 { unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
788
789 if(x < 2) return x;
790
791 for(;;)
792 {
793 if(!n1) return v1;
794
795 while(n2 >= n1)
796 {
797 n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
798 }
799
800 if(!n2) return v2;
801
802 while(n1 >= n2)
803 {
804 n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
805 }
806 }
807 }
808
809 // define the finite field multiplies required for Rijndael
810
811 #define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
812 #define FFmul03(x) ((x) ^ FFmul02(x))
813 #define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
814 #define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
815 #define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
816 #define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
817
818 #else
819
820 #define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
821
822 #define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
823 #define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
824 #define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
825 #define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
826 #define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
827 #define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
828
829 #endif
830
831 // The forward and inverse affine transformations used in the S-box
832
833 #define fwd_affine(x) \
834 (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
835
836 #define inv_affine(x) \
837 (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
838
839 static void gen_tabs(void)
840 { u_int32_t i, w;
841
842 #if defined(FF_TABLES)
843
844 unsigned char pow[512], log[256];
845
846 // log and power tables for GF(2^8) finite field with
847 // 0x011b as modular polynomial - the simplest primitive
848 // root is 0x03, used here to generate the tables
849
850 i = 0; w = 1;
851 do
852 {
853 pow[i] = (unsigned char)w;
854 pow[i + 255] = (unsigned char)w;
855 log[w] = (unsigned char)i++;
856 w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
857 }
858 while (w != 1);
859
860 #endif
861
862 for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
863 {
864 rcon_tab[i] = bytes2word(w, 0, 0, 0);
865 w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
866 }
867
868 for(i = 0; i < 256; ++i)
869 { unsigned char b;
870
871 s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
872
873 w = bytes2word(b, 0, 0, 0);
874 #if defined(ONE_LR_TABLE)
875 fl_tab[i] = w;
876 #elif defined(FOUR_LR_TABLES)
877 fl_tab[0][i] = w;
878 fl_tab[1][i] = upr(w,1);
879 fl_tab[2][i] = upr(w,2);
880 fl_tab[3][i] = upr(w,3);
881 #endif
882 w = bytes2word(FFmul02(b), b, b, FFmul03(b));
883 #if defined(ONE_TABLE)
884 ft_tab[i] = w;
885 #elif defined(FOUR_TABLES)
886 ft_tab[0][i] = w;
887 ft_tab[1][i] = upr(w,1);
888 ft_tab[2][i] = upr(w,2);
889 ft_tab[3][i] = upr(w,3);
890 #endif
891 inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
892
893 w = bytes2word(b, 0, 0, 0);
894 #if defined(ONE_LR_TABLE)
895 il_tab[i] = w;
896 #elif defined(FOUR_LR_TABLES)
897 il_tab[0][i] = w;
898 il_tab[1][i] = upr(w,1);
899 il_tab[2][i] = upr(w,2);
900 il_tab[3][i] = upr(w,3);
901 #endif
902 w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
903 #if defined(ONE_TABLE)
904 it_tab[i] = w;
905 #elif defined(FOUR_TABLES)
906 it_tab[0][i] = w;
907 it_tab[1][i] = upr(w,1);
908 it_tab[2][i] = upr(w,2);
909 it_tab[3][i] = upr(w,3);
910 #endif
911 #if defined(ONE_IM_TABLE)
912 im_tab[b] = w;
913 #elif defined(FOUR_IM_TABLES)
914 im_tab[0][b] = w;
915 im_tab[1][b] = upr(w,1);
916 im_tab[2][b] = upr(w,2);
917 im_tab[3][b] = upr(w,3);
918 #endif
919
920 }
921 }
922
923 #endif
924
925 #define no_table(x,box,vf,rf,c) bytes2word( \
926 box[bval(vf(x,0,c),rf(0,c))], \
927 box[bval(vf(x,1,c),rf(1,c))], \
928 box[bval(vf(x,2,c),rf(2,c))], \
929 box[bval(vf(x,3,c),rf(3,c))])
930
931 #define one_table(x,op,tab,vf,rf,c) \
932 ( tab[bval(vf(x,0,c),rf(0,c))] \
933 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
934 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
935 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
936
937 #define four_tables(x,tab,vf,rf,c) \
938 ( tab[0][bval(vf(x,0,c),rf(0,c))] \
939 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
940 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
941 ^ tab[3][bval(vf(x,3,c),rf(3,c))])
942
943 #define vf1(x,r,c) (x)
944 #define rf1(r,c) (r)
945 #define rf2(r,c) ((r-c)&3)
946
947 #if defined(FOUR_LR_TABLES)
948 #define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
949 #elif defined(ONE_LR_TABLE)
950 #define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
951 #else
952 #define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
953 #endif
954
955 #if defined(FOUR_IM_TABLES)
956 #define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
957 #elif defined(ONE_IM_TABLE)
958 #define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
959 #else
960 #define inv_mcol(x) \
961 (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
962 f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
963 #endif
964
965 #define nc (AES_BLOCK_SIZE/4)
966
967 // Initialise the key schedule from the user supplied key. The key
968 // length is now specified in bytes - 16, 24 or 32 as appropriate.
969 // This corresponds to bit lengths of 128, 192 and 256 bits, and
970 // to Nk values of 4, 6 and 8 respectively.
971
972 #define mx(t,f) (*t++ = inv_mcol(*f),f++)
973 #define cp(t,f) *t++ = *f++
974
975 #if AES_BLOCK_SIZE == 16
976 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
977 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
978 #elif AES_BLOCK_SIZE == 24
979 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
980 cp(d,s); cp(d,s)
981 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
982 mx(d,s); mx(d,s)
983 #elif AES_BLOCK_SIZE == 32
984 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
985 cp(d,s); cp(d,s); cp(d,s); cp(d,s)
986 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
987 mx(d,s); mx(d,s); mx(d,s); mx(d,s)
988 #else
989
990 #define cpy(d,s) \
991 switch(nc) \
992 { case 8: cp(d,s); cp(d,s); \
993 case 6: cp(d,s); cp(d,s); \
994 case 4: cp(d,s); cp(d,s); \
995 cp(d,s); cp(d,s); \
996 }
997
998 #define mix(d,s) \
999 switch(nc) \
1000 { case 8: mx(d,s); mx(d,s); \
1001 case 6: mx(d,s); mx(d,s); \
1002 case 4: mx(d,s); mx(d,s); \
1003 mx(d,s); mx(d,s); \
1004 }
1005
1006 #endif
1007
1008 // y = output word, x = input word, r = row, c = column
1009 // for r = 0, 1, 2 and 3 = column accessed for row r
1010
1011 #if defined(ARRAYS)
1012 #define s(x,c) x[c]
1013 #else
1014 #define s(x,c) x##c
1015 #endif
1016
1017 // I am grateful to Frank Yellin for the following constructions
1018 // which, given the column (c) of the output state variable that
1019 // is being computed, return the input state variables which are
1020 // needed for each row (r) of the state
1021
1022 // For the fixed block size options, compilers reduce these two
1023 // expressions to fixed variable references. For variable block
1024 // size code conditional clauses will sometimes be returned
1025
1026 #define unused 77 // Sunset Strip
1027
1028 #define fwd_var(x,r,c) \
1029 ( r==0 ? \
1030 ( c==0 ? s(x,0) \
1031 : c==1 ? s(x,1) \
1032 : c==2 ? s(x,2) \
1033 : c==3 ? s(x,3) \
1034 : c==4 ? s(x,4) \
1035 : c==5 ? s(x,5) \
1036 : c==6 ? s(x,6) \
1037 : s(x,7)) \
1038 : r==1 ? \
1039 ( c==0 ? s(x,1) \
1040 : c==1 ? s(x,2) \
1041 : c==2 ? s(x,3) \
1042 : c==3 ? nc==4 ? s(x,0) : s(x,4) \
1043 : c==4 ? s(x,5) \
1044 : c==5 ? nc==8 ? s(x,6) : s(x,0) \
1045 : c==6 ? s(x,7) \
1046 : s(x,0)) \
1047 : r==2 ? \
1048 ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
1049 : c==1 ? nc==8 ? s(x,4) : s(x,3) \
1050 : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1051 : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1052 : c==4 ? nc==8 ? s(x,7) : s(x,0) \
1053 : c==5 ? nc==8 ? s(x,0) : s(x,1) \
1054 : c==6 ? s(x,1) \
1055 : s(x,2)) \
1056 : \
1057 ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
1058 : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1059 : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1060 : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
1061 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1062 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1063 : c==6 ? s(x,2) \
1064 : s(x,3)))
1065
1066 #define inv_var(x,r,c) \
1067 ( r==0 ? \
1068 ( c==0 ? s(x,0) \
1069 : c==1 ? s(x,1) \
1070 : c==2 ? s(x,2) \
1071 : c==3 ? s(x,3) \
1072 : c==4 ? s(x,4) \
1073 : c==5 ? s(x,5) \
1074 : c==6 ? s(x,6) \
1075 : s(x,7)) \
1076 : r==1 ? \
1077 ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
1078 : c==1 ? s(x,0) \
1079 : c==2 ? s(x,1) \
1080 : c==3 ? s(x,2) \
1081 : c==4 ? s(x,3) \
1082 : c==5 ? s(x,4) \
1083 : c==6 ? s(x,5) \
1084 : s(x,6)) \
1085 : r==2 ? \
1086 ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1087 : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1088 : c==2 ? nc==8 ? s(x,7) : s(x,0) \
1089 : c==3 ? nc==8 ? s(x,0) : s(x,1) \
1090 : c==4 ? nc==8 ? s(x,1) : s(x,2) \
1091 : c==5 ? nc==8 ? s(x,2) : s(x,3) \
1092 : c==6 ? s(x,3) \
1093 : s(x,4)) \
1094 : \
1095 ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
1096 : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1097 : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1098 : c==3 ? nc==8 ? s(x,7) : s(x,0) \
1099 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1100 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1101 : c==6 ? s(x,2) \
1102 : s(x,3)))
1103
1104 #define si(y,x,k,c) s(y,c) = const_word_in(x + 4 * c) ^ k[c]
1105 #define so(y,x,c) word_out(y + 4 * c, s(x,c))
1106
1107 #if defined(FOUR_TABLES)
1108 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
1109 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
1110 #elif defined(ONE_TABLE)
1111 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
1112 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
1113 #else
1114 #define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
1115 #define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
1116 #endif
1117
1118 #if defined(FOUR_LR_TABLES)
1119 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
1120 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
1121 #elif defined(ONE_LR_TABLE)
1122 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
1123 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
1124 #else
1125 #define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
1126 #define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
1127 #endif
1128
1129 #if AES_BLOCK_SIZE == 16
1130
1131 #if defined(ARRAYS)
1132 #define locals(y,x) x[4],y[4]
1133 #else
1134 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
1135 // the following defines prevent the compiler requiring the declaration
1136 // of generated but unused variables in the fwd_var and inv_var macros
1137 #define b04 unused
1138 #define b05 unused
1139 #define b06 unused
1140 #define b07 unused
1141 #define b14 unused
1142 #define b15 unused
1143 #define b16 unused
1144 #define b17 unused
1145 #endif
1146 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1147 s(y,2) = s(x,2); s(y,3) = s(x,3);
1148 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
1149 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
1150 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
1151
1152 #elif AES_BLOCK_SIZE == 24
1153
1154 #if defined(ARRAYS)
1155 #define locals(y,x) x[6],y[6]
1156 #else
1157 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
1158 y##0,y##1,y##2,y##3,y##4,y##5
1159 #define b06 unused
1160 #define b07 unused
1161 #define b16 unused
1162 #define b17 unused
1163 #endif
1164 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1165 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1166 s(y,4) = s(x,4); s(y,5) = s(x,5);
1167 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
1168 si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
1169 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
1170 so(y,x,3); so(y,x,4); so(y,x,5)
1171 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
1172 rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
1173 #else
1174
1175 #if defined(ARRAYS)
1176 #define locals(y,x) x[8],y[8]
1177 #else
1178 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
1179 y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
1180 #endif
1181 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1182 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1183 s(y,4) = s(x,4); s(y,5) = s(x,5); \
1184 s(y,6) = s(x,6); s(y,7) = s(x,7);
1185
1186 #if AES_BLOCK_SIZE == 32
1187
1188 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
1189 si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
1190 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
1191 so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
1192 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
1193 rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
1194 #else
1195
1196 #define state_in(y,x,k) \
1197 switch(nc) \
1198 { case 8: si(y,x,k,7); si(y,x,k,6); \
1199 case 6: si(y,x,k,5); si(y,x,k,4); \
1200 case 4: si(y,x,k,3); si(y,x,k,2); \
1201 si(y,x,k,1); si(y,x,k,0); \
1202 }
1203
1204 #define state_out(y,x) \
1205 switch(nc) \
1206 { case 8: so(y,x,7); so(y,x,6); \
1207 case 6: so(y,x,5); so(y,x,4); \
1208 case 4: so(y,x,3); so(y,x,2); \
1209 so(y,x,1); so(y,x,0); \
1210 }
1211
1212 #if defined(FAST_VARIABLE)
1213
1214 #define round(rm,y,x,k) \
1215 switch(nc) \
1216 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1217 rm(y,x,k,5); rm(y,x,k,4); \
1218 rm(y,x,k,3); rm(y,x,k,2); \
1219 rm(y,x,k,1); rm(y,x,k,0); \
1220 break; \
1221 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1222 rm(y,x,k,3); rm(y,x,k,2); \
1223 rm(y,x,k,1); rm(y,x,k,0); \
1224 break; \
1225 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1226 rm(y,x,k,1); rm(y,x,k,0); \
1227 break; \
1228 }
1229 #else
1230
1231 #define round(rm,y,x,k) \
1232 switch(nc) \
1233 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1234 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1235 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1236 rm(y,x,k,1); rm(y,x,k,0); \
1237 }
1238
1239 #endif
1240
1241 #endif
1242 #endif
1243
1244 /**
1245 * Implementation of private_aes_cbc_crypter_t.encrypt_block.
1246 */
1247 static void encrypt_block(const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[])
1248 { u_int32_t locals(b0, b1);
1249 const u_int32_t *kp = this->aes_e_key;
1250
1251 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1252 u_int32_t f2;
1253 #endif
1254
1255 state_in(b0, in_blk, kp); kp += nc;
1256
1257 #if defined(UNROLL)
1258
1259 switch(this->aes_Nrnd)
1260 {
1261 case 14: round(fwd_rnd, b1, b0, kp );
1262 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1263 case 12: round(fwd_rnd, b1, b0, kp );
1264 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1265 case 10: round(fwd_rnd, b1, b0, kp );
1266 round(fwd_rnd, b0, b1, kp + nc);
1267 round(fwd_rnd, b1, b0, kp + 2 * nc);
1268 round(fwd_rnd, b0, b1, kp + 3 * nc);
1269 round(fwd_rnd, b1, b0, kp + 4 * nc);
1270 round(fwd_rnd, b0, b1, kp + 5 * nc);
1271 round(fwd_rnd, b1, b0, kp + 6 * nc);
1272 round(fwd_rnd, b0, b1, kp + 7 * nc);
1273 round(fwd_rnd, b1, b0, kp + 8 * nc);
1274 round(fwd_lrnd, b0, b1, kp + 9 * nc);
1275 }
1276
1277 #elif defined(PARTIAL_UNROLL)
1278 { u_int32_t rnd;
1279
1280 for(rnd = 0; rnd < (this->aes_Nrnd >> 1) - 1; ++rnd)
1281 {
1282 round(fwd_rnd, b1, b0, kp);
1283 round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
1284 }
1285
1286 round(fwd_rnd, b1, b0, kp);
1287 round(fwd_lrnd, b0, b1, kp + nc);
1288 }
1289 #else
1290 { u_int32_t rnd;
1291
1292 for(rnd = 0; rnd < this->aes_Nrnd - 1; ++rnd)
1293 {
1294 round(fwd_rnd, b1, b0, kp);
1295 l_copy(b0, b1); kp += nc;
1296 }
1297
1298 round(fwd_lrnd, b0, b1, kp);
1299 }
1300 #endif
1301
1302 state_out(out_blk, b0);
1303 }
1304
1305 /**
1306 * Implementation of private_aes_cbc_crypter_t.decrypt_block.
1307 */
1308 static void decrypt_block(const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[])
1309 { u_int32_t locals(b0, b1);
1310 const u_int32_t *kp = this->aes_d_key;
1311
1312 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1313 u_int32_t f2, f4, f8, f9;
1314 #endif
1315
1316 state_in(b0, in_blk, kp); kp += nc;
1317
1318 #if defined(UNROLL)
1319
1320 switch(this->aes_Nrnd)
1321 {
1322 case 14: round(inv_rnd, b1, b0, kp );
1323 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1324 case 12: round(inv_rnd, b1, b0, kp );
1325 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1326 case 10: round(inv_rnd, b1, b0, kp );
1327 round(inv_rnd, b0, b1, kp + nc);
1328 round(inv_rnd, b1, b0, kp + 2 * nc);
1329 round(inv_rnd, b0, b1, kp + 3 * nc);
1330 round(inv_rnd, b1, b0, kp + 4 * nc);
1331 round(inv_rnd, b0, b1, kp + 5 * nc);
1332 round(inv_rnd, b1, b0, kp + 6 * nc);
1333 round(inv_rnd, b0, b1, kp + 7 * nc);
1334 round(inv_rnd, b1, b0, kp + 8 * nc);
1335 round(inv_lrnd, b0, b1, kp + 9 * nc);
1336 }
1337
1338 #elif defined(PARTIAL_UNROLL)
1339 { u_int32_t rnd;
1340
1341 for(rnd = 0; rnd < (this->aes_Nrnd >> 1) - 1; ++rnd)
1342 {
1343 round(inv_rnd, b1, b0, kp);
1344 round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
1345 }
1346
1347 round(inv_rnd, b1, b0, kp);
1348 round(inv_lrnd, b0, b1, kp + nc);
1349 }
1350 #else
1351 { u_int32_t rnd;
1352
1353 for(rnd = 0; rnd < this->aes_Nrnd - 1; ++rnd)
1354 {
1355 round(inv_rnd, b1, b0, kp);
1356 l_copy(b0, b1); kp += nc;
1357 }
1358
1359 round(inv_lrnd, b0, b1, kp);
1360 }
1361 #endif
1362
1363 state_out(out_blk, b0);
1364 }
1365
1366 /**
1367 * Implementation of crypter_t.decrypt.
1368 */
1369 static status_t decrypt (private_aes_cbc_crypter_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
1370 {
1371 int ret, pos;
1372 const u_int32_t *iv_i;
1373 u_int8_t *in, *out;
1374
1375 ret = data.len;
1376 if (((data.len) % 16) != 0)
1377 {
1378 /* data length must be padded to a multiple of blocksize */
1379 return INVALID_ARG;
1380 }
1381
1382 decrypted->ptr = malloc(data.len);
1383 if (decrypted->ptr == NULL)
1384 {
1385 return OUT_OF_RES;
1386 }
1387 decrypted->len = data.len;
1388
1389 in = data.ptr;
1390 out = decrypted->ptr;
1391
1392 pos=data.len-16;
1393 in+=pos;
1394 out+=pos;
1395 while(pos>=0) {
1396 this->decrypt_block(this,in,out);
1397 if (pos==0)
1398 iv_i=(const u_int32_t*) (iv.ptr);
1399 else
1400 iv_i=(const u_int32_t*) (in-16);
1401 *((u_int32_t *)(&out[ 0])) ^= iv_i[0];
1402 *((u_int32_t *)(&out[ 4])) ^= iv_i[1];
1403 *((u_int32_t *)(&out[ 8])) ^= iv_i[2];
1404 *((u_int32_t *)(&out[12])) ^= iv_i[3];
1405 in-=16;
1406 out-=16;
1407 pos-=16;
1408 }
1409
1410 return SUCCESS;
1411 }
1412
1413
1414 /**
1415 * Implementation of crypter_t.decrypt.
1416 */
1417 static status_t encrypt (private_aes_cbc_crypter_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
1418 {
1419 int ret, pos;
1420 const u_int32_t *iv_i;
1421 u_int8_t *in, *out;
1422
1423 ret = data.len;
1424 if (((data.len) % 16) != 0)
1425 {
1426 /* data length must be padded to a multiple of blocksize */
1427 return INVALID_ARG;
1428 }
1429
1430 encrypted->ptr = malloc(data.len);
1431 if (encrypted->ptr == NULL)
1432 {
1433 return OUT_OF_RES;
1434 }
1435 encrypted->len = data.len;
1436
1437 in = data.ptr;
1438 out = encrypted->ptr;
1439
1440 pos=0;
1441 while(pos<data.len)
1442 {
1443 if (pos==0)
1444 iv_i=(const u_int32_t*) iv.ptr;
1445 else
1446 iv_i=(const u_int32_t*) (out-16);
1447 *((u_int32_t *)(&out[ 0])) = iv_i[0]^*((const u_int32_t *)(&in[ 0]));
1448 *((u_int32_t *)(&out[ 4])) = iv_i[1]^*((const u_int32_t *)(&in[ 4]));
1449 *((u_int32_t *)(&out[ 8])) = iv_i[2]^*((const u_int32_t *)(&in[ 8]));
1450 *((u_int32_t *)(&out[12])) = iv_i[3]^*((const u_int32_t *)(&in[12]));
1451 this->encrypt_block(this,out,out);
1452 in+=16;
1453 out+=16;
1454 pos+=16;
1455 }
1456 return SUCCESS;
1457 }
1458
1459 /**
1460 * Implementation of crypter_t.get_block_size.
1461 */
1462 static size_t get_block_size (private_aes_cbc_crypter_t *this)
1463 {
1464 return AES_BLOCK_SIZE;
1465 }
1466
1467 /**
1468 * Implementation of crypter_t.get_key_size.
1469 */
1470 static size_t get_key_size (private_aes_cbc_crypter_t *this)
1471 {
1472 return this->key_size;
1473 }
1474
1475 /**
1476 * Implementation of crypter_t.set_key.
1477 */
1478 static status_t set_key (private_aes_cbc_crypter_t *this, chunk_t key)
1479 {
1480 u_int32_t *kf, *kt, rci, f = 0;
1481 u_int8_t *in_key = key.ptr;
1482
1483 if (key.len != this->key_size)
1484 {
1485 return INVALID_ARG;
1486 }
1487
1488 this->aes_Nrnd = (this->aes_Nkey > (nc) ? this->aes_Nkey : (nc)) + 6;
1489
1490 this->aes_e_key[0] = const_word_in(in_key );
1491 this->aes_e_key[1] = const_word_in(in_key + 4);
1492 this->aes_e_key[2] = const_word_in(in_key + 8);
1493 this->aes_e_key[3] = const_word_in(in_key + 12);
1494
1495 kf = this->aes_e_key;
1496 kt = kf + nc * (this->aes_Nrnd + 1) - this->aes_Nkey;
1497 rci = 0;
1498
1499 switch(this->aes_Nkey)
1500 {
1501 case 4: do
1502 { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
1503 kf[5] = kf[1] ^ kf[4];
1504 kf[6] = kf[2] ^ kf[5];
1505 kf[7] = kf[3] ^ kf[6];
1506 kf += 4;
1507 }
1508 while(kf < kt);
1509 break;
1510
1511 case 6: this->aes_e_key[4] = const_word_in(in_key + 16);
1512 this->aes_e_key[5] = const_word_in(in_key + 20);
1513 do
1514 { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
1515 kf[ 7] = kf[1] ^ kf[ 6];
1516 kf[ 8] = kf[2] ^ kf[ 7];
1517 kf[ 9] = kf[3] ^ kf[ 8];
1518 kf[10] = kf[4] ^ kf[ 9];
1519 kf[11] = kf[5] ^ kf[10];
1520 kf += 6;
1521 }
1522 while(kf < kt);
1523 break;
1524
1525 case 8: this->aes_e_key[4] = const_word_in(in_key + 16);
1526 this->aes_e_key[5] = const_word_in(in_key + 20);
1527 this->aes_e_key[6] = const_word_in(in_key + 24);
1528 this->aes_e_key[7] = const_word_in(in_key + 28);
1529 do
1530 { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
1531 kf[ 9] = kf[1] ^ kf[ 8];
1532 kf[10] = kf[2] ^ kf[ 9];
1533 kf[11] = kf[3] ^ kf[10];
1534 kf[12] = kf[4] ^ ls_box(kf[11],0);
1535 kf[13] = kf[5] ^ kf[12];
1536 kf[14] = kf[6] ^ kf[13];
1537 kf[15] = kf[7] ^ kf[14];
1538 kf += 8;
1539 }
1540 while (kf < kt);
1541 break;
1542 }
1543
1544 if(!f)
1545 {
1546 u_int32_t i;
1547
1548 kt = this->aes_d_key + nc * this->aes_Nrnd;
1549 kf = this->aes_e_key;
1550
1551 cpy(kt, kf); kt -= 2 * nc;
1552
1553 for(i = 1; i < this->aes_Nrnd; ++i)
1554 {
1555 #if defined(ONE_TABLE) || defined(FOUR_TABLES)
1556 #if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
1557 u_int32_t f2, f4, f8, f9;
1558 #endif
1559 mix(kt, kf);
1560 #else
1561 cpy(kt, kf);
1562 #endif
1563 kt -= 2 * nc;
1564 }
1565 cpy(kt, kf);
1566 }
1567
1568 return SUCCESS;
1569 }
1570
1571 /**
1572 * Implementation of crypter_t.destroy and aes_cbc_crypter_t.destroy.
1573 */
1574 static void destroy (private_aes_cbc_crypter_t *this)
1575 {
1576 free(this);
1577 }
1578
1579 /*
1580 * Described in header
1581 */
1582 aes_cbc_crypter_t *aes_cbc_crypter_create(size_t key_size)
1583 {
1584 private_aes_cbc_crypter_t *this = malloc_thing(private_aes_cbc_crypter_t);
1585
1586 #if !defined(FIXED_TABLES)
1587 if(!tab_gen) { gen_tabs(); tab_gen = 1; }
1588 #endif
1589
1590 this->key_size = key_size;
1591 switch(key_size) {
1592 case 32: /* bytes */
1593 this->aes_Nkey = 8;
1594 break;
1595 case 24: /* bytes */
1596 this->aes_Nkey = 6;
1597 break;
1598 case 16: /* bytes */
1599 this->aes_Nkey = 4;
1600 break;
1601 default:
1602 free(this);
1603 return NULL;
1604 }
1605
1606 /* functions of crypter_t interface */
1607 this->public.crypter_interface.encrypt = (status_t (*) (crypter_t *, chunk_t,chunk_t, chunk_t *)) encrypt;
1608 this->public.crypter_interface.decrypt = (status_t (*) (crypter_t *, chunk_t , chunk_t, chunk_t *)) decrypt;
1609 this->public.crypter_interface.get_block_size = (size_t (*) (crypter_t *)) get_block_size;
1610 this->public.crypter_interface.get_key_size = (size_t (*) (crypter_t *)) get_key_size;
1611 this->public.crypter_interface.set_key = (status_t (*) (crypter_t *,chunk_t)) set_key;
1612 this->public.crypter_interface.destroy = (void (*) (crypter_t *)) destroy;
1613
1614 /* private functions */
1615 this->decrypt_block = decrypt_block;
1616 this->encrypt_block = encrypt_block;
1617
1618 return &(this->public);
1619 }