../svn-commit.tmp
[strongswan.git] / Source / lib / crypto / crypters / aes_cbc_crypter.c
1 /**
2 * @file aes_cbc_crypter.c
3 *
4 * @brief Implementation of aes_cbc_crypter_t
5 *
6 */
7
8 /*
9 * Copyright (C) 2001 Dr B. R. Gladman <brg@gladman.uk.net>
10 * Copyright (C) 2005 Jan Hutter, Martin Willi
11 * Hochschule fuer Technik Rapperswil
12 *
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2 of the License, or (at your
16 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 * for more details.
22 */
23
24 #include "aes_cbc_crypter.h"
25
26 #include <utils/allocator.h>
27
28
29 /*
30 * The number of key schedule words for different block and key lengths
31 * allowing for method of computation which requires the length to be a
32 * multiple of the key length. This version of AES implementation supports
33 * all three keylengths 16, 24 and 32 bytes!
34 *
35 * Nk = 4 6 8
36 * -------------
37 * Nb = 4 | 60 60 64
38 * 6 | 96 90 96
39 * 8 | 120 120 120
40 */
41 #define AES_KS_LENGTH 120
42 #define AES_RC_LENGTH 29
43
44 #define AES_BLOCK_SIZE 16
45
46 typedef struct private_aes_cbc_crypter_t private_aes_cbc_crypter_t;
47
48 /**
49 * @brief Class implementing the AES symmetric encryption algorithm.
50 *
51 * @ingroup crypters
52 */
53 struct private_aes_cbc_crypter_t {
54
55 /**
56 * Public part of this class.
57 */
58 aes_cbc_crypter_t public;
59
60 /**
61 * Number of words in the key input block.
62 */
63 u_int32_t aes_Nkey;
64
65 /**
66 * The number of cipher rounds.
67 */
68 u_int32_t aes_Nrnd;
69
70 /**
71 * The encryption key schedule.
72 */
73 u_int32_t aes_e_key[AES_KS_LENGTH];
74
75 /**
76 * The decryption key schedule.
77 */
78 u_int32_t aes_d_key[AES_KS_LENGTH];
79
80 /**
81 * The number of columns in the cipher state.
82 */
83 u_int32_t aes_Ncol;
84
85 /**
86 * Key size of this AES cypher object.
87 */
88 u_int32_t key_size;
89
90 /**
91 * Decrypts a block.
92 *
93 * No memory gets allocated.
94 *
95 * @param this calling object
96 * @param[in] in_blk block to decrypt
97 * @param[out] out_blk decrypted data are written to this location
98 */
99 void (*decrypt_block) (const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[]);
100
101 /**
102 * Encrypts a block.
103 *
104 * No memory gets allocated.
105 *
106 * @param this calling object
107 * @param[in] in_blk block to encrypt
108 * @param[out] out_blk encrypted data are written to this location
109 */
110 void (*encrypt_block) (const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[]);
111 };
112
113
114 /* ugly macro stuff */
115
116 /* 1. Define UNROLL for full loop unrolling in encryption and decryption.
117 * 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
118 * 3. Define FIXED_TABLES for compiled rather than dynamic tables.
119 * 4. Define FF_TABLES to use tables for field multiplies and inverses.
120 * Do not enable this without understanding stack space requirements.
121 * 5. Define ARRAYS to use arrays to hold the local state block. If this
122 * is not defined, individually declared 32-bit words are used.
123 * 6. Define FAST_VARIABLE if a high speed variable block implementation
124 * is needed (essentially three separate fixed block size code sequences)
125 * 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
126 * version using 1 table (2 kbytes of table space) or 4 tables (8
127 * kbytes of table space) for higher speed.
128 * 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
129 * increase by using tables for the last rounds but with more table
130 * space (2 or 8 kbytes extra).
131 * 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
132 * slower version is provided.
133 * 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
134 * or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
135 */
136
137 #define UNROLL
138 //#define PARTIAL_UNROLL
139
140 #define FIXED_TABLES
141 //#define FF_TABLES
142 //#define ARRAYS
143 #define FAST_VARIABLE
144
145 //#define ONE_TABLE
146 #define FOUR_TABLES
147
148 //#define ONE_LR_TABLE
149 #define FOUR_LR_TABLES
150
151 //#define ONE_IM_TABLE
152 #define FOUR_IM_TABLES
153
154 #if defined(UNROLL) && defined (PARTIAL_UNROLL)
155 #error both UNROLL and PARTIAL_UNROLL are defined
156 #endif
157
158 #if defined(ONE_TABLE) && defined (FOUR_TABLES)
159 #error both ONE_TABLE and FOUR_TABLES are defined
160 #endif
161
162 #if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
163 #error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
164 #endif
165
166 #if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
167 #error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
168 #endif
169
170 #if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
171 #error an illegal block size has been specified
172 #endif
173
174 /**
175 * Rotates bytes within words by n positions, moving bytes
176 * to higher index positions with wrap around into low positions.
177 */
178 #define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
179 /**
180 * Moves bytes by n positions to higher index positions in
181 * words but without wrap around.
182 */
183 #define ups(x,n) ((x) << 8 * (n))
184
185 /**
186 * Extracts a byte from a word.
187 */
188 #define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
189 #define bytes2word(b0, b1, b2, b3) \
190 ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
191
192
193 /* little endian processor without data alignment restrictions: AES_LE_OK */
194 /* original code: i386 */
195 #if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386)
196 #define AES_LE_OK 1
197 /* added (tested): alpha --jjo */
198 #elif defined(__alpha__)|| defined (__alpha)
199 #define AES_LE_OK 1
200 /* added (tested): ia64 --jjo */
201 #elif defined(__ia64__)|| defined (__ia64)
202 #define AES_LE_OK 1
203 #endif
204
205 #ifdef AES_LE_OK
206 /* little endian processor without data alignment restrictions */
207 #define word_in(x) *(u_int32_t*)(x)
208 #define const_word_in(x) *(const u_int32_t*)(x)
209 #define word_out(x,v) *(u_int32_t*)(x) = (v)
210 #define const_word_out(x,v) *(const u_int32_t*)(x) = (v)
211 #else
212 /* slower but generic big endian or with data alignment restrictions */
213 /* some additional "const" touches to stop "gcc -Wcast-qual" complains --jjo */
214 #define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
215 #define const_word_in(x) ((const u_int32_t)(((const unsigned char *)(x))[0])|((const u_int32_t)(((const unsigned char *)(x))[1])<<8)|((const u_int32_t)(((const unsigned char *)(x))[2])<<16)|((const u_int32_t)(((const unsigned char *)(x))[3])<<24))
216 #define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
217 #define const_word_out(x,v) ((const unsigned char *)(x))[0]=(v),((const unsigned char *)(x))[1]=((v)>>8),((const unsigned char *)(x))[2]=((v)>>16),((const unsigned char *)(x))[3]=((v)>>24)
218 #endif
219
220 // Disable at least some poor combinations of options
221
222 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
223 #define FIXED_TABLES
224 #undef UNROLL
225 #undef ONE_LR_TABLE
226 #undef FOUR_LR_TABLES
227 #undef ONE_IM_TABLE
228 #undef FOUR_IM_TABLES
229 #elif !defined(FOUR_TABLES)
230 #ifdef FOUR_LR_TABLES
231 #undef FOUR_LR_TABLES
232 #define ONE_LR_TABLE
233 #endif
234 #ifdef FOUR_IM_TABLES
235 #undef FOUR_IM_TABLES
236 #define ONE_IM_TABLE
237 #endif
238 #elif !defined(AES_BLOCK_SIZE)
239 #if defined(UNROLL)
240 #define PARTIAL_UNROLL
241 #undef UNROLL
242 #endif
243 #endif
244
245 // the finite field modular polynomial and elements
246
247 #define ff_poly 0x011b
248 #define ff_hi 0x80
249
250 // multiply four bytes in GF(2^8) by 'x' {02} in parallel
251
252 #define m1 0x80808080
253 #define m2 0x7f7f7f7f
254 #define m3 0x0000001b
255 #define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
256
257 // The following defines provide alternative definitions of FFmulX that might
258 // give improved performance if a fast 32-bit multiply is not available. Note
259 // that a temporary variable u needs to be defined where FFmulX is used.
260
261 // #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
262 // #define m4 0x1b1b1b1b
263 // #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
264
265 // perform column mix operation on four bytes in parallel
266
267 #define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
268
269 #if defined(FIXED_TABLES)
270
271 // the S-Box table
272
273 static const unsigned char s_box[256] =
274 {
275 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
276 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
277 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
278 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
279 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
280 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
281 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
282 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
283 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
284 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
285 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
286 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
287 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
288 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
289 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
290 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
291 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
292 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
293 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
294 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
295 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
296 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
297 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
298 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
299 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
300 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
301 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
302 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
303 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
304 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
305 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
306 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
307 };
308
309 // the inverse S-Box table
310
311 static const unsigned char inv_s_box[256] =
312 {
313 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
314 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
315 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
316 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
317 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
318 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
319 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
320 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
321 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
322 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
323 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
324 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
325 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
326 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
327 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
328 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
329 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
330 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
331 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
332 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
333 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
334 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
335 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
336 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
337 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
338 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
339 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
340 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
341 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
342 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
343 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
344 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
345 };
346
347 #define w0(p) 0x000000##p
348
349 // Number of elements required in this table for different
350 // block and key lengths is:
351 //
352 // Nk = 4 6 8
353 // ----------
354 // Nb = 4 | 10 8 7
355 // 6 | 19 12 11
356 // 8 | 29 19 14
357 //
358 // this table can be a table of bytes if the key schedule
359 // code is adjusted accordingly
360
361 static const u_int32_t rcon_tab[29] =
362 {
363 w0(01), w0(02), w0(04), w0(08),
364 w0(10), w0(20), w0(40), w0(80),
365 w0(1b), w0(36), w0(6c), w0(d8),
366 w0(ab), w0(4d), w0(9a), w0(2f),
367 w0(5e), w0(bc), w0(63), w0(c6),
368 w0(97), w0(35), w0(6a), w0(d4),
369 w0(b3), w0(7d), w0(fa), w0(ef),
370 w0(c5)
371 };
372
373 #undef w0
374
375 #define r0(p,q,r,s) 0x##p##q##r##s
376 #define r1(p,q,r,s) 0x##q##r##s##p
377 #define r2(p,q,r,s) 0x##r##s##p##q
378 #define r3(p,q,r,s) 0x##s##p##q##r
379 #define w0(p) 0x000000##p
380 #define w1(p) 0x0000##p##00
381 #define w2(p) 0x00##p##0000
382 #define w3(p) 0x##p##000000
383
384 #if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
385
386 // data for forward tables (other than last round)
387
388 #define f_table \
389 r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
390 r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
391 r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
392 r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
393 r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
394 r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
395 r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
396 r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
397 r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
398 r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
399 r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
400 r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
401 r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
402 r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
403 r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
404 r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
405 r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
406 r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
407 r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
408 r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
409 r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
410 r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
411 r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
412 r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
413 r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
414 r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
415 r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
416 r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
417 r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
418 r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
419 r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
420 r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
421 r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
422 r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
423 r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
424 r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
425 r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
426 r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
427 r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
428 r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
429 r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
430 r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
431 r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
432 r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
433 r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
434 r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
435 r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
436 r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
437 r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
438 r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
439 r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
440 r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
441 r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
442 r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
443 r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
444 r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
445 r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
446 r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
447 r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
448 r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
449 r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
450 r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
451 r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
452 r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
453
454 // data for inverse tables (other than last round)
455
456 #define i_table \
457 r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
458 r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
459 r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
460 r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
461 r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
462 r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
463 r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
464 r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
465 r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
466 r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
467 r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
468 r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
469 r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
470 r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
471 r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
472 r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
473 r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
474 r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
475 r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
476 r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
477 r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
478 r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
479 r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
480 r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
481 r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
482 r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
483 r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
484 r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
485 r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
486 r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
487 r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
488 r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
489 r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
490 r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
491 r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
492 r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
493 r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
494 r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
495 r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
496 r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
497 r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
498 r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
499 r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
500 r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
501 r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
502 r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
503 r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
504 r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
505 r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
506 r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
507 r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
508 r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
509 r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
510 r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
511 r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
512 r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
513 r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
514 r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
515 r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
516 r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
517 r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
518 r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
519 r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
520 r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
521
522 // generate the required tables in the desired endian format
523
524 #undef r
525 #define r r0
526
527 #if defined(ONE_TABLE)
528 static const u_int32_t ft_tab[256] =
529 { f_table };
530 #elif defined(FOUR_TABLES)
531 static const u_int32_t ft_tab[4][256] =
532 { { f_table },
533 #undef r
534 #define r r1
535 { f_table },
536 #undef r
537 #define r r2
538 { f_table },
539 #undef r
540 #define r r3
541 { f_table }
542 };
543 #endif
544
545 #undef r
546 #define r r0
547 #if defined(ONE_TABLE)
548 static const u_int32_t it_tab[256] =
549 { i_table };
550 #elif defined(FOUR_TABLES)
551 static const u_int32_t it_tab[4][256] =
552 { { i_table },
553 #undef r
554 #define r r1
555 { i_table },
556 #undef r
557 #define r r2
558 { i_table },
559 #undef r
560 #define r r3
561 { i_table }
562 };
563 #endif
564
565 #endif
566
567 #if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
568
569 // data for inverse tables (last round)
570
571 #define li_table \
572 w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
573 w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
574 w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
575 w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
576 w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
577 w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
578 w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
579 w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
580 w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
581 w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
582 w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
583 w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
584 w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
585 w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
586 w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
587 w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
588 w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
589 w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
590 w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
591 w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
592 w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
593 w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
594 w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
595 w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
596 w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
597 w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
598 w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
599 w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
600 w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
601 w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
602 w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
603 w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
604
605 // generate the required tables in the desired endian format
606
607 #undef r
608 #define r(p,q,r,s) w0(q)
609 #if defined(ONE_LR_TABLE)
610 static const u_int32_t fl_tab[256] =
611 { f_table };
612 #elif defined(FOUR_LR_TABLES)
613 static const u_int32_t fl_tab[4][256] =
614 { { f_table },
615 #undef r
616 #define r(p,q,r,s) w1(q)
617 { f_table },
618 #undef r
619 #define r(p,q,r,s) w2(q)
620 { f_table },
621 #undef r
622 #define r(p,q,r,s) w3(q)
623 { f_table }
624 };
625 #endif
626
627 #undef w
628 #define w w0
629 #if defined(ONE_LR_TABLE)
630 static const u_int32_t il_tab[256] =
631 { li_table };
632 #elif defined(FOUR_LR_TABLES)
633 static const u_int32_t il_tab[4][256] =
634 { { li_table },
635 #undef w
636 #define w w1
637 { li_table },
638 #undef w
639 #define w w2
640 { li_table },
641 #undef w
642 #define w w3
643 { li_table }
644 };
645 #endif
646
647 #endif
648
649 #if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
650
651 #define m_table \
652 r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
653 r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
654 r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
655 r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
656 r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
657 r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
658 r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
659 r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
660 r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
661 r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
662 r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
663 r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
664 r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
665 r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
666 r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
667 r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
668 r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
669 r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
670 r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
671 r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
672 r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
673 r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
674 r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
675 r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
676 r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
677 r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
678 r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
679 r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
680 r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
681 r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
682 r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
683 r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
684 r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
685 r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
686 r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
687 r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
688 r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
689 r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
690 r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
691 r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
692 r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
693 r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
694 r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
695 r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
696 r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
697 r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
698 r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
699 r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
700 r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
701 r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
702 r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
703 r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
704 r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
705 r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
706 r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
707 r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
708 r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
709 r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
710 r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
711 r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
712 r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
713 r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
714 r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
715 r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
716
717 #undef r
718 #define r r0
719
720 #if defined(ONE_IM_TABLE)
721 static const u_int32_t im_tab[256] =
722 { m_table };
723 #elif defined(FOUR_IM_TABLES)
724 static const u_int32_t im_tab[4][256] =
725 { { m_table },
726 #undef r
727 #define r r1
728 { m_table },
729 #undef r
730 #define r r2
731 { m_table },
732 #undef r
733 #define r r3
734 { m_table }
735 };
736 #endif
737
738 #endif
739
740 #else
741
742 static int tab_gen = 0;
743
744 static unsigned char s_box[256]; // the S box
745 static unsigned char inv_s_box[256]; // the inverse S box
746 static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
747
748 #if defined(ONE_TABLE)
749 static u_int32_t ft_tab[256];
750 static u_int32_t it_tab[256];
751 #elif defined(FOUR_TABLES)
752 static u_int32_t ft_tab[4][256];
753 static u_int32_t it_tab[4][256];
754 #endif
755
756 #if defined(ONE_LR_TABLE)
757 static u_int32_t fl_tab[256];
758 static u_int32_t il_tab[256];
759 #elif defined(FOUR_LR_TABLES)
760 static u_int32_t fl_tab[4][256];
761 static u_int32_t il_tab[4][256];
762 #endif
763
764 #if defined(ONE_IM_TABLE)
765 static u_int32_t im_tab[256];
766 #elif defined(FOUR_IM_TABLES)
767 static u_int32_t im_tab[4][256];
768 #endif
769
770 // Generate the tables for the dynamic table option
771
772 #if !defined(FF_TABLES)
773
774 // It will generally be sensible to use tables to compute finite
775 // field multiplies and inverses but where memory is scarse this
776 // code might sometimes be better.
777
778 // return 2 ^ (n - 1) where n is the bit number of the highest bit
779 // set in x with x in the range 1 < x < 0x00000200. This form is
780 // used so that locals within FFinv can be bytes rather than words
781
782 static unsigned char hibit(const u_int32_t x)
783 { unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
784
785 r |= (r >> 2);
786 r |= (r >> 4);
787 return (r + 1) >> 1;
788 }
789
790 // return the inverse of the finite field element x
791
792 static unsigned char FFinv(const unsigned char x)
793 { unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
794
795 if(x < 2) return x;
796
797 for(;;)
798 {
799 if(!n1) return v1;
800
801 while(n2 >= n1)
802 {
803 n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
804 }
805
806 if(!n2) return v2;
807
808 while(n1 >= n2)
809 {
810 n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
811 }
812 }
813 }
814
815 // define the finite field multiplies required for Rijndael
816
817 #define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
818 #define FFmul03(x) ((x) ^ FFmul02(x))
819 #define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
820 #define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
821 #define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
822 #define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
823
824 #else
825
826 #define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
827
828 #define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
829 #define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
830 #define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
831 #define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
832 #define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
833 #define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
834
835 #endif
836
837 // The forward and inverse affine transformations used in the S-box
838
839 #define fwd_affine(x) \
840 (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
841
842 #define inv_affine(x) \
843 (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
844
845 static void gen_tabs(void)
846 { u_int32_t i, w;
847
848 #if defined(FF_TABLES)
849
850 unsigned char pow[512], log[256];
851
852 // log and power tables for GF(2^8) finite field with
853 // 0x011b as modular polynomial - the simplest primitive
854 // root is 0x03, used here to generate the tables
855
856 i = 0; w = 1;
857 do
858 {
859 pow[i] = (unsigned char)w;
860 pow[i + 255] = (unsigned char)w;
861 log[w] = (unsigned char)i++;
862 w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
863 }
864 while (w != 1);
865
866 #endif
867
868 for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
869 {
870 rcon_tab[i] = bytes2word(w, 0, 0, 0);
871 w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
872 }
873
874 for(i = 0; i < 256; ++i)
875 { unsigned char b;
876
877 s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
878
879 w = bytes2word(b, 0, 0, 0);
880 #if defined(ONE_LR_TABLE)
881 fl_tab[i] = w;
882 #elif defined(FOUR_LR_TABLES)
883 fl_tab[0][i] = w;
884 fl_tab[1][i] = upr(w,1);
885 fl_tab[2][i] = upr(w,2);
886 fl_tab[3][i] = upr(w,3);
887 #endif
888 w = bytes2word(FFmul02(b), b, b, FFmul03(b));
889 #if defined(ONE_TABLE)
890 ft_tab[i] = w;
891 #elif defined(FOUR_TABLES)
892 ft_tab[0][i] = w;
893 ft_tab[1][i] = upr(w,1);
894 ft_tab[2][i] = upr(w,2);
895 ft_tab[3][i] = upr(w,3);
896 #endif
897 inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
898
899 w = bytes2word(b, 0, 0, 0);
900 #if defined(ONE_LR_TABLE)
901 il_tab[i] = w;
902 #elif defined(FOUR_LR_TABLES)
903 il_tab[0][i] = w;
904 il_tab[1][i] = upr(w,1);
905 il_tab[2][i] = upr(w,2);
906 il_tab[3][i] = upr(w,3);
907 #endif
908 w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
909 #if defined(ONE_TABLE)
910 it_tab[i] = w;
911 #elif defined(FOUR_TABLES)
912 it_tab[0][i] = w;
913 it_tab[1][i] = upr(w,1);
914 it_tab[2][i] = upr(w,2);
915 it_tab[3][i] = upr(w,3);
916 #endif
917 #if defined(ONE_IM_TABLE)
918 im_tab[b] = w;
919 #elif defined(FOUR_IM_TABLES)
920 im_tab[0][b] = w;
921 im_tab[1][b] = upr(w,1);
922 im_tab[2][b] = upr(w,2);
923 im_tab[3][b] = upr(w,3);
924 #endif
925
926 }
927 }
928
929 #endif
930
931 #define no_table(x,box,vf,rf,c) bytes2word( \
932 box[bval(vf(x,0,c),rf(0,c))], \
933 box[bval(vf(x,1,c),rf(1,c))], \
934 box[bval(vf(x,2,c),rf(2,c))], \
935 box[bval(vf(x,3,c),rf(3,c))])
936
937 #define one_table(x,op,tab,vf,rf,c) \
938 ( tab[bval(vf(x,0,c),rf(0,c))] \
939 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
940 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
941 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
942
943 #define four_tables(x,tab,vf,rf,c) \
944 ( tab[0][bval(vf(x,0,c),rf(0,c))] \
945 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
946 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
947 ^ tab[3][bval(vf(x,3,c),rf(3,c))])
948
949 #define vf1(x,r,c) (x)
950 #define rf1(r,c) (r)
951 #define rf2(r,c) ((r-c)&3)
952
953 #if defined(FOUR_LR_TABLES)
954 #define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
955 #elif defined(ONE_LR_TABLE)
956 #define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
957 #else
958 #define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
959 #endif
960
961 #if defined(FOUR_IM_TABLES)
962 #define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
963 #elif defined(ONE_IM_TABLE)
964 #define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
965 #else
966 #define inv_mcol(x) \
967 (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
968 f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
969 #endif
970
971 #define nc (this->aes_Ncol)
972
973 // Initialise the key schedule from the user supplied key. The key
974 // length is now specified in bytes - 16, 24 or 32 as appropriate.
975 // This corresponds to bit lengths of 128, 192 and 256 bits, and
976 // to Nk values of 4, 6 and 8 respectively.
977
978 #define mx(t,f) (*t++ = inv_mcol(*f),f++)
979 #define cp(t,f) *t++ = *f++
980
981 #if AES_BLOCK_SIZE == 16
982 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
983 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
984 #elif AES_BLOCK_SIZE == 24
985 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
986 cp(d,s); cp(d,s)
987 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
988 mx(d,s); mx(d,s)
989 #elif AES_BLOCK_SIZE == 32
990 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
991 cp(d,s); cp(d,s); cp(d,s); cp(d,s)
992 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
993 mx(d,s); mx(d,s); mx(d,s); mx(d,s)
994 #else
995
996 #define cpy(d,s) \
997 switch(nc) \
998 { case 8: cp(d,s); cp(d,s); \
999 case 6: cp(d,s); cp(d,s); \
1000 case 4: cp(d,s); cp(d,s); \
1001 cp(d,s); cp(d,s); \
1002 }
1003
1004 #define mix(d,s) \
1005 switch(nc) \
1006 { case 8: mx(d,s); mx(d,s); \
1007 case 6: mx(d,s); mx(d,s); \
1008 case 4: mx(d,s); mx(d,s); \
1009 mx(d,s); mx(d,s); \
1010 }
1011
1012 #endif
1013
1014 // y = output word, x = input word, r = row, c = column
1015 // for r = 0, 1, 2 and 3 = column accessed for row r
1016
1017 #if defined(ARRAYS)
1018 #define s(x,c) x[c]
1019 #else
1020 #define s(x,c) x##c
1021 #endif
1022
1023 // I am grateful to Frank Yellin for the following constructions
1024 // which, given the column (c) of the output state variable that
1025 // is being computed, return the input state variables which are
1026 // needed for each row (r) of the state
1027
1028 // For the fixed block size options, compilers reduce these two
1029 // expressions to fixed variable references. For variable block
1030 // size code conditional clauses will sometimes be returned
1031
1032 #define unused 77 // Sunset Strip
1033
1034 #define fwd_var(x,r,c) \
1035 ( r==0 ? \
1036 ( c==0 ? s(x,0) \
1037 : c==1 ? s(x,1) \
1038 : c==2 ? s(x,2) \
1039 : c==3 ? s(x,3) \
1040 : c==4 ? s(x,4) \
1041 : c==5 ? s(x,5) \
1042 : c==6 ? s(x,6) \
1043 : s(x,7)) \
1044 : r==1 ? \
1045 ( c==0 ? s(x,1) \
1046 : c==1 ? s(x,2) \
1047 : c==2 ? s(x,3) \
1048 : c==3 ? nc==4 ? s(x,0) : s(x,4) \
1049 : c==4 ? s(x,5) \
1050 : c==5 ? nc==8 ? s(x,6) : s(x,0) \
1051 : c==6 ? s(x,7) \
1052 : s(x,0)) \
1053 : r==2 ? \
1054 ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
1055 : c==1 ? nc==8 ? s(x,4) : s(x,3) \
1056 : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1057 : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1058 : c==4 ? nc==8 ? s(x,7) : s(x,0) \
1059 : c==5 ? nc==8 ? s(x,0) : s(x,1) \
1060 : c==6 ? s(x,1) \
1061 : s(x,2)) \
1062 : \
1063 ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
1064 : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1065 : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1066 : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
1067 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1068 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1069 : c==6 ? s(x,2) \
1070 : s(x,3)))
1071
1072 #define inv_var(x,r,c) \
1073 ( r==0 ? \
1074 ( c==0 ? s(x,0) \
1075 : c==1 ? s(x,1) \
1076 : c==2 ? s(x,2) \
1077 : c==3 ? s(x,3) \
1078 : c==4 ? s(x,4) \
1079 : c==5 ? s(x,5) \
1080 : c==6 ? s(x,6) \
1081 : s(x,7)) \
1082 : r==1 ? \
1083 ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
1084 : c==1 ? s(x,0) \
1085 : c==2 ? s(x,1) \
1086 : c==3 ? s(x,2) \
1087 : c==4 ? s(x,3) \
1088 : c==5 ? s(x,4) \
1089 : c==6 ? s(x,5) \
1090 : s(x,6)) \
1091 : r==2 ? \
1092 ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1093 : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1094 : c==2 ? nc==8 ? s(x,7) : s(x,0) \
1095 : c==3 ? nc==8 ? s(x,0) : s(x,1) \
1096 : c==4 ? nc==8 ? s(x,1) : s(x,2) \
1097 : c==5 ? nc==8 ? s(x,2) : s(x,3) \
1098 : c==6 ? s(x,3) \
1099 : s(x,4)) \
1100 : \
1101 ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
1102 : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1103 : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1104 : c==3 ? nc==8 ? s(x,7) : s(x,0) \
1105 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1106 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1107 : c==6 ? s(x,2) \
1108 : s(x,3)))
1109
1110 #define si(y,x,k,c) s(y,c) = const_word_in(x + 4 * c) ^ k[c]
1111 #define so(y,x,c) word_out(y + 4 * c, s(x,c))
1112
1113 #if defined(FOUR_TABLES)
1114 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
1115 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
1116 #elif defined(ONE_TABLE)
1117 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
1118 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
1119 #else
1120 #define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
1121 #define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
1122 #endif
1123
1124 #if defined(FOUR_LR_TABLES)
1125 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
1126 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
1127 #elif defined(ONE_LR_TABLE)
1128 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
1129 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
1130 #else
1131 #define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
1132 #define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
1133 #endif
1134
1135 #if AES_BLOCK_SIZE == 16
1136
1137 #if defined(ARRAYS)
1138 #define locals(y,x) x[4],y[4]
1139 #else
1140 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
1141 // the following defines prevent the compiler requiring the declaration
1142 // of generated but unused variables in the fwd_var and inv_var macros
1143 #define b04 unused
1144 #define b05 unused
1145 #define b06 unused
1146 #define b07 unused
1147 #define b14 unused
1148 #define b15 unused
1149 #define b16 unused
1150 #define b17 unused
1151 #endif
1152 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1153 s(y,2) = s(x,2); s(y,3) = s(x,3);
1154 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
1155 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
1156 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
1157
1158 #elif AES_BLOCK_SIZE == 24
1159
1160 #if defined(ARRAYS)
1161 #define locals(y,x) x[6],y[6]
1162 #else
1163 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
1164 y##0,y##1,y##2,y##3,y##4,y##5
1165 #define b06 unused
1166 #define b07 unused
1167 #define b16 unused
1168 #define b17 unused
1169 #endif
1170 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1171 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1172 s(y,4) = s(x,4); s(y,5) = s(x,5);
1173 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
1174 si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
1175 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
1176 so(y,x,3); so(y,x,4); so(y,x,5)
1177 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
1178 rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
1179 #else
1180
1181 #if defined(ARRAYS)
1182 #define locals(y,x) x[8],y[8]
1183 #else
1184 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
1185 y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
1186 #endif
1187 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1188 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1189 s(y,4) = s(x,4); s(y,5) = s(x,5); \
1190 s(y,6) = s(x,6); s(y,7) = s(x,7);
1191
1192 #if AES_BLOCK_SIZE == 32
1193
1194 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
1195 si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
1196 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
1197 so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
1198 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
1199 rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
1200 #else
1201
1202 #define state_in(y,x,k) \
1203 switch(nc) \
1204 { case 8: si(y,x,k,7); si(y,x,k,6); \
1205 case 6: si(y,x,k,5); si(y,x,k,4); \
1206 case 4: si(y,x,k,3); si(y,x,k,2); \
1207 si(y,x,k,1); si(y,x,k,0); \
1208 }
1209
1210 #define state_out(y,x) \
1211 switch(nc) \
1212 { case 8: so(y,x,7); so(y,x,6); \
1213 case 6: so(y,x,5); so(y,x,4); \
1214 case 4: so(y,x,3); so(y,x,2); \
1215 so(y,x,1); so(y,x,0); \
1216 }
1217
1218 #if defined(FAST_VARIABLE)
1219
1220 #define round(rm,y,x,k) \
1221 switch(nc) \
1222 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1223 rm(y,x,k,5); rm(y,x,k,4); \
1224 rm(y,x,k,3); rm(y,x,k,2); \
1225 rm(y,x,k,1); rm(y,x,k,0); \
1226 break; \
1227 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1228 rm(y,x,k,3); rm(y,x,k,2); \
1229 rm(y,x,k,1); rm(y,x,k,0); \
1230 break; \
1231 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1232 rm(y,x,k,1); rm(y,x,k,0); \
1233 break; \
1234 }
1235 #else
1236
1237 #define round(rm,y,x,k) \
1238 switch(nc) \
1239 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1240 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1241 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1242 rm(y,x,k,1); rm(y,x,k,0); \
1243 }
1244
1245 #endif
1246
1247 #endif
1248 #endif
1249
1250 /**
1251 * Implementation of private_aes_cbc_crypter_t.encrypt_block.
1252 */
1253 static void encrypt_block(const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[])
1254 { u_int32_t locals(b0, b1);
1255 const u_int32_t *kp = this->aes_e_key;
1256
1257 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1258 u_int32_t f2;
1259 #endif
1260
1261 state_in(b0, in_blk, kp); kp += nc;
1262
1263 #if defined(UNROLL)
1264
1265 switch(this->aes_Nrnd)
1266 {
1267 case 14: round(fwd_rnd, b1, b0, kp );
1268 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1269 case 12: round(fwd_rnd, b1, b0, kp );
1270 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1271 case 10: round(fwd_rnd, b1, b0, kp );
1272 round(fwd_rnd, b0, b1, kp + nc);
1273 round(fwd_rnd, b1, b0, kp + 2 * nc);
1274 round(fwd_rnd, b0, b1, kp + 3 * nc);
1275 round(fwd_rnd, b1, b0, kp + 4 * nc);
1276 round(fwd_rnd, b0, b1, kp + 5 * nc);
1277 round(fwd_rnd, b1, b0, kp + 6 * nc);
1278 round(fwd_rnd, b0, b1, kp + 7 * nc);
1279 round(fwd_rnd, b1, b0, kp + 8 * nc);
1280 round(fwd_lrnd, b0, b1, kp + 9 * nc);
1281 }
1282
1283 #elif defined(PARTIAL_UNROLL)
1284 { u_int32_t rnd;
1285
1286 for(rnd = 0; rnd < (this->aes_Nrnd >> 1) - 1; ++rnd)
1287 {
1288 round(fwd_rnd, b1, b0, kp);
1289 round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
1290 }
1291
1292 round(fwd_rnd, b1, b0, kp);
1293 round(fwd_lrnd, b0, b1, kp + nc);
1294 }
1295 #else
1296 { u_int32_t rnd;
1297
1298 for(rnd = 0; rnd < this->aes_Nrnd - 1; ++rnd)
1299 {
1300 round(fwd_rnd, b1, b0, kp);
1301 l_copy(b0, b1); kp += nc;
1302 }
1303
1304 round(fwd_lrnd, b0, b1, kp);
1305 }
1306 #endif
1307
1308 state_out(out_blk, b0);
1309 }
1310
1311 /**
1312 * Implementation of private_aes_cbc_crypter_t.decrypt_block.
1313 */
1314 static void decrypt_block(const private_aes_cbc_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[])
1315 { u_int32_t locals(b0, b1);
1316 const u_int32_t *kp = this->aes_d_key;
1317
1318 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1319 u_int32_t f2, f4, f8, f9;
1320 #endif
1321
1322 state_in(b0, in_blk, kp); kp += nc;
1323
1324 #if defined(UNROLL)
1325
1326 switch(this->aes_Nrnd)
1327 {
1328 case 14: round(inv_rnd, b1, b0, kp );
1329 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1330 case 12: round(inv_rnd, b1, b0, kp );
1331 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1332 case 10: round(inv_rnd, b1, b0, kp );
1333 round(inv_rnd, b0, b1, kp + nc);
1334 round(inv_rnd, b1, b0, kp + 2 * nc);
1335 round(inv_rnd, b0, b1, kp + 3 * nc);
1336 round(inv_rnd, b1, b0, kp + 4 * nc);
1337 round(inv_rnd, b0, b1, kp + 5 * nc);
1338 round(inv_rnd, b1, b0, kp + 6 * nc);
1339 round(inv_rnd, b0, b1, kp + 7 * nc);
1340 round(inv_rnd, b1, b0, kp + 8 * nc);
1341 round(inv_lrnd, b0, b1, kp + 9 * nc);
1342 }
1343
1344 #elif defined(PARTIAL_UNROLL)
1345 { u_int32_t rnd;
1346
1347 for(rnd = 0; rnd < (this->aes_Nrnd >> 1) - 1; ++rnd)
1348 {
1349 round(inv_rnd, b1, b0, kp);
1350 round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
1351 }
1352
1353 round(inv_rnd, b1, b0, kp);
1354 round(inv_lrnd, b0, b1, kp + nc);
1355 }
1356 #else
1357 { u_int32_t rnd;
1358
1359 for(rnd = 0; rnd < this->aes_Nrnd - 1; ++rnd)
1360 {
1361 round(inv_rnd, b1, b0, kp);
1362 l_copy(b0, b1); kp += nc;
1363 }
1364
1365 round(inv_lrnd, b0, b1, kp);
1366 }
1367 #endif
1368
1369 state_out(out_blk, b0);
1370 }
1371
1372 /**
1373 * Implementation of crypter_t.decrypt.
1374 */
1375 static status_t decrypt (private_aes_cbc_crypter_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
1376 {
1377 int ret, pos;
1378 const u_int32_t *iv_i;
1379 u_int8_t *in, *out;
1380
1381 ret = data.len;
1382 if (((data.len) % 16) != 0)
1383 {
1384 /* data length must be padded to a multiple of blocksize */
1385 return INVALID_ARG;
1386 }
1387
1388 decrypted->ptr = allocator_alloc(data.len);
1389 if (decrypted->ptr == NULL)
1390 {
1391 return OUT_OF_RES;
1392 }
1393 decrypted->len = data.len;
1394
1395 in = data.ptr;
1396 out = decrypted->ptr;
1397
1398 pos=data.len-16;
1399 in+=pos;
1400 out+=pos;
1401 while(pos>=0) {
1402 this->decrypt_block(this,in,out);
1403 if (pos==0)
1404 iv_i=(const u_int32_t*) (iv.ptr);
1405 else
1406 iv_i=(const u_int32_t*) (in-16);
1407 *((u_int32_t *)(&out[ 0])) ^= iv_i[0];
1408 *((u_int32_t *)(&out[ 4])) ^= iv_i[1];
1409 *((u_int32_t *)(&out[ 8])) ^= iv_i[2];
1410 *((u_int32_t *)(&out[12])) ^= iv_i[3];
1411 in-=16;
1412 out-=16;
1413 pos-=16;
1414 }
1415
1416 return SUCCESS;
1417 }
1418
1419
1420 /**
1421 * Implementation of crypter_t.decrypt.
1422 */
1423 static status_t encrypt (private_aes_cbc_crypter_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
1424 {
1425 int ret, pos;
1426 const u_int32_t *iv_i;
1427 u_int8_t *in, *out;
1428
1429 ret = data.len;
1430 if (((data.len) % 16) != 0)
1431 {
1432 /* data length must be padded to a multiple of blocksize */
1433 return INVALID_ARG;
1434 }
1435
1436 encrypted->ptr = allocator_alloc(data.len);
1437 if (encrypted->ptr == NULL)
1438 {
1439 return OUT_OF_RES;
1440 }
1441 encrypted->len = data.len;
1442
1443 in = data.ptr;
1444 out = encrypted->ptr;
1445
1446 pos=0;
1447 while(pos<data.len)
1448 {
1449 if (pos==0)
1450 iv_i=(const u_int32_t*) iv.ptr;
1451 else
1452 iv_i=(const u_int32_t*) (out-16);
1453 *((u_int32_t *)(&out[ 0])) = iv_i[0]^*((const u_int32_t *)(&in[ 0]));
1454 *((u_int32_t *)(&out[ 4])) = iv_i[1]^*((const u_int32_t *)(&in[ 4]));
1455 *((u_int32_t *)(&out[ 8])) = iv_i[2]^*((const u_int32_t *)(&in[ 8]));
1456 *((u_int32_t *)(&out[12])) = iv_i[3]^*((const u_int32_t *)(&in[12]));
1457 this->encrypt_block(this,out,out);
1458 in+=16;
1459 out+=16;
1460 pos+=16;
1461 }
1462 return SUCCESS;
1463 }
1464
1465 /**
1466 * Implementation of crypter_t.get_block_size.
1467 */
1468 static size_t get_block_size (private_aes_cbc_crypter_t *this)
1469 {
1470 return AES_BLOCK_SIZE;
1471 }
1472
1473 /**
1474 * Implementation of crypter_t.get_key_size.
1475 */
1476 static size_t get_key_size (private_aes_cbc_crypter_t *this)
1477 {
1478 return this->key_size;
1479 }
1480
1481 /**
1482 * Implementation of crypter_t.set_key.
1483 */
1484 static status_t set_key (private_aes_cbc_crypter_t *this, chunk_t key)
1485 {
1486 u_int32_t *kf, *kt, rci, f = 0;
1487 u_int8_t *in_key = key.ptr;
1488
1489 if (key.len != this->key_size)
1490 {
1491 return INVALID_ARG;
1492 }
1493
1494 this->aes_Nrnd = (this->aes_Nkey > (this->aes_Ncol) ? this->aes_Nkey : (this->aes_Ncol)) + 6;
1495
1496 this->aes_e_key[0] = const_word_in(in_key );
1497 this->aes_e_key[1] = const_word_in(in_key + 4);
1498 this->aes_e_key[2] = const_word_in(in_key + 8);
1499 this->aes_e_key[3] = const_word_in(in_key + 12);
1500
1501 kf = this->aes_e_key;
1502 kt = kf + nc * (this->aes_Nrnd + 1) - this->aes_Nkey;
1503 rci = 0;
1504
1505 switch(this->aes_Nkey)
1506 {
1507 case 4: do
1508 { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
1509 kf[5] = kf[1] ^ kf[4];
1510 kf[6] = kf[2] ^ kf[5];
1511 kf[7] = kf[3] ^ kf[6];
1512 kf += 4;
1513 }
1514 while(kf < kt);
1515 break;
1516
1517 case 6: this->aes_e_key[4] = const_word_in(in_key + 16);
1518 this->aes_e_key[5] = const_word_in(in_key + 20);
1519 do
1520 { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
1521 kf[ 7] = kf[1] ^ kf[ 6];
1522 kf[ 8] = kf[2] ^ kf[ 7];
1523 kf[ 9] = kf[3] ^ kf[ 8];
1524 kf[10] = kf[4] ^ kf[ 9];
1525 kf[11] = kf[5] ^ kf[10];
1526 kf += 6;
1527 }
1528 while(kf < kt);
1529 break;
1530
1531 case 8: this->aes_e_key[4] = const_word_in(in_key + 16);
1532 this->aes_e_key[5] = const_word_in(in_key + 20);
1533 this->aes_e_key[6] = const_word_in(in_key + 24);
1534 this->aes_e_key[7] = const_word_in(in_key + 28);
1535 do
1536 { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
1537 kf[ 9] = kf[1] ^ kf[ 8];
1538 kf[10] = kf[2] ^ kf[ 9];
1539 kf[11] = kf[3] ^ kf[10];
1540 kf[12] = kf[4] ^ ls_box(kf[11],0);
1541 kf[13] = kf[5] ^ kf[12];
1542 kf[14] = kf[6] ^ kf[13];
1543 kf[15] = kf[7] ^ kf[14];
1544 kf += 8;
1545 }
1546 while (kf < kt);
1547 break;
1548 }
1549
1550 if(!f)
1551 {
1552 u_int32_t i;
1553
1554 kt = this->aes_d_key + nc * this->aes_Nrnd;
1555 kf = this->aes_e_key;
1556
1557 cpy(kt, kf); kt -= 2 * nc;
1558
1559 for(i = 1; i < this->aes_Nrnd; ++i)
1560 {
1561 #if defined(ONE_TABLE) || defined(FOUR_TABLES)
1562 #if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
1563 u_int32_t f2, f4, f8, f9;
1564 #endif
1565 mix(kt, kf);
1566 #else
1567 cpy(kt, kf);
1568 #endif
1569 kt -= 2 * nc;
1570 }
1571 cpy(kt, kf);
1572 }
1573
1574 return SUCCESS;
1575 }
1576
1577 /**
1578 * Implementation of crypter_t.destroy and aes_cbc_crypter_t.destroy.
1579 */
1580 static void destroy (private_aes_cbc_crypter_t *this)
1581 {
1582 allocator_free(this);
1583 }
1584
1585 /*
1586 * Described in header
1587 */
1588 aes_cbc_crypter_t *aes_cbc_crypter_create(size_t key_size)
1589 {
1590 private_aes_cbc_crypter_t *this = allocator_alloc_thing(private_aes_cbc_crypter_t);
1591
1592 #if !defined(FIXED_TABLES)
1593 if(!tab_gen) { gen_tabs(); tab_gen = 1; }
1594 #endif
1595
1596 this->key_size = key_size;
1597 switch(key_size) {
1598 case 32: /* bytes */
1599 this->aes_Ncol = 8;
1600 this->aes_Nkey = 8;
1601 break;
1602 case 24: /* bytes */
1603 this->aes_Ncol = 6;
1604 this->aes_Nkey = 6;
1605 break;
1606 case 16: /* bytes */
1607 this->aes_Ncol = 4;
1608 this->aes_Nkey = 4;
1609 break;
1610 default:
1611 allocator_free(this);
1612 return NULL;
1613 }
1614
1615 /* functions of crypter_t interface */
1616 this->public.crypter_interface.encrypt = (status_t (*) (crypter_t *, chunk_t,chunk_t, chunk_t *)) encrypt;
1617 this->public.crypter_interface.decrypt = (status_t (*) (crypter_t *, chunk_t , chunk_t, chunk_t *)) decrypt;
1618 this->public.crypter_interface.get_block_size = (size_t (*) (crypter_t *)) get_block_size;
1619 this->public.crypter_interface.get_key_size = (size_t (*) (crypter_t *)) get_key_size;
1620 this->public.crypter_interface.set_key = (status_t (*) (crypter_t *,chunk_t)) set_key;
1621 this->public.crypter_interface.destroy = (void (*) (crypter_t *)) destroy;
1622
1623 /* private functions */
1624 this->decrypt_block = decrypt_block;
1625 this->encrypt_block = encrypt_block;
1626
1627 return &(this->public);
1628 }