crypter_t api supports in-place encryption using NULL as output parameter
[strongswan.git] / src / libstrongswan / plugins / aes / aes_crypter.c
1 /*
2 * Copyright (C) 2001 Dr B. R. Gladman <brg@gladman.uk.net>
3 * Copyright (C) 2005-2006 Martin Willi
4 * Copyright (C) 2005 Jan Hutter
5 * Hochschule fuer Technik Rapperswil
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
16 *
17 * $Id$
18 */
19
20 #include "aes_crypter.h"
21
22 /*
23 * The number of key schedule words for different block and key lengths
24 * allowing for method of computation which requires the length to be a
25 * multiple of the key length. This version of AES implementation supports
26 * all three keylengths 16, 24 and 32 bytes!
27 *
28 * Nk = 4 6 8
29 * -------------
30 * Nb = 4 | 60 60 64
31 * 6 | 96 90 96
32 * 8 | 120 120 120
33 */
34 #define AES_KS_LENGTH 120
35 #define AES_RC_LENGTH 29
36
37 #define AES_BLOCK_SIZE 16
38
39 typedef struct private_aes_crypter_t private_aes_crypter_t;
40
41 /**
42 * Class implementing the AES symmetric encryption algorithm.
43 *
44 * @ingroup crypters
45 */
46 struct private_aes_crypter_t {
47
48 /**
49 * Public part of this class.
50 */
51 aes_crypter_t public;
52
53 /**
54 * Number of words in the key input block.
55 */
56 u_int32_t aes_Nkey;
57
58 /**
59 * The number of cipher rounds.
60 */
61 u_int32_t aes_Nrnd;
62
63 /**
64 * The encryption key schedule.
65 */
66 u_int32_t aes_e_key[AES_KS_LENGTH];
67
68 /**
69 * The decryption key schedule.
70 */
71 u_int32_t aes_d_key[AES_KS_LENGTH];
72
73 /**
74 * Key size of this AES cypher object.
75 */
76 u_int32_t key_size;
77 };
78
79
80 /* ugly macro stuff */
81
82 /* 1. Define UNROLL for full loop unrolling in encryption and decryption.
83 * 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
84 * 3. Define FIXED_TABLES for compiled rather than dynamic tables.
85 * 4. Define FF_TABLES to use tables for field multiplies and inverses.
86 * Do not enable this without understanding stack space requirements.
87 * 5. Define ARRAYS to use arrays to hold the local state block. If this
88 * is not defined, individually declared 32-bit words are used.
89 * 6. Define FAST_VARIABLE if a high speed variable block implementation
90 * is needed (essentially three separate fixed block size code sequences)
91 * 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
92 * version using 1 table (2 kbytes of table space) or 4 tables (8
93 * kbytes of table space) for higher speed.
94 * 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
95 * increase by using tables for the last rounds but with more table
96 * space (2 or 8 kbytes extra).
97 * 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
98 * slower version is provided.
99 * 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
100 * or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
101 */
102
103 #define UNROLL
104 //#define PARTIAL_UNROLL
105
106 #define FIXED_TABLES
107 //#define FF_TABLES
108 //#define ARRAYS
109 #define FAST_VARIABLE
110
111 //#define ONE_TABLE
112 #define FOUR_TABLES
113
114 //#define ONE_LR_TABLE
115 #define FOUR_LR_TABLES
116
117 //#define ONE_IM_TABLE
118 #define FOUR_IM_TABLES
119
120 #if defined(UNROLL) && defined (PARTIAL_UNROLL)
121 #error both UNROLL and PARTIAL_UNROLL are defined
122 #endif
123
124 #if defined(ONE_TABLE) && defined (FOUR_TABLES)
125 #error both ONE_TABLE and FOUR_TABLES are defined
126 #endif
127
128 #if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
129 #error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
130 #endif
131
132 #if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
133 #error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
134 #endif
135
136 #if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
137 #error an illegal block size has been specified
138 #endif
139
140 /**
141 * Rotates bytes within words by n positions, moving bytes
142 * to higher index positions with wrap around into low positions.
143 */
144 #define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
145 /**
146 * Moves bytes by n positions to higher index positions in
147 * words but without wrap around.
148 */
149 #define ups(x,n) ((x) << 8 * (n))
150
151 /**
152 * Extracts a byte from a word.
153 */
154 #define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
155 #define bytes2word(b0, b1, b2, b3) \
156 ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
157
158
159 /* little endian processor without data alignment restrictions: AES_LE_OK */
160 /* original code: i386 */
161 #if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386)
162 #define AES_LE_OK 1
163 /* added (tested): alpha --jjo */
164 #elif defined(__alpha__)|| defined (__alpha)
165 #define AES_LE_OK 1
166 /* added (tested): ia64 --jjo */
167 #elif defined(__ia64__)|| defined (__ia64)
168 #define AES_LE_OK 1
169 #endif
170
171 #ifdef AES_LE_OK
172 /* little endian processor without data alignment restrictions */
173 #define word_in(x) *(u_int32_t*)(x)
174 #define const_word_in(x) *(const u_int32_t*)(x)
175 #define word_out(x,v) *(u_int32_t*)(x) = (v)
176 #define const_word_out(x,v) *(const u_int32_t*)(x) = (v)
177 #else
178 /* slower but generic big endian or with data alignment restrictions */
179 /* some additional "const" touches to stop "gcc -Wcast-qual" complains --jjo */
180 #define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
181 #define const_word_in(x) ((const u_int32_t)(((const unsigned char *)(x))[0])|((const u_int32_t)(((const unsigned char *)(x))[1])<<8)|((const u_int32_t)(((const unsigned char *)(x))[2])<<16)|((const u_int32_t)(((const unsigned char *)(x))[3])<<24))
182 #define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
183 #define const_word_out(x,v) ((const unsigned char *)(x))[0]=(v),((const unsigned char *)(x))[1]=((v)>>8),((const unsigned char *)(x))[2]=((v)>>16),((const unsigned char *)(x))[3]=((v)>>24)
184 #endif
185
186 // Disable at least some poor combinations of options
187
188 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
189 #define FIXED_TABLES
190 #undef UNROLL
191 #undef ONE_LR_TABLE
192 #undef FOUR_LR_TABLES
193 #undef ONE_IM_TABLE
194 #undef FOUR_IM_TABLES
195 #elif !defined(FOUR_TABLES)
196 #ifdef FOUR_LR_TABLES
197 #undef FOUR_LR_TABLES
198 #define ONE_LR_TABLE
199 #endif
200 #ifdef FOUR_IM_TABLES
201 #undef FOUR_IM_TABLES
202 #define ONE_IM_TABLE
203 #endif
204 #elif !defined(AES_BLOCK_SIZE)
205 #if defined(UNROLL)
206 #define PARTIAL_UNROLL
207 #undef UNROLL
208 #endif
209 #endif
210
211 // the finite field modular polynomial and elements
212
213 #define ff_poly 0x011b
214 #define ff_hi 0x80
215
216 // multiply four bytes in GF(2^8) by 'x' {02} in parallel
217
218 #define m1 0x80808080
219 #define m2 0x7f7f7f7f
220 #define m3 0x0000001b
221 #define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
222
223 // The following defines provide alternative definitions of FFmulX that might
224 // give improved performance if a fast 32-bit multiply is not available. Note
225 // that a temporary variable u needs to be defined where FFmulX is used.
226
227 // #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
228 // #define m4 0x1b1b1b1b
229 // #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
230
231 // perform column mix operation on four bytes in parallel
232
233 #define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
234
235 #if defined(FIXED_TABLES)
236
237 // the S-Box table
238
239 static const unsigned char s_box[256] =
240 {
241 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
242 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
243 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
244 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
245 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
246 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
247 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
248 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
249 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
250 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
251 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
252 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
253 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
254 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
255 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
256 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
257 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
258 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
259 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
260 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
261 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
262 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
263 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
264 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
265 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
266 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
267 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
268 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
269 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
270 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
271 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
272 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
273 };
274
275 // the inverse S-Box table
276
277 static const unsigned char inv_s_box[256] =
278 {
279 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
280 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
281 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
282 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
283 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
284 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
285 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
286 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
287 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
288 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
289 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
290 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
291 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
292 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
293 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
294 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
295 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
296 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
297 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
298 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
299 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
300 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
301 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
302 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
303 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
304 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
305 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
306 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
307 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
308 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
309 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
310 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
311 };
312
313 #define w0(p) 0x000000##p
314
315 // Number of elements required in this table for different
316 // block and key lengths is:
317 //
318 // Nk = 4 6 8
319 // ----------
320 // Nb = 4 | 10 8 7
321 // 6 | 19 12 11
322 // 8 | 29 19 14
323 //
324 // this table can be a table of bytes if the key schedule
325 // code is adjusted accordingly
326
327 static const u_int32_t rcon_tab[29] =
328 {
329 w0(01), w0(02), w0(04), w0(08),
330 w0(10), w0(20), w0(40), w0(80),
331 w0(1b), w0(36), w0(6c), w0(d8),
332 w0(ab), w0(4d), w0(9a), w0(2f),
333 w0(5e), w0(bc), w0(63), w0(c6),
334 w0(97), w0(35), w0(6a), w0(d4),
335 w0(b3), w0(7d), w0(fa), w0(ef),
336 w0(c5)
337 };
338
339 #undef w0
340
341 #define r0(p,q,r,s) 0x##p##q##r##s
342 #define r1(p,q,r,s) 0x##q##r##s##p
343 #define r2(p,q,r,s) 0x##r##s##p##q
344 #define r3(p,q,r,s) 0x##s##p##q##r
345 #define w0(p) 0x000000##p
346 #define w1(p) 0x0000##p##00
347 #define w2(p) 0x00##p##0000
348 #define w3(p) 0x##p##000000
349
350 #if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
351
352 // data for forward tables (other than last round)
353
354 #define f_table \
355 r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
356 r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
357 r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
358 r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
359 r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
360 r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
361 r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
362 r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
363 r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
364 r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
365 r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
366 r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
367 r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
368 r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
369 r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
370 r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
371 r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
372 r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
373 r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
374 r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
375 r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
376 r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
377 r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
378 r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
379 r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
380 r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
381 r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
382 r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
383 r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
384 r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
385 r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
386 r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
387 r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
388 r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
389 r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
390 r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
391 r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
392 r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
393 r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
394 r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
395 r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
396 r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
397 r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
398 r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
399 r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
400 r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
401 r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
402 r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
403 r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
404 r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
405 r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
406 r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
407 r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
408 r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
409 r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
410 r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
411 r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
412 r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
413 r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
414 r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
415 r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
416 r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
417 r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
418 r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
419
420 // data for inverse tables (other than last round)
421
422 #define i_table \
423 r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
424 r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
425 r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
426 r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
427 r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
428 r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
429 r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
430 r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
431 r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
432 r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
433 r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
434 r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
435 r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
436 r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
437 r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
438 r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
439 r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
440 r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
441 r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
442 r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
443 r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
444 r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
445 r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
446 r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
447 r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
448 r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
449 r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
450 r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
451 r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
452 r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
453 r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
454 r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
455 r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
456 r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
457 r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
458 r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
459 r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
460 r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
461 r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
462 r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
463 r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
464 r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
465 r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
466 r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
467 r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
468 r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
469 r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
470 r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
471 r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
472 r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
473 r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
474 r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
475 r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
476 r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
477 r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
478 r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
479 r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
480 r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
481 r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
482 r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
483 r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
484 r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
485 r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
486 r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
487
488 // generate the required tables in the desired endian format
489
490 #undef r
491 #define r r0
492
493 #if defined(ONE_TABLE)
494 static const u_int32_t ft_tab[256] =
495 { f_table };
496 #elif defined(FOUR_TABLES)
497 static const u_int32_t ft_tab[4][256] =
498 { { f_table },
499 #undef r
500 #define r r1
501 { f_table },
502 #undef r
503 #define r r2
504 { f_table },
505 #undef r
506 #define r r3
507 { f_table }
508 };
509 #endif
510
511 #undef r
512 #define r r0
513 #if defined(ONE_TABLE)
514 static const u_int32_t it_tab[256] =
515 { i_table };
516 #elif defined(FOUR_TABLES)
517 static const u_int32_t it_tab[4][256] =
518 { { i_table },
519 #undef r
520 #define r r1
521 { i_table },
522 #undef r
523 #define r r2
524 { i_table },
525 #undef r
526 #define r r3
527 { i_table }
528 };
529 #endif
530
531 #endif
532
533 #if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
534
535 // data for inverse tables (last round)
536
537 #define li_table \
538 w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
539 w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
540 w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
541 w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
542 w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
543 w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
544 w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
545 w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
546 w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
547 w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
548 w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
549 w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
550 w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
551 w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
552 w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
553 w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
554 w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
555 w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
556 w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
557 w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
558 w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
559 w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
560 w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
561 w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
562 w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
563 w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
564 w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
565 w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
566 w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
567 w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
568 w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
569 w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
570
571 // generate the required tables in the desired endian format
572
573 #undef r
574 #define r(p,q,r,s) w0(q)
575 #if defined(ONE_LR_TABLE)
576 static const u_int32_t fl_tab[256] =
577 { f_table };
578 #elif defined(FOUR_LR_TABLES)
579 static const u_int32_t fl_tab[4][256] =
580 { { f_table },
581 #undef r
582 #define r(p,q,r,s) w1(q)
583 { f_table },
584 #undef r
585 #define r(p,q,r,s) w2(q)
586 { f_table },
587 #undef r
588 #define r(p,q,r,s) w3(q)
589 { f_table }
590 };
591 #endif
592
593 #undef w
594 #define w w0
595 #if defined(ONE_LR_TABLE)
596 static const u_int32_t il_tab[256] =
597 { li_table };
598 #elif defined(FOUR_LR_TABLES)
599 static const u_int32_t il_tab[4][256] =
600 { { li_table },
601 #undef w
602 #define w w1
603 { li_table },
604 #undef w
605 #define w w2
606 { li_table },
607 #undef w
608 #define w w3
609 { li_table }
610 };
611 #endif
612
613 #endif
614
615 #if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
616
617 #define m_table \
618 r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
619 r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
620 r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
621 r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
622 r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
623 r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
624 r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
625 r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
626 r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
627 r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
628 r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
629 r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
630 r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
631 r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
632 r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
633 r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
634 r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
635 r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
636 r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
637 r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
638 r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
639 r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
640 r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
641 r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
642 r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
643 r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
644 r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
645 r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
646 r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
647 r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
648 r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
649 r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
650 r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
651 r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
652 r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
653 r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
654 r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
655 r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
656 r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
657 r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
658 r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
659 r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
660 r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
661 r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
662 r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
663 r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
664 r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
665 r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
666 r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
667 r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
668 r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
669 r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
670 r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
671 r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
672 r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
673 r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
674 r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
675 r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
676 r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
677 r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
678 r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
679 r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
680 r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
681 r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
682
683 #undef r
684 #define r r0
685
686 #if defined(ONE_IM_TABLE)
687 static const u_int32_t im_tab[256] =
688 { m_table };
689 #elif defined(FOUR_IM_TABLES)
690 static const u_int32_t im_tab[4][256] =
691 { { m_table },
692 #undef r
693 #define r r1
694 { m_table },
695 #undef r
696 #define r r2
697 { m_table },
698 #undef r
699 #define r r3
700 { m_table }
701 };
702 #endif
703
704 #endif
705
706 #else
707
708 static int tab_gen = 0;
709
710 static unsigned char s_box[256]; // the S box
711 static unsigned char inv_s_box[256]; // the inverse S box
712 static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
713
714 #if defined(ONE_TABLE)
715 static u_int32_t ft_tab[256];
716 static u_int32_t it_tab[256];
717 #elif defined(FOUR_TABLES)
718 static u_int32_t ft_tab[4][256];
719 static u_int32_t it_tab[4][256];
720 #endif
721
722 #if defined(ONE_LR_TABLE)
723 static u_int32_t fl_tab[256];
724 static u_int32_t il_tab[256];
725 #elif defined(FOUR_LR_TABLES)
726 static u_int32_t fl_tab[4][256];
727 static u_int32_t il_tab[4][256];
728 #endif
729
730 #if defined(ONE_IM_TABLE)
731 static u_int32_t im_tab[256];
732 #elif defined(FOUR_IM_TABLES)
733 static u_int32_t im_tab[4][256];
734 #endif
735
736 // Generate the tables for the dynamic table option
737
738 #if !defined(FF_TABLES)
739
740 // It will generally be sensible to use tables to compute finite
741 // field multiplies and inverses but where memory is scarse this
742 // code might sometimes be better.
743
744 // return 2 ^ (n - 1) where n is the bit number of the highest bit
745 // set in x with x in the range 1 < x < 0x00000200. This form is
746 // used so that locals within FFinv can be bytes rather than words
747
748 static unsigned char hibit(const u_int32_t x)
749 { unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
750
751 r |= (r >> 2);
752 r |= (r >> 4);
753 return (r + 1) >> 1;
754 }
755
756 // return the inverse of the finite field element x
757
758 static unsigned char FFinv(const unsigned char x)
759 { unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
760
761 if(x < 2) return x;
762
763 for(;;)
764 {
765 if(!n1) return v1;
766
767 while(n2 >= n1)
768 {
769 n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
770 }
771
772 if(!n2) return v2;
773
774 while(n1 >= n2)
775 {
776 n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
777 }
778 }
779 }
780
781 // define the finite field multiplies required for Rijndael
782
783 #define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
784 #define FFmul03(x) ((x) ^ FFmul02(x))
785 #define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
786 #define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
787 #define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
788 #define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
789
790 #else
791
792 #define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
793
794 #define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
795 #define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
796 #define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
797 #define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
798 #define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
799 #define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
800
801 #endif
802
803 // The forward and inverse affine transformations used in the S-box
804
805 #define fwd_affine(x) \
806 (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
807
808 #define inv_affine(x) \
809 (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
810
811 static void gen_tabs(void)
812 { u_int32_t i, w;
813
814 #if defined(FF_TABLES)
815
816 unsigned char pow[512], log[256];
817
818 // log and power tables for GF(2^8) finite field with
819 // 0x011b as modular polynomial - the simplest primitive
820 // root is 0x03, used here to generate the tables
821
822 i = 0; w = 1;
823 do
824 {
825 pow[i] = (unsigned char)w;
826 pow[i + 255] = (unsigned char)w;
827 log[w] = (unsigned char)i++;
828 w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
829 }
830 while (w != 1);
831
832 #endif
833
834 for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
835 {
836 rcon_tab[i] = bytes2word(w, 0, 0, 0);
837 w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
838 }
839
840 for(i = 0; i < 256; ++i)
841 { unsigned char b;
842
843 s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
844
845 w = bytes2word(b, 0, 0, 0);
846 #if defined(ONE_LR_TABLE)
847 fl_tab[i] = w;
848 #elif defined(FOUR_LR_TABLES)
849 fl_tab[0][i] = w;
850 fl_tab[1][i] = upr(w,1);
851 fl_tab[2][i] = upr(w,2);
852 fl_tab[3][i] = upr(w,3);
853 #endif
854 w = bytes2word(FFmul02(b), b, b, FFmul03(b));
855 #if defined(ONE_TABLE)
856 ft_tab[i] = w;
857 #elif defined(FOUR_TABLES)
858 ft_tab[0][i] = w;
859 ft_tab[1][i] = upr(w,1);
860 ft_tab[2][i] = upr(w,2);
861 ft_tab[3][i] = upr(w,3);
862 #endif
863 inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
864
865 w = bytes2word(b, 0, 0, 0);
866 #if defined(ONE_LR_TABLE)
867 il_tab[i] = w;
868 #elif defined(FOUR_LR_TABLES)
869 il_tab[0][i] = w;
870 il_tab[1][i] = upr(w,1);
871 il_tab[2][i] = upr(w,2);
872 il_tab[3][i] = upr(w,3);
873 #endif
874 w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
875 #if defined(ONE_TABLE)
876 it_tab[i] = w;
877 #elif defined(FOUR_TABLES)
878 it_tab[0][i] = w;
879 it_tab[1][i] = upr(w,1);
880 it_tab[2][i] = upr(w,2);
881 it_tab[3][i] = upr(w,3);
882 #endif
883 #if defined(ONE_IM_TABLE)
884 im_tab[b] = w;
885 #elif defined(FOUR_IM_TABLES)
886 im_tab[0][b] = w;
887 im_tab[1][b] = upr(w,1);
888 im_tab[2][b] = upr(w,2);
889 im_tab[3][b] = upr(w,3);
890 #endif
891
892 }
893 }
894
895 #endif
896
897 #define no_table(x,box,vf,rf,c) bytes2word( \
898 box[bval(vf(x,0,c),rf(0,c))], \
899 box[bval(vf(x,1,c),rf(1,c))], \
900 box[bval(vf(x,2,c),rf(2,c))], \
901 box[bval(vf(x,3,c),rf(3,c))])
902
903 #define one_table(x,op,tab,vf,rf,c) \
904 ( tab[bval(vf(x,0,c),rf(0,c))] \
905 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
906 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
907 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
908
909 #define four_tables(x,tab,vf,rf,c) \
910 ( tab[0][bval(vf(x,0,c),rf(0,c))] \
911 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
912 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
913 ^ tab[3][bval(vf(x,3,c),rf(3,c))])
914
915 #define vf1(x,r,c) (x)
916 #define rf1(r,c) (r)
917 #define rf2(r,c) ((r-c)&3)
918
919 #if defined(FOUR_LR_TABLES)
920 #define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
921 #elif defined(ONE_LR_TABLE)
922 #define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
923 #else
924 #define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
925 #endif
926
927 #if defined(FOUR_IM_TABLES)
928 #define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
929 #elif defined(ONE_IM_TABLE)
930 #define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
931 #else
932 #define inv_mcol(x) \
933 (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
934 f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
935 #endif
936
937 #define nc (AES_BLOCK_SIZE/4)
938
939 // Initialise the key schedule from the user supplied key. The key
940 // length is now specified in bytes - 16, 24 or 32 as appropriate.
941 // This corresponds to bit lengths of 128, 192 and 256 bits, and
942 // to Nk values of 4, 6 and 8 respectively.
943
944 #define mx(t,f) (*t++ = inv_mcol(*f),f++)
945 #define cp(t,f) *t++ = *f++
946
947 #if AES_BLOCK_SIZE == 16
948 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
949 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
950 #elif AES_BLOCK_SIZE == 24
951 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
952 cp(d,s); cp(d,s)
953 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
954 mx(d,s); mx(d,s)
955 #elif AES_BLOCK_SIZE == 32
956 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
957 cp(d,s); cp(d,s); cp(d,s); cp(d,s)
958 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
959 mx(d,s); mx(d,s); mx(d,s); mx(d,s)
960 #else
961
962 #define cpy(d,s) \
963 switch(nc) \
964 { case 8: cp(d,s); cp(d,s); \
965 case 6: cp(d,s); cp(d,s); \
966 case 4: cp(d,s); cp(d,s); \
967 cp(d,s); cp(d,s); \
968 }
969
970 #define mix(d,s) \
971 switch(nc) \
972 { case 8: mx(d,s); mx(d,s); \
973 case 6: mx(d,s); mx(d,s); \
974 case 4: mx(d,s); mx(d,s); \
975 mx(d,s); mx(d,s); \
976 }
977
978 #endif
979
980 // y = output word, x = input word, r = row, c = column
981 // for r = 0, 1, 2 and 3 = column accessed for row r
982
983 #if defined(ARRAYS)
984 #define s(x,c) x[c]
985 #else
986 #define s(x,c) x##c
987 #endif
988
989 // I am grateful to Frank Yellin for the following constructions
990 // which, given the column (c) of the output state variable that
991 // is being computed, return the input state variables which are
992 // needed for each row (r) of the state
993
994 // For the fixed block size options, compilers reduce these two
995 // expressions to fixed variable references. For variable block
996 // size code conditional clauses will sometimes be returned
997
998 #define unused 77 // Sunset Strip
999
1000 #define fwd_var(x,r,c) \
1001 ( r==0 ? \
1002 ( c==0 ? s(x,0) \
1003 : c==1 ? s(x,1) \
1004 : c==2 ? s(x,2) \
1005 : c==3 ? s(x,3) \
1006 : c==4 ? s(x,4) \
1007 : c==5 ? s(x,5) \
1008 : c==6 ? s(x,6) \
1009 : s(x,7)) \
1010 : r==1 ? \
1011 ( c==0 ? s(x,1) \
1012 : c==1 ? s(x,2) \
1013 : c==2 ? s(x,3) \
1014 : c==3 ? nc==4 ? s(x,0) : s(x,4) \
1015 : c==4 ? s(x,5) \
1016 : c==5 ? nc==8 ? s(x,6) : s(x,0) \
1017 : c==6 ? s(x,7) \
1018 : s(x,0)) \
1019 : r==2 ? \
1020 ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
1021 : c==1 ? nc==8 ? s(x,4) : s(x,3) \
1022 : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1023 : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1024 : c==4 ? nc==8 ? s(x,7) : s(x,0) \
1025 : c==5 ? nc==8 ? s(x,0) : s(x,1) \
1026 : c==6 ? s(x,1) \
1027 : s(x,2)) \
1028 : \
1029 ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
1030 : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1031 : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1032 : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
1033 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1034 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1035 : c==6 ? s(x,2) \
1036 : s(x,3)))
1037
1038 #define inv_var(x,r,c) \
1039 ( r==0 ? \
1040 ( c==0 ? s(x,0) \
1041 : c==1 ? s(x,1) \
1042 : c==2 ? s(x,2) \
1043 : c==3 ? s(x,3) \
1044 : c==4 ? s(x,4) \
1045 : c==5 ? s(x,5) \
1046 : c==6 ? s(x,6) \
1047 : s(x,7)) \
1048 : r==1 ? \
1049 ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
1050 : c==1 ? s(x,0) \
1051 : c==2 ? s(x,1) \
1052 : c==3 ? s(x,2) \
1053 : c==4 ? s(x,3) \
1054 : c==5 ? s(x,4) \
1055 : c==6 ? s(x,5) \
1056 : s(x,6)) \
1057 : r==2 ? \
1058 ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1059 : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1060 : c==2 ? nc==8 ? s(x,7) : s(x,0) \
1061 : c==3 ? nc==8 ? s(x,0) : s(x,1) \
1062 : c==4 ? nc==8 ? s(x,1) : s(x,2) \
1063 : c==5 ? nc==8 ? s(x,2) : s(x,3) \
1064 : c==6 ? s(x,3) \
1065 : s(x,4)) \
1066 : \
1067 ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
1068 : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1069 : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1070 : c==3 ? nc==8 ? s(x,7) : s(x,0) \
1071 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1072 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1073 : c==6 ? s(x,2) \
1074 : s(x,3)))
1075
1076 #define si(y,x,k,c) s(y,c) = const_word_in(x + 4 * c) ^ k[c]
1077 #define so(y,x,c) word_out(y + 4 * c, s(x,c))
1078
1079 #if defined(FOUR_TABLES)
1080 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
1081 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
1082 #elif defined(ONE_TABLE)
1083 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
1084 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
1085 #else
1086 #define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
1087 #define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
1088 #endif
1089
1090 #if defined(FOUR_LR_TABLES)
1091 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
1092 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
1093 #elif defined(ONE_LR_TABLE)
1094 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
1095 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
1096 #else
1097 #define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
1098 #define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
1099 #endif
1100
1101 #if AES_BLOCK_SIZE == 16
1102
1103 #if defined(ARRAYS)
1104 #define locals(y,x) x[4],y[4]
1105 #else
1106 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
1107 // the following defines prevent the compiler requiring the declaration
1108 // of generated but unused variables in the fwd_var and inv_var macros
1109 #define b04 unused
1110 #define b05 unused
1111 #define b06 unused
1112 #define b07 unused
1113 #define b14 unused
1114 #define b15 unused
1115 #define b16 unused
1116 #define b17 unused
1117 #endif
1118 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1119 s(y,2) = s(x,2); s(y,3) = s(x,3);
1120 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
1121 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
1122 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
1123
1124 #elif AES_BLOCK_SIZE == 24
1125
1126 #if defined(ARRAYS)
1127 #define locals(y,x) x[6],y[6]
1128 #else
1129 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
1130 y##0,y##1,y##2,y##3,y##4,y##5
1131 #define b06 unused
1132 #define b07 unused
1133 #define b16 unused
1134 #define b17 unused
1135 #endif
1136 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1137 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1138 s(y,4) = s(x,4); s(y,5) = s(x,5);
1139 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
1140 si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
1141 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
1142 so(y,x,3); so(y,x,4); so(y,x,5)
1143 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
1144 rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
1145 #else
1146
1147 #if defined(ARRAYS)
1148 #define locals(y,x) x[8],y[8]
1149 #else
1150 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
1151 y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
1152 #endif
1153 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1154 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1155 s(y,4) = s(x,4); s(y,5) = s(x,5); \
1156 s(y,6) = s(x,6); s(y,7) = s(x,7);
1157
1158 #if AES_BLOCK_SIZE == 32
1159
1160 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
1161 si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
1162 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
1163 so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
1164 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
1165 rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
1166 #else
1167
1168 #define state_in(y,x,k) \
1169 switch(nc) \
1170 { case 8: si(y,x,k,7); si(y,x,k,6); \
1171 case 6: si(y,x,k,5); si(y,x,k,4); \
1172 case 4: si(y,x,k,3); si(y,x,k,2); \
1173 si(y,x,k,1); si(y,x,k,0); \
1174 }
1175
1176 #define state_out(y,x) \
1177 switch(nc) \
1178 { case 8: so(y,x,7); so(y,x,6); \
1179 case 6: so(y,x,5); so(y,x,4); \
1180 case 4: so(y,x,3); so(y,x,2); \
1181 so(y,x,1); so(y,x,0); \
1182 }
1183
1184 #if defined(FAST_VARIABLE)
1185
1186 #define round(rm,y,x,k) \
1187 switch(nc) \
1188 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1189 rm(y,x,k,5); rm(y,x,k,4); \
1190 rm(y,x,k,3); rm(y,x,k,2); \
1191 rm(y,x,k,1); rm(y,x,k,0); \
1192 break; \
1193 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1194 rm(y,x,k,3); rm(y,x,k,2); \
1195 rm(y,x,k,1); rm(y,x,k,0); \
1196 break; \
1197 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1198 rm(y,x,k,1); rm(y,x,k,0); \
1199 break; \
1200 }
1201 #else
1202
1203 #define round(rm,y,x,k) \
1204 switch(nc) \
1205 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1206 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1207 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1208 rm(y,x,k,1); rm(y,x,k,0); \
1209 }
1210
1211 #endif
1212
1213 #endif
1214 #endif
1215
1216 /**
1217 * Encrypt a single block of data.
1218 */
1219 static void encrypt_block(const private_aes_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[])
1220 { u_int32_t locals(b0, b1);
1221 const u_int32_t *kp = this->aes_e_key;
1222
1223 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1224 u_int32_t f2;
1225 #endif
1226
1227 state_in(b0, in_blk, kp); kp += nc;
1228
1229 #if defined(UNROLL)
1230
1231 switch(this->aes_Nrnd)
1232 {
1233 case 14: round(fwd_rnd, b1, b0, kp );
1234 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1235 case 12: round(fwd_rnd, b1, b0, kp );
1236 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1237 case 10: round(fwd_rnd, b1, b0, kp );
1238 round(fwd_rnd, b0, b1, kp + nc);
1239 round(fwd_rnd, b1, b0, kp + 2 * nc);
1240 round(fwd_rnd, b0, b1, kp + 3 * nc);
1241 round(fwd_rnd, b1, b0, kp + 4 * nc);
1242 round(fwd_rnd, b0, b1, kp + 5 * nc);
1243 round(fwd_rnd, b1, b0, kp + 6 * nc);
1244 round(fwd_rnd, b0, b1, kp + 7 * nc);
1245 round(fwd_rnd, b1, b0, kp + 8 * nc);
1246 round(fwd_lrnd, b0, b1, kp + 9 * nc);
1247 }
1248
1249 #elif defined(PARTIAL_UNROLL)
1250 { u_int32_t rnd;
1251
1252 for(rnd = 0; rnd < (this->aes_Nrnd >> 1) - 1; ++rnd)
1253 {
1254 round(fwd_rnd, b1, b0, kp);
1255 round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
1256 }
1257
1258 round(fwd_rnd, b1, b0, kp);
1259 round(fwd_lrnd, b0, b1, kp + nc);
1260 }
1261 #else
1262 { u_int32_t rnd;
1263
1264 for(rnd = 0; rnd < this->aes_Nrnd - 1; ++rnd)
1265 {
1266 round(fwd_rnd, b1, b0, kp);
1267 l_copy(b0, b1); kp += nc;
1268 }
1269
1270 round(fwd_lrnd, b0, b1, kp);
1271 }
1272 #endif
1273
1274 state_out(out_blk, b0);
1275 }
1276
1277 /**
1278 * Decrypt a single block of data.
1279 */
1280 static void decrypt_block(const private_aes_crypter_t *this, const unsigned char in_blk[], unsigned char out_blk[])
1281 { u_int32_t locals(b0, b1);
1282 const u_int32_t *kp = this->aes_d_key;
1283
1284 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1285 u_int32_t f2, f4, f8, f9;
1286 #endif
1287
1288 state_in(b0, in_blk, kp); kp += nc;
1289
1290 #if defined(UNROLL)
1291
1292 switch(this->aes_Nrnd)
1293 {
1294 case 14: round(inv_rnd, b1, b0, kp );
1295 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1296 case 12: round(inv_rnd, b1, b0, kp );
1297 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1298 case 10: round(inv_rnd, b1, b0, kp );
1299 round(inv_rnd, b0, b1, kp + nc);
1300 round(inv_rnd, b1, b0, kp + 2 * nc);
1301 round(inv_rnd, b0, b1, kp + 3 * nc);
1302 round(inv_rnd, b1, b0, kp + 4 * nc);
1303 round(inv_rnd, b0, b1, kp + 5 * nc);
1304 round(inv_rnd, b1, b0, kp + 6 * nc);
1305 round(inv_rnd, b0, b1, kp + 7 * nc);
1306 round(inv_rnd, b1, b0, kp + 8 * nc);
1307 round(inv_lrnd, b0, b1, kp + 9 * nc);
1308 }
1309
1310 #elif defined(PARTIAL_UNROLL)
1311 { u_int32_t rnd;
1312
1313 for(rnd = 0; rnd < (this->aes_Nrnd >> 1) - 1; ++rnd)
1314 {
1315 round(inv_rnd, b1, b0, kp);
1316 round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
1317 }
1318
1319 round(inv_rnd, b1, b0, kp);
1320 round(inv_lrnd, b0, b1, kp + nc);
1321 }
1322 #else
1323 { u_int32_t rnd;
1324
1325 for(rnd = 0; rnd < this->aes_Nrnd - 1; ++rnd)
1326 {
1327 round(inv_rnd, b1, b0, kp);
1328 l_copy(b0, b1); kp += nc;
1329 }
1330
1331 round(inv_lrnd, b0, b1, kp);
1332 }
1333 #endif
1334
1335 state_out(out_blk, b0);
1336 }
1337
1338 /**
1339 * Implementation of crypter_t.decrypt.
1340 */
1341 static void decrypt(private_aes_crypter_t *this, chunk_t data, chunk_t iv,
1342 chunk_t *decrypted)
1343 {
1344 int pos;
1345 const u_int32_t *iv_i;
1346 u_int8_t *in, *out;
1347
1348 if (decrypted)
1349 {
1350 *decrypted = chunk_alloc(data.len);
1351 out = decrypted->ptr;
1352 }
1353 else
1354 {
1355 out = data.ptr;
1356 }
1357 in = data.ptr;
1358
1359 pos = data.len-16;
1360 in += pos;
1361 out += pos;
1362 while (pos >= 0)
1363 {
1364 decrypt_block(this, in, out);
1365 if (pos==0)
1366 {
1367 iv_i=(const u_int32_t*) (iv.ptr);
1368 }
1369 else
1370 {
1371 iv_i=(const u_int32_t*) (in-16);
1372 }
1373 *((u_int32_t *)(&out[ 0])) ^= iv_i[0];
1374 *((u_int32_t *)(&out[ 4])) ^= iv_i[1];
1375 *((u_int32_t *)(&out[ 8])) ^= iv_i[2];
1376 *((u_int32_t *)(&out[12])) ^= iv_i[3];
1377 in-=16;
1378 out-=16;
1379 pos-=16;
1380 }
1381 }
1382
1383
1384 /**
1385 * Implementation of crypter_t.decrypt.
1386 */
1387 static void encrypt (private_aes_crypter_t *this, chunk_t data, chunk_t iv,
1388 chunk_t *encrypted)
1389 {
1390 int pos;
1391 const u_int32_t *iv_i;
1392 u_int8_t *in, *out;
1393
1394 in = data.ptr;
1395 out = data.ptr;
1396 if (encrypted)
1397 {
1398 *encrypted = chunk_alloc(data.len);
1399 out = encrypted->ptr;
1400 }
1401
1402 pos=0;
1403 while(pos<data.len)
1404 {
1405 if (pos==0)
1406 {
1407 iv_i=(const u_int32_t*) iv.ptr;
1408 }
1409 else
1410 {
1411 iv_i=(const u_int32_t*) (out-16);
1412 }
1413 *((u_int32_t *)(&out[ 0])) = iv_i[0]^*((const u_int32_t *)(&in[ 0]));
1414 *((u_int32_t *)(&out[ 4])) = iv_i[1]^*((const u_int32_t *)(&in[ 4]));
1415 *((u_int32_t *)(&out[ 8])) = iv_i[2]^*((const u_int32_t *)(&in[ 8]));
1416 *((u_int32_t *)(&out[12])) = iv_i[3]^*((const u_int32_t *)(&in[12]));
1417 encrypt_block(this, out, out);
1418 in+=16;
1419 out+=16;
1420 pos+=16;
1421 }
1422 }
1423
1424 /**
1425 * Implementation of crypter_t.get_block_size.
1426 */
1427 static size_t get_block_size (private_aes_crypter_t *this)
1428 {
1429 return AES_BLOCK_SIZE;
1430 }
1431
1432 /**
1433 * Implementation of crypter_t.get_key_size.
1434 */
1435 static size_t get_key_size (private_aes_crypter_t *this)
1436 {
1437 return this->key_size;
1438 }
1439
1440 /**
1441 * Implementation of crypter_t.set_key.
1442 */
1443 static void set_key (private_aes_crypter_t *this, chunk_t key)
1444 {
1445 u_int32_t *kf, *kt, rci, f = 0;
1446 u_int8_t *in_key = key.ptr;
1447
1448 this->aes_Nrnd = (this->aes_Nkey > (nc) ? this->aes_Nkey : (nc)) + 6;
1449
1450 this->aes_e_key[0] = const_word_in(in_key );
1451 this->aes_e_key[1] = const_word_in(in_key + 4);
1452 this->aes_e_key[2] = const_word_in(in_key + 8);
1453 this->aes_e_key[3] = const_word_in(in_key + 12);
1454
1455 kf = this->aes_e_key;
1456 kt = kf + nc * (this->aes_Nrnd + 1) - this->aes_Nkey;
1457 rci = 0;
1458
1459 switch(this->aes_Nkey)
1460 {
1461 case 4: do
1462 { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
1463 kf[5] = kf[1] ^ kf[4];
1464 kf[6] = kf[2] ^ kf[5];
1465 kf[7] = kf[3] ^ kf[6];
1466 kf += 4;
1467 }
1468 while(kf < kt);
1469 break;
1470
1471 case 6: this->aes_e_key[4] = const_word_in(in_key + 16);
1472 this->aes_e_key[5] = const_word_in(in_key + 20);
1473 do
1474 { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
1475 kf[ 7] = kf[1] ^ kf[ 6];
1476 kf[ 8] = kf[2] ^ kf[ 7];
1477 kf[ 9] = kf[3] ^ kf[ 8];
1478 kf[10] = kf[4] ^ kf[ 9];
1479 kf[11] = kf[5] ^ kf[10];
1480 kf += 6;
1481 }
1482 while(kf < kt);
1483 break;
1484
1485 case 8: this->aes_e_key[4] = const_word_in(in_key + 16);
1486 this->aes_e_key[5] = const_word_in(in_key + 20);
1487 this->aes_e_key[6] = const_word_in(in_key + 24);
1488 this->aes_e_key[7] = const_word_in(in_key + 28);
1489 do
1490 { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
1491 kf[ 9] = kf[1] ^ kf[ 8];
1492 kf[10] = kf[2] ^ kf[ 9];
1493 kf[11] = kf[3] ^ kf[10];
1494 kf[12] = kf[4] ^ ls_box(kf[11],0);
1495 kf[13] = kf[5] ^ kf[12];
1496 kf[14] = kf[6] ^ kf[13];
1497 kf[15] = kf[7] ^ kf[14];
1498 kf += 8;
1499 }
1500 while (kf < kt);
1501 break;
1502 }
1503
1504 if(!f)
1505 {
1506 u_int32_t i;
1507
1508 kt = this->aes_d_key + nc * this->aes_Nrnd;
1509 kf = this->aes_e_key;
1510
1511 cpy(kt, kf); kt -= 2 * nc;
1512
1513 for(i = 1; i < this->aes_Nrnd; ++i)
1514 {
1515 #if defined(ONE_TABLE) || defined(FOUR_TABLES)
1516 #if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
1517 u_int32_t f2, f4, f8, f9;
1518 #endif
1519 mix(kt, kf);
1520 #else
1521 cpy(kt, kf);
1522 #endif
1523 kt -= 2 * nc;
1524 }
1525 cpy(kt, kf);
1526 }
1527 }
1528
1529 /**
1530 * Implementation of crypter_t.destroy and aes_crypter_t.destroy.
1531 */
1532 static void destroy (private_aes_crypter_t *this)
1533 {
1534 free(this);
1535 }
1536
1537 /*
1538 * Described in header
1539 */
1540 aes_crypter_t *aes_crypter_create(encryption_algorithm_t algo, size_t key_size)
1541 {
1542 private_aes_crypter_t *this;
1543
1544 if (algo != ENCR_AES_CBC)
1545 {
1546 return NULL;
1547 }
1548
1549 this = malloc_thing(private_aes_crypter_t);
1550
1551 #if !defined(FIXED_TABLES)
1552 if(!tab_gen) { gen_tabs(); tab_gen = 1; }
1553 #endif
1554
1555 this->key_size = key_size;
1556 switch(key_size)
1557 {
1558 case 32: /* bytes */
1559 this->aes_Nkey = 8;
1560 break;
1561 case 24: /* bytes */
1562 this->aes_Nkey = 6;
1563 break;
1564 case 16: /* bytes */
1565 this->aes_Nkey = 4;
1566 break;
1567 default:
1568 free(this);
1569 return NULL;
1570 }
1571
1572 this->public.crypter_interface.encrypt = (void (*) (crypter_t *, chunk_t,chunk_t, chunk_t *)) encrypt;
1573 this->public.crypter_interface.decrypt = (void (*) (crypter_t *, chunk_t , chunk_t, chunk_t *)) decrypt;
1574 this->public.crypter_interface.get_block_size = (size_t (*) (crypter_t *)) get_block_size;
1575 this->public.crypter_interface.get_key_size = (size_t (*) (crypter_t *)) get_key_size;
1576 this->public.crypter_interface.set_key = (void (*) (crypter_t *,chunk_t)) set_key;
1577 this->public.crypter_interface.destroy = (void (*) (crypter_t *)) destroy;
1578
1579 return &(this->public);
1580 }