2 /* Optimized implementation of the Serpent AES candidate algorithm
3 * Designed by Anderson, Biham and Knudsen and Implemented by
4 * Gisle Sælensminde 2000.
6 * The implementation is based on the pentium optimised sboxes of
7 * Dag Arne Osvik. Even these sboxes are designed to be optimal for x86
8 * processors they are efficient on other processors as well, but the speedup
9 * isn't so impressive compared to other implementations.
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public License
13 * as published by the Free Software Foundation; either version 2 of
14 * the License, or (at your option) any later version.
18 #include <linux/init.h>
19 #include <linux/types.h>
21 #include <asm/byteorder.h>
23 #include <sys/types.h>
24 #include <asm/byteorder.h>
29 #define rotl(reg, val) ((reg << val) | (reg >> (32 - val)))
30 #define rotr(reg, val) ((reg >> val) | (reg << (32 - val)))
34 #define io_swap(x) __cpu_to_be32(x)
39 /* The sbox functions. The first four parameters is the input bits, and
40 * the last is a tempoary. These parameters are also used for output, but
41 * the bit order is permuted. The output bit order from S0 is
42 * (1 4 2 0 3), where 3 is the (now useless) tempoary.
45 #define S0(r0,r1,r2,r3,r4) \
65 #define S1(r0,r1,r2,r3,r4) \
90 #define S2(r0,r1,r2,r3,r4) \
108 #define S3(r0,r1,r2,r3,r4) \
129 #define S4(r0,r1,r2,r3,r4) \
151 #define S5(r0,r1,r2,r3,r4) \
172 #define S6(r0,r1,r2,r3,r4) \
192 #define S7(r0,r1,r2,r3,r4) \
214 /* The inverse sboxes */
216 #define I0(r0,r1,r2,r3,r4) \
237 #define I1(r0,r1,r2,r3,r4) \
258 #define I2(r0,r1,r2,r3,r4) \
279 #define I3(r0,r1,r2,r3,r4) \
299 #define I4(r0,r1,r2,r3,r4) \
321 #define I5(r0,r1,r2,r3,r4) \
343 #define I6(r0,r1,r2,r3,r4) \
362 #define I7(r0,r1,r2,r3,r4) \
383 /* forward and inverse linear transformations */
385 #define LINTRANS(r0,r1,r2,r3,r4) \
403 #define ILINTRANS(r0,r1,r2,r3,r4) \
422 #define KEYMIX(r0,r1,r2,r3,r4,IN) \
423 r0 = r0 ^ l_key[IN+8]; \
424 r1 = r1 ^ l_key[IN+9]; \
425 r2 = r2 ^ l_key[IN+10]; \
426 r3 = r3 ^ l_key[IN+11];
428 #define GETKEY(r0, r1, r2, r3, IN) \
434 #define SETKEY(r0, r1, r2, r3, IN) \
440 /* initialise the key schedule from the user supplied key */
442 int serpent_set_key(serpent_context
*cx
, const unsigned char *key
, int key_len
)
443 { const u32
*in_key
= (const u32
*)key
;
444 /* l_key - storage for the key schedule */
445 u32
*l_key
= cx
->keyinfo
;
446 u32 i
,lk
,r0
,r1
,r2
,r3
,r4
;
448 if (key_len
!= 16 && key_len
!= 24 && key_len
!= 32)
449 return -1; /* unsupported key length */
453 i
= 0; lk
= (key_len
+ 31) / 32;
458 l_key
[i
] = io_swap(in_key
[lk
- i
- 1]);
460 l_key
[i
] = in_key
[i
];
471 i
= key_len
/ 32; lk
= 1 << key_len
% 32;
477 for(i
= 0; i
< 132; ++i
)
479 lk
= l_key
[i
] ^ l_key
[i
+ 3] ^ l_key
[i
+ 5]
480 ^ l_key
[i
+ 7] ^ 0x9e3779b9 ^ i
;
482 l_key
[i
+ 8] = (lk
<< 11) | (lk
>> 21);
485 GETKEY(r0
, r1
, r2
, r3
, 0);
487 SETKEY(r1
, r2
, r3
, r4
, 0)
489 GETKEY(r0
, r1
, r2
, r3
, 4);
491 SETKEY(r2
, r3
, r1
, r4
, 4)
493 GETKEY(r0
, r1
, r2
, r3
, 8);
495 SETKEY(r3
, r1
, r2
, r0
, 8)
497 GETKEY(r0
, r1
, r2
, r3
, 12);
499 SETKEY(r1
, r4
, r2
, r0
, 12)
501 GETKEY(r0
, r1
, r2
, r3
, 16);
503 SETKEY(r2
, r4
, r3
, r0
, 16)
505 GETKEY(r0
, r1
, r2
, r3
, 20);
507 SETKEY(r0
, r1
, r4
, r2
, 20)
509 GETKEY(r0
, r1
, r2
, r3
, 24);
511 SETKEY(r1
, r3
, r0
, r2
, 24)
513 GETKEY(r0
, r1
, r2
, r3
, 28);
515 SETKEY(r1
, r4
, r0
, r3
, 28)
517 GETKEY(r0
, r1
, r2
, r3
, 32);
519 SETKEY(r1
, r2
, r3
, r4
, 32)
521 GETKEY(r0
, r1
, r2
, r3
, 36);
523 SETKEY(r2
, r3
, r1
, r4
, 36)
525 GETKEY(r0
, r1
, r2
, r3
, 40);
527 SETKEY(r3
, r1
, r2
, r0
, 40)
529 GETKEY(r0
, r1
, r2
, r3
, 44);
531 SETKEY(r1
, r4
, r2
, r0
, 44)
533 GETKEY(r0
, r1
, r2
, r3
, 48);
535 SETKEY(r2
, r4
, r3
, r0
, 48)
537 GETKEY(r0
, r1
, r2
, r3
, 52);
539 SETKEY(r0
, r1
, r4
, r2
, 52)
541 GETKEY(r0
, r1
, r2
, r3
, 56);
543 SETKEY(r1
, r3
, r0
, r2
, 56)
545 GETKEY(r0
, r1
, r2
, r3
, 60);
547 SETKEY(r1
, r4
, r0
, r3
, 60)
549 GETKEY(r0
, r1
, r2
, r3
, 64);
551 SETKEY(r1
, r2
, r3
, r4
, 64)
553 GETKEY(r0
, r1
, r2
, r3
, 68);
555 SETKEY(r2
, r3
, r1
, r4
, 68)
557 GETKEY(r0
, r1
, r2
, r3
, 72);
559 SETKEY(r3
, r1
, r2
, r0
, 72)
561 GETKEY(r0
, r1
, r2
, r3
, 76);
563 SETKEY(r1
, r4
, r2
, r0
, 76)
565 GETKEY(r0
, r1
, r2
, r3
, 80);
567 SETKEY(r2
, r4
, r3
, r0
, 80)
569 GETKEY(r0
, r1
, r2
, r3
, 84);
571 SETKEY(r0
, r1
, r4
, r2
, 84)
573 GETKEY(r0
, r1
, r2
, r3
, 88);
575 SETKEY(r1
, r3
, r0
, r2
, 88)
577 GETKEY(r0
, r1
, r2
, r3
, 92);
579 SETKEY(r1
, r4
, r0
, r3
, 92)
581 GETKEY(r0
, r1
, r2
, r3
, 96);
583 SETKEY(r1
, r2
, r3
, r4
, 96)
585 GETKEY(r0
, r1
, r2
, r3
, 100);
587 SETKEY(r2
, r3
, r1
, r4
, 100)
589 GETKEY(r0
, r1
, r2
, r3
, 104);
591 SETKEY(r3
, r1
, r2
, r0
, 104)
593 GETKEY(r0
, r1
, r2
, r3
, 108);
595 SETKEY(r1
, r4
, r2
, r0
, 108)
597 GETKEY(r0
, r1
, r2
, r3
, 112);
599 SETKEY(r2
, r4
, r3
, r0
, 112)
601 GETKEY(r0
, r1
, r2
, r3
, 116);
603 SETKEY(r0
, r1
, r4
, r2
, 116)
605 GETKEY(r0
, r1
, r2
, r3
, 120);
607 SETKEY(r1
, r3
, r0
, r2
, 120)
609 GETKEY(r0
, r1
, r2
, r3
, 124);
611 SETKEY(r1
, r4
, r0
, r3
, 124)
613 GETKEY(r0
, r1
, r2
, r3
, 128);
615 SETKEY(r1
, r2
, r3
, r4
, 128)
620 /* Encryption and decryption functions. The rounds are fully inlined.
621 * The sboxes alters the bit order of the output, and the altered
622 * bit ordrer is used progressivly. */
624 /* encrypt a block of text */
626 int serpent_encrypt(serpent_context
*cx
, const u8
*in
,
628 { u32
*l_key
= cx
->keyinfo
;
629 const u32
*in_blk
= (const u32
*) in
;
630 u32
*out_blk
= (u32
*) out
;
634 r0
= io_swap(in_blk
[3]); r1
= io_swap(in_blk
[2]);
635 r2
= io_swap(in_blk
[1]); r3
= io_swap(in_blk
[0]);
637 r0
= in_blk
[0]; r1
= in_blk
[1]; r2
= in_blk
[2]; r3
= in_blk
[3];
641 KEYMIX(r0
,r1
,r2
,r3
,r4
,0);
643 LINTRANS(r1
,r4
,r2
,r0
,r3
);
646 KEYMIX(r1
,r4
,r2
,r0
,r3
,4);
648 LINTRANS(r0
,r4
,r2
,r1
,r3
);
651 KEYMIX(r0
,r4
,r2
,r1
,r3
,8);
653 LINTRANS(r2
,r1
,r4
,r3
,r0
);
656 KEYMIX(r2
,r1
,r4
,r3
,r0
,12);
658 LINTRANS(r1
,r4
,r3
,r0
,r2
);
661 KEYMIX(r1
,r4
,r3
,r0
,r2
,16);
663 LINTRANS(r4
,r2
,r1
,r0
,r3
);
666 KEYMIX(r4
,r2
,r1
,r0
,r3
,20);
668 LINTRANS(r2
,r0
,r4
,r1
,r3
);
671 KEYMIX(r2
,r0
,r4
,r1
,r3
,24);
673 LINTRANS(r2
,r0
,r3
,r4
,r1
);
676 KEYMIX(r2
,r0
,r3
,r4
,r1
,28);
678 LINTRANS(r3
,r1
,r4
,r2
,r0
);
681 KEYMIX(r3
,r1
,r4
,r2
,r0
,32);
683 LINTRANS(r1
,r0
,r4
,r3
,r2
);
686 KEYMIX(r1
,r0
,r4
,r3
,r2
,36);
688 LINTRANS(r3
,r0
,r4
,r1
,r2
);
691 KEYMIX(r3
,r0
,r4
,r1
,r2
,40);
693 LINTRANS(r4
,r1
,r0
,r2
,r3
);
696 KEYMIX(r4
,r1
,r0
,r2
,r3
,44);
698 LINTRANS(r1
,r0
,r2
,r3
,r4
);
701 KEYMIX(r1
,r0
,r2
,r3
,r4
,48);
703 LINTRANS(r0
,r4
,r1
,r3
,r2
);
706 KEYMIX(r0
,r4
,r1
,r3
,r2
,52);
708 LINTRANS(r4
,r3
,r0
,r1
,r2
);
711 KEYMIX(r4
,r3
,r0
,r1
,r2
,56);
713 LINTRANS(r4
,r3
,r2
,r0
,r1
);
716 KEYMIX(r4
,r3
,r2
,r0
,r1
,60);
718 LINTRANS(r2
,r1
,r0
,r4
,r3
);
721 KEYMIX(r2
,r1
,r0
,r4
,r3
,64);
723 LINTRANS(r1
,r3
,r0
,r2
,r4
);
726 KEYMIX(r1
,r3
,r0
,r2
,r4
,68);
728 LINTRANS(r2
,r3
,r0
,r1
,r4
);
731 KEYMIX(r2
,r3
,r0
,r1
,r4
,72);
733 LINTRANS(r0
,r1
,r3
,r4
,r2
);
736 KEYMIX(r0
,r1
,r3
,r4
,r2
,76);
738 LINTRANS(r1
,r3
,r4
,r2
,r0
);
741 KEYMIX(r1
,r3
,r4
,r2
,r0
,80);
743 LINTRANS(r3
,r0
,r1
,r2
,r4
);
746 KEYMIX(r3
,r0
,r1
,r2
,r4
,84);
748 LINTRANS(r0
,r2
,r3
,r1
,r4
);
751 KEYMIX(r0
,r2
,r3
,r1
,r4
,88);
753 LINTRANS(r0
,r2
,r4
,r3
,r1
);
756 KEYMIX(r0
,r2
,r4
,r3
,r1
,92);
758 LINTRANS(r4
,r1
,r3
,r0
,r2
);
761 KEYMIX(r4
,r1
,r3
,r0
,r2
,96);
763 LINTRANS(r1
,r2
,r3
,r4
,r0
);
766 KEYMIX(r1
,r2
,r3
,r4
,r0
,100);
768 LINTRANS(r4
,r2
,r3
,r1
,r0
);
771 KEYMIX(r4
,r2
,r3
,r1
,r0
,104);
773 LINTRANS(r3
,r1
,r2
,r0
,r4
);
776 KEYMIX(r3
,r1
,r2
,r0
,r4
,108);
778 LINTRANS(r1
,r2
,r0
,r4
,r3
);
781 KEYMIX(r1
,r2
,r0
,r4
,r3
,112);
783 LINTRANS(r2
,r3
,r1
,r4
,r0
);
786 KEYMIX(r2
,r3
,r1
,r4
,r0
,116);
788 LINTRANS(r3
,r4
,r2
,r1
,r0
);
791 KEYMIX(r3
,r4
,r2
,r1
,r0
,120);
793 LINTRANS(r3
,r4
,r0
,r2
,r1
);
796 KEYMIX(r3
,r4
,r0
,r2
,r1
,124);
798 KEYMIX(r0
,r1
,r2
,r3
,r4
,128);
802 out_blk
[3] = io_swap(r0
); out_blk
[2] = io_swap(r1
);
803 out_blk
[1] = io_swap(r2
); out_blk
[0] = io_swap(r3
);
805 out_blk
[0] = r0
; out_blk
[1] = r1
; out_blk
[2] = r2
; out_blk
[3] = r3
;
810 /* decrypt a block of text */
812 int serpent_decrypt(serpent_context
*cx
, const u8
*in
,
814 { u32
*l_key
= cx
->keyinfo
;
815 const u32
*in_blk
= (const u32
*)in
;
816 u32
*out_blk
= (u32
*)out
;
820 r0
= io_swap(in_blk
[3]); r1
= io_swap(in_blk
[2]);
821 r2
= io_swap(in_blk
[1]); r3
= io_swap(in_blk
[0]);
823 r0
= in_blk
[0]; r1
= in_blk
[1]; r2
= in_blk
[2]; r3
= in_blk
[3];
827 KEYMIX(r0
,r1
,r2
,r3
,r4
,128);
829 KEYMIX(r3
,r0
,r1
,r4
,r2
,124);
832 ILINTRANS(r3
,r0
,r1
,r4
,r2
);
834 KEYMIX(r0
,r1
,r2
,r4
,r3
,120);
837 ILINTRANS(r0
,r1
,r2
,r4
,r3
);
839 KEYMIX(r1
,r3
,r4
,r2
,r0
,116);
842 ILINTRANS(r1
,r3
,r4
,r2
,r0
);
844 KEYMIX(r1
,r2
,r4
,r0
,r3
,112);
847 ILINTRANS(r1
,r2
,r4
,r0
,r3
);
849 KEYMIX(r4
,r2
,r0
,r1
,r3
,108);
852 ILINTRANS(r4
,r2
,r0
,r1
,r3
);
854 KEYMIX(r2
,r3
,r0
,r1
,r4
,104);
857 ILINTRANS(r2
,r3
,r0
,r1
,r4
);
859 KEYMIX(r4
,r2
,r1
,r0
,r3
,100);
862 ILINTRANS(r4
,r2
,r1
,r0
,r3
);
864 KEYMIX(r4
,r3
,r2
,r0
,r1
,96);
867 ILINTRANS(r4
,r3
,r2
,r0
,r1
);
869 KEYMIX(r0
,r4
,r3
,r1
,r2
,92);
872 ILINTRANS(r0
,r4
,r3
,r1
,r2
);
874 KEYMIX(r4
,r3
,r2
,r1
,r0
,88);
877 ILINTRANS(r4
,r3
,r2
,r1
,r0
);
879 KEYMIX(r3
,r0
,r1
,r2
,r4
,84);
882 ILINTRANS(r3
,r0
,r1
,r2
,r4
);
884 KEYMIX(r3
,r2
,r1
,r4
,r0
,80);
887 ILINTRANS(r3
,r2
,r1
,r4
,r0
);
889 KEYMIX(r1
,r2
,r4
,r3
,r0
,76);
892 ILINTRANS(r1
,r2
,r4
,r3
,r0
);
894 KEYMIX(r2
,r0
,r4
,r3
,r1
,72);
897 ILINTRANS(r2
,r0
,r4
,r3
,r1
);
899 KEYMIX(r1
,r2
,r3
,r4
,r0
,68);
902 ILINTRANS(r1
,r2
,r3
,r4
,r0
);
904 KEYMIX(r1
,r0
,r2
,r4
,r3
,64);
907 ILINTRANS(r1
,r0
,r2
,r4
,r3
);
909 KEYMIX(r4
,r1
,r0
,r3
,r2
,60);
912 ILINTRANS(r4
,r1
,r0
,r3
,r2
);
914 KEYMIX(r1
,r0
,r2
,r3
,r4
,56);
917 ILINTRANS(r1
,r0
,r2
,r3
,r4
);
919 KEYMIX(r0
,r4
,r3
,r2
,r1
,52);
922 ILINTRANS(r0
,r4
,r3
,r2
,r1
);
924 KEYMIX(r0
,r2
,r3
,r1
,r4
,48);
927 ILINTRANS(r0
,r2
,r3
,r1
,r4
);
929 KEYMIX(r3
,r2
,r1
,r0
,r4
,44);
932 ILINTRANS(r3
,r2
,r1
,r0
,r4
);
934 KEYMIX(r2
,r4
,r1
,r0
,r3
,40);
937 ILINTRANS(r2
,r4
,r1
,r0
,r3
);
939 KEYMIX(r3
,r2
,r0
,r1
,r4
,36);
942 ILINTRANS(r3
,r2
,r0
,r1
,r4
);
944 KEYMIX(r3
,r4
,r2
,r1
,r0
,32);
947 ILINTRANS(r3
,r4
,r2
,r1
,r0
);
949 KEYMIX(r1
,r3
,r4
,r0
,r2
,28);
952 ILINTRANS(r1
,r3
,r4
,r0
,r2
);
954 KEYMIX(r3
,r4
,r2
,r0
,r1
,24);
957 ILINTRANS(r3
,r4
,r2
,r0
,r1
);
959 KEYMIX(r4
,r1
,r0
,r2
,r3
,20);
962 ILINTRANS(r4
,r1
,r0
,r2
,r3
);
964 KEYMIX(r4
,r2
,r0
,r3
,r1
,16);
967 ILINTRANS(r4
,r2
,r0
,r3
,r1
);
969 KEYMIX(r0
,r2
,r3
,r4
,r1
,12);
972 ILINTRANS(r0
,r2
,r3
,r4
,r1
);
974 KEYMIX(r2
,r1
,r3
,r4
,r0
,8);
977 ILINTRANS(r2
,r1
,r3
,r4
,r0
);
979 KEYMIX(r0
,r2
,r4
,r3
,r1
,4);
982 ILINTRANS(r0
,r2
,r4
,r3
,r1
);
984 KEYMIX(r0
,r1
,r2
,r3
,r4
,0);
987 out_blk
[3] = io_swap(r0
); out_blk
[2] = io_swap(r1
);
988 out_blk
[1] = io_swap(r2
); out_blk
[0] = io_swap(r3
);
990 out_blk
[0] = r0
; out_blk
[1] = r1
; out_blk
[2] = r2
; out_blk
[3] = r3
;