aesni: Implement CMAC mode to provide a signer/prf
authorMartin Willi <martin@revosec.ch>
Fri, 27 Mar 2015 16:36:12 +0000 (17:36 +0100)
committerMartin Willi <martin@revosec.ch>
Wed, 15 Apr 2015 09:35:28 +0000 (11:35 +0200)
Compared to the cmac plugin using AESNI-CBC as backend, this improves
performance of AES-CMAC by ~45%.

src/libstrongswan/plugins/aesni/Makefile.am
src/libstrongswan/plugins/aesni/aesni_cmac.c [new file with mode: 0644]
src/libstrongswan/plugins/aesni/aesni_cmac.h [new file with mode: 0644]
src/libstrongswan/plugins/aesni/aesni_plugin.c

index e4f827b..307a5d1 100644 (file)
@@ -18,6 +18,7 @@ libstrongswan_aesni_la_SOURCES = \
        aesni_ctr.h aesni_ctr.c \
        aesni_ccm.h aesni_ccm.c \
        aesni_xcbc.h aesni_xcbc.c \
+       aesni_cmac.h aesni_cmac.c \
        aesni_plugin.h aesni_plugin.c
 
 libstrongswan_aesni_la_LDFLAGS = -module -avoid-version
diff --git a/src/libstrongswan/plugins/aesni/aesni_cmac.c b/src/libstrongswan/plugins/aesni/aesni_cmac.c
new file mode 100644 (file)
index 0000000..f94960d
--- /dev/null
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2012 Tobias Brunner
+ * Hochschule fuer Technik Rapperswil
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include "aesni_cmac.h"
+#include "aesni_key.h"
+
+#include <crypto/prfs/mac_prf.h>
+#include <crypto/signers/mac_signer.h>
+
+typedef struct private_mac_t private_mac_t;
+
+/**
+ * Private data of a mac_t object.
+ */
+struct private_mac_t {
+
+       /**
+        * Public interface.
+        */
+       mac_t public;
+
+       /**
+        * Key schedule for key K
+        */
+       aesni_key_t *k;
+
+       /**
+        * K1
+        */
+       __m128i k1;
+
+       /**
+        * K2
+        */
+       __m128i k2;
+
+       /**
+        * T
+        */
+       __m128i t;
+
+       /**
+        * remaining, unprocessed bytes in append mode
+        */
+       u_char rem[AES_BLOCK_SIZE];
+
+       /**
+        * number of bytes in remaining
+        */
+       int rem_size;
+};
+
+METHOD(mac_t, get_mac, bool,
+       private_mac_t *this, chunk_t data, u_int8_t *out)
+{
+       __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10;
+       __m128i t, l, *bi;
+       u_int blocks, rem, i;
+
+       if (!this->k)
+       {
+               return FALSE;
+       }
+
+       k0 = this->k->schedule[0];
+       k1 = this->k->schedule[1];
+       k2 = this->k->schedule[2];
+       k3 = this->k->schedule[3];
+       k4 = this->k->schedule[4];
+       k5 = this->k->schedule[5];
+       k6 = this->k->schedule[6];
+       k7 = this->k->schedule[7];
+       k8 = this->k->schedule[8];
+       k9 = this->k->schedule[9];
+       k10 = this->k->schedule[10];
+
+       t = this->t;
+
+       if (this->rem_size + data.len > AES_BLOCK_SIZE)
+       {
+               /* T := 0x00000000000000000000000000000000 (initially)
+                * for each block M_i (except the last)
+                *   X := T XOR M_i;
+                *   T := AES-128(K, X);
+                */
+
+               /* append data to remaining bytes, process block M_1 */
+               memcpy(this->rem + this->rem_size, data.ptr,
+                          AES_BLOCK_SIZE - this->rem_size);
+               data = chunk_skip(data, AES_BLOCK_SIZE - this->rem_size);
+
+               t = _mm_xor_si128(t, _mm_loadu_si128((__m128i*)this->rem));
+
+               t = _mm_xor_si128(t, k0);
+               t = _mm_aesenc_si128(t, k1);
+               t = _mm_aesenc_si128(t, k2);
+               t = _mm_aesenc_si128(t, k3);
+               t = _mm_aesenc_si128(t, k4);
+               t = _mm_aesenc_si128(t, k5);
+               t = _mm_aesenc_si128(t, k6);
+               t = _mm_aesenc_si128(t, k7);
+               t = _mm_aesenc_si128(t, k8);
+               t = _mm_aesenc_si128(t, k9);
+               t = _mm_aesenclast_si128(t, k10);
+
+               /* process blocks M_2 ... M_n-1 */
+               bi = (__m128i*)data.ptr;
+               rem = data.len % AES_BLOCK_SIZE;
+               blocks = data.len / AES_BLOCK_SIZE;
+               if (!rem && blocks)
+               {       /* don't do last block */
+                       rem = AES_BLOCK_SIZE;
+                       blocks--;
+               }
+
+               /* process blocks M[2] ... M[n-1] */
+               for (i = 0; i < blocks; i++)
+               {
+                       t = _mm_xor_si128(t, _mm_loadu_si128(bi + i));
+
+                       t = _mm_xor_si128(t, k0);
+                       t = _mm_aesenc_si128(t, k1);
+                       t = _mm_aesenc_si128(t, k2);
+                       t = _mm_aesenc_si128(t, k3);
+                       t = _mm_aesenc_si128(t, k4);
+                       t = _mm_aesenc_si128(t, k5);
+                       t = _mm_aesenc_si128(t, k6);
+                       t = _mm_aesenc_si128(t, k7);
+                       t = _mm_aesenc_si128(t, k8);
+                       t = _mm_aesenc_si128(t, k9);
+                       t = _mm_aesenclast_si128(t, k10);
+               }
+
+               /* store remaining bytes of block M_n */
+               memcpy(this->rem, data.ptr + data.len - rem, rem);
+               this->rem_size = rem;
+       }
+       else
+       {
+               /* no complete block (or last block), just copy into remaining */
+               memcpy(this->rem + this->rem_size, data.ptr, data.len);
+               this->rem_size += data.len;
+       }
+       if (out)
+       {
+               /* if last block is complete
+                *   M_last := M_n XOR K1;
+                * else
+                *   M_last := padding(M_n) XOR K2;
+                */
+               if (this->rem_size == AES_BLOCK_SIZE)
+               {
+                       l = _mm_loadu_si128((__m128i*)this->rem);
+                       l = _mm_xor_si128(l, this->k1);
+               }
+               else
+               {
+                       /* padding(x) = x || 10^i  where i is 128-8*r-1
+                        * That is, padding(x) is the concatenation of x and a single '1',
+                        * followed by the minimum number of '0's, so that the total length is
+                        * equal to 128 bits.
+                        */
+                       if (this->rem_size < AES_BLOCK_SIZE)
+                       {
+                               memset(this->rem + this->rem_size, 0,
+                                          AES_BLOCK_SIZE - this->rem_size);
+                               this->rem[this->rem_size] = 0x80;
+                       }
+                       l = _mm_loadu_si128((__m128i*)this->rem);
+                       l = _mm_xor_si128(l, this->k2);
+               }
+               /* T := M_last XOR T;
+                * T := AES-128(K,T);
+                */
+               t = _mm_xor_si128(l, t);
+
+               t = _mm_xor_si128(t, k0);
+               t = _mm_aesenc_si128(t, k1);
+               t = _mm_aesenc_si128(t, k2);
+               t = _mm_aesenc_si128(t, k3);
+               t = _mm_aesenc_si128(t, k4);
+               t = _mm_aesenc_si128(t, k5);
+               t = _mm_aesenc_si128(t, k6);
+               t = _mm_aesenc_si128(t, k7);
+               t = _mm_aesenc_si128(t, k8);
+               t = _mm_aesenc_si128(t, k9);
+               t = _mm_aesenclast_si128(t, k10);
+
+               _mm_storeu_si128((__m128i*)out, t);
+
+               /* reset state */
+               t = _mm_setzero_si128();
+               this->rem_size = 0;
+       }
+       this->t = t;
+       return TRUE;
+}
+
+METHOD(mac_t, get_mac_size, size_t,
+       private_mac_t *this)
+{
+       return AES_BLOCK_SIZE;
+}
+
+/**
+ * Left-shift the given chunk by one bit.
+ */
+static void bit_shift(chunk_t chunk)
+{
+       size_t i;
+
+       for (i = 0; i < chunk.len; i++)
+       {
+               chunk.ptr[i] <<= 1;
+               if (i < chunk.len - 1 && chunk.ptr[i + 1] & 0x80)
+               {
+                       chunk.ptr[i] |= 0x01;
+               }
+       }
+}
+
+METHOD(mac_t, set_key, bool,
+       private_mac_t *this, chunk_t key)
+{
+       __m128i rb, msb, l, a;
+       u_int round;
+       chunk_t k;
+
+       this->t = _mm_setzero_si128();
+       this->rem_size = 0;
+
+       /* we support variable keys as defined in RFC 4615 */
+       if (key.len == AES_BLOCK_SIZE)
+       {
+               k = key;
+       }
+       else
+       {       /* use cmac recursively to resize longer or shorter keys */
+               k = chunk_alloca(AES_BLOCK_SIZE);
+               memset(k.ptr, 0, k.len);
+               if (!set_key(this, k) || !get_mac(this, key, k.ptr))
+               {
+                       return FALSE;
+               }
+       }
+
+       DESTROY_IF(this->k);
+       this->k = aesni_key_create(TRUE, k);
+       if (!this->k)
+       {
+               return FALSE;
+       }
+
+       /*
+        * Rb = 0x00000000000000000000000000000087
+        * L = 0x00000000000000000000000000000000 encrypted with K
+        * if MSB(L) == 0
+        *   K1 = L << 1
+        * else
+        *   K1 = (L << 1) XOR Rb
+        * if MSB(K1) == 0
+        *   K2 = K1 << 1
+        * else
+        *   K2 = (K1 << 1) XOR Rb
+        */
+
+       rb = _mm_set_epi32(0x87000000, 0, 0, 0);
+       msb = _mm_set_epi32(0, 0, 0, 0x80);
+
+       l = _mm_setzero_si128();
+
+       l = _mm_xor_si128(l, this->k->schedule[0]);
+       for (round = 1; round < this->k->rounds; round++)
+       {
+               l = _mm_aesenc_si128(l, this->k->schedule[round]);
+       }
+       l = _mm_aesenclast_si128(l, this->k->schedule[this->k->rounds]);
+
+       this->k1 = l;
+       bit_shift(chunk_from_thing(this->k1));
+       a = _mm_and_si128(l, msb);
+       if (memchr(&a, 0x80, 1))
+       {
+               this->k1 = _mm_xor_si128(this->k1, rb);
+       }
+       this->k2 = this->k1;
+       bit_shift(chunk_from_thing(this->k2));
+       a = _mm_and_si128(this->k1, msb);
+       if (memchr(&a, 0x80, 1))
+       {
+               this->k2 = _mm_xor_si128(this->k2, rb);
+       }
+
+       return TRUE;
+}
+
+METHOD(mac_t, destroy, void,
+       private_mac_t *this)
+{
+       DESTROY_IF(this->k);
+       memwipe(&this->k1, sizeof(this->k1));
+       memwipe(&this->k2, sizeof(this->k2));
+       free(this);
+}
+
+/*
+ * Described in header
+ */
+mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size)
+{
+       private_mac_t *this;
+
+       INIT(this,
+               .public = {
+                       .get_mac = _get_mac,
+                       .get_mac_size = _get_mac_size,
+                       .set_key = _set_key,
+                       .destroy = _destroy,
+               },
+       );
+
+       return &this->public;
+}
+
+/*
+ * Described in header.
+ */
+prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo)
+{
+       mac_t *cmac;
+
+       switch (algo)
+       {
+               case PRF_AES128_CMAC:
+                       cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
+                       break;
+               default:
+                       return NULL;
+       }
+       if (cmac)
+       {
+               return mac_prf_create(cmac);
+       }
+       return NULL;
+}
+
+/*
+ * Described in header
+ */
+signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo)
+{
+       size_t truncation;
+       mac_t *cmac;
+
+       switch (algo)
+       {
+               case AUTH_AES_CMAC_96:
+                       cmac = aesni_cmac_create(ENCR_AES_CBC, 16);
+                       truncation = 12;
+                       break;
+               default:
+                       return NULL;
+       }
+       if (cmac)
+       {
+               return mac_signer_create(cmac, truncation);
+       }
+       return NULL;
+}
diff --git a/src/libstrongswan/plugins/aesni/aesni_cmac.h b/src/libstrongswan/plugins/aesni/aesni_cmac.h
new file mode 100644 (file)
index 0000000..5f0af73
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2015 revosec AG
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+/**
+ * @defgroup aesni_xcbc aesni_xcbc
+ * @{ @ingroup aesni
+ */
+
+#ifndef CMAC_H_
+#define CMAC_H_
+
+#include <crypto/mac.h>
+#include <crypto/prfs/prf.h>
+#include <crypto/signers/signer.h>
+
+/**
+ * Create a generic mac_t object using AESNI CMAC.
+ *
+ * @param algo         underlying encryption algorithm
+ * @param key_size     size of encryption key, in bytes
+ */
+mac_t *aesni_cmac_create(encryption_algorithm_t algo, size_t key_size);
+
+/**
+ * Creates a new prf_t object based AESNI CMAC.
+ *
+ * @param algo         algorithm to implement
+ * @return                     prf_t object, NULL if not supported
+ */
+prf_t *aesni_cmac_prf_create(pseudo_random_function_t algo);
+
+/**
+ * Creates a new signer_t object based on AESNI CMAC.
+ *
+ * @param algo         algorithm to implement
+ * @return                     signer_t, NULL if  not supported
+ */
+signer_t *aesni_cmac_signer_create(integrity_algorithm_t algo);
+
+#endif /** CMAC_H_ @}*/
index 334db9c..229361d 100644 (file)
@@ -18,6 +18,7 @@
 #include "aesni_ctr.h"
 #include "aesni_ccm.h"
 #include "aesni_xcbc.h"
+#include "aesni_cmac.h"
 
 #include <stdio.h>
 
@@ -71,6 +72,10 @@ METHOD(plugin_t, get_features, int,
                        PLUGIN_PROVIDE(PRF, PRF_AES128_XCBC),
                PLUGIN_REGISTER(SIGNER, aesni_xcbc_signer_create),
                        PLUGIN_PROVIDE(SIGNER, AUTH_AES_XCBC_96),
+               PLUGIN_REGISTER(PRF, aesni_cmac_prf_create),
+                       PLUGIN_PROVIDE(PRF, PRF_AES128_CMAC),
+               PLUGIN_REGISTER(SIGNER, aesni_cmac_signer_create),
+                       PLUGIN_PROVIDE(SIGNER, AUTH_AES_CMAC_96),
        };
 
        *features = f;