kernel-netlink: Support configuring XFRM policy hashing thresholds
authorTobias Brunner <tobias@strongswan.org>
Wed, 21 Sep 2016 08:16:00 +0000 (10:16 +0200)
committerTobias Brunner <tobias@strongswan.org>
Fri, 30 Sep 2016 12:54:52 +0000 (14:54 +0200)
If the number of flows over a gateway exceeds the flow cache size of the Linux
kernel, policy lookup gets very expensive. Policies covering more than a single
address don't get hash-indexed by default, which results in wasting most of
the cycles in xfrm_policy_lookup_bytype() and its xfrm_policy_match() use.
Starting with several hundred policies the overhead gets inacceptable.

Starting with Linux 3.18, Linux can hash the first n-bit of a policy subnet
to perform indexed lookup. With correctly chosen netbits, this can completely
eliminate the performance impact of policy lookups, freeing the resources
for ESP crypto.

WARNING: Due to a bug in kernels 3.19 through 4.7, the kernel crashes with a
NULL pointer dereference if a socket policy is installed while hash thresholds
are changed.  And because the hashtable rebuild triggered by the threshold
change that causes this is scheduled it might also happen if the socket
policies are seemingly installed after setting the thresholds.
The fix for this bug - 6916fb3b10b3 ("xfrm: Ignore socket policies when
rebuilding hash tables") - is included since 4.8 (and might get backported).
As a workaround `charon.plugins.kernel-netlink.port_bypass` may be enabled
to replace the socket policies that allow IKE traffic with port specific
bypass policies.

conf/plugins/kernel-netlink.opt
src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c

index 0d465f6..77ba6ea 100644 (file)
@@ -51,6 +51,35 @@ charon.plugins.kernel-netlink.set_proto_port_transport_sa = no
        traffic, it also prevents the use of a single IPsec SA by more than one
        traffic selector.
 
+charon.plugins.kernel-netlink.spdh_thresh {}
+       XFRM policy hashing threshold configuration for IPv4 and IPv6.
+
+       XFRM policy hashing threshold configuration for IPv4 and IPv6.
+
+       The section defines hashing thresholds to configure in the kernel during
+       daemon startup. Each address family takes a threshold for the local subnet
+       of an IPsec policy (src in out-policies, dst in in- and forward-policies)
+       and the remote subnet (dst in out-policies, src in in- and
+       forward-policies).
+
+       If the subnet has more or equal net bits than the threshold, the first
+       threshold bits are used to calculate a hash to lookup the policy.
+
+       Policy hashing thresholds are not supported before Linux 3.18 and might
+       conflict with socket policies before Linux 4.8.
+
+charon.plugins.kernel-netlink.spdh_thresh.ipv4.lbits = 32
+       Local subnet XFRM policy hashing threshold for IPv4.
+
+charon.plugins.kernel-netlink.spdh_thresh.ipv4.rbits = 32
+       Remote subnet XFRM policy hashing threshold for IPv4.
+
+charon.plugins.kernel-netlink.spdh_thresh.ipv6.lbits = 128
+       Local subnet XFRM policy hashing threshold for IPv6.
+
+charon.plugins.kernel-netlink.spdh_thresh.ipv6.rbits = 128
+       Remote subnet XFRM policy hashing threshold for IPv6.
+
 charon.plugins.kernel-netlink.retries = 0
        Number of Netlink message retransmissions to send on timeout.
 
index c680342..6b06c26 100644 (file)
@@ -3046,6 +3046,110 @@ METHOD(kernel_ipsec_t, destroy, void,
        free(this);
 }
 
+/**
+ * Get the currently configured SPD hashing thresholds for an address family
+ */
+static bool get_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
+                                                               int type, uint8_t *lbits, uint8_t *rbits)
+{
+       netlink_buf_t request;
+       struct nlmsghdr *hdr, *out;
+       struct xfrmu_spdhthresh *thresh;
+       struct rtattr *rta;
+       size_t len, rtasize;
+       bool success = FALSE;
+
+       memset(&request, 0, sizeof(request));
+
+       hdr = &request.hdr;
+       hdr->nlmsg_flags = NLM_F_REQUEST;
+       hdr->nlmsg_type = XFRM_MSG_GETSPDINFO;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
+
+       if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
+       {
+               hdr = out;
+               while (NLMSG_OK(hdr, len))
+               {
+                       switch (hdr->nlmsg_type)
+                       {
+                               case XFRM_MSG_NEWSPDINFO:
+                               {
+                                       rta = XFRM_RTA(hdr, uint32_t);
+                                       rtasize = XFRM_PAYLOAD(hdr, uint32_t);
+                                       while (RTA_OK(rta, rtasize))
+                                       {
+                                               if (rta->rta_type == type &&
+                                                       RTA_PAYLOAD(rta) == sizeof(*thresh))
+                                               {
+                                                       thresh = RTA_DATA(rta);
+                                                       *lbits = thresh->lbits;
+                                                       *rbits = thresh->rbits;
+                                                       success = TRUE;
+                                                       break;
+                                               }
+                                               rta = RTA_NEXT(rta, rtasize);
+                                       }
+                                       break;
+                               }
+                               case NLMSG_ERROR:
+                               {
+                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
+                                       DBG1(DBG_KNL, "getting SPD hash threshold failed: %s (%d)",
+                                                strerror(-err->error), -err->error);
+                                       break;
+                               }
+                               default:
+                                       hdr = NLMSG_NEXT(hdr, len);
+                                       continue;
+                               case NLMSG_DONE:
+                                       break;
+                       }
+                       break;
+               }
+               free(out);
+       }
+       return success;
+}
+
+/**
+ * Configure SPD hashing threshold for an address family
+ */
+static void setup_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
+                                                                 char *key, int type, uint8_t def)
+{
+       struct xfrmu_spdhthresh *thresh;
+       struct nlmsghdr *hdr;
+       netlink_buf_t request;
+       uint8_t lbits, rbits;
+
+       if (!get_spd_hash_thresh(this, type, &lbits, &rbits))
+       {
+               return;
+       }
+       memset(&request, 0, sizeof(request));
+
+       hdr = &request.hdr;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       hdr->nlmsg_type = XFRM_MSG_NEWSPDINFO;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
+
+       thresh = netlink_reserve(hdr, sizeof(request), type, sizeof(*thresh));
+       thresh->lbits = lib->settings->get_int(lib->settings,
+                                                       "%s.plugins.kernel-netlink.spdh_thresh.%s.lbits",
+                                                       def, lib->ns, key);
+       thresh->rbits = lib->settings->get_int(lib->settings,
+                                                       "%s.plugins.kernel-netlink.spdh_thresh.%s.rbits",
+                                                       def, lib->ns, key);
+       if (thresh->lbits != lbits || thresh->rbits != rbits)
+       {
+               if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
+               {
+                       DBG1(DBG_KNL, "setting SPD hash threshold failed");
+               }
+       }
+}
+
 /*
  * Described in header.
  */
@@ -3116,6 +3220,9 @@ kernel_netlink_ipsec_t *kernel_netlink_ipsec_create()
                return NULL;
        }
 
+       setup_spd_hash_thresh(this, "ipv4", XFRMA_SPD_IPV4_HTHRESH, 32);
+       setup_spd_hash_thresh(this, "ipv6", XFRMA_SPD_IPV6_HTHRESH, 128);
+
        if (register_for_events)
        {
                struct sockaddr_nl addr;