merging modularized kernel interface back to trunk
authorTobias Brunner <tobias@strongswan.org>
Thu, 25 Sep 2008 07:56:58 +0000 (07:56 -0000)
committerTobias Brunner <tobias@strongswan.org>
Thu, 25 Sep 2008 07:56:58 +0000 (07:56 -0000)
21 files changed:
configure.in
src/charon/Makefile.am
src/charon/daemon.c
src/charon/kernel/kernel_interface.c
src/charon/kernel/kernel_interface.h
src/charon/kernel/kernel_ipsec.h [new file with mode: 0644]
src/charon/kernel/kernel_net.h [new file with mode: 0644]
src/charon/network/socket.h
src/charon/plugins/kernel_netlink/Makefile.am [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_ipsec.c [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_ipsec.h [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_net.c [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_net.h [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_plugin.c [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_plugin.h [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_shared.c [new file with mode: 0644]
src/charon/plugins/kernel_netlink/kernel_netlink_shared.h [new file with mode: 0644]
src/charon/plugins/stroke/stroke_list.c
src/charon/sa/tasks/ike_me.c
src/charon/sa/tasks/ike_mobike.c
src/charon/sa/tasks/ike_natd.c

index 076d983..f7cc62c 100644 (file)
@@ -98,7 +98,7 @@ AC_ARG_WITH(
 AC_ARG_WITH(
        [linux-headers],
        AS_HELP_STRING([--with-linux-headers=dir],[use the linux header files in dir instead of the supplied ones in "src/include"]),
-       [AC_SUBST(linuxdir, "$withval")], [AC_SUBST(linuxdir, "../include")]
+       [AC_SUBST(linuxdir, "$withval")], [AC_SUBST(linuxdir, "\${top_srcdir}/src/include")]
 )
 AC_SUBST(LINUX_HEADERS)
 
@@ -926,6 +926,7 @@ AC_OUTPUT(
        src/charon/plugins/eap_md5/Makefile
        src/charon/plugins/eap_gtc/Makefile
        src/charon/plugins/eap_sim/Makefile
+       src/charon/plugins/kernel_netlink/Makefile
        src/charon/plugins/smp/Makefile
        src/charon/plugins/sql/Makefile
        src/charon/plugins/medsrv/Makefile
index c7fb7ff..5c5c618 100644 (file)
@@ -41,6 +41,7 @@ encoding/payloads/ts_payload.c encoding/payloads/ts_payload.h \
 encoding/payloads/unknown_payload.c encoding/payloads/unknown_payload.h \
 encoding/payloads/vendor_id_payload.c encoding/payloads/vendor_id_payload.h \
 kernel/kernel_interface.c kernel/kernel_interface.h \
+kernel/kernel_interface_ipsec.h kernel/kernel_interface_net.h \
 network/packet.c network/packet.h \
 network/receiver.c network/receiver.h \
 network/sender.c network/sender.h \
@@ -140,6 +141,11 @@ endif
 SUBDIRS = . 
 PLUGINS = ${libstrongswan_plugins}
 
+#if USE_KERNEL_NETLINK
+  SUBDIRS += plugins/kernel_netlink
+  PLUGINS += kernel-netlink
+#endif
+
 if USE_STROKE
   SUBDIRS += plugins/stroke
   PLUGINS += stroke
index 127a158..05da3c6 100644 (file)
@@ -366,6 +366,9 @@ static bool initialize(private_daemon_t *this, bool syslog, level_t levels[])
        lib->plugins->load(lib->plugins, IPSEC_PLUGINDIR, 
                lib->settings->get_str(lib->settings, "charon.load", PLUGINS));
        
+       /* create the kernel interfaces */
+       this->public.kernel_interface->create_interfaces(this->public.kernel_interface);
+       
 #ifdef INTEGRITY_TEST
        DBG1(DBG_DMN, "integrity test of libstrongswan code");
        if (fips_verify_hmac_signature(hmac_key, hmac_signature))
index b5a6fa5..ef4fc11 100644 (file)
@@ -1,9 +1,5 @@
 /*
- * Copyright (C) 2006-2008 Tobias Brunner
- * Copyright (C) 2005-2007 Martin Willi
- * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
- * Copyright (C) 2006 Daniel Roethlisberger
- * Copyright (C) 2005 Jan Hutter
+ * Copyright (C) 2008 Tobias Brunner
  * Hochschule fuer Technik Rapperswil
  *
  * This program is free software; you can redistribute it and/or modify it
  * $Id$
  */
 
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/xfrm.h>
-#include <linux/udp.h>
-#include <netinet/in.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <string.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-
 #include "kernel_interface.h"
 
+#include <pthread.h>
+
 #include <daemon.h>
 #include <utils/linked_list.h>
-#include <processing/jobs/delete_child_sa_job.h>
-#include <processing/jobs/rekey_child_sa_job.h>
-#include <processing/jobs/acquire_job.h>
-#include <processing/jobs/callback_job.h>
-#include <processing/jobs/roam_job.h>
-
-/** required for Linux 2.6.26 kernel and later */
-#ifndef XFRM_STATE_AF_UNSPEC
-#define XFRM_STATE_AF_UNSPEC   32
-#endif
-
-/** routing table for routes installed by us */
-#ifndef IPSEC_ROUTING_TABLE
-#define IPSEC_ROUTING_TABLE 100
-#endif
-#ifndef IPSEC_ROUTING_TABLE_PRIO
-#define IPSEC_ROUTING_TABLE_PRIO 100
-#endif
-
-/** default priority of installed policies */
-#define PRIO_LOW 3000
-#define PRIO_HIGH 2000
-
-/** delay before firing roam jobs (ms) */
-#define ROAM_DELAY 100
+#include <utils/mutex.h>
 
-#define BUFFER_SIZE 1024
+typedef struct private_kernel_interface_t private_kernel_interface_t;
 
 /**
- * returns a pointer to the first rtattr following the nlmsghdr *nlh and the 
- * 'usual' netlink data x like 'struct xfrm_usersa_info' 
- */
-#define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
-/**
- * returns a pointer to the next rtattr following rta.
- * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
- */
-#define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
-/**
- * returns the total size of attached rta data 
- * (after 'usual' netlink data x like 'struct xfrm_usersa_info') 
+ * Private data of a kernel_interface_t object.
  */
-#define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
-
-typedef struct kernel_algorithm_t kernel_algorithm_t;
+struct private_kernel_interface_t {
 
-/**
- * Mapping from the algorithms defined in IKEv2 to
- * kernel level algorithm names and their key length
- */
-struct kernel_algorithm_t {
        /**
-        * Identifier specified in IKEv2
+        * Public part of kernel_interface_t object.
         */
-       int ikev2_id;
+       kernel_interface_t public;
        
        /**
-        * Name of the algorithm, as used as kernel identifier
+        * list of registered ipsec kernel interfaces
         */
-       char *name;
+       linked_list_t *ipsec_interfaces;
        
        /**
-        * Key length in bits, if fixed size
+        * list of registered network kernel interfaces
         */
-       u_int key_size;
-};
-
-ENUM(policy_dir_names, POLICY_IN, POLICY_FWD,
-       "in",
-       "out",
-       "fwd"
-);
-
-#define END_OF_LIST -1
-
-/**
- * Algorithms for encryption
- */
-static kernel_algorithm_t encryption_algs[] = {
-/*     {ENCR_DES_IV64,                 "***",                                  0}, */
-       {ENCR_DES,                              "des",                                  64},
-       {ENCR_3DES,                     "des3_ede",                             192},
-/*     {ENCR_RC5,                              "***",                                  0}, */
-/*     {ENCR_IDEA,                     "***",                                  0}, */
-       {ENCR_CAST,                     "cast128",                              0},
-       {ENCR_BLOWFISH,                 "blowfish",                             0},
-/*     {ENCR_3IDEA,                    "***",                                  0}, */
-/*     {ENCR_DES_IV32,                 "***",                                  0}, */
-       {ENCR_NULL,                     "cipher_null",                  0},
-       {ENCR_AES_CBC,                  "aes",                                  0},
-/*     {ENCR_AES_CTR,                  "***",                                  0}, */
-       {ENCR_AES_CCM_ICV8,             "rfc4309(ccm(aes))",    64},    /* key_size = ICV size */
-       {ENCR_AES_CCM_ICV12,    "rfc4309(ccm(aes))",    96},    /* key_size = ICV size */
-       {ENCR_AES_CCM_ICV16,    "rfc4309(ccm(aes))",    128},   /* key_size = ICV size */
-       {ENCR_AES_GCM_ICV8,             "rfc4106(gcm(aes))",    64},    /* key_size = ICV size */
-       {ENCR_AES_GCM_ICV12,    "rfc4106(gcm(aes))",    96},    /* key_size = ICV size */
-       {ENCR_AES_GCM_ICV16,    "rfc4106(gcm(aes))",    128},   /* key_size = ICV size */
-       {END_OF_LIST,           NULL,                   0},
-};
-
-/**
- * Algorithms for integrity protection
- */
-static kernel_algorithm_t integrity_algs[] = {
-       {AUTH_HMAC_MD5_96,                      "md5",                  128},
-       {AUTH_HMAC_SHA1_96,                     "sha1",                 160},
-       {AUTH_HMAC_SHA2_256_128,        "sha256",               256},
-       {AUTH_HMAC_SHA2_384_192,        "sha384",               384},
-       {AUTH_HMAC_SHA2_512_256,        "sha512",               512},
-/*     {AUTH_DES_MAC,                          "***",                  0}, */
-/*     {AUTH_KPDK_MD5,                         "***",                  0}, */
-       {AUTH_AES_XCBC_96,                      "xcbc(aes)",    128},
-       {END_OF_LIST,                           NULL,                   0},
-};
-
-/**
- * Algorithms for IPComp
- */
-static kernel_algorithm_t compression_algs[] = {
-/*     {IPCOMP_OUI,                    "***",                  0}, */
-       {IPCOMP_DEFLATE,                "deflate",              0},
-       {IPCOMP_LZS,                    "lzs",                  0},
-       {IPCOMP_LZJH,                   "lzjh",                 0},
-       {END_OF_LIST,                   NULL,                   0},
+       linked_list_t *net_interfaces;
+       
+       /**
+        * ipsec interface
+        */
+       kernel_ipsec_t *ipsec;
+       
+       /**
+        * network interface
+        */
+       kernel_net_t *net;
+       
+       /**
+        * locking mutex
+        */
+       mutex_t *mutex;
 };
 
 /**
- * Look up a kernel algorithm name and its key size
+ * Implementation of kernel_interface_t.get_spi
  */
-static char* lookup_algorithm(kernel_algorithm_t *kernel_algo
-                                          u_int16_t ikev2_algo, u_int16_t *key_size)
+static status_t get_spi(private_kernel_interface_t *this, host_t *src, host_t *dst
+                                protocol_id_t protocol, u_int32_t reqid, u_int32_t *spi)
 {
-       while (kernel_algo->ikev2_id != END_OF_LIST)
-       {
-               if (ikev2_algo == kernel_algo->ikev2_id)
-               {
-                       /* match, evaluate key length */
-                       if (key_size && *key_size == 0)
-                       {       /* update key size if not set */
-                               *key_size = kernel_algo->key_size;
-                       }
-                       return kernel_algo->name;
-               }
-               kernel_algo++;
-       }
-       return NULL;
+       return this->ipsec->get_spi(this->ipsec, src, dst, protocol, reqid, spi);
 }
 
-typedef struct route_entry_t route_entry_t;
-
-/**
- * installed routing entry
- */
-struct route_entry_t {
-
-       /** Index of the interface the route is bound to */
-       int if_index;
-
-       /** Source ip of the route */
-       host_t *src_ip;
-       
-       /** gateway for this route */
-       host_t *gateway;
-
-       /** Destination net */
-       chunk_t dst_net;
-
-       /** Destination net prefixlen */
-       u_int8_t prefixlen;
-};
-
 /**
- * destroy an route_entry_t object
+ * Implementation of kernel_interface_t.get_cpi
  */
-static void route_entry_destroy(route_entry_t *this)
+static status_t get_cpi(private_kernel_interface_t *this, host_t *src, host_t *dst, 
+                                       u_int32_t reqid, u_int16_t *cpi)
 {
-       this->src_ip->destroy(this->src_ip);
-       this->gateway->destroy(this->gateway);
-       chunk_free(&this->dst_net);
-       free(this);
+       return this->ipsec->get_cpi(this->ipsec, src, dst, reqid, cpi);
 }
 
-typedef struct policy_entry_t policy_entry_t;
-
-/**
- * installed kernel policy.
- */
-struct policy_entry_t {
-       
-       /** direction of this policy: in, out, forward */
-       u_int8_t direction;
-       
-       /** reqid of the policy */
-       u_int32_t reqid;
-       
-       /** parameters of installed policy */
-       struct xfrm_selector sel;
-       
-       /** associated route installed for this policy */
-       route_entry_t *route;
-       
-       /** by how many CHILD_SA's this policy is used */
-       u_int refcount;
-};
-
-typedef struct addr_entry_t addr_entry_t;
-
 /**
- * IP address in an inface_entry_t
+ * Implementation of kernel_interface_t.add_sa
  */
-struct addr_entry_t {
-       
-       /** The ip address */
-       host_t *ip;
-       
-       /** virtual IP managed by us */
-       bool virtual;
-       
-       /** scope of the address */
-       u_char scope;
-       
-       /** Number of times this IP is used, if virtual */
-       u_int refcount;
-};
+static status_t add_sa(private_kernel_interface_t *this, host_t *src, host_t *dst,
+                               u_int32_t spi, protocol_id_t protocol, u_int32_t reqid,
+                               u_int64_t expire_soft, u_int64_t expire_hard,
+                               u_int16_t enc_alg, u_int16_t enc_size,
+                               u_int16_t int_alg, u_int16_t int_size,
+                               prf_plus_t *prf_plus, mode_t mode, u_int16_t ipcomp, bool encap,
+                               bool update)
+{
+       return this->ipsec->add_sa(this->ipsec, src, dst, spi, protocol, reqid,
+                       expire_soft, expire_hard, enc_alg, enc_size, int_alg, int_size,
+                       prf_plus, mode, ipcomp, encap, update);
+}
 
 /**
- * destroy a addr_entry_t object
+ * Implementation of kernel_interface_t.update_sa
  */
-static void addr_entry_destroy(addr_entry_t *this)
+static status_t update_sa(private_kernel_interface_t *this, u_int32_t spi,
+                                  protocol_id_t protocol, host_t *src, host_t *dst, 
+                                  host_t *new_src, host_t *new_dst, bool encap)
 {
-       this->ip->destroy(this->ip);
-       free(this);
+       return this->ipsec->update_sa(this->ipsec, spi, protocol, src, dst, new_src,
+                       new_dst, encap);
 }
 
-typedef struct iface_entry_t iface_entry_t;
-
 /**
- * A network interface on this system, containing addr_entry_t's
+ * Implementation of kernel_interface_t.query_sa
  */
-struct iface_entry_t {
-       
-       /** interface index */
-       int ifindex;
-       
-       /** name of the interface */
-       char ifname[IFNAMSIZ];
-       
-       /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
-       u_int flags;
-       
-       /** list of addresses as host_t */
-       linked_list_t *addrs;
-};
+static status_t query_sa(private_kernel_interface_t *this, host_t *dst, u_int32_t spi, 
+                                 protocol_id_t protocol, u_int32_t *use_time)
+{
+       return this->ipsec->query_sa(this->ipsec, dst, spi, protocol, use_time);
+}
 
 /**
- * destroy an interface entry
+ * Implementation of kernel_interface_t.del_sa
  */
-static void iface_entry_destroy(iface_entry_t *this)
+static status_t del_sa(private_kernel_interface_t *this, host_t *dst, u_int32_t spi,
+                               protocol_id_t protocol)
 {
-       this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
-       free(this);
+       return this->ipsec->del_sa(this->ipsec, dst, spi, protocol);
 }
 
-typedef struct private_kernel_interface_t private_kernel_interface_t;
-
 /**
- * Private variables and functions of kernel_interface class.
+ * Implementation of kernel_interface_t.add_policy
  */
-struct private_kernel_interface_t {
-       /**
-        * Public part of the kernel_interface_t object.
-        */
-       kernel_interface_t public;
-       
-       /**
-        * mutex to lock access to netlink socket
-        */
-       pthread_mutex_t nl_mutex;
-       
-       /**
-        * mutex to lock access to various lists
-        */
-       pthread_mutex_t mutex;
-       
-       /**
-        * condition variable to signal virtual IP add/removal
-        */
-       pthread_cond_t cond;
-       
-       /**
-        * List of installed policies (policy_entry_t)
-        */
-       linked_list_t *policies;
-       
-       /**
-        * Cached list of interfaces and its addresses (iface_entry_t)
-        */
-       linked_list_t *ifaces;
-       
-       /**
-        * iterator used in hook()
-        */
-       iterator_t *hiter;
-        
-       /**
-        * job receiving netlink events
-        */
-       callback_job_t *job;
-       
-       /**
-        * current sequence number for netlink request
-        */
-       int seq;
-       
-       /**
-        * Netlink xfrm socket (IPsec)
-        */
-       int socket_xfrm;
-       
-       /**
-        * netlink xfrm socket to receive acquire and expire events
-        */
-       int socket_xfrm_events;
-       
-       /**
-        * Netlink rt socket (routing)
-        */
-       int socket_rt;
-       
-       /**
-        * Netlink rt socket to receive address change events
-        */
-       int socket_rt_events;
-       
-       /**
-        * time of the last roam_job
-        */
-       struct timeval last_roam;
-       
-       /**
-        * whether to install routes along policies
-        */
-       bool install_routes;
-       
-       /**
-        * routing table to install routes
-        */
-       int routing_table;
-       
-       /**
-        * priority of used routing table
-        */
-       int routing_table_prio;
-
-       /**
-        * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
-        */
-       bool process_route;
-
-};
+static status_t add_policy(private_kernel_interface_t *this, host_t *src, host_t *dst,
+                                       traffic_selector_t *src_ts, traffic_selector_t *dst_ts,
+                                       policy_dir_t direction, protocol_id_t protocol,
+                                       u_int32_t reqid, bool high_prio, mode_t mode,
+                                       u_int16_t ipcomp)
+{
+       return this->ipsec->add_policy(this->ipsec, src, dst, src_ts, dst_ts,
+                       direction, protocol, reqid, high_prio, mode, ipcomp);
+}
 
 /**
- * convert a IKEv2 specific protocol identifier to the kernel one
+ * Implementation of kernel_interface_t.query_policy
  */
-static u_int8_t proto_ike2kernel(protocol_id_t proto)
+static status_t query_policy(private_kernel_interface_t *this,
+                                         traffic_selector_t *src_ts, traffic_selector_t *dst_ts,
+                                         policy_dir_t direction, u_int32_t *use_time)
 {
-       switch (proto)
-       {
-               case PROTO_ESP:
-                       return IPPROTO_ESP;
-               case PROTO_AH:
-                       return IPPROTO_AH;
-               default:
-                       return proto;
-       }
+       return this->ipsec->query_policy(this->ipsec, src_ts, dst_ts, direction, use_time);
 }
 
 /**
- * reverse of ike2kernel
+ * Implementation of kernel_interface_t.del_policy
  */
-static protocol_id_t proto_kernel2ike(u_int8_t proto)
+static status_t del_policy(private_kernel_interface_t *this,
+                                       traffic_selector_t *src_ts, traffic_selector_t *dst_ts,
+                                       policy_dir_t direction)
 {
-       switch (proto)
-       {
-               case IPPROTO_ESP:
-                       return PROTO_ESP;
-               case IPPROTO_AH:
-                       return PROTO_AH;
-               default:
-                       return proto;
-       }
+       return this->ipsec->del_policy(this->ipsec, src_ts, dst_ts, direction);
 }
 
 /**
- * convert a host_t to a struct xfrm_address
+ * Implementation of kernel_interface_t.get_source_addr
  */
-static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
+static host_t *get_source_addr(private_kernel_interface_t *this, host_t *dest)
 {
-       chunk_t chunk = host->get_address(host);
-       memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));        
+       return this->net->get_source_addr(this->net, dest);
 }
 
 /**
- * convert a traffic selector address range to subnet and its mask.
+ * Implementation of kernel_interface_t.get_nexthop
  */
-static void ts2subnet(traffic_selector_t* ts, 
-                                         xfrm_address_t *net, u_int8_t *mask)
+static host_t *get_nexthop(private_kernel_interface_t *this, host_t *dest)
 {
-       /* there is no way to do this cleanly, as the address range may
-        * be anything else but a subnet. We use from_addr as subnet 
-        * and try to calculate a usable subnet mask.
-        */
-       int byte, bit;
-       bool found = FALSE;
-       chunk_t from, to;
-       size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
-       
-       from = ts->get_from_address(ts);
-       to = ts->get_to_address(ts);
-       
-       *mask = (size * 8);
-       /* go trough all bits of the addresses, beginning in the front.
-        * as long as they are equal, the subnet gets larger
-        */
-       for (byte = 0; byte < size; byte++)
-       {
-               for (bit = 7; bit >= 0; bit--)
-               {
-                       if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
-                       {
-                               *mask = ((7 - bit) + (byte * 8));
-                               found = TRUE;
-                               break;
-                       }
-               }
-               if (found)
-               {
-                       break;
-               }
-       }
-       memcpy(net, from.ptr, from.len);
-       chunk_free(&from);
-       chunk_free(&to);
+       return this->net->get_nexthop(this->net, dest);
 }
 
 /**
- * convert a traffic selector port range to port/portmask
+ * Implementation of kernel_interface_t.get_interface
  */
-static void ts2ports(traffic_selector_t* ts, 
-                                        u_int16_t *port, u_int16_t *mask)
+static char* get_interface(private_kernel_interface_t *this, host_t *host)
 {
-       /* linux does not seem to accept complex portmasks. Only
-        * any or a specific port is allowed. We set to any, if we have
-        * a port range, or to a specific, if we have one port only.
-        */
-       u_int16_t from, to;
-       
-       from = ts->get_from_port(ts);
-       to = ts->get_to_port(ts);
-       
-       if (from == to)
-       {
-               *port = htons(from);
-               *mask = ~0;
-       }
-       else
-       {
-               *port = 0;
-               *mask = 0;
-       }
+       return this->net->get_interface(this->net, host);
 }
 
 /**
- * convert a pair of traffic_selectors to a xfrm_selector
+ * Implementation of kernel_interface_t.create_address_enumerator
  */
-static struct xfrm_selector ts2selector(traffic_selector_t *src, 
-                                                                               traffic_selector_t *dst)
+static enumerator_t *create_address_enumerator(private_kernel_interface_t *this,
+               bool include_down_ifaces, bool include_virtual_ips)
 {
-       struct xfrm_selector sel;
-
-       memset(&sel, 0, sizeof(sel));
-       sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
-       /* src or dest proto may be "any" (0), use more restrictive one */
-       sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
-       ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
-       ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
-       ts2ports(dst, &sel.dport, &sel.dport_mask);
-       ts2ports(src, &sel.sport, &sel.sport_mask);
-       sel.ifindex = 0;
-       sel.user = 0;
-       
-       return sel;
+       return this->net->create_address_enumerator(this->net, include_down_ifaces,
+                       include_virtual_ips);
 }
 
 /**
- * Creates an rtattr and adds it to the netlink message
+ * Implementation of kernel_interface_t.add_ip
  */
-static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
-                                                 size_t buflen)
+static status_t add_ip(private_kernel_interface_t *this, host_t *virtual_ip,
+                               host_t *iface_ip)
 {
-       struct rtattr *rta;
-       
-       if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
-       {
-               DBG1(DBG_KNL, "unable to add attribute, buffer too small");
-               return;
-       }
-       
-       rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
-       rta->rta_type = rta_type;
-       rta->rta_len = RTA_LENGTH(data.len);
-       memcpy(RTA_DATA(rta), data.ptr, data.len);
-       hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
+       return this->net->add_ip(this->net, virtual_ip, iface_ip);
 }
 
 /**
- * process a XFRM_MSG_ACQUIRE from kernel
+ * Implementation of kernel_interface_t.del_ip
  */
-static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
+static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
 {
-       u_int32_t reqid = 0;
-       int proto = 0;
-       job_t *job;
-       struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
-       size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
-       
-       if (RTA_OK(rtattr, rtsize))
-       {
-               if (rtattr->rta_type == XFRMA_TMPL)
-               {
-                       struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
-                       reqid = tmpl->reqid;
-                       proto = tmpl->id.proto;
-               }
-       }
-       switch (proto)
-       {
-               case 0:
-               case IPPROTO_ESP:
-               case IPPROTO_AH:
-                       break;
-               default:
-                       /* acquire for AH/ESP only, not for IPCOMP */
-                       return;
-       }
-       if (reqid == 0)
-       {
-               DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
-               return;
-       }
-       DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
-       DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid {%d}", reqid);
-       job = (job_t*)acquire_job_create(reqid);
-       charon->processor->queue_job(charon->processor, job);
+       return this->net->del_ip(this->net, virtual_ip);
 }
 
 /**
- * process a XFRM_MSG_EXPIRE from kernel
+ * Implementation of kernel_interface_t.add_route
  */
-static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
+static status_t add_route(private_kernel_interface_t *this, chunk_t dst_net,
+               u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
 {
-       job_t *job;
-       protocol_id_t protocol;
-       u_int32_t spi, reqid;
-       struct xfrm_user_expire *expire;
-       
-       expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
-       protocol = proto_kernel2ike(expire->state.id.proto);
-       spi = expire->state.id.spi;
-       reqid = expire->state.reqid;
-       
-       DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
-       
-       if (protocol != PROTO_ESP && protocol != PROTO_AH)
-       {
-               DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA with SPI %.8x and reqid {%d} "
-                                         "which is not a CHILD_SA", ntohl(spi), reqid);
-               return;
-       }
-       
-       DBG1(DBG_KNL, "creating %s job for %N CHILD_SA with SPI %.8x and reqid {%d}",
-                expire->hard ? "delete" : "rekey",  protocol_id_names,
-                protocol, ntohl(spi), reqid);
-       if (expire->hard)
-       {
-               job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
-       }
-       else
-       {
-               job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
-       }
-       charon->processor->queue_job(charon->processor, job);
+       return this->net->add_route(this->net, dst_net, prefixlen, gateway, src_ip,
+                       if_name);
 }
 
 /**
- * start a roaming job. We delay it for a second and fire only one job
- * for multiple events. Otherwise we would create two many jobs.
+ * Implementation of kernel_interface_t.del_route
  */
-static void fire_roam_job(private_kernel_interface_t *this, bool address)
+static status_t del_route(private_kernel_interface_t *this, chunk_t dst_net,
+               u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
 {
-       struct timeval now;
-               
-       if (gettimeofday(&now, NULL) == 0)
-       {
-               if (timercmp(&now, &this->last_roam, >))
-               {
-                       now.tv_usec += ROAM_DELAY * 1000;
-                       while (now.tv_usec > 1000000)
-                       {
-                               now.tv_sec++;
-                               now.tv_usec -= 1000000;
-                       }
-                       this->last_roam = now;
-                       charon->scheduler->schedule_job(charon->scheduler,
-                                       (job_t*)roam_job_create(address), ROAM_DELAY);
-               }
-       }
+       return this->net->del_route(this->net, dst_net, prefixlen, gateway, src_ip,
+                       if_name);
 }
 
+
 /**
- * get the refcount of a virtual ip
+ * Implementation of kernel_interface_t.add_ipsec_interface.
  */
-static int get_vip_refcount(private_kernel_interface_t *this, host_t* ip)
+static void add_ipsec_interface(private_kernel_interface_t *this,
+               kernel_ipsec_constructor_t *create)
 {
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       int refcount = 0;
-       
-       ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (addr->virtual && (iface->flags & IFF_UP) &&
-                               ip->ip_equals(ip, addr->ip))
-                       {
-                               refcount = addr->refcount;
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
-               if (refcount)
-               {
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-       
-       return refcount;
+       this->mutex->lock(this->mutex);
+       this->ipsec_interfaces->insert_last(this->ipsec_interfaces, create);
+       this->mutex->unlock(this->mutex);
 }
 
 /**
- * process RTM_NEWLINK/RTM_DELLINK from kernel
+ * Implementation of kernel_interface_t.remove_ipsec_interface.
  */
-static void process_link(private_kernel_interface_t *this,
-                                                struct nlmsghdr *hdr, bool event)
+static void remove_ipsec_interface(private_kernel_interface_t *this,
+               kernel_ipsec_constructor_t *create)
 {
-       struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
-       struct rtattr *rta = IFLA_RTA(msg);
-       size_t rtasize = IFLA_PAYLOAD (hdr);
-       iterator_t *iterator;
-       iface_entry_t *current, *entry = NULL;
-       char *name = NULL;
-       bool update = FALSE;
-       
-       while(RTA_OK(rta, rtasize))
-       {
-               switch (rta->rta_type)
-               {
-                       case IFLA_IFNAME:
-                               name = RTA_DATA(rta);
-                               break;
-               }
-               rta = RTA_NEXT(rta, rtasize);
-       }
-       if (!name)
-       {
-               name = "(unknown)";
-       }
-       
-       switch (hdr->nlmsg_type)
-       {
-               case RTM_NEWLINK:
-               {
-                       if (msg->ifi_flags & IFF_LOOPBACK)
-                       {       /* ignore loopback interfaces */
-                               break;
-                       }
-                       iterator = this->ifaces->create_iterator_locked(this->ifaces,
-                                                                                                                       &this->mutex);
-                       while (iterator->iterate(iterator, (void**)&current))
-                       {
-                               if (current->ifindex == msg->ifi_index)
-                               {
-                                       entry = current;
-                                       break;
-                               }
-                       }
-                       if (!entry)
-                       {
-                               entry = malloc_thing(iface_entry_t);
-                               entry->ifindex = msg->ifi_index;
-                               entry->flags = 0;
-                               entry->addrs = linked_list_create();
-                               this->ifaces->insert_last(this->ifaces, entry);
-                       }
-                       memcpy(entry->ifname, name, IFNAMSIZ);
-                       entry->ifname[IFNAMSIZ-1] = '\0';
-                       if (event)
-                       {
-                               if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
-                               {
-                                       update = TRUE;
-                                       DBG1(DBG_KNL, "interface %s activated", name);
-                               }
-                               if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
-                               {
-                                       update = TRUE;
-                                       DBG1(DBG_KNL, "interface %s deactivated", name);
-                               }
-                       }
-                       entry->flags = msg->ifi_flags;
-                       iterator->destroy(iterator);
-                       break;
-               }
-               case RTM_DELLINK:
-               {
-                       iterator = this->ifaces->create_iterator_locked(this->ifaces,
-                                                                                                                       &this->mutex);
-                       while (iterator->iterate(iterator, (void**)&current))
-                       {
-                               if (current->ifindex == msg->ifi_index)
-                               {
-                                       /* we do not remove it, as an address may be added to a 
-                                        * "down" interface and we wan't to know that. */
-                                       current->flags = msg->ifi_flags;
-                                       break;
-                               }
-                       }
-                       iterator->destroy(iterator);
-                       break;
-               }
-       }
-       
-       /* send an update to all IKE_SAs */
-       if (update && event)
-       {
-               fire_roam_job(this, TRUE);
-       }
+       this->mutex->lock(this->mutex);
+       this->ipsec_interfaces->remove(this->ipsec_interfaces, create, NULL);
+       this->mutex->unlock(this->mutex);
 }
 
 /**
- * process RTM_NEWADDR/RTM_DELADDR from kernel
+ * Implementation of kernel_interface_t.add_ipsec_interface.
  */
-static void process_addr(private_kernel_interface_t *this,
-                                                struct nlmsghdr *hdr, bool event)
+static void add_net_interface(private_kernel_interface_t *this,
+               kernel_net_constructor_t *create)
 {
-       struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
-       struct rtattr *rta = IFA_RTA(msg);
-       size_t rtasize = IFA_PAYLOAD (hdr);
-       host_t *host = NULL;
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       chunk_t local = chunk_empty, address = chunk_empty;
-       bool update = FALSE, found = FALSE, changed = FALSE;
-       
-       while(RTA_OK(rta, rtasize))
-       {
-               switch (rta->rta_type)
-               {
-                       case IFA_LOCAL:
-                               local.ptr = RTA_DATA(rta);
-                               local.len = RTA_PAYLOAD(rta);
-                               break;
-                       case IFA_ADDRESS:
-                               address.ptr = RTA_DATA(rta);
-                               address.len = RTA_PAYLOAD(rta);
-                               break;
-               }
-               rta = RTA_NEXT(rta, rtasize);
-       }
-       
-       /* For PPP interfaces, we need the IFA_LOCAL address,
-        * IFA_ADDRESS is the peers address. But IFA_LOCAL is
-        * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
-       if (local.ptr)
-       {
-               host = host_create_from_chunk(msg->ifa_family, local, 0);
-       }
-       else if (address.ptr)
-       {
-               host = host_create_from_chunk(msg->ifa_family, address, 0);
-       }
-       
-       if (host == NULL)
-       {       /* bad family? */
-               return;
-       }
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               if (iface->ifindex == msg->ifa_index)
-               {
-                       addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-                       while (addrs->iterate(addrs, (void**)&addr))
-                       {
-                               if (host->ip_equals(host, addr->ip))
-                               {
-                                       found = TRUE;
-                                       if (hdr->nlmsg_type == RTM_DELADDR)
-                                       {
-                                               addrs->remove(addrs);
-                                               if (!addr->virtual)
-                                               {
-                                                       changed = TRUE;
-                                                       DBG1(DBG_KNL, "%H disappeared from %s",
-                                                                host, iface->ifname);
-                                               }
-                                               addr_entry_destroy(addr);
-                                       }
-                                       else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
-                                       {
-                                               addr->refcount = 1;
-                                       }
-                               }
-                       }
-                       addrs->destroy(addrs);
-               
-                       if (hdr->nlmsg_type == RTM_NEWADDR)
-                       {
-                               if (!found)
-                               {
-                                       found = TRUE;
-                                       changed = TRUE;
-                                       addr = malloc_thing(addr_entry_t);
-                                       addr->ip = host->clone(host);
-                                       addr->virtual = FALSE;
-                                       addr->refcount = 1;
-                                       addr->scope = msg->ifa_scope;
-                                       
-                                       iface->addrs->insert_last(iface->addrs, addr);
-                                       if (event)
-                                       {
-                                               DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
-                                       }
-                               }
-                       }
-                       if (found && (iface->flags & IFF_UP))
-                       {
-                               update = TRUE;
-                       }
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-       host->destroy(host);
-       
-       /* send an update to all IKE_SAs */
-       if (update && event && changed)
-       {
-               fire_roam_job(this, TRUE);
-       }
+       this->mutex->lock(this->mutex);
+       this->net_interfaces->insert_last(this->net_interfaces, create);
+       this->mutex->unlock(this->mutex);
 }
 
 /**
- * process RTM_NEWROUTE and RTM_DELROUTE from kernel
+ * Implementation of kernel_interface_t.remove_ipsec_interface.
  */
-static void process_route(private_kernel_interface_t *this, struct nlmsghdr *hdr)
+static void remove_net_interface(private_kernel_interface_t *this,
+               kernel_net_constructor_t *create)
 {
-       struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
-       struct rtattr *rta = RTM_RTA(msg);
-       size_t rtasize = RTM_PAYLOAD(hdr);
-       host_t *host = NULL;
-       
-       while (RTA_OK(rta, rtasize))
-       {
-               switch (rta->rta_type)
-               {
-                       case RTA_PREFSRC:
-                               host = host_create_from_chunk(msg->rtm_family,
-                                                       chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
-                               break;
-               }
-               rta = RTA_NEXT(rta, rtasize);
-       }
-       if (host)
-       {
-               if (!get_vip_refcount(this, host))
-               {       /* ignore routes added for virtual IPs */
-                       fire_roam_job(this, FALSE);
-               }
-               host->destroy(host);
-       }
+       this->mutex->lock(this->mutex);
+       this->net_interfaces->remove(this->net_interfaces, create, NULL);
+       this->mutex->unlock(this->mutex);
 }
 
 /**
- * Receives events from kernel
+ * Implementation of kernel_interface_t.create_interfaces.
  */
-static job_requeue_t receive_events(private_kernel_interface_t *this)
+static void create_interfaces(private_kernel_interface_t *this)
 {
-       char response[1024];
-       struct nlmsghdr *hdr = (struct nlmsghdr*)response;
-       struct sockaddr_nl addr;
-       socklen_t addr_len = sizeof(addr);
-       int len, oldstate, maxfd, selected;
-       fd_set rfds;
-
-       FD_ZERO(&rfds);
-       FD_SET(this->socket_xfrm_events, &rfds);
-       FD_SET(this->socket_rt_events, &rfds);
-       maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
+       kernel_ipsec_constructor_t create_ipsec;
+       kernel_net_constructor_t create_net;
        
-       pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
-       selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
-       pthread_setcancelstate(oldstate, NULL);
-       if (selected <= 0)
-       {
-               DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
-               return JOB_REQUEUE_FAIR;
-       }
-       if (FD_ISSET(this->socket_xfrm_events, &rfds))
-       {
-               selected = this->socket_xfrm_events;
-       }
-       else if (FD_ISSET(this->socket_rt_events, &rfds))
-       {
-               selected = this->socket_rt_events;
-       }
-       else
+       this->mutex->lock(this->mutex);
+       if (this->ipsec_interfaces->get_first(this->ipsec_interfaces, (void**)&create_ipsec) != SUCCESS)
        {
-               return JOB_REQUEUE_DIRECT;
+               this->mutex->unlock(this->mutex);
+               charon->kill(charon, "no ipsec kernel interface loaded");
        }
        
-       len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
-                                  (struct sockaddr*)&addr, &addr_len);
-       if (len < 0)
+       if (this->net_interfaces->get_first(this->net_interfaces, (void**)&create_net) != SUCCESS)
        {
-               switch (errno)
-               {
-                       case EINTR:
-                               /* interrupted, try again */
-                               return JOB_REQUEUE_DIRECT;
-                       case EAGAIN:
-                               /* no data ready, select again */
-                               return JOB_REQUEUE_DIRECT;
-                       default:
-                               DBG1(DBG_KNL, "unable to receive from xfrm event socket");
-                               sleep(1);
-                               return JOB_REQUEUE_FAIR;
-               }
-       }
-       if (addr.nl_pid != 0)
-       {       /* not from kernel. not interested, try another one */
-               return JOB_REQUEUE_DIRECT;
+               this->mutex->unlock(this->mutex);
+               charon->kill(charon, "no network kernel interface loaded");
        }
+       this->mutex->unlock(this->mutex);
        
-       while (NLMSG_OK(hdr, len))
-       {
-               /* looks good so far, dispatch netlink message */
-               if (selected == this->socket_xfrm_events)
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case XFRM_MSG_ACQUIRE:
-                                       process_acquire(this, hdr);
-                                       break;
-                               case XFRM_MSG_EXPIRE:
-                                       process_expire(this, hdr);
-                                       break;
-                               default:
-                                       break;
-                       }
-               }
-               else if (selected == this->socket_rt_events)
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case RTM_NEWADDR:
-                               case RTM_DELADDR:
-                                       process_addr(this, hdr, TRUE);
-                                       pthread_cond_broadcast(&this->cond);
-                                       break;
-                               case RTM_NEWLINK:
-                               case RTM_DELLINK:
-                                       process_link(this, hdr, TRUE);
-                                       pthread_cond_broadcast(&this->cond);
-                                       break;
-                               case RTM_NEWROUTE:
-                               case RTM_DELROUTE:
-                                       if (this->process_route)
-                                       {
-                                               process_route(this, hdr);
-                                       }
-                                       break;
-                               default:
-                                       break;
-                       }
-               }
-               hdr = NLMSG_NEXT(hdr, len);
-       }
-       return JOB_REQUEUE_DIRECT;
+       this->ipsec = create_ipsec();
+       this->net = create_net();
 }
 
 /**
- * send a netlink message and wait for a reply
+ * Implementation of kernel_interface_t.destroy.
  */
-static status_t netlink_send(private_kernel_interface_t *this,
-                                                        int socket, struct nlmsghdr *in,
-                                                        struct nlmsghdr **out, size_t *out_len)
+static void destroy(private_kernel_interface_t *this)
 {
-       int len, addr_len;
-       struct sockaddr_nl addr;
-       chunk_t result = chunk_empty, tmp;
-       struct nlmsghdr *msg, peek;
-       
-       pthread_mutex_lock(&this->nl_mutex);
-       
-       in->nlmsg_seq = ++this->seq;
-       in->nlmsg_pid = getpid();
-       
-       memset(&addr, 0, sizeof(addr));
-       addr.nl_family = AF_NETLINK;
-       addr.nl_pid = 0;
-       addr.nl_groups = 0;
-
-       while (TRUE)
-       {
-               len = sendto(socket, in, in->nlmsg_len, 0, 
-                                        (struct sockaddr*)&addr, sizeof(addr));
-               
-               if (len != in->nlmsg_len)
-               {       
-                       if (errno == EINTR)
-                       {
-                               /* interrupted, try again */
-                               continue;
-                       }
-                       pthread_mutex_unlock(&this->nl_mutex);
-                       DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
-                       return FAILED;
-               }
-               break;
-       }
-       
-       while (TRUE)
-       {       
-               char buf[4096];
-               tmp.len = sizeof(buf);
-               tmp.ptr = buf;
-               msg = (struct nlmsghdr*)tmp.ptr;
-               
-               memset(&addr, 0, sizeof(addr));
-               addr.nl_family = AF_NETLINK;
-               addr.nl_pid = getpid();
-               addr.nl_groups = 0;
-               addr_len = sizeof(addr);
-               
-               len = recvfrom(socket, tmp.ptr, tmp.len, 0,
-                                          (struct sockaddr*)&addr, &addr_len);
-               
-               if (len < 0)
-               {
-                       if (errno == EINTR)
-                       {
-                               DBG1(DBG_KNL, "got interrupted");
-                               /* interrupted, try again */
-                               continue;
-                       }
-                       DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
-                       pthread_mutex_unlock(&this->nl_mutex);
-                       free(result.ptr);
-                       return FAILED;
-               }
-               if (!NLMSG_OK(msg, len))
-               {
-                       DBG1(DBG_KNL, "received corrupted netlink message");
-                       pthread_mutex_unlock(&this->nl_mutex);
-                       free(result.ptr);
-                       return FAILED;
-               }
-               if (msg->nlmsg_seq != this->seq)
-               {
-                       DBG1(DBG_KNL, "received invalid netlink sequence number");
-                       if (msg->nlmsg_seq < this->seq)
-                       {
-                               continue;
-                       }
-                       pthread_mutex_unlock(&this->nl_mutex);
-                       free(result.ptr);
-                       return FAILED;
-               }
-               
-               tmp.len = len;
-               result.ptr = realloc(result.ptr, result.len + tmp.len);
-               memcpy(result.ptr + result.len, tmp.ptr, tmp.len);
-               result.len += tmp.len;
-               
-               /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
-                * numbers to detect multi header messages */
-               len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
-                                          (struct sockaddr*)&addr, &addr_len);
-               
-               if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
-               {
-                       /* seems to be multipart */
-                       continue;
-               }
-               break;
-       }
-       
-       *out_len = result.len;
-       *out = (struct nlmsghdr*)result.ptr;
-       
-       pthread_mutex_unlock(&this->nl_mutex);
-       
-       return SUCCESS;
+       DESTROY_IF(this->ipsec);
+       DESTROY_IF(this->net);
+       this->ipsec_interfaces->destroy(this->ipsec_interfaces);
+       this->net_interfaces->destroy(this->net_interfaces);
+       this->mutex->destroy(this->mutex);
+       free(this);
 }
 
-/**
- * send a netlink message and wait for its acknowledge
+/*
+ * Described in header-file
  */
-static status_t netlink_send_ack(private_kernel_interface_t *this,
-                                                                int socket, struct nlmsghdr *in)
-{
-       struct nlmsghdr *out, *hdr;
-       size_t len;
-
-       if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
-       {
-               return FAILED;
-       }
-       hdr = out;
-       while (NLMSG_OK(hdr, len))
-       {
-               switch (hdr->nlmsg_type)
-               {
-                       case NLMSG_ERROR:
-                       {
-                               struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
-                               
-                               if (err->error)
-                               {
-                                       if (-err->error == EEXIST)
-                                       {       /* do not report existing routes */
-                                               free(out);
-                                               return ALREADY_DONE;
-                                       }
-                                       DBG1(DBG_KNL, "received netlink error: %s (%d)",
-                                                strerror(-err->error), -err->error);
-                                       free(out);
-                                       return FAILED;
-                               }
-                               free(out);
-                               return SUCCESS;
-                       }
-                       default:
-                               hdr = NLMSG_NEXT(hdr, len);
-                               continue;
-                       case NLMSG_DONE:
-                               break;
-               }
-               break;
-       }
-       DBG1(DBG_KNL, "netlink request not acknowledged");
-       free(out);
-       return FAILED;
-}
-       
-/**
- * Initialize a list of local addresses.
- */
-static status_t init_address_list(private_kernel_interface_t *this)
-{
-       char request[BUFFER_SIZE];
-       struct nlmsghdr *out, *current, *in;
-       struct rtgenmsg *msg;
-       size_t len;
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       
-       DBG1(DBG_KNL, "listening on interfaces:");
-       
-       memset(&request, 0, sizeof(request));
-
-       in = (struct nlmsghdr*)&request;
-       in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
-       in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
-       msg = (struct rtgenmsg*)NLMSG_DATA(in);
-       msg->rtgen_family = AF_UNSPEC;
-       
-       /* get all links */
-       in->nlmsg_type = RTM_GETLINK;
-       if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
-       {
-               return FAILED;
-       }
-       current = out;
-       while (NLMSG_OK(current, len))
-       {
-               switch (current->nlmsg_type)
-               {
-                       case NLMSG_DONE:
-                               break;
-                       case RTM_NEWLINK:
-                               process_link(this, current, FALSE);
-                               /* fall through */
-                       default:
-                               current = NLMSG_NEXT(current, len);
-                               continue;
-               }
-               break;
-       }
-       free(out);
-       
-       /* get all interface addresses */
-       in->nlmsg_type = RTM_GETADDR;
-       if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
-       {
-               return FAILED;
-       }
-       current = out;
-       while (NLMSG_OK(current, len))
-       {
-               switch (current->nlmsg_type)
-               {
-                       case NLMSG_DONE:
-                               break;
-                       case RTM_NEWADDR:
-                               process_addr(this, current, FALSE);
-                               /* fall through */
-                       default:
-                               current = NLMSG_NEXT(current, len);
-                               continue;
-               }
-               break;
-       }
-       free(out);
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               if (iface->flags & IFF_UP)
-               {
-                       DBG1(DBG_KNL, "  %s", iface->ifname);
-                       addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-                       while (addrs->iterate(addrs, (void**)&addr))
-                       {
-                               DBG1(DBG_KNL, "    %H", addr->ip);
-                       }
-                       addrs->destroy(addrs);
-               }
-       }
-       ifaces->destroy(ifaces);
-       return SUCCESS;
-}
-
-/**
- * iterator hook to iterate over addrs
- */
-static hook_result_t addr_hook(private_kernel_interface_t *this,
-                                                          addr_entry_t *in, host_t **out)
-{
-       if (in->virtual)
-       {       /* skip virtual interfaces added by us */
-               return HOOK_SKIP;
-       }
-       if (in->scope >= RT_SCOPE_LINK)
-       {       /* skip addresses with a unusable scope */
-               return HOOK_SKIP;
-       }
-       *out = in->ip;
-       return HOOK_NEXT;
-}
-                                                               
-/**
- * iterator hook to iterate over ifaces
- */
-static hook_result_t iface_hook(private_kernel_interface_t *this,
-                                                               iface_entry_t *in, host_t **out)
-{
-       if (!(in->flags & IFF_UP))
-       {       /* skip interfaces not up */
-               return HOOK_SKIP;
-       }
-
-       if (this->hiter == NULL)
-       {
-               this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
-               this->hiter->set_iterator_hook(this->hiter,
-                                                                          (iterator_hook_t*)addr_hook, this);
-       }
-       while (this->hiter->iterate(this->hiter, (void**)out))
-       {
-               return HOOK_AGAIN;
-       }
-       this->hiter->destroy(this->hiter);
-       this->hiter = NULL;
-       return HOOK_SKIP;
-}
-
-/**
- * Implements kernel_interface_t.create_address_iterator.
- */
-static iterator_t *create_address_iterator(private_kernel_interface_t *this)
-{
-       iterator_t *iterator;
-       
-       /* This iterator is not only hooked, it is double-hooked. As we have stored
-        * our addresses in iface_entry->addr_entry->ip, we need to iterate the
-        * entries in each interface we iterate. This does the iface_hook. The
-        * addr_hook returns the ip instead of the addr_entry. */
-       
-       iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
-       iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
-       return iterator;
-}
-
-/**
- * implementation of kernel_interface_t.get_interface_name
- */
-static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
-{
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       char *name = NULL;
-       
-       DBG2(DBG_KNL, "getting interface name for %H", ip);
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (ip->ip_equals(ip, addr->ip))
-                       {
-                               name = strdup(iface->ifname);
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
-               if (name)
-               {
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-       
-       if (name)
-       {
-               DBG2(DBG_KNL, "%H is on interface %s", ip, name);
-       }
-       else
-       {
-               DBG2(DBG_KNL, "%H is not a local address", ip);
-       }
-       return name;
-}
-
-/**
- * Tries to find an ip address of a local interface that is included in the
- * supplied traffic selector.
- */
-static status_t get_address_by_ts(private_kernel_interface_t *this,
-                                                                 traffic_selector_t *ts, host_t **ip)
-{
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       host_t *host;
-       int family;
-       bool found = FALSE;
-       
-       DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
-       
-       /* if we have a family which includes localhost, we do not
-        * search for an IP, we use the default */
-       family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
-       
-       if (family == AF_INET)
-       {
-               host = host_create_from_string("127.0.0.1", 0);
-       }
-       else
-       {
-               host = host_create_from_string("::1", 0);
-       }
-       
-       if (ts->includes(ts, host))
-       {
-               *ip = host_create_any(family);
-               host->destroy(host);
-               DBG2(DBG_KNL, "using host %H", *ip);
-               return SUCCESS;
-       }
-       host->destroy(host);
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces,     &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (ts->includes(ts, addr->ip))
-                       {
-                               found = TRUE;
-                               *ip = addr->ip->clone(addr->ip);
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
-               if (found)
-               {
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-       
-       if (!found)
-       {
-               DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
-               return FAILED;
-       }
-       DBG2(DBG_KNL, "using host %H", *ip);
-       return SUCCESS;
-}
-
-/**
- * get the interface of a local address
- */
-static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
-{
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       int ifindex = 0;
-       
-       DBG2(DBG_KNL, "getting iface for %H", ip);
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces,     &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (ip->ip_equals(ip, addr->ip))
-                       {
-                               ifindex = iface->ifindex;
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
-               if (ifindex)
-               {
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-
-       if (ifindex == 0)
-       {
-               DBG1(DBG_KNL, "unable to get interface for %H", ip);
-       }
-       return ifindex;
-}
-
-/**
- * Manages the creation and deletion of ip addresses on an interface.
- * By setting the appropriate nlmsg_type, the ip will be set or unset.
- */
-static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
-                                                         int flags, int if_index, host_t *ip)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr;
-       struct ifaddrmsg *msg;
-       chunk_t chunk;
-       
-       memset(&request, 0, sizeof(request));
-       
-       chunk = ip->get_address(ip);
-    
-    hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
-       hdr->nlmsg_type = nlmsg_type; 
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
-       
-       msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
-    msg->ifa_family = ip->get_family(ip);
-    msg->ifa_flags = 0;
-    msg->ifa_prefixlen = 8 * chunk.len;
-    msg->ifa_scope = RT_SCOPE_UNIVERSE;
-    msg->ifa_index = if_index;
-       
-       add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
-
-       return netlink_send_ack(this, this->socket_rt, hdr);
-}
-
-/**
- * Manages source routes in the routing table.
- * By setting the appropriate nlmsg_type, the route added or r.
- */
-static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
-                                                               int flags, route_entry_t *route)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr;
-       struct rtmsg *msg;
-       chunk_t chunk;
-
-       /* if route is 0.0.0.0/0, we can't install it, as it would
-        * overwrite the default route. Instead, we add two routes:
-        * 0.0.0.0/1 and 128.0.0.0/1 */
-       if (this->routing_table == 0 && route->prefixlen == 0)
-       {
-               route_entry_t half;
-               status_t status;
-               
-               half.dst_net = chunk_alloca(route->dst_net.len);
-               memset(half.dst_net.ptr, 0, half.dst_net.len);
-               half.src_ip = route->src_ip;
-               half.gateway = route->gateway;
-               half.if_index = route->if_index;
-               half.prefixlen = 1;
-               
-               status = manage_srcroute(this, nlmsg_type, flags, &half);
-               half.dst_net.ptr[0] |= 0x80;
-               status = manage_srcroute(this, nlmsg_type, flags, &half);
-               return status;
-       }
-       
-       memset(&request, 0, sizeof(request));
-
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
-       hdr->nlmsg_type = nlmsg_type;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
-
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
-       msg->rtm_family = route->src_ip->get_family(route->src_ip);
-       msg->rtm_dst_len = route->prefixlen;
-       msg->rtm_table = this->routing_table;
-       msg->rtm_protocol = RTPROT_STATIC;
-       msg->rtm_type = RTN_UNICAST;
-       msg->rtm_scope = RT_SCOPE_UNIVERSE;
-       
-       add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
-       chunk = route->src_ip->get_address(route->src_ip);
-       add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
-       chunk = route->gateway->get_address(route->gateway);
-       add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
-       chunk.ptr = (char*)&route->if_index;
-       chunk.len = sizeof(route->if_index);
-       add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
-
-       return netlink_send_ack(this, this->socket_rt, hdr);
-}
-
-/**
- * create or delete an rule to use our routing table
- */
-static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
-                                                       u_int32_t table, u_int32_t prio)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr;
-       struct rtmsg *msg;
-       chunk_t chunk;
-
-       memset(&request, 0, sizeof(request));    
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-       hdr->nlmsg_type = nlmsg_type; 
-       if (nlmsg_type == RTM_NEWRULE)
-       {
-               hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
-       }
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
-
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
-       msg->rtm_table = table;
-       msg->rtm_family = AF_INET;
-       msg->rtm_protocol = RTPROT_BOOT;
-       msg->rtm_scope = RT_SCOPE_UNIVERSE;
-       msg->rtm_type = RTN_UNICAST;
-
-       chunk = chunk_from_thing(prio);
-       add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
-
-       return netlink_send_ack(this, this->socket_rt, hdr);
-}
-
-/**
- * check if an address (chunk) addr is in subnet (net with net_len net bits)
- */
-static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
-{
-       int bit, byte;
-
-       if (addr.len != net.len)
-       {
-               return FALSE;
-       }
-       /* scan through all bits, beginning in the front */
-       for (byte = 0; byte < addr.len; byte++)
-       {
-               for (bit = 7; bit >= 0; bit--)
-               {
-                       /* check if bits are equal (or we reached the end of the net) */
-                       if (bit + byte * 8 > net_len)
-                       {
-                               return TRUE;
-                       }
-                       if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
-                       {
-                               return FALSE;
-                       }
-               }
-       }
-       return TRUE;
-}
-
-/**
- * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
- */
-static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
-                                                bool nexthop)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr, *out, *current;
-       struct rtmsg *msg;
-       chunk_t chunk;
-       size_t len;
-       int best = -1;
-       host_t *src = NULL, *gtw = NULL;
-       
-       DBG2(DBG_KNL, "getting address to reach %H", dest);
-       
-       memset(&request, 0, sizeof(request));
-
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
-       hdr->nlmsg_type = RTM_GETROUTE;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
-
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
-       msg->rtm_family = dest->get_family(dest);
-       
-       chunk = dest->get_address(dest);
-       add_attribute(hdr, RTA_DST, chunk, sizeof(request));
-                       
-       if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "getting address to %H failed", dest);
-               return NULL;
-       }
-       current = out;
-       while (NLMSG_OK(current, len))
-       {
-               switch (current->nlmsg_type)
-               {
-                       case NLMSG_DONE:
-                               break;
-                       case RTM_NEWROUTE:
-                       {
-                               struct rtattr *rta;
-                               size_t rtasize;
-                               chunk_t rta_gtw, rta_src, rta_dst;
-                               u_int32_t rta_oif = 0;
-                               
-                               rta_gtw = rta_src = rta_dst = chunk_empty;
-                               msg = (struct rtmsg*)(NLMSG_DATA(current));
-                               rta = RTM_RTA(msg);
-                               rtasize = RTM_PAYLOAD(current);
-                               while (RTA_OK(rta, rtasize))
-                               {
-                                       switch (rta->rta_type)
-                                       {
-                                               case RTA_PREFSRC:
-                                                       rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_GATEWAY:
-                                                       rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_DST:
-                                                       rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_OIF:
-                                                       if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
-                                                       {
-                                                               rta_oif = *(u_int32_t*)RTA_DATA(rta);
-                                                       }
-                                                       break;
-                                       }
-                                       rta = RTA_NEXT(rta, rtasize);
-                               }
-                               
-                               /* apply the route if:
-                                * - it is not from our own ipsec routing table
-                                * - is better than a previous one
-                                * - is the default route or
-                                * - its destination net contains our destination
-                                */
-                               if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
-                                       &&  msg->rtm_dst_len > best
-                                       && (msg->rtm_dst_len == 0 || /* default route */
-                                       (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
-                               {
-                                       iterator_t *ifaces, *addrs;
-                                       iface_entry_t *iface;
-                                       addr_entry_t *addr;
-                                       
-                                       best = msg->rtm_dst_len;
-                                       if (nexthop)
-                                       {
-                                               DESTROY_IF(gtw);
-                                               gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       }
-                                       else if (rta_src.ptr)
-                                       {
-                                               DESTROY_IF(src);
-                                               src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
-                                               if (get_vip_refcount(this, src))
-                                               {       /* skip source address if it is installed by us */
-                                                       DESTROY_IF(src);
-                                                       src = NULL;
-                                                       current = NLMSG_NEXT(current, len);
-                                                       continue;
-                                               }
-                                       }
-                                       else
-                                       {
-                                               /* no source addr, get one from the interfaces */
-                                               ifaces = this->ifaces->create_iterator_locked(
-                                                                                                       this->ifaces, &this->mutex);
-                                               while (ifaces->iterate(ifaces, (void**)&iface))
-                                               {
-                                                       if (iface->ifindex == rta_oif)
-                                                       {
-                                                               addrs = iface->addrs->create_iterator(
-                                                                                                                       iface->addrs, TRUE);
-                                                               while (addrs->iterate(addrs, (void**)&addr))
-                                                               {
-                                                                       chunk_t ip = addr->ip->get_address(addr->ip);
-                                                                       if ((msg->rtm_dst_len == 0 && 
-                                                                                addr->ip->get_family(addr->ip) ==
-                                                                                       dest->get_family(dest)) ||
-                                                                               addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
-                                                                       {
-                                                                               DESTROY_IF(src);
-                                                                               src = addr->ip->clone(addr->ip);
-                                                                               break;
-                                                                       }
-                                                               }
-                                                               addrs->destroy(addrs);
-                                                       }
-                                               }
-                                               ifaces->destroy(ifaces);
-                                       }
-                               }
-                               /* FALL through */
-                       }
-                       default:
-                               current = NLMSG_NEXT(current, len);
-                               continue;
-               }
-               break;
-       }
-       free(out);
-       
-       if (nexthop)
-       {
-               if (gtw)
-               {
-                       return gtw;
-               }
-               return dest->clone(dest);
-       }
-       return src;
-}
-
-/**
- * Implementation of kernel_interface_t.get_source_addr.
- */
-static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
-{
-       return get_route(this, dest, FALSE);
-}
-
-/**
- * Implementation of kernel_interface_t.add_ip.
- */
-static status_t add_ip(private_kernel_interface_t *this, 
-                                               host_t *virtual_ip, host_t *iface_ip)
-{
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       iterator_t *addrs, *ifaces;
-       int ifindex;
-
-       DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               bool iface_found = FALSE;
-       
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (iface_ip->ip_equals(iface_ip, addr->ip))
-                       {
-                               iface_found = TRUE;
-                       }
-                       else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
-                       {
-                               addr->refcount++;
-                               DBG2(DBG_KNL, "virtual IP %H already installed on %s",
-                                        virtual_ip, iface->ifname);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               return SUCCESS;
-                       }
-               }
-               addrs->destroy(addrs);
-               
-               if (iface_found)
-               {
-                       ifindex = iface->ifindex;
-                       addr = malloc_thing(addr_entry_t);
-                       addr->ip = virtual_ip->clone(virtual_ip);
-                       addr->refcount = 0;
-                       addr->virtual = TRUE;
-                       addr->scope = RT_SCOPE_UNIVERSE;
-                       iface->addrs->insert_last(iface->addrs, addr);
-                       
-                       if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
-                                                         ifindex, virtual_ip) == SUCCESS)
-                       {
-                               while (get_vip_refcount(this, virtual_ip) == 0)
-                               {       /* wait until address appears */
-                                       pthread_cond_wait(&this->cond, &this->mutex);
-                               }
-                               ifaces->destroy(ifaces);
-                               return SUCCESS;
-                       }
-                       ifaces->destroy(ifaces);
-                       DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
-                       return FAILED;
-               }
-       }
-       ifaces->destroy(ifaces);
-       
-       DBG1(DBG_KNL, "interface address %H not found, unable to install"
-                "virtual IP %H", iface_ip, virtual_ip);
-       return FAILED;
-}
-
-/**
- * Implementation of kernel_interface_t.del_ip.
- */
-static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
-{
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       iterator_t *addrs, *ifaces;
-       status_t status;
-       int ifindex;
-
-       DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
-       
-       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
-       while (ifaces->iterate(ifaces, (void**)&iface))
-       {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (virtual_ip->ip_equals(virtual_ip, addr->ip))
-                       {
-                               ifindex = iface->ifindex;
-                               if (addr->refcount == 1)
-                               {
-                                       status = manage_ipaddr(this, RTM_DELADDR, 0,
-                                                                                  ifindex, virtual_ip);
-                                       if (status == SUCCESS)
-                                       {       /* wait until the address is really gone */
-                                               while (get_vip_refcount(this, virtual_ip) > 0)
-                                               {
-                                                       pthread_cond_wait(&this->cond, &this->mutex);
-                                               }
-                                       }
-                                       addrs->destroy(addrs);
-                                       ifaces->destroy(ifaces);
-                                       return status;
-                               }
-                               else
-                               {
-                                       addr->refcount--;
-                               }
-                               DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
-                                        virtual_ip);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               return SUCCESS;
-                       }
-               }
-               addrs->destroy(addrs);
-       }
-       ifaces->destroy(ifaces);
-       
-       DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
-       return FAILED;
-}
-
-/**
- * Get an SPI for a specific protocol from the kernel.
- */
-static status_t get_spi_internal(private_kernel_interface_t *this,
-               host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
-               u_int32_t reqid, u_int32_t *spi)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr, *out;
-       struct xfrm_userspi_info *userspi;
-       u_int32_t received_spi = 0;
-       size_t len;
-       
-       memset(&request, 0, sizeof(request));
-       
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST;
-       hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
-
-       userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
-       host2xfrm(src, &userspi->info.saddr);
-       host2xfrm(dst, &userspi->info.id.daddr);
-       userspi->info.id.proto = proto;
-       userspi->info.mode = TRUE; /* tunnel mode */
-       userspi->info.reqid = reqid;
-       userspi->info.family = src->get_family(src);
-       userspi->min = min;
-       userspi->max = max;
-       
-       if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
-       {
-               hdr = out;
-               while (NLMSG_OK(hdr, len))
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case XFRM_MSG_NEWSA:
-                               {
-                                       struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
-                                       received_spi = usersa->id.spi;
-                                       break;
-                               }
-                               case NLMSG_ERROR:
-                               {
-                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
-                                       
-                                       DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
-                                                strerror(-err->error), -err->error);
-                                       break;
-                               }
-                               default:
-                                       hdr = NLMSG_NEXT(hdr, len);
-                                       continue;
-                               case NLMSG_DONE:
-                                       break;
-                       }
-                       break;
-               }
-               free(out);
-       }
-       
-       if (received_spi == 0)
-       {
-               return FAILED;
-       }
-       
-       *spi = received_spi;
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.get_spi.
- */
-static status_t get_spi(private_kernel_interface_t *this, 
-                                               host_t *src, host_t *dst, 
-                                               protocol_id_t protocol, u_int32_t reqid,
-                                               u_int32_t *spi)
-{
-       DBG2(DBG_KNL, "getting SPI for reqid {%d}", reqid);
-       
-       if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
-                       0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to get SPI for reqid {%d}", reqid);
-               return FAILED;
-       }
-       
-       DBG2(DBG_KNL, "got SPI %.8x for reqid {%d}", ntohl(*spi), reqid);
-       
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.get_cpi.
- */
-static status_t get_cpi(private_kernel_interface_t *this, 
-                                               host_t *src, host_t *dst, 
-                                               u_int32_t reqid, u_int16_t *cpi)
-{
-       u_int32_t received_spi = 0;
-
-       DBG2(DBG_KNL, "getting CPI for reqid {%d}", reqid);
-       
-       if (get_spi_internal(this, src, dst,
-                       IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to get CPI for reqid {%d}", reqid);
-               return FAILED;
-       }
-       
-       *cpi = htons((u_int16_t)ntohl(received_spi));
-       
-       DBG2(DBG_KNL, "got CPI %.4x for reqid {%d}", ntohs(*cpi), reqid);
-       
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.add_sa.
- */
-static status_t add_sa(private_kernel_interface_t *this,
-                                          host_t *src, host_t *dst, u_int32_t spi,
-                                          protocol_id_t protocol, u_int32_t reqid,
-                                          u_int64_t expire_soft, u_int64_t expire_hard,
-                                          u_int16_t enc_alg, u_int16_t enc_size,
-                                          u_int16_t int_alg, u_int16_t int_size,
-                                          prf_plus_t *prf_plus, mode_t mode,
-                                          u_int16_t ipcomp, bool encap,
-                                          bool replace)
-{
-       unsigned char request[BUFFER_SIZE];
-       char *alg_name;
-       /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
-       u_int16_t add_keymat = 32; 
-       struct nlmsghdr *hdr;
-       struct xfrm_usersa_info *sa;
-       
-       memset(&request, 0, sizeof(request));
-       
-       DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%d}", ntohl(spi), reqid);
-
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-       hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
-       
-       sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
-       host2xfrm(src, &sa->saddr);
-       host2xfrm(dst, &sa->id.daddr);
-       sa->id.spi = spi;
-       sa->id.proto = proto_ike2kernel(protocol);
-       sa->family = src->get_family(src);
-       sa->mode = mode;
-       if (mode == MODE_TUNNEL)
-       {
-               sa->flags |= XFRM_STATE_AF_UNSPEC;
-       }
-       sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
-       sa->reqid = reqid;
-       /* we currently do not expire SAs by volume/packet count */
-       sa->lft.soft_byte_limit = XFRM_INF;
-       sa->lft.hard_byte_limit = XFRM_INF;
-       sa->lft.soft_packet_limit = XFRM_INF;
-       sa->lft.hard_packet_limit = XFRM_INF;
-       /* we use lifetimes since added, not since used */
-       sa->lft.soft_add_expires_seconds = expire_soft;
-       sa->lft.hard_add_expires_seconds = expire_hard;
-       sa->lft.soft_use_expires_seconds = 0;
-       sa->lft.hard_use_expires_seconds = 0;
-       
-       struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
-       
-       switch (enc_alg)
-       {
-               case ENCR_UNDEFINED:
-                       /* no encryption */
-                       break;
-               case ENCR_AES_CCM_ICV8:
-               case ENCR_AES_CCM_ICV12:
-               case ENCR_AES_CCM_ICV16:
-                       /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
-                       add_keymat = 24;
-                       /* fall-through */
-               case ENCR_AES_GCM_ICV8:
-               case ENCR_AES_GCM_ICV12:
-               case ENCR_AES_GCM_ICV16:
-               {
-                       u_int16_t icv_size = 0;
-                       rthdr->rta_type = XFRMA_ALG_AEAD;
-                       alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
-                       if (alg_name == NULL)
-                       {
-                               DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
-                                        encryption_algorithm_names, enc_alg);
-                               return FAILED;
-                       }
-                       DBG2(DBG_KNL, "  using encryption algorithm %N with key size %d",
-                                encryption_algorithm_names, enc_alg, enc_size);
-                       
-                       /* additional KEYMAT required */
-                       enc_size += add_keymat;
-                       
-                       rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
-                       hdr->nlmsg_len += rthdr->rta_len;
-                       if (hdr->nlmsg_len > sizeof(request))
-                       {
-                               return FAILED;
-                       }
-                       
-                       struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
-                       algo->alg_key_len = enc_size;
-                       algo->alg_icv_len = icv_size;
-                       strcpy(algo->alg_name, alg_name);
-                       prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
-                       
-                       rthdr = XFRM_RTA_NEXT(rthdr);
-                       break;
-               }
-               default:
-               {
-                       rthdr->rta_type = XFRMA_ALG_CRYPT;
-                       alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
-                       if (alg_name == NULL)
-                       {
-                               DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
-                                        encryption_algorithm_names, enc_alg);
-                               return FAILED;
-                       }
-                       DBG2(DBG_KNL, "  using encryption algorithm %N with key size %d",
-                                encryption_algorithm_names, enc_alg, enc_size);
-                       
-                       rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
-                       hdr->nlmsg_len += rthdr->rta_len;
-                       if (hdr->nlmsg_len > sizeof(request))
-                       {
-                               return FAILED;
-                       }
-                       
-                       struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
-                       algo->alg_key_len = enc_size;
-                       strcpy(algo->alg_name, alg_name);
-                       prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
-                       
-                       rthdr = XFRM_RTA_NEXT(rthdr);
-                       break;
-               }
-       }
-               
-       if (int_alg  != AUTH_UNDEFINED)
-       {
-               rthdr->rta_type = XFRMA_ALG_AUTH;
-               alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
-               if (alg_name == NULL)
-               {
-                       DBG1(DBG_KNL, "algorithm %N not supported by kernel!", 
-                                integrity_algorithm_names, int_alg);
-                       return FAILED;
-               }
-               DBG2(DBG_KNL, "  using integrity algorithm %N with key size %d",
-                        integrity_algorithm_names, int_alg, int_size);
-               
-               rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
-               hdr->nlmsg_len += rthdr->rta_len;
-               if (hdr->nlmsg_len > sizeof(request))
-               {
-                       return FAILED;
-               }
-               
-               struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
-               algo->alg_key_len = int_size;
-               strcpy(algo->alg_name, alg_name);
-               prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
-               
-               rthdr = XFRM_RTA_NEXT(rthdr);
-       }
-       
-       if (ipcomp != IPCOMP_NONE)
-       {
-               rthdr->rta_type = XFRMA_ALG_COMP;
-               alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
-               if (alg_name == NULL)
-               {
-                       DBG1(DBG_KNL, "algorithm %N not supported by kernel!", 
-                                ipcomp_transform_names, ipcomp);
-                       return FAILED;
-               }
-               DBG2(DBG_KNL, "  using compression algorithm %N",
-                        ipcomp_transform_names, ipcomp);
-               
-               rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
-               hdr->nlmsg_len += rthdr->rta_len;
-               if (hdr->nlmsg_len > sizeof(request))
-               {
-                       return FAILED;
-               }
-               
-               struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
-               algo->alg_key_len = 0;
-               strcpy(algo->alg_name, alg_name);
-               
-               rthdr = XFRM_RTA_NEXT(rthdr);
-       }
-       
-       if (encap)
-       {
-               rthdr->rta_type = XFRMA_ENCAP;
-               rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
-
-               hdr->nlmsg_len += rthdr->rta_len;
-               if (hdr->nlmsg_len > sizeof(request))
-               {
-                       return FAILED;
-               }
-
-               struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
-               tmpl->encap_type = UDP_ENCAP_ESPINUDP;
-               tmpl->encap_sport = htons(src->get_port(src));
-               tmpl->encap_dport = htons(dst->get_port(dst));
-               memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
-               /* encap_oa could probably be derived from the 
-                * traffic selectors [rfc4306, p39]. In the netlink kernel implementation 
-                * pluto does the same as we do here but it uses encap_oa in the 
-                * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates 
-                * the kernel ignores it anyway
-                *   -> does that mean that NAT-T encap doesn't work in transport mode?
-                * No. The reason the kernel ignores NAT-OA is that it recomputes 
-                * (or, rather, just ignores) the checksum. If packets pass
-                * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
-               rthdr = XFRM_RTA_NEXT(rthdr);
-       }
-       
-       if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x", ntohl(spi));
-               return FAILED;
-       }
-       return SUCCESS;
-}
-
-/**
- * Get the replay state (i.e. sequence numbers) of an SA.
- */
-static status_t get_replay_state(private_kernel_interface_t *this,
-                                                 u_int32_t spi, protocol_id_t protocol, host_t *dst,
-                                                 struct xfrm_replay_state *replay)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr, *out = NULL;
-       struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
-       size_t len;
-       struct rtattr *rta;
-       size_t rtasize;
-       
-       memset(&request, 0, sizeof(request));
-       
-       DBG2(DBG_KNL, "querying replay state from SAD entry with SPI %.8x", ntohl(spi));
-
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST;
-       hdr->nlmsg_type = XFRM_MSG_GETAE;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
-
-       aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
-       aevent_id->flags = XFRM_AE_RVAL;
-       
-       host2xfrm(dst, &aevent_id->sa_id.daddr);
-       aevent_id->sa_id.spi = spi;
-       aevent_id->sa_id.proto = proto_ike2kernel(protocol);
-       aevent_id->sa_id.family = dst->get_family(dst);
-       
-       if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
-       {
-               hdr = out;
-               while (NLMSG_OK(hdr, len))
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case XFRM_MSG_NEWAE:
-                               {
-                                       out_aevent = NLMSG_DATA(hdr);
-                                       break;
-                               }
-                               case NLMSG_ERROR:
-                               {
-                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
-                                       DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
-                                                strerror(-err->error), -err->error);
-                                       break;
-                               }
-                               default:
-                                       hdr = NLMSG_NEXT(hdr, len);
-                                       continue;
-                               case NLMSG_DONE:
-                                       break;
-                       }
-                       break;
-               }
-       }
-       
-       if (out_aevent == NULL)
-       {
-               DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI %.8x",
-                                         ntohl(spi));
-               free(out);
-               return FAILED;
-       }
-       
-       rta = XFRM_RTA(out, struct xfrm_aevent_id);
-       rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
-       while(RTA_OK(rta, rtasize))
-       {
-               if (rta->rta_type == XFRMA_REPLAY_VAL)
-               {
-                       memcpy(replay, RTA_DATA(rta), rta->rta_len);
-                       free(out);
-                       return SUCCESS;
-               }
-               rta = RTA_NEXT(rta, rtasize);
-       }
-       
-       DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI %.8x",
-                                 ntohl(spi));
-       free(out);
-       return FAILED;
-}
-
-/**
- * Implementation of kernel_interface_t.update_sa.
- */
-static status_t update_sa(private_kernel_interface_t *this,
-                                                 u_int32_t spi, protocol_id_t protocol,
-                                                 host_t *src, host_t *dst,
-                                                 host_t *new_src, host_t *new_dst, bool encap)
-{
-       unsigned char request[BUFFER_SIZE], *pos;
-       struct nlmsghdr *hdr, *out = NULL;
-       struct xfrm_usersa_id *sa_id;
-       struct xfrm_usersa_info *out_sa = NULL, *sa;
-       size_t len;
-       struct rtattr *rta;
-       size_t rtasize;
-       struct xfrm_encap_tmpl* tmpl = NULL;
-       bool got_replay_state;
-       struct xfrm_replay_state replay;
-       
-       memset(&request, 0, sizeof(request));
-       
-       DBG2(DBG_KNL, "querying SAD entry with SPI %.8x for update", ntohl(spi));
-
-       /* query the existing SA first */
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST;
-       hdr->nlmsg_type = XFRM_MSG_GETSA;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
-
-       sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
-       host2xfrm(dst, &sa_id->daddr);
-       sa_id->spi = spi;
-       sa_id->proto = proto_ike2kernel(protocol);
-       sa_id->family = dst->get_family(dst);
-       
-       if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
-       {
-               hdr = out;
-               while (NLMSG_OK(hdr, len))
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case XFRM_MSG_NEWSA:
-                               {
-                                       out_sa = NLMSG_DATA(hdr);
-                                       break;
-                               }
-                               case NLMSG_ERROR:
-                               {
-                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
-                                       DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
-                                                strerror(-err->error), -err->error);
-                                       break;
-                               }
-                               default:
-                                       hdr = NLMSG_NEXT(hdr, len);
-                                       continue;
-                               case NLMSG_DONE:
-                                       break;
-                       }
-                       break;
-               }
-       }
-       if (out_sa == NULL)
-       {
-               DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x", ntohl(spi));
-               free(out);
-               return FAILED;
-       }
-       
-       /* try to get the replay state */
-       got_replay_state = (get_replay_state(
-                                               this, spi, protocol, dst, &replay) == SUCCESS);
-       
-       /* delete the old SA */
-       if (this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x", ntohl(spi));
-               free(out);
-               return FAILED;
-       }
-       
-       DBG2(DBG_KNL, "updating SAD entry with SPI %.8x from %#H..%#H to %#H..%#H",
-                ntohl(spi), src, dst, new_src, new_dst);
-       
-       /* copy over the SA from out to request */
-       hdr = (struct nlmsghdr*)request;
-       memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;   
-       hdr->nlmsg_type = XFRM_MSG_NEWSA;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
-       sa = NLMSG_DATA(hdr);
-       sa->family = new_dst->get_family(new_dst);
-       
-       if (!src->ip_equals(src, new_src))
-       {
-               host2xfrm(new_src, &sa->saddr);
-       }
-       if (!dst->ip_equals(dst, new_dst))
-       {
-               host2xfrm(new_dst, &sa->id.daddr);
-       }
-       
-       rta = XFRM_RTA(out, struct xfrm_usersa_info);
-       rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
-       pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
-       while(RTA_OK(rta, rtasize))
-       {
-               /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
-               if (rta->rta_type != XFRMA_ENCAP || encap)
-               {
-                       if (rta->rta_type == XFRMA_ENCAP)
-                       {       /* update encap tmpl */
-                               tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
-                               tmpl->encap_sport = ntohs(new_src->get_port(new_src));
-                               tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
-                       }       
-                       memcpy(pos, rta, rta->rta_len);
-                       pos += RTA_ALIGN(rta->rta_len);
-                       hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
-               }
-               rta = RTA_NEXT(rta, rtasize);
-       }
-       
-       rta = (struct rtattr*)pos;
-       if (tmpl == NULL && encap)
-       {       /* add tmpl if we are enabling it */
-               rta->rta_type = XFRMA_ENCAP;
-               rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
-               
-               hdr->nlmsg_len += rta->rta_len;
-               if (hdr->nlmsg_len > sizeof(request))
-               {
-                       return FAILED;
-               }
-               
-               tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
-               tmpl->encap_type = UDP_ENCAP_ESPINUDP;
-               tmpl->encap_sport = ntohs(new_src->get_port(new_src));
-               tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
-               memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
-               
-               rta = XFRM_RTA_NEXT(rta);
-       }
-       
-       if (got_replay_state)
-       {       /* copy the replay data if available */
-               rta->rta_type = XFRMA_REPLAY_VAL;
-               rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
-               
-               hdr->nlmsg_len += rta->rta_len;
-               if (hdr->nlmsg_len > sizeof(request))
-               {
-                       return FAILED;
-               }
-               memcpy(RTA_DATA(rta), &replay, sizeof(replay));
-               
-               rta = XFRM_RTA_NEXT(rta);
-       }
-       
-       if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x", ntohl(spi));
-               free(out);
-               return FAILED;
-       }
-       free(out);
-       
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.query_sa.
- */
-static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
-                                                u_int32_t spi, protocol_id_t protocol,
-                                                u_int32_t *use_time)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *out = NULL, *hdr;
-       struct xfrm_usersa_id *sa_id;
-       struct xfrm_usersa_info *sa = NULL;
-       size_t len;
-       
-       DBG2(DBG_KNL, "querying SAD entry with SPI %.8x", ntohl(spi));
-       memset(&request, 0, sizeof(request));
-       
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST;
-       hdr->nlmsg_type = XFRM_MSG_GETSA;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
-
-       sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
-       host2xfrm(dst, &sa_id->daddr);
-       sa_id->spi = spi;
-       sa_id->proto = proto_ike2kernel(protocol);
-       sa_id->family = dst->get_family(dst);
-       
-       if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
-       {
-               hdr = out;
-               while (NLMSG_OK(hdr, len))
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case XFRM_MSG_NEWSA:
-                               {
-                                       sa = NLMSG_DATA(hdr);
-                                       break;
-                               }
-                               case NLMSG_ERROR:
-                               {
-                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
-                                       DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
-                                                strerror(-err->error), -err->error);
-                                       break;
-                               }
-                               default:
-                                       hdr = NLMSG_NEXT(hdr, len);
-                                       continue;
-                               case NLMSG_DONE:
-                                       break;
-                       }
-                       break;
-               }
-       }
-       
-       if (sa == NULL)
-       {
-               DBG1(DBG_KNL, "unable to query SAD entry with SPI %.8x", ntohl(spi));
-               free(out);
-               return FAILED;
-       }
-       
-       *use_time = sa->curlft.use_time;
-       free (out);
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.del_sa.
- */
-static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
-                                          u_int32_t spi, protocol_id_t protocol)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr;
-       struct xfrm_usersa_id *sa_id;
-       
-       memset(&request, 0, sizeof(request));
-       
-       DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x", ntohl(spi));
-       
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-       hdr->nlmsg_type = XFRM_MSG_DELSA;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
-       
-       sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
-       host2xfrm(dst, &sa_id->daddr);
-       sa_id->spi = spi;
-       sa_id->proto = proto_ike2kernel(protocol);
-       sa_id->family = dst->get_family(dst);
-       
-       if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x", ntohl(spi));
-               return FAILED;
-       }
-       DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x", ntohl(spi));
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.add_policy.
- */
-static status_t add_policy(private_kernel_interface_t *this, 
-                                                  host_t *src, host_t *dst,
-                                                  traffic_selector_t *src_ts,
-                                                  traffic_selector_t *dst_ts,
-                                                  policy_dir_t direction, protocol_id_t protocol,
-                                                  u_int32_t reqid, bool high_prio, mode_t mode,
-                                                  u_int16_t ipcomp)
-{
-       iterator_t *iterator;
-       policy_entry_t *current, *policy;
-       bool found = FALSE;
-       unsigned char request[BUFFER_SIZE];
-       struct xfrm_userpolicy_info *policy_info;
-       struct nlmsghdr *hdr;
-       
-       /* create a policy */
-       policy = malloc_thing(policy_entry_t);
-       memset(policy, 0, sizeof(policy_entry_t));
-       policy->sel = ts2selector(src_ts, dst_ts);
-       policy->direction = direction;
-       
-       /* find the policy, which matches EXACTLY */
-       pthread_mutex_lock(&this->mutex);
-       iterator = this->policies->create_iterator(this->policies, TRUE);
-       while (iterator->iterate(iterator, (void**)&current))
-       {
-               if (memeq(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) &&
-                       policy->direction == current->direction)
-               {
-                       /* use existing policy */
-                       current->refcount++;
-                       DBG2(DBG_KNL, "policy %R === %R %N already exists, increasing "
-                                                 "refcount", src_ts, dst_ts,
-                                                  policy_dir_names, direction);
-                       free(policy);
-                       policy = current;
-                       found = TRUE;
-                       break;
-               }
-       }
-       iterator->destroy(iterator);
-       if (!found)
-       {       /* apply the new one, if we have no such policy */
-               this->policies->insert_last(this->policies, policy);
-               policy->refcount = 1;
-       }
-       
-       DBG2(DBG_KNL, "adding policy %R === %R %N", src_ts, dst_ts,
-                                  policy_dir_names, direction);
-       
-       memset(&request, 0, sizeof(request));
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-       hdr->nlmsg_type = found ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
-
-       policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
-       policy_info->sel = policy->sel;
-       policy_info->dir = policy->direction;
-       /* calculate priority based on source selector size, small size = high prio */
-       policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
-       policy_info->priority -= policy->sel.prefixlen_s * 10;
-       policy_info->priority -= policy->sel.proto ? 2 : 0;
-       policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
-       policy_info->action = XFRM_POLICY_ALLOW;
-       policy_info->share = XFRM_SHARE_ANY;
-       pthread_mutex_unlock(&this->mutex);
-       
-       /* policies don't expire */
-       policy_info->lft.soft_byte_limit = XFRM_INF;
-       policy_info->lft.soft_packet_limit = XFRM_INF;
-       policy_info->lft.hard_byte_limit = XFRM_INF;
-       policy_info->lft.hard_packet_limit = XFRM_INF;
-       policy_info->lft.soft_add_expires_seconds = 0;
-       policy_info->lft.hard_add_expires_seconds = 0;
-       policy_info->lft.soft_use_expires_seconds = 0;
-       policy_info->lft.hard_use_expires_seconds = 0;
-       
-       struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
-       rthdr->rta_type = XFRMA_TMPL;
-       rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
-       
-       hdr->nlmsg_len += rthdr->rta_len;
-       if (hdr->nlmsg_len > sizeof(request))
-       {
-               return FAILED;
-       }
-       
-       struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
-       
-       if (ipcomp != IPCOMP_NONE)
-       {
-               tmpl->reqid = reqid;
-               tmpl->id.proto = IPPROTO_COMP;
-               tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
-               tmpl->mode = mode;
-               tmpl->optional = direction != POLICY_OUT;
-               tmpl->family = src->get_family(src);
-               
-               host2xfrm(src, &tmpl->saddr);
-               host2xfrm(dst, &tmpl->id.daddr);
-               
-               /* add an additional xfrm_user_tmpl */
-               rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
-               hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
-               if (hdr->nlmsg_len > sizeof(request))
-               {
-                       return FAILED;
-               }
-               
-               tmpl++;
-       }
-       
-       tmpl->reqid = reqid;
-       tmpl->id.proto = proto_ike2kernel(protocol);
-       tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
-       tmpl->mode = mode;
-       tmpl->family = src->get_family(src);
-       
-       host2xfrm(src, &tmpl->saddr);
-       host2xfrm(dst, &tmpl->id.daddr);
-       
-       if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to add policy %R === %R %N", src_ts, dst_ts,
-                                          policy_dir_names, direction);
-               return FAILED;
-       }
-       
-       /* install a route, if:
-        * - we are NOT updating a policy
-        * - this is a forward policy (to just get one for each child)
-        * - we are in tunnel mode
-        * - we are not using IPv6 (does not work correctly yet!)
-        * - routing is not disabled via strongswan.conf
-        */
-       if (policy->route == NULL && direction == POLICY_FWD &&
-               mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
-               this->install_routes)
-       {
-               policy->route = malloc_thing(route_entry_t);
-               if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
-               {
-                       /* get the nexthop to src (src as we are in POLICY_FWD).*/
-                       policy->route->gateway = get_route(this, src, TRUE);
-                       policy->route->if_index = get_interface_index(this, dst);
-                       policy->route->dst_net = chunk_alloc(
-                                                                               policy->sel.family == AF_INET ? 4 : 16);
-                       memcpy(policy->route->dst_net.ptr, &policy->sel.saddr,
-                                  policy->route->dst_net.len);
-                       policy->route->prefixlen = policy->sel.prefixlen_s;
-                       
-                       switch (manage_srcroute(this, RTM_NEWROUTE,
-                                                                       NLM_F_CREATE | NLM_F_EXCL, policy->route))
-                       {
-                               default:
-                                       DBG1(DBG_KNL, "unable to install source route for %H",
-                                                policy->route->src_ip);
-                                       /* FALL */
-                               case ALREADY_DONE:
-                                       /* route exists, do not uninstall */
-                                       route_entry_destroy(policy->route);
-                                       policy->route = NULL;
-                                       break;
-                               case SUCCESS:
-                                       break;
-                       }
-               }
-               else
-               {
-                       free(policy->route);
-                       policy->route = NULL;
-               }
-       }
-
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.query_policy.
- */
-static status_t query_policy(private_kernel_interface_t *this,
-                                                        traffic_selector_t *src_ts, 
-                                                        traffic_selector_t *dst_ts,
-                                                        policy_dir_t direction, u_int32_t *use_time)
-{
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *out = NULL, *hdr;
-       struct xfrm_userpolicy_id *policy_id;
-       struct xfrm_userpolicy_info *policy = NULL;
-       size_t len;
-       
-       memset(&request, 0, sizeof(request));
-       
-       DBG2(DBG_KNL, "querying policy %R === %R %N", src_ts, dst_ts,
-                                  policy_dir_names, direction);
-
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST;
-       hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
-
-       policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
-       policy_id->sel = ts2selector(src_ts, dst_ts);
-       policy_id->dir = direction;
-       
-       if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
-       {
-               hdr = out;
-               while (NLMSG_OK(hdr, len))
-               {
-                       switch (hdr->nlmsg_type)
-                       {
-                               case XFRM_MSG_NEWPOLICY:
-                               {
-                                       policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
-                                       break;
-                               }
-                               case NLMSG_ERROR:
-                               {
-                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
-                                       DBG1(DBG_KNL, "querying policy failed: %s (%d)",
-                                                strerror(-err->error), -err->error);
-                                       break;
-                               }
-                               default:
-                                       hdr = NLMSG_NEXT(hdr, len);
-                                       continue;
-                               case NLMSG_DONE:
-                                       break;
-                       }
-                       break;
-               }
-       }
-       
-       if (policy == NULL)
-       {
-               DBG2(DBG_KNL, "unable to query policy %R === %R %N", src_ts, dst_ts,
-                                          policy_dir_names, direction);
-               free(out);
-               return FAILED;
-       }
-       *use_time = (time_t)policy->curlft.use_time;
-       
-       free(out);
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.del_policy.
- */
-static status_t del_policy(private_kernel_interface_t *this,
-                                                  traffic_selector_t *src_ts, 
-                                                  traffic_selector_t *dst_ts,
-                                                  policy_dir_t direction)
-{
-       policy_entry_t *current, policy, *to_delete = NULL;
-       route_entry_t *route;
-       unsigned char request[BUFFER_SIZE];
-       struct nlmsghdr *hdr;
-       struct xfrm_userpolicy_id *policy_id;
-       iterator_t *iterator;
-       
-       DBG2(DBG_KNL, "deleting policy %R === %R %N", src_ts, dst_ts,
-                                  policy_dir_names, direction);
-       
-       /* create a policy */
-       memset(&policy, 0, sizeof(policy_entry_t));
-       policy.sel = ts2selector(src_ts, dst_ts);
-       policy.direction = direction;
-       
-       /* find the policy */
-       iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
-       while (iterator->iterate(iterator, (void**)&current))
-       {
-               if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
-                       policy.direction == current->direction)
-               {
-                       to_delete = current;
-                       if (--to_delete->refcount > 0)
-                       {
-                               /* is used by more SAs, keep in kernel */
-                               DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
-                               iterator->destroy(iterator);
-                               return SUCCESS;
-                       }
-                       /* remove if last reference */
-                       iterator->remove(iterator);
-                       break;
-               }
-       }
-       iterator->destroy(iterator);
-       if (!to_delete)
-       {
-               DBG1(DBG_KNL, "deleting policy %R === %R %N failed, not found", src_ts,
-                                          dst_ts, policy_dir_names, direction);
-               return NOT_FOUND;
-       }
-       
-       memset(&request, 0, sizeof(request));
-       
-       hdr = (struct nlmsghdr*)request;
-       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-       hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
-
-       policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
-       policy_id->sel = to_delete->sel;
-       policy_id->dir = direction;
-       
-       route = to_delete->route;
-       free(to_delete);
-       
-       if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
-       {
-               DBG1(DBG_KNL, "unable to delete policy %R === %R %N", src_ts, dst_ts,
-                                          policy_dir_names, direction);
-               return FAILED;
-       }
-
-       if (route)
-       {
-               if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
-               {
-                       DBG1(DBG_KNL, "error uninstalling route installed with "
-                                                 "policy %R === %R %N", src_ts, dst_ts,
-                                                  policy_dir_names, direction);
-               }               
-               route_entry_destroy(route);
-       }
-       return SUCCESS;
-}
-
-/**
- * Implementation of kernel_interface_t.destroy.
- */
-static void destroy(private_kernel_interface_t *this)
-{
-       if (this->routing_table)
-       {
-               manage_rule(this, RTM_DELRULE, this->routing_table,
-                                       this->routing_table_prio);
-       }
-
-       this->job->cancel(this->job);
-       close(this->socket_xfrm_events);
-       close(this->socket_xfrm);
-       close(this->socket_rt_events);
-       close(this->socket_rt);
-       this->policies->destroy(this->policies);
-       this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
-       free(this);
-}
-
-/*
- * Described in header.
- */
-kernel_interface_t *kernel_interface_create()
+kernel_interface_t *kernel_interface_create()
 {
        private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
-       struct sockaddr_nl addr;
        
-       /* public functions */
        this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
        this->public.get_cpi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
        this->public.add_sa  = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
@@ -3035,99 +328,29 @@ kernel_interface_t *kernel_interface_create()
        this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
        this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
        this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
-       this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
-       this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
+       
        this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
+       this->public.get_nexthop = (host_t*(*)(kernel_interface_t*, host_t *dest))get_nexthop;
+       this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface;
+       this->public.create_address_enumerator = (enumerator_t*(*)(kernel_interface_t*,bool,bool))create_address_enumerator;
        this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
        this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
-       this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
-
-       /* private members */
-       this->policies = linked_list_create();
-       this->ifaces = linked_list_create();
-       this->hiter = NULL;
-       this->seq = 200;
-       pthread_mutex_init(&this->mutex, NULL);
-       pthread_mutex_init(&this->nl_mutex, NULL);
-       pthread_cond_init(&this->cond, NULL);
-       timerclear(&this->last_roam);
-       this->install_routes = lib->settings->get_bool(lib->settings,
-                                       "charon.install_routes", TRUE);
-       this->routing_table = lib->settings->get_int(lib->settings,
-                                       "charon.routing_table", IPSEC_ROUTING_TABLE);
-       this->routing_table_prio = lib->settings->get_int(lib->settings,
-                                       "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
-       this->process_route = lib->settings->get_bool(lib->settings,
-                                       "charon.process_route", TRUE);
-       memset(&addr, 0, sizeof(addr));
-       addr.nl_family = AF_NETLINK;
-       
-       /* create and bind RT socket */
-       this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-       if (this->socket_rt <= 0)
-       {
-               charon->kill(charon, "unable to create RT netlink socket");
-       }
-       addr.nl_groups = 0;
-       if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
-       {
-               charon->kill(charon, "unable to bind RT netlink socket");
-       }
-       
-       /* create and bind RT socket for events (address/interface/route changes) */
-       this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-       if (this->socket_rt_events <= 0)
-       {
-               charon->kill(charon, "unable to create RT event socket");
-       }
-       addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR | 
-                                        RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
-       if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
-       {
-               charon->kill(charon, "unable to bind RT event socket");
-       }
-       
-       /* create and bind XFRM socket */ 
-       this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
-       if (this->socket_xfrm <= 0)
-       {
-               charon->kill(charon, "unable to create XFRM netlink socket");
-       }
-       addr.nl_groups = 0;
-       if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
-       {
-               charon->kill(charon, "unable to bind XFRM netlink socket");
-       }
-       
-       /* create and bind XFRM socket for ACQUIRE & EXPIRE */
-       this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
-       if (this->socket_xfrm_events <= 0)
-       {
-               charon->kill(charon, "unable to create XFRM event socket");
-       }
-       addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
-       if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
-       {
-               charon->kill(charon, "unable to bind XFRM event socket");
-       }
-       
-       this->job = callback_job_create((callback_job_cb_t)receive_events,
-                                                                       this, NULL, NULL);
-       charon->processor->queue_job(charon->processor, (job_t*)this->job);
-       
-       if (init_address_list(this) != SUCCESS)
-       {
-               charon->kill(charon, "unable to get interface list");
-       }
-       
-       if (this->routing_table)
-       {
-               if (manage_rule(this, RTM_NEWRULE, this->routing_table,
-                                               this->routing_table_prio) != SUCCESS)
-               {
-                       DBG1(DBG_KNL, "unable to create routing table rule");
-               }
-       }
+       this->public.add_route = (status_t(*)(kernel_interface_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
+       this->public.del_route = (status_t(*)(kernel_interface_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
+       
+       this->public.add_ipsec_interface = (void(*)(kernel_interface_t*, kernel_ipsec_constructor_t))add_ipsec_interface;
+       this->public.remove_ipsec_interface = (void(*)(kernel_interface_t*, kernel_ipsec_constructor_t))remove_ipsec_interface;
+       this->public.add_net_interface = (void(*)(kernel_interface_t*, kernel_net_constructor_t))add_net_interface;
+       this->public.remove_net_interface = (void(*)(kernel_interface_t*, kernel_net_constructor_t))remove_net_interface;
+       
+       this->public.create_interfaces = (void (*)(kernel_interface_t*))create_interfaces;
+       this->public.destroy = (void (*)(kernel_interface_t*))destroy;
+       
+       this->ipsec_interfaces = linked_list_create();
+       this->net_interfaces = linked_list_create();
+       this->mutex = mutex_create(MUTEX_RECURSIVE);
+       this->ipsec = NULL;
+       this->net = NULL;
        
        return &this->public;
 }
index 0978b05..757459f 100644 (file)
 #ifndef KERNEL_INTERFACE_H_
 #define KERNEL_INTERFACE_H_
 
-typedef enum policy_dir_t policy_dir_t;
 typedef struct kernel_interface_t kernel_interface_t;
 
 #include <utils/host.h>
 #include <crypto/prf_plus.h>
 #include <encoding/payloads/proposal_substructure.h>
 
+#include <kernel/kernel_ipsec.h>
+#include <kernel/kernel_net.h>
 
 /**
- * Direction of a policy. These are equal to those
- * defined in xfrm.h, but we want to stay implementation
- * neutral here.
+ * Constructor function for ipsec kernel interface
  */
-enum policy_dir_t {
-       /** Policy for inbound traffic */
-       POLICY_IN = 0,
-       /** Policy for outbound traffic */
-       POLICY_OUT = 1,
-       /** Policy for forwarded traffic */
-       POLICY_FWD = 2,
-};
+typedef kernel_ipsec_t* (*kernel_ipsec_constructor_t)(void);
+
+/**
+ * Constructor function for network kernel interface
+ */
+typedef kernel_net_t* (*kernel_net_constructor_t)(void);
 
 /**
- * Interface to the kernel.
+ * Manager and wrapper for different kernel interfaces.
  * 
  * The kernel interface handles the communication with the kernel
- * for SA and policy management. It allows setup of these, and provides 
- * further the handling of kernel events.
- * Policy information are cached in the interface. This is necessary to do
- * reference counting. The Linux kernel does not allow the same policy
- * installed twice, but we need this as CHILD_SA exist multiple times
- * when rekeying. Thats why we do reference counting of policies.
+ * for SA and policy management and interface and IP address management.
  */
 struct kernel_interface_t {
 
        /**
         * Get a SPI from the kernel.
         *
-        * @warning get_spi() implicitely creates an SA with
+        * @warning get_spi() implicitly creates an SA with
         * the allocated SPI, therefore the replace flag
         * in add_sa() must be set when installing this SA.
         * 
@@ -107,7 +99,7 @@ struct kernel_interface_t {
         * @param protocol              protocol for this SA (ESP/AH)
         * @param reqid                 unique ID for this SA
         * @param expire_soft   lifetime in seconds before rekeying
-        * @param expire_hard   lieftime in seconds before delete
+        * @param expire_hard   lifetime in seconds before delete
         * @param enc_alg               Algorithm to use for encryption (ESP only)
         * @param enc_size              key length of encryption algorithm, if dynamic
         * @param int_alg               Algorithm to use for integrity protection
@@ -167,7 +159,7 @@ struct kernel_interface_t {
                                                  protocol_id_t protocol, u_int32_t *use_time);
        
        /**
-        * Delete a previusly installed SA from the SAD.
+        * Delete a previously installed SA from the SAD.
         * 
         * @param dst                   destination address for this SA
         * @param spi                   SPI allocated by us or remote peer
@@ -189,7 +181,7 @@ struct kernel_interface_t {
         * @param dst_ts                traffic selector to match traffic dest
         * @param direction             direction of traffic, POLICY_IN, POLICY_OUT, POLICY_FWD
         * @param protocol              protocol to use to protect traffic (AH/ESP)
-        * @param reqid                 uniqe ID of an SA to use to enforce policy
+        * @param reqid                 unique ID of an SA to use to enforce policy
         * @param high_prio             if TRUE, uses a higher priority than any with FALSE
         * @param mode                  mode of SA (tunnel, transport)
         * @param ipcomp                the IPComp transform used
@@ -250,6 +242,17 @@ struct kernel_interface_t {
        host_t* (*get_source_addr)(kernel_interface_t *this, host_t *dest);
        
        /**
+        * Get the next hop for a destination.
+        *
+        * Does a route lookup to get the next hop used to reach dest.
+        * The returned host is allocated and must be destroyed.
+        *
+        * @param dest                  target destination address
+        * @return                              next hop address, NULL if unreachable
+        */
+       host_t* (*get_nexthop)(kernel_interface_t *this, host_t *dest);
+       
+       /**
         * Get the interface name of a local address.
         *
         * @param host                  address to get interface name from
@@ -258,15 +261,18 @@ struct kernel_interface_t {
        char* (*get_interface) (kernel_interface_t *this, host_t *host);
        
        /**
-        * Creates an iterator over all local addresses.
-        *
+        * Creates an enumerator over all local addresses.
+        * 
         * This function blocks an internal cached address list until the
-        * iterator gets destroyed.
-        * These hosts are read-only, do not modify or free.
-        *
-        * @return                              iterator over host_t's
+        * enumerator gets destroyed.
+        * The hosts are read-only, do not modify of free.
+        * 
+        * @param include_down_ifaces   TRUE to enumerate addresses from down interfaces
+        * @param include_virtual_ips   TRUE to enumerate virtual ip addresses
+        * @return                                              enumerator over host_t's
         */
-       iterator_t *(*create_address_iterator) (kernel_interface_t *this);
+       enumerator_t *(*create_address_enumerator) (kernel_interface_t *this,
+                                               bool include_down_ifaces, bool include_virtual_ips);
        
        /**
         * Add a virtual IP to an interface.
@@ -294,9 +300,73 @@ struct kernel_interface_t {
        status_t (*del_ip) (kernel_interface_t *this, host_t *virtual_ip);
        
        /**
-        * Destroys a kernel_interface object.
+        * Add a route.
+        * 
+        * @param dst_net               destination net
+        * @param prefixlen             destination net prefix length
+        * @param gateway               gateway for this route
+        * @param src_ip                sourc ip of the route
+        * @param if_name               name of the interface the route is bound to
+        * @return                              SUCCESS if operation completed
+        *                                              ALREADY_DONE if the route already exists
+        */
+       status_t (*add_route) (kernel_interface_t *this, chunk_t dst_net, u_int8_t prefixlen,
+                                                               host_t *gateway, host_t *src_ip, char *if_name);
+       
+       /**
+        * Delete a route.
+        * 
+        * @param dst_net               destination net
+        * @param prefixlen             destination net prefix length
+        * @param gateway               gateway for this route
+        * @param src_ip                sourc ip of the route
+        * @param if_name               name of the interface the route is bound to
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*del_route) (kernel_interface_t *this, chunk_t dst_net, u_int8_t prefixlen,
+                                                               host_t *gateway, host_t *src_ip, char *if_name);
+       
+       /**
+        * manager methods
+        */
+       
+       /**
+        * Register an ipsec kernel interface constructor on the manager.
+        *
+        * @param create                        constructor to register
+        */
+       void (*add_ipsec_interface)(kernel_interface_t *this, kernel_ipsec_constructor_t create);
+       
+       /**
+        * Unregister an ipsec kernel interface constructor.
+        *
+        * @param create                        constructor to unregister
+        */
+       void (*remove_ipsec_interface)(kernel_interface_t *this, kernel_ipsec_constructor_t create);
+       
+       /**
+        * Register a network kernel interface constructor on the manager.
+        *
+        * @param create                        constructor to register
+        */
+       void (*add_net_interface)(kernel_interface_t *this, kernel_net_constructor_t create);
+       
+       /**
+        * Unregister a network kernel interface constructor.
+        *
+        * @param create                        constructor to unregister
+        */
+       void (*remove_net_interface)(kernel_interface_t *this, kernel_net_constructor_t create);
+       
+       /**
+        * Create the kernel interfaces classes.
+        */
+       void (*create_interfaces)(kernel_interface_t *this);
+       
+       /**
+        * Destroys a kernel_interface_manager_t object.
         */
-       void (*destroy) (kernel_interface_t *kernel_interface);
+       void (*destroy) (kernel_interface_t *this);
 };
 
 /**
@@ -304,4 +374,4 @@ struct kernel_interface_t {
  */
 kernel_interface_t *kernel_interface_create(void);
 
-#endif /*KERNEL_INTERFACE_H_ @} */
+#endif /* KERNEL_INTERFACE_H_ @} */
diff --git a/src/charon/kernel/kernel_ipsec.h b/src/charon/kernel/kernel_ipsec.h
new file mode 100644 (file)
index 0000000..b8ca3c9
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2006-2008 Tobias Brunner
+ * Copyright (C) 2006 Daniel Roethlisberger
+ * Copyright (C) 2005-2006 Martin Willi
+ * Copyright (C) 2005 Jan Hutter
+ * Hochschule fuer Technik Rapperswil
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * $Id$
+ */
+
+/**
+ * @defgroup kernel_ipsec kernel_ipsec
+ * @{ @ingroup kernel
+ */
+
+#ifndef KERNEL_IPSEC_H_
+#define KERNEL_IPSEC_H_
+
+typedef enum policy_dir_t policy_dir_t;
+typedef struct kernel_ipsec_t kernel_ipsec_t;
+
+#include <utils/host.h>
+#include <crypto/prf_plus.h>
+#include <encoding/payloads/proposal_substructure.h>
+
+/**
+ * Direction of a policy. These are equal to those
+ * defined in xfrm.h, but we want to stay implementation
+ * neutral here.
+ */
+enum policy_dir_t {
+       /** Policy for inbound traffic */
+       POLICY_IN = 0,
+       /** Policy for outbound traffic */
+       POLICY_OUT = 1,
+       /** Policy for forwarded traffic */
+       POLICY_FWD = 2,
+};
+
+/**
+ * Interface to the ipsec subsystem of the kernel.
+ * 
+ * The kernel ipsec interface handles the communication with the kernel
+ * for SA and policy management. It allows setup of these, and provides 
+ * further the handling of kernel events.
+ * Policy information are cached in the interface. This is necessary to do
+ * reference counting. The Linux kernel does not allow the same policy
+ * installed twice, but we need this as CHILD_SA exist multiple times
+ * when rekeying. Thats why we do reference counting of policies.
+ */
+struct kernel_ipsec_t {
+       
+       /**
+        * Get a SPI from the kernel.
+        *
+        * @warning get_spi() implicitly creates an SA with
+        * the allocated SPI, therefore the replace flag
+        * in add_sa() must be set when installing this SA.
+        * 
+        * @param src           source address of SA
+        * @param dst           destination address of SA
+        * @param protocol      protocol for SA (ESP/AH)
+        * @param reqid         unique ID for this SA
+        * @param spi           allocated spi
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*get_spi)(kernel_ipsec_t *this, host_t *src, host_t *dst, 
+                                               protocol_id_t protocol, u_int32_t reqid, u_int32_t *spi);
+       
+       /**
+        * Get a Compression Parameter Index (CPI) from the kernel.
+        * 
+        * @param src           source address of SA
+        * @param dst           destination address of SA
+        * @param reqid         unique ID for the corresponding SA
+        * @param cpi           allocated cpi
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*get_cpi)(kernel_ipsec_t *this, host_t *src, host_t *dst, 
+                                               u_int32_t reqid, u_int16_t *cpi);
+       
+       /**
+        * Add an SA to the SAD.
+        * 
+        * add_sa() may update an already allocated
+        * SPI (via get_spi). In this case, the replace
+        * flag must be set.
+        * This function does install a single SA for a
+        * single protocol in one direction. The kernel-interface
+        * gets the keys itself from the PRF, as we don't know
+        * his algorithms and key sizes.
+        * 
+        * @param src                   source address for this SA
+        * @param dst                   destination address for this SA
+        * @param spi                   SPI allocated by us or remote peer
+        * @param protocol              protocol for this SA (ESP/AH)
+        * @param reqid                 unique ID for this SA
+        * @param expire_soft   lifetime in seconds before rekeying
+        * @param expire_hard   lifetime in seconds before delete
+        * @param enc_alg               Algorithm to use for encryption (ESP only)
+        * @param enc_size              key length of encryption algorithm, if dynamic
+        * @param int_alg               Algorithm to use for integrity protection
+        * @param int_size              key length of integrity algorithm, if dynamic
+        * @param prf_plus              PRF to derive keys from
+        * @param mode                  mode of the SA (tunnel, transport)
+        * @param ipcomp                IPComp transform to use
+        * @param encap                 enable UDP encapsulation for NAT traversal
+        * @param replace               Should an already installed SA be updated?
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*add_sa) (kernel_ipsec_t *this,
+                                               host_t *src, host_t *dst, u_int32_t spi,
+                                               protocol_id_t protocol, u_int32_t reqid,
+                                               u_int64_t expire_soft, u_int64_t expire_hard,
+                                           u_int16_t enc_alg, u_int16_t enc_size,
+                                           u_int16_t int_alg, u_int16_t int_size,
+                                               prf_plus_t *prf_plus, mode_t mode,
+                                               u_int16_t ipcomp, bool encap,
+                                               bool update);
+       
+       /**
+        * Update the hosts on an installed SA.
+        *
+        * We cannot directly update the destination address as the kernel
+        * requires the spi, the protocol AND the destination address (and family)
+        * to identify SAs. Therefore if the destination address changed we
+        * create a new SA and delete the old one.
+        *
+        * @param spi                   SPI of the SA
+        * @param protocol              protocol for this SA (ESP/AH)
+        * @param src                   current source address
+        * @param dst                   current destination address
+        * @param new_src               new source address
+        * @param new_dst               new destination address
+        * @param encap                 use UDP encapsulation
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*update_sa)(kernel_ipsec_t *this,
+                                                 u_int32_t spi, protocol_id_t protocol,
+                                                 host_t *src, host_t *dst, 
+                                                 host_t *new_src, host_t *new_dst, bool encap);
+       
+       /**
+        * Query the use time of an SA.
+        *
+        * The use time of an SA is not the time of the last usage, but 
+        * the time of the first usage of the SA.
+        * 
+        * @param dst                   destination address for this SA
+        * @param spi                   SPI allocated by us or remote peer
+        * @param protocol              protocol for this SA (ESP/AH)
+        * @param use_time              pointer receives the time of this SA's last use
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*query_sa) (kernel_ipsec_t *this, host_t *dst, u_int32_t spi, 
+                                                 protocol_id_t protocol, u_int32_t *use_time);
+       
+       /**
+        * Delete a previusly installed SA from the SAD.
+        * 
+        * @param dst                   destination address for this SA
+        * @param spi                   SPI allocated by us or remote peer
+        * @param protocol              protocol for this SA (ESP/AH)
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*del_sa) (kernel_ipsec_t *this, host_t *dst, u_int32_t spi,
+                                               protocol_id_t protocol);
+       
+       /**
+        * Add a policy to the SPD.
+        * 
+        * A policy is always associated to an SA. Traffic which matches a
+        * policy is handled by the SA with the same reqid.
+        * 
+        * @param src                   source address of SA
+        * @param dst                   dest address of SA
+        * @param src_ts                traffic selector to match traffic source
+        * @param dst_ts                traffic selector to match traffic dest
+        * @param direction             direction of traffic, POLICY_IN, POLICY_OUT, POLICY_FWD
+        * @param protocol              protocol to use to protect traffic (AH/ESP)
+        * @param reqid                 unique ID of an SA to use to enforce policy
+        * @param high_prio             if TRUE, uses a higher priority than any with FALSE
+        * @param mode                  mode of SA (tunnel, transport)
+        * @param ipcomp                the IPComp transform used
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*add_policy) (kernel_ipsec_t *this,
+                                                       host_t *src, host_t *dst,
+                                                       traffic_selector_t *src_ts,
+                                                       traffic_selector_t *dst_ts,
+                                                       policy_dir_t direction, protocol_id_t protocol,
+                                                       u_int32_t reqid, bool high_prio, mode_t mode,
+                                                       u_int16_t ipcomp);
+       
+       /**
+        * Query the use time of a policy.
+        *
+        * The use time of a policy is the time the policy was used
+        * for the last time.
+        * 
+        * @param src_ts                traffic selector to match traffic source
+        * @param dst_ts                traffic selector to match traffic dest
+        * @param direction             direction of traffic, POLICY_IN, POLICY_OUT, POLICY_FWD
+        * @param[out] use_time the time of this SA's last use
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*query_policy) (kernel_ipsec_t *this,
+                                                         traffic_selector_t *src_ts, 
+                                                         traffic_selector_t *dst_ts,
+                                                         policy_dir_t direction, u_int32_t *use_time);
+       
+       /**
+        * Remove a policy from the SPD.
+        *
+        * The kernel interface implements reference counting for policies.
+        * If the same policy is installed multiple times (in the case of rekeying),
+        * the reference counter is increased. del_policy() decreases the ref counter
+        * and removes the policy only when no more references are available.
+        *
+        * @param src_ts                traffic selector to match traffic source
+        * @param dst_ts                traffic selector to match traffic dest
+        * @param direction             direction of traffic, POLICY_IN, POLICY_OUT, POLICY_FWD
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*del_policy) (kernel_ipsec_t *this,
+                                                       traffic_selector_t *src_ts, 
+                                                       traffic_selector_t *dst_ts,
+                                                       policy_dir_t direction);
+       
+       /**
+        * Destroy the implementation.
+        */
+       void (*destroy) (kernel_ipsec_t *this);
+};
+
+#endif /* KERNEL_IPSEC_H_ @} */
diff --git a/src/charon/kernel/kernel_net.h b/src/charon/kernel/kernel_net.h
new file mode 100644 (file)
index 0000000..998fef0
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2008 Tobias Brunner
+ * Copyright (C) 2007 Martin Willi
+ * Hochschule fuer Technik Rapperswil
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * $Id$
+ */
+
+/**
+ * @defgroup kernel_net kernel_net
+ * @{ @ingroup kernel
+ */
+
+#ifndef KERNEL_NET_H_
+#define KERNEL_NET_H_
+
+typedef struct kernel_net_t kernel_net_t;
+
+#include <utils/enumerator.h>
+#include <utils/host.h>
+
+/**
+ * Interface to the network subsystem of the kernel.
+ * 
+ * The kernel network interface handles the communication with the kernel
+ * for interface and IP address management.
+ */
+struct kernel_net_t {
+
+       /**
+        * Get our outgoing source address for a destination.
+        *
+        * Does a route lookup to get the source address used to reach dest.
+        * The returned host is allocated and must be destroyed.
+        *
+        * @param dest                  target destination address
+        * @return                              outgoing source address, NULL if unreachable
+        */
+       host_t* (*get_source_addr)(kernel_net_t *this, host_t *dest);
+       
+       /**
+        * Get the next hop for a destination.
+        *
+        * Does a route lookup to get the next hop used to reach dest.
+        * The returned host is allocated and must be destroyed.
+        *
+        * @param dest                  target destination address
+        * @return                              next hop address, NULL if unreachable
+        */
+       host_t* (*get_nexthop)(kernel_net_t *this, host_t *dest);
+       
+       /**
+        * Get the interface name of a local address.
+        *
+        * @param host                  address to get interface name from
+        * @return                              allocated interface name, or NULL if not found
+        */
+       char* (*get_interface) (kernel_net_t *this, host_t *host);
+       
+       /**
+        * Creates an enumerator over all local addresses.
+        * 
+        * This function blocks an internal cached address list until the
+        * enumerator gets destroyed.
+        * The hosts are read-only, do not modify of free.
+        * 
+        * @param include_down_ifaces   TRUE to enumerate addresses from down interfaces
+        * @param include_virtual_ips   TRUE to enumerate virtual ip addresses
+        * @return                                              enumerator over host_t's
+        */
+       enumerator_t *(*create_address_enumerator) (kernel_net_t *this,
+                                               bool include_down_ifaces, bool include_virtual_ips);
+       
+       /**
+        * Add a virtual IP to an interface.
+        *
+        * Virtual IPs are attached to an interface. If an IP is added multiple
+        * times, the IP is refcounted and not removed until del_ip() was called
+        * as many times as add_ip().
+        * The virtual IP is attached to the interface where the iface_ip is found.
+        *
+        * @param virtual_ip    virtual ip address to assign
+        * @param iface_ip              IP of an interface to attach virtual IP
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*add_ip) (kernel_net_t *this, host_t *virtual_ip,
+                                               host_t *iface_ip);
+       
+       /**
+        * Remove a virtual IP from an interface.
+        *
+        * The kernel interface uses refcounting, see add_ip().
+        *
+        * @param virtual_ip    virtual ip address to assign
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*del_ip) (kernel_net_t *this, host_t *virtual_ip);
+       
+       /**
+        * Add a route.
+        * 
+        * @param dst_net               destination net
+        * @param prefixlen             destination net prefix length
+        * @param gateway               gateway for this route
+        * @param src_ip                sourc ip of the route
+        * @param if_name               name of the interface the route is bound to
+        * @return                              SUCCESS if operation completed
+        *                                              ALREADY_DONE if the route already exists
+        */
+       status_t (*add_route) (kernel_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
+                                                               host_t *gateway, host_t *src_ip, char *if_name);
+       
+       /**
+        * Delete a route.
+        * 
+        * @param dst_net               destination net
+        * @param prefixlen             destination net prefix length
+        * @param gateway               gateway for this route
+        * @param src_ip                sourc ip of the route
+        * @param if_name               name of the interface the route is bound to
+        * @return                              SUCCESS if operation completed
+        */
+       status_t (*del_route) (kernel_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
+                                                               host_t *gateway, host_t *src_ip, char *if_name);
+       
+       /**
+        * Destroy the implementation.
+        */
+       void (*destroy) (kernel_net_t *this);
+};
+
+#endif /* KERNEL_NET_H_ @} */
index 7ddde61..7e6623f 100644 (file)
@@ -50,7 +50,7 @@ typedef struct socket_t socket_t;
  * The first uses raw sockets to allow binding of other daemons (pluto) to
  * UDP/500. An installed "Linux socket filter" filters out all non-IKEv2 
  * traffic and handles just IKEv2 messages. An other daemon (pluto) must 
- * handle all traffic seperatly, e.g. ignore IKEv2 traffic, since charon 
+ * handle all traffic separately, e.g. ignore IKEv2 traffic, since charon 
  * handles that.
  * The other implementation uses normal sockets and is built if
  * --disable-pluto is given to the configure script.
@@ -91,7 +91,7 @@ struct socket_t {
 };
 
 /**
- * Create a socket_t, wich binds multiple sockets.
+ * Create a socket_t, which binds multiple sockets.
  *
  * @return                             socket_t object
  */
diff --git a/src/charon/plugins/kernel_netlink/Makefile.am b/src/charon/plugins/kernel_netlink/Makefile.am
new file mode 100644 (file)
index 0000000..e0efe57
--- /dev/null
@@ -0,0 +1,11 @@
+
+INCLUDES = -I${linuxdir} -I$(top_srcdir)/src/libstrongswan -I$(top_srcdir)/src/charon
+
+AM_CFLAGS = -rdynamic
+
+plugin_LTLIBRARIES = libstrongswan-kernel-netlink.la
+
+libstrongswan_kernel_netlink_la_SOURCES = kernel_netlink_plugin.h kernel_netlink_plugin.c \
+       kernel_netlink_ipsec.h kernel_netlink_ipsec.c kernel_netlink_net.h kernel_netlink_net.c \
+       kernel_netlink_shared.h kernel_netlink_shared.c
+libstrongswan_kernel_netlink_la_LDFLAGS = -module
diff --git a/src/charon/plugins/kernel_netlink/kernel_netlink_ipsec.c b/src/charon/plugins/kernel_netlink/kernel_netlink_ipsec.c
new file mode 100644 (file)
index 0000000..ccf0d0d
--- /dev/null
@@ -0,0 +1,1700 @@
+/*
+ * Copyright (C) 2006-2008 Tobias Brunner
+ * Copyright (C) 2005-2007 Martin Willi
+ * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
+ * Copyright (C) 2006 Daniel Roethlisberger
+ * Copyright (C) 2005 Jan Hutter
+ * Hochschule fuer Technik Rapperswil
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * $Id$
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/xfrm.h>
+#include <linux/udp.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#include "kernel_netlink_ipsec.h"
+#include "kernel_netlink_shared.h"
+
+#include <daemon.h>
+#include <utils/linked_list.h>
+#include <processing/jobs/callback_job.h>
+#include <processing/jobs/acquire_job.h>
+#include <processing/jobs/rekey_child_sa_job.h>
+#include <processing/jobs/delete_child_sa_job.h>
+
+/** required for Linux 2.6.26 kernel and later */
+#ifndef XFRM_STATE_AF_UNSPEC
+#define XFRM_STATE_AF_UNSPEC   32
+#endif
+
+/** default priority of installed policies */
+#define PRIO_LOW 3000
+#define PRIO_HIGH 2000
+
+/**
+ * returns a pointer to the first rtattr following the nlmsghdr *nlh and the 
+ * 'usual' netlink data x like 'struct xfrm_usersa_info' 
+ */
+#define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
+/**
+ * returns a pointer to the next rtattr following rta.
+ * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
+ */
+#define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
+/**
+ * returns the total size of attached rta data 
+ * (after 'usual' netlink data x like 'struct xfrm_usersa_info') 
+ */
+#define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
+
+typedef struct kernel_algorithm_t kernel_algorithm_t;
+
+/**
+ * Mapping from the algorithms defined in IKEv2 to
+ * kernel level algorithm names and their key length
+ */
+struct kernel_algorithm_t {
+       /**
+        * Identifier specified in IKEv2
+        */
+       int ikev2_id;
+       
+       /**
+        * Name of the algorithm, as used as kernel identifier
+        */
+       char *name;
+       
+       /**
+        * Key length in bits, if fixed size
+        */
+       u_int key_size;
+};
+
+ENUM(policy_dir_names, POLICY_IN, POLICY_FWD,
+       "in",
+       "out",
+       "fwd"
+);
+
+#define END_OF_LIST -1
+
+/**
+ * Algorithms for encryption
+ */
+static kernel_algorithm_t encryption_algs[] = {
+/*     {ENCR_DES_IV64,                 "***",                                  0}, */
+       {ENCR_DES,                              "des",                                  64},
+       {ENCR_3DES,                     "des3_ede",                             192},
+/*     {ENCR_RC5,                              "***",                                  0}, */
+/*     {ENCR_IDEA,                     "***",                                  0}, */
+       {ENCR_CAST,                     "cast128",                              0},
+       {ENCR_BLOWFISH,                 "blowfish",                             0},
+/*     {ENCR_3IDEA,                    "***",                                  0}, */
+/*     {ENCR_DES_IV32,                 "***",                                  0}, */
+       {ENCR_NULL,                     "cipher_null",                  0},
+       {ENCR_AES_CBC,                  "aes",                                  0},
+/*     {ENCR_AES_CTR,                  "***",                                  0}, */
+       {ENCR_AES_CCM_ICV8,             "rfc4309(ccm(aes))",    64},    /* key_size = ICV size */
+       {ENCR_AES_CCM_ICV12,    "rfc4309(ccm(aes))",    96},    /* key_size = ICV size */
+       {ENCR_AES_CCM_ICV16,    "rfc4309(ccm(aes))",    128},   /* key_size = ICV size */
+       {ENCR_AES_GCM_ICV8,             "rfc4106(gcm(aes))",    64},    /* key_size = ICV size */
+       {ENCR_AES_GCM_ICV12,    "rfc4106(gcm(aes))",    96},    /* key_size = ICV size */
+       {ENCR_AES_GCM_ICV16,    "rfc4106(gcm(aes))",    128},   /* key_size = ICV size */
+       {END_OF_LIST,           NULL,                   0},
+};
+
+/**
+ * Algorithms for integrity protection
+ */
+static kernel_algorithm_t integrity_algs[] = {
+       {AUTH_HMAC_MD5_96,                      "md5",                  128},
+       {AUTH_HMAC_SHA1_96,                     "sha1",                 160},
+       {AUTH_HMAC_SHA2_256_128,        "sha256",               256},
+       {AUTH_HMAC_SHA2_384_192,        "sha384",               384},
+       {AUTH_HMAC_SHA2_512_256,        "sha512",               512},
+/*     {AUTH_DES_MAC,                          "***",                  0}, */
+/*     {AUTH_KPDK_MD5,                         "***",                  0}, */
+       {AUTH_AES_XCBC_96,                      "xcbc(aes)",    128},
+       {END_OF_LIST,                           NULL,                   0},
+};
+
+/**
+ * Algorithms for IPComp
+ */
+static kernel_algorithm_t compression_algs[] = {
+/*     {IPCOMP_OUI,                    "***",                  0}, */
+       {IPCOMP_DEFLATE,                "deflate",              0},
+       {IPCOMP_LZS,                    "lzs",                  0},
+       {IPCOMP_LZJH,                   "lzjh",                 0},
+       {END_OF_LIST,                   NULL,                   0},
+};
+
+/**
+ * Look up a kernel algorithm name and its key size
+ */
+static char* lookup_algorithm(kernel_algorithm_t *kernel_algo, 
+                                          u_int16_t ikev2_algo, u_int16_t *key_size)
+{
+       while (kernel_algo->ikev2_id != END_OF_LIST)
+       {
+               if (ikev2_algo == kernel_algo->ikev2_id)
+               {
+                       /* match, evaluate key length */
+                       if (key_size && *key_size == 0)
+                       {       /* update key size if not set */
+                               *key_size = kernel_algo->key_size;
+                       }
+                       return kernel_algo->name;
+               }
+               kernel_algo++;
+       }
+       return NULL;
+}
+
+typedef struct route_entry_t route_entry_t;
+
+/**
+ * installed routing entry
+ */
+struct route_entry_t {
+       /** Name of the interface the route is bound to */
+       char *if_name;
+       
+       /** Source ip of the route */
+       host_t *src_ip;
+       
+       /** gateway for this route */
+       host_t *gateway;
+
+       /** Destination net */
+       chunk_t dst_net;
+
+       /** Destination net prefixlen */
+       u_int8_t prefixlen;
+};
+
+/**
+ * destroy an route_entry_t object
+ */
+static void route_entry_destroy(route_entry_t *this)
+{
+       free(this->if_name);
+       this->src_ip->destroy(this->src_ip);
+       this->gateway->destroy(this->gateway);
+       chunk_free(&this->dst_net);
+       free(this);
+}
+
+typedef struct policy_entry_t policy_entry_t;
+
+/**
+ * installed kernel policy.
+ */
+struct policy_entry_t {
+       
+       /** direction of this policy: in, out, forward */
+       u_int8_t direction;
+       
+       /** reqid of the policy */
+       u_int32_t reqid;
+       
+       /** parameters of installed policy */
+       struct xfrm_selector sel;
+       
+       /** associated route installed for this policy */
+       route_entry_t *route;
+       
+       /** by how many CHILD_SA's this policy is used */
+       u_int refcount;
+};
+
+typedef struct private_kernel_netlink_ipsec_t private_kernel_netlink_ipsec_t;
+
+/**
+ * Private variables and functions of kernel_netlink class.
+ */
+struct private_kernel_netlink_ipsec_t {
+       /**
+        * Public part of the kernel_netlink_t object.
+        */
+       kernel_netlink_ipsec_t public;
+       
+       /**
+        * mutex to lock access to various lists
+        */
+       pthread_mutex_t mutex;
+       
+       /**
+        * List of installed policies (policy_entry_t)
+        */
+       linked_list_t *policies;
+                
+       /**
+        * job receiving netlink events
+        */
+       callback_job_t *job;
+       
+       /**
+        * Netlink xfrm socket (IPsec)
+        */
+       netlink_socket_t *socket_xfrm;
+       
+       /**
+        * netlink xfrm socket to receive acquire and expire events
+        */
+       int socket_xfrm_events;
+       
+       /**
+        * whether to install routes along policies
+        */
+       bool install_routes;
+};
+
+/**
+ * convert a IKEv2 specific protocol identifier to the kernel one
+ */
+static u_int8_t proto_ike2kernel(protocol_id_t proto)
+{
+       switch (proto)
+       {
+               case PROTO_ESP:
+                       return IPPROTO_ESP;
+               case PROTO_AH:
+                       return IPPROTO_AH;
+               default:
+                       return proto;
+       }
+}
+
+/**
+ * reverse of ike2kernel
+ */
+static protocol_id_t proto_kernel2ike(u_int8_t proto)
+{
+       switch (proto)
+       {
+               case IPPROTO_ESP:
+                       return PROTO_ESP;
+               case IPPROTO_AH:
+                       return PROTO_AH;
+               default:
+                       return proto;
+       }
+}
+
+/**
+ * convert a host_t to a struct xfrm_address
+ */
+static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
+{
+       chunk_t chunk = host->get_address(host);
+       memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));        
+}
+
+/**
+ * convert a traffic selector address range to subnet and its mask.
+ */
+static void ts2subnet(traffic_selector_t* ts, 
+                                         xfrm_address_t *net, u_int8_t *mask)
+{
+       /* there is no way to do this cleanly, as the address range may
+        * be anything else but a subnet. We use from_addr as subnet 
+        * and try to calculate a usable subnet mask.
+        */
+       int byte, bit;
+       bool found = FALSE;
+       chunk_t from, to;
+       size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
+       
+       from = ts->get_from_address(ts);
+       to = ts->get_to_address(ts);
+       
+       *mask = (size * 8);
+       /* go trough all bits of the addresses, beginning in the front.
+        * as long as they are equal, the subnet gets larger
+        */
+       for (byte = 0; byte < size; byte++)
+       {
+               for (bit = 7; bit >= 0; bit--)
+               {
+                       if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
+                       {
+                               *mask = ((7 - bit) + (byte * 8));
+                               found = TRUE;
+                               break;
+                       }
+               }
+               if (found)
+               {
+                       break;
+               }
+       }
+       memcpy(net, from.ptr, from.len);
+       chunk_free(&from);
+       chunk_free(&to);
+}
+
+/**
+ * convert a traffic selector port range to port/portmask
+ */
+static void ts2ports(traffic_selector_t* ts, 
+                                        u_int16_t *port, u_int16_t *mask)
+{
+       /* linux does not seem to accept complex portmasks. Only
+        * any or a specific port is allowed. We set to any, if we have
+        * a port range, or to a specific, if we have one port only.
+        */
+       u_int16_t from, to;
+       
+       from = ts->get_from_port(ts);
+       to = ts->get_to_port(ts);
+       
+       if (from == to)
+       {
+               *port = htons(from);
+               *mask = ~0;
+       }
+       else
+       {
+               *port = 0;
+               *mask = 0;
+       }
+}
+
+/**
+ * convert a pair of traffic_selectors to a xfrm_selector
+ */
+static struct xfrm_selector ts2selector(traffic_selector_t *src, 
+                                                                               traffic_selector_t *dst)
+{
+       struct xfrm_selector sel;
+
+       memset(&sel, 0, sizeof(sel));
+       sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
+       /* src or dest proto may be "any" (0), use more restrictive one */
+       sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
+       ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
+       ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
+       ts2ports(dst, &sel.dport, &sel.dport_mask);
+       ts2ports(src, &sel.sport, &sel.sport_mask);
+       sel.ifindex = 0;
+       sel.user = 0;
+       
+       return sel;
+}
+
+
+/**
+ * process a XFRM_MSG_ACQUIRE from kernel
+ */
+static void process_acquire(private_kernel_netlink_ipsec_t *this, struct nlmsghdr *hdr)
+{
+       u_int32_t reqid = 0;
+       int proto = 0;
+       job_t *job;
+       struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
+       size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
+       
+       if (RTA_OK(rtattr, rtsize))
+       {
+               if (rtattr->rta_type == XFRMA_TMPL)
+               {
+                       struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
+                       reqid = tmpl->reqid;
+                       proto = tmpl->id.proto;
+               }
+       }
+       switch (proto)
+       {
+               case 0:
+               case IPPROTO_ESP:
+               case IPPROTO_AH:
+                       break;
+               default:
+                       /* acquire for AH/ESP only, not for IPCOMP */
+                       return;
+       }
+       if (reqid == 0)
+       {
+               DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
+               return;
+       }
+       DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
+       DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid {%d}", reqid);
+       job = (job_t*)acquire_job_create(reqid);
+       charon->processor->queue_job(charon->processor, job);
+}
+
+/**
+ * process a XFRM_MSG_EXPIRE from kernel
+ */
+static void process_expire(private_kernel_netlink_ipsec_t *this, struct nlmsghdr *hdr)
+{
+       job_t *job;
+       protocol_id_t protocol;
+       u_int32_t spi, reqid;
+       struct xfrm_user_expire *expire;
+       
+       expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
+       protocol = proto_kernel2ike(expire->state.id.proto);
+       spi = expire->state.id.spi;
+       reqid = expire->state.reqid;
+       
+       DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
+       
+       if (protocol != PROTO_ESP && protocol != PROTO_AH)
+       {
+               DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA with SPI %.8x and reqid {%d} "
+                                         "which is not a CHILD_SA", ntohl(spi), reqid);
+               return;
+       }
+       
+       DBG1(DBG_KNL, "creating %s job for %N CHILD_SA with SPI %.8x and reqid {%d}",
+                expire->hard ? "delete" : "rekey",  protocol_id_names,
+                protocol, ntohl(spi), reqid);
+       if (expire->hard)
+       {
+               job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
+       }
+       else
+       {
+               job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
+       }
+       charon->processor->queue_job(charon->processor, job);
+}
+
+/**
+ * Receives events from kernel
+ */
+static job_requeue_t receive_events(private_kernel_netlink_ipsec_t *this)
+{
+       char response[1024];
+       struct nlmsghdr *hdr = (struct nlmsghdr*)response;
+       struct sockaddr_nl addr;
+       socklen_t addr_len = sizeof(addr);
+       int len, oldstate;
+
+       pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
+       len = recvfrom(this->socket_xfrm_events, response, sizeof(response), 0,
+                                  (struct sockaddr*)&addr, &addr_len);
+       pthread_setcancelstate(oldstate, NULL);
+       
+       if (len < 0)
+       {
+               switch (errno)
+               {
+                       case EINTR:
+                               /* interrupted, try again */
+                               return JOB_REQUEUE_DIRECT;
+                       case EAGAIN:
+                               /* no data ready, select again */
+                               return JOB_REQUEUE_DIRECT;
+                       default:
+                               DBG1(DBG_KNL, "unable to receive from xfrm event socket");
+                               sleep(1);
+                               return JOB_REQUEUE_FAIR;
+               }
+       }
+       
+       if (addr.nl_pid != 0)
+       {       /* not from kernel. not interested, try another one */
+               return JOB_REQUEUE_DIRECT;
+       }
+       
+       while (NLMSG_OK(hdr, len))
+       {
+               switch (hdr->nlmsg_type)
+               {
+                       case XFRM_MSG_ACQUIRE:
+                               process_acquire(this, hdr);
+                               break;
+                       case XFRM_MSG_EXPIRE:
+                               process_expire(this, hdr);
+                               break;
+                       default:
+                               break;
+               }
+               hdr = NLMSG_NEXT(hdr, len);
+       }
+       return JOB_REQUEUE_DIRECT;
+}
+
+/**
+ * Tries to find an ip address of a local interface that is included in the
+ * supplied traffic selector.
+ */
+static status_t get_address_by_ts(private_kernel_netlink_ipsec_t *this,
+                                                                 traffic_selector_t *ts, host_t **ip)
+{
+       enumerator_t *addrs;
+       host_t *host;
+       int family;
+       bool found = FALSE;
+       
+       DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
+       
+       /* if we have a family which includes localhost, we do not
+        * search for an IP, we use the default */
+       family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
+       
+       if (family == AF_INET)
+       {
+               host = host_create_from_string("127.0.0.1", 0);
+       }
+       else
+       {
+               host = host_create_from_string("::1", 0);
+       }
+       
+       if (ts->includes(ts, host))
+       {
+               *ip = host_create_any(family);
+               host->destroy(host);
+               DBG2(DBG_KNL, "using host %H", *ip);
+               return SUCCESS;
+       }
+       host->destroy(host);
+       
+       addrs = charon->kernel_interface->create_address_enumerator(
+                               charon->kernel_interface, TRUE, TRUE);
+       while (addrs->enumerate(addrs, (void**)&host))
+       {
+               if (ts->includes(ts, host))
+               {
+                       found = TRUE;
+                       *ip = host->clone(host);
+                       break;
+               }
+       }
+       addrs->destroy(addrs);
+       
+       if (!found)
+       {
+               DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
+               return FAILED;
+       }
+       DBG2(DBG_KNL, "using host %H", *ip);
+       return SUCCESS;
+}
+
+/**
+ * Get an SPI for a specific protocol from the kernel.
+ */
+static status_t get_spi_internal(private_kernel_netlink_ipsec_t *this,
+               host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
+               u_int32_t reqid, u_int32_t *spi)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *hdr, *out;
+       struct xfrm_userspi_info *userspi;
+       u_int32_t received_spi = 0;
+       size_t len;
+       
+       memset(&request, 0, sizeof(request));
+       
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST;
+       hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
+
+       userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
+       host2xfrm(src, &userspi->info.saddr);
+       host2xfrm(dst, &userspi->info.id.daddr);
+       userspi->info.id.proto = proto;
+       userspi->info.mode = TRUE; /* tunnel mode */
+       userspi->info.reqid = reqid;
+       userspi->info.family = src->get_family(src);
+       userspi->min = min;
+       userspi->max = max;
+       
+       if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
+       {
+               hdr = out;
+               while (NLMSG_OK(hdr, len))
+               {
+                       switch (hdr->nlmsg_type)
+                       {
+                               case XFRM_MSG_NEWSA:
+                               {
+                                       struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
+                                       received_spi = usersa->id.spi;
+                                       break;
+                               }
+                               case NLMSG_ERROR:
+                               {
+                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
+                                       
+                                       DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
+                                                strerror(-err->error), -err->error);
+                                       break;
+                               }
+                               default:
+                                       hdr = NLMSG_NEXT(hdr, len);
+                                       continue;
+                               case NLMSG_DONE:
+                                       break;
+                       }
+                       break;
+               }
+               free(out);
+       }
+       
+       if (received_spi == 0)
+       {
+               return FAILED;
+       }
+       
+       *spi = received_spi;
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.get_spi.
+ */
+static status_t get_spi(private_kernel_netlink_ipsec_t *this, 
+                                               host_t *src, host_t *dst, 
+                                               protocol_id_t protocol, u_int32_t reqid,
+                                               u_int32_t *spi)
+{
+       DBG2(DBG_KNL, "getting SPI for reqid {%d}", reqid);
+       
+       if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
+                       0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to get SPI for reqid {%d}", reqid);
+               return FAILED;
+       }
+       
+       DBG2(DBG_KNL, "got SPI %.8x for reqid {%d}", ntohl(*spi), reqid);
+       
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.get_cpi.
+ */
+static status_t get_cpi(private_kernel_netlink_ipsec_t *this, 
+                                               host_t *src, host_t *dst, 
+                                               u_int32_t reqid, u_int16_t *cpi)
+{
+       u_int32_t received_spi = 0;
+
+       DBG2(DBG_KNL, "getting CPI for reqid {%d}", reqid);
+       
+       if (get_spi_internal(this, src, dst,
+                       IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to get CPI for reqid {%d}", reqid);
+               return FAILED;
+       }
+       
+       *cpi = htons((u_int16_t)ntohl(received_spi));
+       
+       DBG2(DBG_KNL, "got CPI %.4x for reqid {%d}", ntohs(*cpi), reqid);
+       
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.add_sa.
+ */
+static status_t add_sa(private_kernel_netlink_ipsec_t *this,
+                                          host_t *src, host_t *dst, u_int32_t spi,
+                                          protocol_id_t protocol, u_int32_t reqid,
+                                          u_int64_t expire_soft, u_int64_t expire_hard,
+                                          u_int16_t enc_alg, u_int16_t enc_size,
+                                          u_int16_t int_alg, u_int16_t int_size,
+                                          prf_plus_t *prf_plus, mode_t mode,
+                                          u_int16_t ipcomp, bool encap,
+                                          bool replace)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       char *alg_name;
+       /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
+       u_int16_t add_keymat = 32; 
+       struct nlmsghdr *hdr;
+       struct xfrm_usersa_info *sa;
+       
+       memset(&request, 0, sizeof(request));
+       
+       DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%d}", ntohl(spi), reqid);
+
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
+       
+       sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
+       host2xfrm(src, &sa->saddr);
+       host2xfrm(dst, &sa->id.daddr);
+       sa->id.spi = spi;
+       sa->id.proto = proto_ike2kernel(protocol);
+       sa->family = src->get_family(src);
+       sa->mode = mode;
+       if (mode == MODE_TUNNEL)
+       {
+               sa->flags |= XFRM_STATE_AF_UNSPEC;
+       }
+       sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
+       sa->reqid = reqid;
+       /* we currently do not expire SAs by volume/packet count */
+       sa->lft.soft_byte_limit = XFRM_INF;
+       sa->lft.hard_byte_limit = XFRM_INF;
+       sa->lft.soft_packet_limit = XFRM_INF;
+       sa->lft.hard_packet_limit = XFRM_INF;
+       /* we use lifetimes since added, not since used */
+       sa->lft.soft_add_expires_seconds = expire_soft;
+       sa->lft.hard_add_expires_seconds = expire_hard;
+       sa->lft.soft_use_expires_seconds = 0;
+       sa->lft.hard_use_expires_seconds = 0;
+       
+       struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
+       
+       switch (enc_alg)
+       {
+               case ENCR_UNDEFINED:
+                       /* no encryption */
+                       break;
+               case ENCR_AES_CCM_ICV8:
+               case ENCR_AES_CCM_ICV12:
+               case ENCR_AES_CCM_ICV16:
+                       /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
+                       add_keymat = 24;
+                       /* fall-through */
+               case ENCR_AES_GCM_ICV8:
+               case ENCR_AES_GCM_ICV12:
+               case ENCR_AES_GCM_ICV16:
+               {
+                       u_int16_t icv_size = 0;
+                       rthdr->rta_type = XFRMA_ALG_AEAD;
+                       alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
+                       if (alg_name == NULL)
+                       {
+                               DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
+                                        encryption_algorithm_names, enc_alg);
+                               return FAILED;
+                       }
+                       DBG2(DBG_KNL, "  using encryption algorithm %N with key size %d",
+                                encryption_algorithm_names, enc_alg, enc_size);
+                       
+                       /* additional KEYMAT required */
+                       enc_size += add_keymat;
+                       
+                       rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
+                       hdr->nlmsg_len += rthdr->rta_len;
+                       if (hdr->nlmsg_len > sizeof(request))
+                       {
+                               return FAILED;
+                       }
+                       
+                       struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
+                       algo->alg_key_len = enc_size;
+                       algo->alg_icv_len = icv_size;
+                       strcpy(algo->alg_name, alg_name);
+                       prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
+                       
+                       rthdr = XFRM_RTA_NEXT(rthdr);
+                       break;
+               }
+               default:
+               {
+                       rthdr->rta_type = XFRMA_ALG_CRYPT;
+                       alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
+                       if (alg_name == NULL)
+                       {
+                               DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
+                                        encryption_algorithm_names, enc_alg);
+                               return FAILED;
+                       }
+                       DBG2(DBG_KNL, "  using encryption algorithm %N with key size %d",
+                                encryption_algorithm_names, enc_alg, enc_size);
+                       
+                       rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
+                       hdr->nlmsg_len += rthdr->rta_len;
+                       if (hdr->nlmsg_len > sizeof(request))
+                       {
+                               return FAILED;
+                       }
+                       
+                       struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
+                       algo->alg_key_len = enc_size;
+                       strcpy(algo->alg_name, alg_name);
+                       prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
+                       
+                       rthdr = XFRM_RTA_NEXT(rthdr);
+                       break;
+               }
+       }
+               
+       if (int_alg  != AUTH_UNDEFINED)
+       {
+               rthdr->rta_type = XFRMA_ALG_AUTH;
+               alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
+               if (alg_name == NULL)
+               {
+                       DBG1(DBG_KNL, "algorithm %N not supported by kernel!", 
+                                integrity_algorithm_names, int_alg);
+                       return FAILED;
+               }
+               DBG2(DBG_KNL, "  using integrity algorithm %N with key size %d",
+                        integrity_algorithm_names, int_alg, int_size);
+               
+               rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
+               hdr->nlmsg_len += rthdr->rta_len;
+               if (hdr->nlmsg_len > sizeof(request))
+               {
+                       return FAILED;
+               }
+               
+               struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
+               algo->alg_key_len = int_size;
+               strcpy(algo->alg_name, alg_name);
+               prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
+               
+               rthdr = XFRM_RTA_NEXT(rthdr);
+       }
+       
+       if (ipcomp != IPCOMP_NONE)
+       {
+               rthdr->rta_type = XFRMA_ALG_COMP;
+               alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
+               if (alg_name == NULL)
+               {
+                       DBG1(DBG_KNL, "algorithm %N not supported by kernel!", 
+                                ipcomp_transform_names, ipcomp);
+                       return FAILED;
+               }
+               DBG2(DBG_KNL, "  using compression algorithm %N",
+                        ipcomp_transform_names, ipcomp);
+               
+               rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
+               hdr->nlmsg_len += rthdr->rta_len;
+               if (hdr->nlmsg_len > sizeof(request))
+               {
+                       return FAILED;
+               }
+               
+               struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
+               algo->alg_key_len = 0;
+               strcpy(algo->alg_name, alg_name);
+               
+               rthdr = XFRM_RTA_NEXT(rthdr);
+       }
+       
+       if (encap)
+       {
+               rthdr->rta_type = XFRMA_ENCAP;
+               rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
+
+               hdr->nlmsg_len += rthdr->rta_len;
+               if (hdr->nlmsg_len > sizeof(request))
+               {
+                       return FAILED;
+               }
+
+               struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
+               tmpl->encap_type = UDP_ENCAP_ESPINUDP;
+               tmpl->encap_sport = htons(src->get_port(src));
+               tmpl->encap_dport = htons(dst->get_port(dst));
+               memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
+               /* encap_oa could probably be derived from the 
+                * traffic selectors [rfc4306, p39]. In the netlink kernel implementation 
+                * pluto does the same as we do here but it uses encap_oa in the 
+                * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates 
+                * the kernel ignores it anyway
+                *   -> does that mean that NAT-T encap doesn't work in transport mode?
+                * No. The reason the kernel ignores NAT-OA is that it recomputes 
+                * (or, rather, just ignores) the checksum. If packets pass
+                * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
+               rthdr = XFRM_RTA_NEXT(rthdr);
+       }
+       
+       if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x", ntohl(spi));
+               return FAILED;
+       }
+       return SUCCESS;
+}
+
+/**
+ * Get the replay state (i.e. sequence numbers) of an SA.
+ */
+static status_t get_replay_state(private_kernel_netlink_ipsec_t *this,
+                                                 u_int32_t spi, protocol_id_t protocol, host_t *dst,
+                                                 struct xfrm_replay_state *replay)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *hdr, *out = NULL;
+       struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
+       size_t len;
+       struct rtattr *rta;
+       size_t rtasize;
+       
+       memset(&request, 0, sizeof(request));
+       
+       DBG2(DBG_KNL, "querying replay state from SAD entry with SPI %.8x", ntohl(spi));
+
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST;
+       hdr->nlmsg_type = XFRM_MSG_GETAE;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
+
+       aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
+       aevent_id->flags = XFRM_AE_RVAL;
+       
+       host2xfrm(dst, &aevent_id->sa_id.daddr);
+       aevent_id->sa_id.spi = spi;
+       aevent_id->sa_id.proto = proto_ike2kernel(protocol);
+       aevent_id->sa_id.family = dst->get_family(dst);
+       
+       if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
+       {
+               hdr = out;
+               while (NLMSG_OK(hdr, len))
+               {
+                       switch (hdr->nlmsg_type)
+                       {
+                               case XFRM_MSG_NEWAE:
+                               {
+                                       out_aevent = NLMSG_DATA(hdr);
+                                       break;
+                               }
+                               case NLMSG_ERROR:
+                               {
+                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
+                                       DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
+                                                strerror(-err->error), -err->error);
+                                       break;
+                               }
+                               default:
+                                       hdr = NLMSG_NEXT(hdr, len);
+                                       continue;
+                               case NLMSG_DONE:
+                                       break;
+                       }
+                       break;
+               }
+       }
+       
+       if (out_aevent == NULL)
+       {
+               DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI %.8x",
+                                         ntohl(spi));
+               free(out);
+               return FAILED;
+       }
+       
+       rta = XFRM_RTA(out, struct xfrm_aevent_id);
+       rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
+       while(RTA_OK(rta, rtasize))
+       {
+               if (rta->rta_type == XFRMA_REPLAY_VAL)
+               {
+                       memcpy(replay, RTA_DATA(rta), rta->rta_len);
+                       free(out);
+                       return SUCCESS;
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       
+       DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI %.8x",
+                                 ntohl(spi));
+       free(out);
+       return FAILED;
+}
+
+/**
+ * Implementation of kernel_interface_t.update_sa.
+ */
+static status_t update_sa(private_kernel_netlink_ipsec_t *this,
+                                                 u_int32_t spi, protocol_id_t protocol,
+                                                 host_t *src, host_t *dst,
+                                                 host_t *new_src, host_t *new_dst, bool encap)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE], *pos;
+       struct nlmsghdr *hdr, *out = NULL;
+       struct xfrm_usersa_id *sa_id;
+       struct xfrm_usersa_info *out_sa = NULL, *sa;
+       size_t len;
+       struct rtattr *rta;
+       size_t rtasize;
+       struct xfrm_encap_tmpl* tmpl = NULL;
+       bool got_replay_state;
+       struct xfrm_replay_state replay;
+       
+       memset(&request, 0, sizeof(request));
+       
+       DBG2(DBG_KNL, "querying SAD entry with SPI %.8x for update", ntohl(spi));
+
+       /* query the existing SA first */
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST;
+       hdr->nlmsg_type = XFRM_MSG_GETSA;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
+
+       sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
+       host2xfrm(dst, &sa_id->daddr);
+       sa_id->spi = spi;
+       sa_id->proto = proto_ike2kernel(protocol);
+       sa_id->family = dst->get_family(dst);
+       
+       if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
+       {
+               hdr = out;
+               while (NLMSG_OK(hdr, len))
+               {
+                       switch (hdr->nlmsg_type)
+                       {
+                               case XFRM_MSG_NEWSA:
+                               {
+                                       out_sa = NLMSG_DATA(hdr);
+                                       break;
+                               }
+                               case NLMSG_ERROR:
+                               {
+                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
+                                       DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
+                                                strerror(-err->error), -err->error);
+                                       break;
+                               }
+                               default:
+                                       hdr = NLMSG_NEXT(hdr, len);
+                                       continue;
+                               case NLMSG_DONE:
+                                       break;
+                       }
+                       break;
+               }
+       }
+       if (out_sa == NULL)
+       {
+               DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x", ntohl(spi));
+               free(out);
+               return FAILED;
+       }
+       
+       /* try to get the replay state */
+       got_replay_state = (get_replay_state(
+                                               this, spi, protocol, dst, &replay) == SUCCESS);
+       
+       /* delete the old SA */
+       if (this->public.interface.del_sa(&this->public.interface, dst, spi, protocol) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x", ntohl(spi));
+               free(out);
+               return FAILED;
+       }
+       
+       DBG2(DBG_KNL, "updating SAD entry with SPI %.8x from %#H..%#H to %#H..%#H",
+                ntohl(spi), src, dst, new_src, new_dst);
+       
+       /* copy over the SA from out to request */
+       hdr = (struct nlmsghdr*)request;
+       memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;   
+       hdr->nlmsg_type = XFRM_MSG_NEWSA;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
+       sa = NLMSG_DATA(hdr);
+       sa->family = new_dst->get_family(new_dst);
+       
+       if (!src->ip_equals(src, new_src))
+       {
+               host2xfrm(new_src, &sa->saddr);
+       }
+       if (!dst->ip_equals(dst, new_dst))
+       {
+               host2xfrm(new_dst, &sa->id.daddr);
+       }
+       
+       rta = XFRM_RTA(out, struct xfrm_usersa_info);
+       rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
+       pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
+       while(RTA_OK(rta, rtasize))
+       {
+               /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
+               if (rta->rta_type != XFRMA_ENCAP || encap)
+               {
+                       if (rta->rta_type == XFRMA_ENCAP)
+                       {       /* update encap tmpl */
+                               tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
+                               tmpl->encap_sport = ntohs(new_src->get_port(new_src));
+                               tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
+                       }       
+                       memcpy(pos, rta, rta->rta_len);
+                       pos += RTA_ALIGN(rta->rta_len);
+                       hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       
+       rta = (struct rtattr*)pos;
+       if (tmpl == NULL && encap)
+       {       /* add tmpl if we are enabling it */
+               rta->rta_type = XFRMA_ENCAP;
+               rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
+               
+               hdr->nlmsg_len += rta->rta_len;
+               if (hdr->nlmsg_len > sizeof(request))
+               {
+                       return FAILED;
+               }
+               
+               tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
+               tmpl->encap_type = UDP_ENCAP_ESPINUDP;
+               tmpl->encap_sport = ntohs(new_src->get_port(new_src));
+               tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
+               memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
+               
+               rta = XFRM_RTA_NEXT(rta);
+       }
+       
+       if (got_replay_state)
+       {       /* copy the replay data if available */
+               rta->rta_type = XFRMA_REPLAY_VAL;
+               rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
+               
+               hdr->nlmsg_len += rta->rta_len;
+               if (hdr->nlmsg_len > sizeof(request))
+               {
+                       return FAILED;
+               }
+               memcpy(RTA_DATA(rta), &replay, sizeof(replay));
+               
+               rta = XFRM_RTA_NEXT(rta);
+       }
+       
+       if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x", ntohl(spi));
+               free(out);
+               return FAILED;
+       }
+       free(out);
+       
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.query_sa.
+ */
+static status_t query_sa(private_kernel_netlink_ipsec_t *this, host_t *dst,
+                                                u_int32_t spi, protocol_id_t protocol,
+                                                u_int32_t *use_time)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *out = NULL, *hdr;
+       struct xfrm_usersa_id *sa_id;
+       struct xfrm_usersa_info *sa = NULL;
+       size_t len;
+       
+       DBG2(DBG_KNL, "querying SAD entry with SPI %.8x", ntohl(spi));
+       memset(&request, 0, sizeof(request));
+       
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST;
+       hdr->nlmsg_type = XFRM_MSG_GETSA;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
+
+       sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
+       host2xfrm(dst, &sa_id->daddr);
+       sa_id->spi = spi;
+       sa_id->proto = proto_ike2kernel(protocol);
+       sa_id->family = dst->get_family(dst);
+       
+       if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
+       {
+               hdr = out;
+               while (NLMSG_OK(hdr, len))
+               {
+                       switch (hdr->nlmsg_type)
+                       {
+                               case XFRM_MSG_NEWSA:
+                               {
+                                       sa = NLMSG_DATA(hdr);
+                                       break;
+                               }
+                               case NLMSG_ERROR:
+                               {
+                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
+                                       DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
+                                                strerror(-err->error), -err->error);
+                                       break;
+                               }
+                               default:
+                                       hdr = NLMSG_NEXT(hdr, len);
+                                       continue;
+                               case NLMSG_DONE:
+                                       break;
+                       }
+                       break;
+               }
+       }
+       
+       if (sa == NULL)
+       {
+               DBG1(DBG_KNL, "unable to query SAD entry with SPI %.8x", ntohl(spi));
+               free(out);
+               return FAILED;
+       }
+       
+       *use_time = sa->curlft.use_time;
+       free (out);
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.del_sa.
+ */
+static status_t del_sa(private_kernel_netlink_ipsec_t *this, host_t *dst,
+                                          u_int32_t spi, protocol_id_t protocol)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *hdr;
+       struct xfrm_usersa_id *sa_id;
+       
+       memset(&request, 0, sizeof(request));
+       
+       DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x", ntohl(spi));
+       
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       hdr->nlmsg_type = XFRM_MSG_DELSA;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
+       
+       sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
+       host2xfrm(dst, &sa_id->daddr);
+       sa_id->spi = spi;
+       sa_id->proto = proto_ike2kernel(protocol);
+       sa_id->family = dst->get_family(dst);
+       
+       if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x", ntohl(spi));
+               return FAILED;
+       }
+       DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x", ntohl(spi));
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.add_policy.
+ */
+static status_t add_policy(private_kernel_netlink_ipsec_t *this, 
+                                                  host_t *src, host_t *dst,
+                                                  traffic_selector_t *src_ts,
+                                                  traffic_selector_t *dst_ts,
+                                                  policy_dir_t direction, protocol_id_t protocol,
+                                                  u_int32_t reqid, bool high_prio, mode_t mode,
+                                                  u_int16_t ipcomp)
+{
+       iterator_t *iterator;
+       policy_entry_t *current, *policy;
+       bool found = FALSE;
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct xfrm_userpolicy_info *policy_info;
+       struct nlmsghdr *hdr;
+       
+       /* create a policy */
+       policy = malloc_thing(policy_entry_t);
+       memset(policy, 0, sizeof(policy_entry_t));
+       policy->sel = ts2selector(src_ts, dst_ts);
+       policy->direction = direction;
+       
+       /* find the policy, which matches EXACTLY */
+       pthread_mutex_lock(&this->mutex);
+       iterator = this->policies->create_iterator(this->policies, TRUE);
+       while (iterator->iterate(iterator, (void**)&current))
+       {
+               if (memeq(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) &&
+                       policy->direction == current->direction)
+               {
+                       /* use existing policy */
+                       current->refcount++;
+                       DBG2(DBG_KNL, "policy %R === %R %N already exists, increasing "
+                                                 "refcount", src_ts, dst_ts,
+                                                  policy_dir_names, direction);
+                       free(policy);
+                       policy = current;
+                       found = TRUE;
+                       break;
+               }
+       }
+       iterator->destroy(iterator);
+       if (!found)
+       {       /* apply the new one, if we have no such policy */
+               this->policies->insert_last(this->policies, policy);
+               policy->refcount = 1;
+       }
+       
+       DBG2(DBG_KNL, "adding policy %R === %R %N", src_ts, dst_ts,
+                                  policy_dir_names, direction);
+       
+       memset(&request, 0, sizeof(request));
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       hdr->nlmsg_type = found ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
+
+       policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
+       policy_info->sel = policy->sel;
+       policy_info->dir = policy->direction;
+       /* calculate priority based on source selector size, small size = high prio */
+       policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
+       policy_info->priority -= policy->sel.prefixlen_s * 10;
+       policy_info->priority -= policy->sel.proto ? 2 : 0;
+       policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
+       policy_info->action = XFRM_POLICY_ALLOW;
+       policy_info->share = XFRM_SHARE_ANY;
+       pthread_mutex_unlock(&this->mutex);
+       
+       /* policies don't expire */
+       policy_info->lft.soft_byte_limit = XFRM_INF;
+       policy_info->lft.soft_packet_limit = XFRM_INF;
+       policy_info->lft.hard_byte_limit = XFRM_INF;
+       policy_info->lft.hard_packet_limit = XFRM_INF;
+       policy_info->lft.soft_add_expires_seconds = 0;
+       policy_info->lft.hard_add_expires_seconds = 0;
+       policy_info->lft.soft_use_expires_seconds = 0;
+       policy_info->lft.hard_use_expires_seconds = 0;
+       
+       struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
+       rthdr->rta_type = XFRMA_TMPL;
+       rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
+       
+       hdr->nlmsg_len += rthdr->rta_len;
+       if (hdr->nlmsg_len > sizeof(request))
+       {
+               return FAILED;
+       }
+       
+       struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
+       
+       if (ipcomp != IPCOMP_NONE)
+       {
+               tmpl->reqid = reqid;
+               tmpl->id.proto = IPPROTO_COMP;
+               tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
+               tmpl->mode = mode;
+               tmpl->optional = direction != POLICY_OUT;
+               tmpl->family = src->get_family(src);
+               
+               host2xfrm(src, &tmpl->saddr);
+               host2xfrm(dst, &tmpl->id.daddr);
+               
+               /* add an additional xfrm_user_tmpl */
+               rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
+               hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
+               if (hdr->nlmsg_len > sizeof(request))
+               {
+                       return FAILED;
+               }
+               
+               tmpl++;
+       }
+       
+       tmpl->reqid = reqid;
+       tmpl->id.proto = proto_ike2kernel(protocol);
+       tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
+       tmpl->mode = mode;
+       tmpl->family = src->get_family(src);
+       
+       host2xfrm(src, &tmpl->saddr);
+       host2xfrm(dst, &tmpl->id.daddr);
+       
+       if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to add policy %R === %R %N", src_ts, dst_ts,
+                                          policy_dir_names, direction);
+               return FAILED;
+       }
+       
+       /* install a route, if:
+        * - we are NOT updating a policy
+        * - this is a forward policy (to just get one for each child)
+        * - we are in tunnel mode
+        * - we are not using IPv6 (does not work correctly yet!)
+        * - routing is not disabled via strongswan.conf
+        */
+       if (policy->route == NULL && direction == POLICY_FWD &&
+               mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
+               this->install_routes)
+       {
+               route_entry_t *route = malloc_thing(route_entry_t);
+               
+               if (get_address_by_ts(this, dst_ts, &route->src_ip) == SUCCESS)
+               {
+                       /* get the nexthop to src (src as we are in POLICY_FWD).*/
+                       route->gateway = charon->kernel_interface->get_nexthop(
+                                                                       charon->kernel_interface, src);
+                       route->if_name = charon->kernel_interface->get_interface(
+                                                                       charon->kernel_interface, dst);
+                       route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
+                       memcpy(route->dst_net.ptr, &policy->sel.saddr, route->dst_net.len);
+                       route->prefixlen = policy->sel.prefixlen_s;
+                       
+                       switch (charon->kernel_interface->add_route(charon->kernel_interface,
+                                       route->dst_net, route->prefixlen, route->gateway,
+                                       route->src_ip, route->if_name))
+                       {
+                               default:
+                                       DBG1(DBG_KNL, "unable to install source route for %H",
+                                                route->src_ip);
+                                       /* FALL */
+                               case ALREADY_DONE:
+                                       /* route exists, do not uninstall */
+                                       route_entry_destroy(route);
+                                       break;
+                               case SUCCESS:
+                                       /* cache the installed route */
+                                       policy->route = route;
+                                       break;
+                       }
+               }
+               else
+               {
+                       free(route);
+               }
+       }
+
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.query_policy.
+ */
+static status_t query_policy(private_kernel_netlink_ipsec_t *this,
+                                                        traffic_selector_t *src_ts, 
+                                                        traffic_selector_t *dst_ts,
+                                                        policy_dir_t direction, u_int32_t *use_time)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *out = NULL, *hdr;
+       struct xfrm_userpolicy_id *policy_id;
+       struct xfrm_userpolicy_info *policy = NULL;
+       size_t len;
+       
+       memset(&request, 0, sizeof(request));
+       
+       DBG2(DBG_KNL, "querying policy %R === %R %N", src_ts, dst_ts,
+                                  policy_dir_names, direction);
+
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST;
+       hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
+
+       policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
+       policy_id->sel = ts2selector(src_ts, dst_ts);
+       policy_id->dir = direction;
+       
+       if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
+       {
+               hdr = out;
+               while (NLMSG_OK(hdr, len))
+               {
+                       switch (hdr->nlmsg_type)
+                       {
+                               case XFRM_MSG_NEWPOLICY:
+                               {
+                                       policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
+                                       break;
+                               }
+                               case NLMSG_ERROR:
+                               {
+                                       struct nlmsgerr *err = NLMSG_DATA(hdr);
+                                       DBG1(DBG_KNL, "querying policy failed: %s (%d)",
+                                                strerror(-err->error), -err->error);
+                                       break;
+                               }
+                               default:
+                                       hdr = NLMSG_NEXT(hdr, len);
+                                       continue;
+                               case NLMSG_DONE:
+                                       break;
+                       }
+                       break;
+               }
+       }
+       
+       if (policy == NULL)
+       {
+               DBG2(DBG_KNL, "unable to query policy %R === %R %N", src_ts, dst_ts,
+                                          policy_dir_names, direction);
+               free(out);
+               return FAILED;
+       }
+       *use_time = (time_t)policy->curlft.use_time;
+       
+       free(out);
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.del_policy.
+ */
+static status_t del_policy(private_kernel_netlink_ipsec_t *this,
+                                                  traffic_selector_t *src_ts, 
+                                                  traffic_selector_t *dst_ts,
+                                                  policy_dir_t direction)
+{
+       policy_entry_t *current, policy, *to_delete = NULL;
+       route_entry_t *route;
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *hdr;
+       struct xfrm_userpolicy_id *policy_id;
+       iterator_t *iterator;
+       
+       DBG2(DBG_KNL, "deleting policy %R === %R %N", src_ts, dst_ts,
+                                  policy_dir_names, direction);
+       
+       /* create a policy */
+       memset(&policy, 0, sizeof(policy_entry_t));
+       policy.sel = ts2selector(src_ts, dst_ts);
+       policy.direction = direction;
+       
+       /* find the policy */
+       iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
+       while (iterator->iterate(iterator, (void**)&current))
+       {
+               if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
+                       policy.direction == current->direction)
+               {
+                       to_delete = current;
+                       if (--to_delete->refcount > 0)
+                       {
+                               /* is used by more SAs, keep in kernel */
+                               DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
+                               iterator->destroy(iterator);
+                               return SUCCESS;
+                       }
+                       /* remove if last reference */
+                       iterator->remove(iterator);
+                       break;
+               }
+       }
+       iterator->destroy(iterator);
+       if (!to_delete)
+       {
+               DBG1(DBG_KNL, "deleting policy %R === %R %N failed, not found", src_ts,
+                                          dst_ts, policy_dir_names, direction);
+               return NOT_FOUND;
+       }
+       
+       memset(&request, 0, sizeof(request));
+       
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
+
+       policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
+       policy_id->sel = to_delete->sel;
+       policy_id->dir = direction;
+       
+       route = to_delete->route;
+       free(to_delete);
+       
+       if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "unable to delete policy %R === %R %N", src_ts, dst_ts,
+                                          policy_dir_names, direction);
+               return FAILED;
+       }
+
+       if (route)
+       {
+               if (charon->kernel_interface->del_route(charon->kernel_interface,
+                               route->dst_net, route->prefixlen, route->gateway,
+                               route->src_ip, route->if_name) != SUCCESS)
+               {
+                       DBG1(DBG_KNL, "error uninstalling route installed with "
+                                                 "policy %R === %R %N", src_ts, dst_ts,
+                                                  policy_dir_names, direction);
+               }               
+               route_entry_destroy(route);
+       }
+       return SUCCESS;
+}
+
+/**
+ * Implementation of kernel_interface_t.destroy.
+ */
+static void destroy(private_kernel_netlink_ipsec_t *this)
+{
+       this->job->cancel(this->job);
+       close(this->socket_xfrm_events);
+       this->socket_xfrm->destroy(this->socket_xfrm);
+       this->policies->destroy(this->policies);
+       free(this);
+}
+
+/*
+ * Described in header.
+ */
+kernel_netlink_ipsec_t *kernel_netlink_ipsec_create()
+{
+       private_kernel_netlink_ipsec_t *this = malloc_thing(private_kernel_netlink_ipsec_t);
+       struct sockaddr_nl addr;
+       
+       /* public functions */
+       this->public.interface.get_spi = (status_t(*)(kernel_ipsec_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
+       this->public.interface.get_cpi = (status_t(*)(kernel_ipsec_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
+       this->public.interface.add_sa  = (status_t(*)(kernel_ipsec_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
+       this->public.interface.update_sa = (status_t(*)(kernel_ipsec_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
+       this->public.interface.query_sa = (status_t(*)(kernel_ipsec_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
+       this->public.interface.del_sa = (status_t(*)(kernel_ipsec_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
+       this->public.interface.add_policy = (status_t(*)(kernel_ipsec_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
+       this->public.interface.query_policy = (status_t(*)(kernel_ipsec_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
+       this->public.interface.del_policy = (status_t(*)(kernel_ipsec_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
+       this->public.interface.destroy = (void(*)(kernel_ipsec_t*)) destroy;
+
+       /* private members */
+       this->policies = linked_list_create();
+       pthread_mutex_init(&this->mutex, NULL);
+       this->install_routes = lib->settings->get_bool(lib->settings,
+                                       "charon.install_routes", TRUE);
+       
+       this->socket_xfrm = netlink_socket_create(NETLINK_XFRM);
+       
+       memset(&addr, 0, sizeof(addr));
+       addr.nl_family = AF_NETLINK;
+       
+       /* create and bind XFRM socket for ACQUIRE & EXPIRE */
+       this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
+       if (this->socket_xfrm_events <= 0)
+       {
+               charon->kill(charon, "unable to create XFRM event socket");
+       }
+       addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
+       if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
+       {
+               charon->kill(charon, "unable to bind XFRM event socket");
+       }
+       
+       this->job = callback_job_create((callback_job_cb_t)receive_events,
+                                                                       this, NULL, NULL);
+       charon->processor->queue_job(charon->processor, (job_t*)this->job);
+       
+       return &this->public;
+}
diff --git a/src/charon/plugins/kernel_netlink/kernel_netlink_ipsec.h b/src/charon/plugins/kernel_netlink/kernel_netlink_ipsec.h
new file mode 100644 (file)
index 0000000..a915881
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2008 Tobias Brunner
+ * Hochschule fuer Technik Rapperswil
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * $Id$
+ */
+
+/**
+ * @defgroup kernel_netlink_ipsec_i kernel_netlink_ipsec
+ * @{ @ingroup kernel_netlink
+ */
+
+#ifndef KERNEL_NETLINK_IPSEC_H_
+#define KERNEL_NETLINK_IPSEC_H_
+
+#include <kernel/kernel_ipsec.h>
+
+typedef struct kernel_netlink_ipsec_t kernel_netlink_ipsec_t;
+
+/**
+ * Implementation of the kernel ipsec interface using Netlink.
+ */
+struct kernel_netlink_ipsec_t {
+
+       /**
+        * Implements kernel_ipsec_t interface
+        */
+       kernel_ipsec_t interface;
+};
+
+/**
+ * Create a netlink kernel ipsec interface instance.
+ *
+ * @return                     kernel_netlink_ipsec_t instance
+ */
+kernel_netlink_ipsec_t *kernel_netlink_ipsec_create();
+
+#endif /* KERNEL_NETLINK_IPSEC_H_ */
diff --git a/src/charon/plugins/kernel_netlink/kernel_netlink_net.c b/src/charon/plugins/kernel_netlink/kernel_netlink_net.c
new file mode 100644 (file)
index 0000000..f147762
--- /dev/null
@@ -0,0 +1,1342 @@
+/*
+ * Copyright (C) 2008 Tobias Brunner
+ * Hochschule fuer Technik Rapperswil
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * $Id$
+ */
+
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <errno.h>
+#include <net/if.h>
+
+#include "kernel_netlink_net.h"
+#include "kernel_netlink_shared.h"
+
+#include <daemon.h>
+#include <utils/linked_list.h>
+#include <processing/jobs/callback_job.h>
+#include <processing/jobs/roam_job.h>
+
+/** delay before firing roam jobs (ms) */
+#define ROAM_DELAY 100
+
+/** routing table for routes installed by us */
+#ifndef IPSEC_ROUTING_TABLE
+#define IPSEC_ROUTING_TABLE 100
+#endif
+#ifndef IPSEC_ROUTING_TABLE_PRIO
+#define IPSEC_ROUTING_TABLE_PRIO 100
+#endif
+
+typedef struct addr_entry_t addr_entry_t;
+
+/**
+ * IP address in an inface_entry_t
+ */
+struct addr_entry_t {
+       
+       /** The ip address */
+       host_t *ip;
+       
+       /** virtual IP managed by us */
+       bool virtual;
+       
+       /** scope of the address */
+       u_char scope;
+       
+       /** Number of times this IP is used, if virtual */
+       u_int refcount;
+};
+
+/**
+ * destroy a addr_entry_t object
+ */
+static void addr_entry_destroy(addr_entry_t *this)
+{
+       this->ip->destroy(this->ip);
+       free(this);
+}
+
+typedef struct iface_entry_t iface_entry_t;
+
+/**
+ * A network interface on this system, containing addr_entry_t's
+ */
+struct iface_entry_t {
+       
+       /** interface index */
+       int ifindex;
+       
+       /** name of the interface */
+       char ifname[IFNAMSIZ];
+       
+       /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
+       u_int flags;
+       
+       /** list of addresses as host_t */
+       linked_list_t *addrs;
+};
+
+/**
+ * destroy an interface entry
+ */
+static void iface_entry_destroy(iface_entry_t *this)
+{
+       this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
+       free(this);
+}
+
+typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
+
+/**
+ * Private variables and functions of kernel_netlink_net class.
+ */
+struct private_kernel_netlink_net_t {
+       /**
+        * Public part of the kernel_netlink_net_t object.
+        */
+       kernel_netlink_net_t public;
+       
+       /**
+        * mutex to lock access to various lists
+        */
+       pthread_mutex_t mutex;
+       
+       /**
+        * condition variable to signal virtual IP add/removal
+        */
+       pthread_cond_t cond;
+       
+       /**
+        * Cached list of interfaces and its addresses (iface_entry_t)
+        */
+       linked_list_t *ifaces;
+        
+       /**
+        * job receiving netlink events
+        */
+       callback_job_t *job;
+       
+       /**
+        * netlink rt socket (routing)
+        */
+       netlink_socket_t *socket;
+       
+       /**
+        * Netlink rt socket to receive address change events
+        */
+       int socket_events;
+       
+       /**
+        * time of the last roam_job
+        */
+       struct timeval last_roam;
+       
+       /**
+        * routing table to install routes
+        */
+       int routing_table;
+       
+       /**
+        * priority of used routing table
+        */
+       int routing_table_prio;
+
+       /**
+        * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
+        */
+       bool process_route;
+
+};
+
+/**
+ * get the refcount of a virtual ip
+ */
+static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
+{
+       iterator_t *ifaces, *addrs;
+       iface_entry_t *iface;
+       addr_entry_t *addr;
+       int refcount = 0;
+       
+       ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
+       while (ifaces->iterate(ifaces, (void**)&iface))
+       {
+               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
+               while (addrs->iterate(addrs, (void**)&addr))
+               {
+                       if (addr->virtual && (iface->flags & IFF_UP) &&
+                               ip->ip_equals(ip, addr->ip))
+                       {
+                               refcount = addr->refcount;
+                               break;
+                       }
+               }
+               addrs->destroy(addrs);
+               if (refcount)
+               {
+                       break;
+               }
+       }
+       ifaces->destroy(ifaces);
+       
+       return refcount;
+}
+
+/**
+ * start a roaming job. We delay it for a second and fire only one job
+ * for multiple events. Otherwise we would create two many jobs.
+ */
+static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
+{
+       struct timeval now;
+               
+       if (gettimeofday(&now, NULL) == 0)
+       {
+               if (timercmp(&now, &this->last_roam, >))
+               {
+                       now.tv_usec += ROAM_DELAY * 1000;
+                       while (now.tv_usec > 1000000)
+                       {
+                               now.tv_sec++;
+                               now.tv_usec -= 1000000;
+                       }
+                       this->last_roam = now;
+                       charon->scheduler->schedule_job(charon->scheduler,
+                                       (job_t*)roam_job_create(address), ROAM_DELAY);
+               }
+       }
+}
+
+/**
+ * process RTM_NEWLINK/RTM_DELLINK from kernel
+ */
+static void process_link(private_kernel_netlink_net_t *this,
+                                                struct nlmsghdr *hdr, bool event)
+{
+       struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
+       struct rtattr *rta = IFLA_RTA(msg);
+       size_t rtasize = IFLA_PAYLOAD (hdr);
+       iterator_t *iterator;
+       iface_entry_t *current, *entry = NULL;
+       char *name = NULL;
+       bool update = FALSE;
+       
+       while(RTA_OK(rta, rtasize))
+       {
+               switch (rta->rta_type)
+               {
+                       case IFLA_IFNAME:
+                               name = RTA_DATA(rta);
+                               break;
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       if (!name)
+       {
+               name = "(unknown)";
+       }
+       
+       switch (hdr->nlmsg_type)
+       {
+               case RTM_NEWLINK:
+               {
+                       if (msg->ifi_flags & IFF_LOOPBACK)
+                       {       /* ignore loopback interfaces */
+                               break;
+                       }
+                       iterator = this->ifaces->create_iterator_locked(this->ifaces,
+                                                                                                                       &this->mutex);
+                       while (iterator->iterate(iterator, (void**)&current))
+                       {
+                               if (current->ifindex == msg->ifi_index)
+                               {
+                                       entry = current;
+                                       break;
+                               }
+                       }
+                       if (!entry)
+                       {
+                               entry = malloc_thing(iface_entry_t);
+                               entry->ifindex = msg->ifi_index;
+                               entry->flags = 0;
+                               entry->addrs = linked_list_create();
+                               this->ifaces->insert_last(this->ifaces, entry);
+                       }
+                       memcpy(entry->ifname, name, IFNAMSIZ);
+                       entry->ifname[IFNAMSIZ-1] = '\0';
+                       if (event)
+                       {
+                               if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
+                               {
+                                       update = TRUE;
+                                       DBG1(DBG_KNL, "interface %s activated", name);
+                               }
+                               if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
+                               {
+                                       update = TRUE;
+                                       DBG1(DBG_KNL, "interface %s deactivated", name);
+                               }
+                       }
+                       entry->flags = msg->ifi_flags;
+                       iterator->destroy(iterator);
+                       break;
+               }
+               case RTM_DELLINK:
+               {
+                       iterator = this->ifaces->create_iterator_locked(this->ifaces,
+                                                                                                                       &this->mutex);
+                       while (iterator->iterate(iterator, (void**)&current))
+                       {
+                               if (current->ifindex == msg->ifi_index)
+                               {
+                                       /* we do not remove it, as an address may be added to a 
+                                        * "down" interface and we wan't to know that. */
+                                       current->flags = msg->ifi_flags;
+                                       break;
+                               }
+                       }
+                       iterator->destroy(iterator);
+                       break;
+               }
+       }
+       
+       /* send an update to all IKE_SAs */
+       if (update && event)
+       {
+               fire_roam_job(this, TRUE);
+       }
+}
+
+/**
+ * process RTM_NEWADDR/RTM_DELADDR from kernel
+ */
+static void process_addr(private_kernel_netlink_net_t *this,
+                                                struct nlmsghdr *hdr, bool event)
+{
+       struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
+       struct rtattr *rta = IFA_RTA(msg);
+       size_t rtasize = IFA_PAYLOAD (hdr);
+       host_t *host = NULL;
+       iterator_t *ifaces, *addrs;
+       iface_entry_t *iface;
+       addr_entry_t *addr;
+       chunk_t local = chunk_empty, address = chunk_empty;
+       bool update = FALSE, found = FALSE, changed = FALSE;
+       
+       while(RTA_OK(rta, rtasize))
+       {
+               switch (rta->rta_type)
+               {
+                       case IFA_LOCAL:
+                               local.ptr = RTA_DATA(rta);
+                               local.len = RTA_PAYLOAD(rta);
+                               break;
+                       case IFA_ADDRESS:
+                               address.ptr = RTA_DATA(rta);
+                               address.len = RTA_PAYLOAD(rta);
+                               break;
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       
+       /* For PPP interfaces, we need the IFA_LOCAL address,
+        * IFA_ADDRESS is the peers address. But IFA_LOCAL is
+        * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
+       if (local.ptr)
+       {
+               host = host_create_from_chunk(msg->ifa_family, local, 0);
+       }
+       else if (address.ptr)
+       {
+               host = host_create_from_chunk(msg->ifa_family, address, 0);
+       }
+       
+       if (host == NULL)
+       {       /* bad family? */
+               return;
+       }
+       
+       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
+       while (ifaces->iterate(ifaces, (void**)&iface))
+       {
+               if (iface->ifindex == msg->ifa_index)
+               {
+                       addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
+                       while (addrs->iterate(addrs, (void**)&addr))
+                       {
+                               if (host->ip_equals(host, addr->ip))
+                               {
+                                       found = TRUE;
+                                       if (hdr->nlmsg_type == RTM_DELADDR)
+                                       {
+                                               addrs->remove(addrs);
+                                               if (!addr->virtual)
+                                               {
+                                                       changed = TRUE;
+                                                       DBG1(DBG_KNL, "%H disappeared from %s",
+                                                                host, iface->ifname);
+                                               }
+                                               addr_entry_destroy(addr);
+                                       }
+                                       else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
+                                       {
+                                               addr->refcount = 1;
+                                       }
+                               }
+                       }
+                       addrs->destroy(addrs);
+               
+                       if (hdr->nlmsg_type == RTM_NEWADDR)
+                       {
+                               if (!found)
+                               {
+                                       found = TRUE;
+                                       changed = TRUE;
+                                       addr = malloc_thing(addr_entry_t);
+                                       addr->ip = host->clone(host);
+                                       addr->virtual = FALSE;
+                                       addr->refcount = 1;
+                                       addr->scope = msg->ifa_scope;
+                                       
+                                       iface->addrs->insert_last(iface->addrs, addr);
+                                       if (event)
+                                       {
+                                               DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
+                                       }
+                               }
+                       }
+                       if (found && (iface->flags & IFF_UP))
+                       {
+                               update = TRUE;
+                       }
+                       break;
+               }
+       }
+       ifaces->destroy(ifaces);
+       host->destroy(host);
+       
+       /* send an update to all IKE_SAs */
+       if (update && event && changed)
+       {
+               fire_roam_job(this, TRUE);
+       }
+}
+
+/**
+ * process RTM_NEWROUTE and RTM_DELROUTE from kernel
+ */
+static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
+{
+       struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
+       struct rtattr *rta = RTM_RTA(msg);
+       size_t rtasize = RTM_PAYLOAD(hdr);
+       host_t *host = NULL;
+       
+       while (RTA_OK(rta, rtasize))
+       {
+               switch (rta->rta_type)
+               {
+                       case RTA_PREFSRC:
+                               host = host_create_from_chunk(msg->rtm_family,
+                                                       chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
+                               break;
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       if (host)
+       {
+               if (!get_vip_refcount(this, host))
+               {       /* ignore routes added for virtual IPs */
+                       fire_roam_job(this, FALSE);
+               }
+               host->destroy(host);
+       }
+}
+
+/**
+ * Receives events from kernel
+ */
+static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
+{
+       char response[1024];
+       struct nlmsghdr *hdr = (struct nlmsghdr*)response;
+       struct sockaddr_nl addr;
+       socklen_t addr_len = sizeof(addr);
+       int len, oldstate;
+
+       pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);       
+       len = recvfrom(this->socket_events, response, sizeof(response), 0,
+                                  (struct sockaddr*)&addr, &addr_len);
+       pthread_setcancelstate(oldstate, NULL);
+       
+       if (len < 0)
+       {
+               switch (errno)
+               {
+                       case EINTR:
+                               /* interrupted, try again */
+                               return JOB_REQUEUE_DIRECT;
+                       case EAGAIN:
+                               /* no data ready, select again */
+                               return JOB_REQUEUE_DIRECT;
+                       default:
+                               DBG1(DBG_KNL, "unable to receive from rt event socket");
+                               sleep(1);
+                               return JOB_REQUEUE_FAIR;
+               }
+       }
+       
+       if (addr.nl_pid != 0)
+       {       /* not from kernel. not interested, try another one */
+               return JOB_REQUEUE_DIRECT;
+       }
+       
+       while (NLMSG_OK(hdr, len))
+       {
+               /* looks good so far, dispatch netlink message */
+               switch (hdr->nlmsg_type)
+               {
+                       case RTM_NEWADDR:
+                       case RTM_DELADDR:
+                               process_addr(this, hdr, TRUE);
+                               pthread_cond_broadcast(&this->cond);
+                               break;
+                       case RTM_NEWLINK:
+                       case RTM_DELLINK:
+                               process_link(this, hdr, TRUE);
+                               pthread_cond_broadcast(&this->cond);
+                               break;
+                       case RTM_NEWROUTE:
+                       case RTM_DELROUTE:
+                               if (this->process_route)
+                               {
+                                       process_route(this, hdr);
+                               }
+                               break;
+                       default:
+                               break;
+               }
+               hdr = NLMSG_NEXT(hdr, len);
+       }
+       return JOB_REQUEUE_DIRECT;
+}
+
+/** enumerator over addresses */
+typedef struct {
+       private_kernel_netlink_net_t* this;
+       /** whether to enumerate down interfaces */
+       bool include_down_ifaces;
+       /** whether to enumerate virtual ip addresses */ 
+       bool include_virtual_ips;
+} address_enumerator_t;
+
+/**
+ * cleanup function for address enumerator
+ */
+static void address_enumerator_destroy(address_enumerator_t *data)
+{
+       pthread_mutex_unlock(&data->this->mutex);
+       free(data);
+}
+
+/**
+ * filter for addresses
+ */
+static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
+{
+       if (!data->include_virtual_ips && (*in)->virtual)
+       {       /* skip virtual interfaces added by us */
+               return FALSE;
+       }
+       if ((*in)->scope >= RT_SCOPE_LINK)
+       {       /* skip addresses with a unusable scope */
+               return FALSE;
+       }
+       *out = (*in)->ip;
+       return TRUE;
+}
+
+/**
+ * enumerator constructor for interfaces
+ */
+static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
+{
+       return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
+                               (void*)filter_addresses, data, NULL);
+}
+
+/**
+ * filter for interfaces
+ */
+static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
+{
+       if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
+       {       /* skip interfaces not up */
+               return FALSE;
+       }
+       *out = *in;
+       return TRUE;
+}
+
+/**
+ * implementation of kernel_net_t.create_address_enumerator
+ */
+static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
+               bool include_down_ifaces, bool include_virtual_ips)
+{
+       address_enumerator_t *data = malloc_thing(address_enumerator_t);
+       data->this = this;
+       data->include_down_ifaces = include_down_ifaces;
+       data->include_virtual_ips = include_virtual_ips;
+       
+       pthread_mutex_lock(&this->mutex);
+       return enumerator_create_nested(
+                               enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
+                                                       (void*)filter_interfaces, data, NULL),
+                               (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
+}
+
+/**
+ * implementation of kernel_net_t.get_interface_name
+ */
+static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
+{
+       iterator_t *ifaces, *addrs;
+       iface_entry_t *iface;
+       addr_entry_t *addr;
+       char *name = NULL;
+       
+       DBG2(DBG_KNL, "getting interface name for %H", ip);
+       
+       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
+       while (ifaces->iterate(ifaces, (void**)&iface))
+       {
+               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
+               while (addrs->iterate(addrs, (void**)&addr))
+               {
+                       if (ip->ip_equals(ip, addr->ip))
+                       {
+                               name = strdup(iface->ifname);
+                               break;
+                       }
+               }
+               addrs->destroy(addrs);
+               if (name)
+               {
+                       break;
+               }
+       }
+       ifaces->destroy(ifaces);
+       
+       if (name)
+       {
+               DBG2(DBG_KNL, "%H is on interface %s", ip, name);
+       }
+       else
+       {
+               DBG2(DBG_KNL, "%H is not a local address", ip);
+       }
+       return name;
+}
+
+/**
+ * get the index of an interface by name
+ */
+static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
+{
+       iterator_t *ifaces;
+       iface_entry_t *iface;
+       int ifindex = 0;
+       
+       DBG2(DBG_KNL, "getting iface index for %s", name);
+       
+       ifaces = this->ifaces->create_iterator_locked(this->ifaces,     &this->mutex);
+       while (ifaces->iterate(ifaces, (void**)&iface))
+       {
+               if (streq(name, iface->ifname))
+               {
+                       ifindex = iface->ifindex;
+                       break;
+               }
+       }
+       ifaces->destroy(ifaces);
+
+       if (ifindex == 0)
+       {
+               DBG1(DBG_KNL, "unable to get interface index for %s", name);
+       }
+       return ifindex;
+}
+
+/**
+ * check if an address (chunk) addr is in subnet (net with net_len net bits)
+ */
+static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
+{
+       int bit, byte;
+
+       if (addr.len != net.len)
+       {
+               return FALSE;
+       }
+       /* scan through all bits, beginning in the front */
+       for (byte = 0; byte < addr.len; byte++)
+       {
+               for (bit = 7; bit >= 0; bit--)
+               {
+                       /* check if bits are equal (or we reached the end of the net) */
+                       if (bit + byte * 8 > net_len)
+                       {
+                               return TRUE;
+                       }
+                       if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
+                       {
+                               return FALSE;
+                       }
+               }
+       }
+       return TRUE;
+}
+
+/**
+ * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
+ */
+static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
+                                                bool nexthop)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *hdr, *out, *current;
+       struct rtmsg *msg;
+       chunk_t chunk;
+       size_t len;
+       int best = -1;
+       host_t *src = NULL, *gtw = NULL;
+       
+       DBG2(DBG_KNL, "getting address to reach %H", dest);
+       
+       memset(&request, 0, sizeof(request));
+
+       hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
+       hdr->nlmsg_type = RTM_GETROUTE;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+
+       msg = (struct rtmsg*)NLMSG_DATA(hdr);
+       msg->rtm_family = dest->get_family(dest);
+       
+       chunk = dest->get_address(dest);
+       netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
+                       
+       if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
+       {
+               DBG1(DBG_KNL, "getting address to %H failed", dest);
+               return NULL;
+       }
+       current = out;
+       while (NLMSG_OK(current, len))
+       {
+               switch (current->nlmsg_type)
+               {
+                       case NLMSG_DONE:
+                               break;
+                       case RTM_NEWROUTE:
+                       {
+                               struct rtattr *rta;
+                               size_t rtasize;
+                               chunk_t rta_gtw, rta_src, rta_dst;
+                               u_int32_t rta_oif = 0;
+                               
+                               rta_gtw = rta_src = rta_dst = chunk_empty;
+                               msg = (struct rtmsg*)(NLMSG_DATA(current));
+                               rta = RTM_RTA(msg);
+                               rtasize = RTM_PAYLOAD(current);
+                               while (RTA_OK(rta, rtasize))
+                               {
+                                       switch (rta->rta_type)
+                                       {
+                                               case RTA_PREFSRC:
+                                                       rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                                                       break;
+                                               case RTA_GATEWAY:
+                                                       rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                                                       break;
+                                               case RTA_DST:
+                                                       rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                                                       break;
+                                               case RTA_OIF:
+                                                       if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
+                                                       {
+                                                               rta_oif = *(u_int32_t*)RTA_DATA(rta);
+                                                       }
+                                                       break;
+                                       }
+                                       rta = RTA_NEXT(rta, rtasize);
+                               }
+                               
+                               /* apply the route if:
+                                * - it is not from our own ipsec routing table
+                                * - is better than a previous one
+                                * - is the default route or
+                                * - its destination net contains our destination
+                                */
+                               if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
+                                       &&  msg->rtm_dst_len > best
+                                       && (msg->rtm_dst_len == 0 || /* default route */
+                                       (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
+                               {
+                                       iterator_t *ifaces, *addrs;
+                                       iface_entry_t *iface;
+                                       addr_entry_t *addr;
+                                       
+                                       best = msg->rtm_dst_len;
+                                       if (nexthop)
+                                       {
+                                               DESTROY_IF(gtw);
+                                               gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
+                                       }
+                                       else if (rta_src.ptr)
+                                       {
+                                               DESTROY_IF(src);
+                                               src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
+                                               if (get_vip_refcount(this, src))
+                                               {       /* skip source address if it is installed by us */
+                                                       DESTROY_IF(src);
+                                                       src = NULL;
+                                                       current = NLMSG_NEXT(current, len);
+                                                       continue;
+                                               }
+                                       }
+                                       else
+                                       {
+                                               /* no source addr, get one from the interfaces */
+                                               ifaces = this->ifaces->create_iterator_locked(
+                                                                                                       this->ifaces, &this->mutex);
+                                               while (ifaces->iterate(ifaces, (void**)&iface))
+                                               {
+                                                       if (iface->ifindex == rta_oif)
+                                                       {
+                                                               addrs = iface->addrs->create_iterator(
+                                                                                                                       iface->addrs, TRUE);
+                                                               while (addrs->iterate(addrs, (void**)&addr))
+                                                               {
+                                                                       chunk_t ip = addr->ip->get_address(addr->ip);
+                                                                       if ((msg->rtm_dst_len == 0 && 
+                                                                                addr->ip->get_family(addr->ip) ==
+                                                                                       dest->get_family(dest)) ||
+                                                                               addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
+                                                                       {
+                                                                               DESTROY_IF(src);
+                                                                               src = addr->ip->clone(addr->ip);
+                                                                               break;
+                                                                       }
+                                                               }
+                                                               addrs->destroy(addrs);
+                                                       }
+                                               }
+                                               ifaces->destroy(ifaces);
+                                       }
+                               }
+                               /* FALL through */
+                       }
+                       default:
+                               current = NLMSG_NEXT(current, len);
+                               continue;
+               }
+               break;
+       }
+       free(out);
+       
+       if (nexthop)
+       {
+               if (gtw)
+               {
+                       return gtw;
+               }
+               return dest->clone(dest);
+       }
+       return src;
+}
+
+/**
+ * Implementation of kernel_net_t.get_source_addr.
+ */
+static host_t* get_source_addr(private_kernel_netlink_net_t *this, host_t *dest)
+{
+       return get_route(this, dest, FALSE);
+}
+
+/**
+ * Implementation of kernel_net_t.get_nexthop.
+ */
+static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
+{
+       return get_route(this, dest, TRUE);
+}
+
+/**
+ * Manages the creation and deletion of ip addresses on an interface.
+ * By setting the appropriate nlmsg_type, the ip will be set or unset.
+ */
+static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
+                                                         int flags, int if_index, host_t *ip)
+{
+       unsigned char request[NETLINK_BUFFER_SIZE];
+       struct nlmsghdr *hdr;
+       struct ifaddrmsg *msg;
+       chunk_t chunk;
+       
+       memset(&request, 0, sizeof(request));
+       
+       chunk = ip->get_address(ip);
+    
+    hdr = (struct nlmsghdr*)request;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
+       hdr->nlmsg_type = nlmsg_type; 
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
+       
+       msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
+    msg->ifa_family = ip->get_family(ip);
+    msg->ifa_flags = 0;
+    msg->ifa_prefixlen = 8 * chunk.len;
+    msg->ifa_scope = RT_SCOPE_UNIVERSE;
+    msg->ifa_index = if_index;
+       
+       netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
+
+       return this->socket->send_ack(this->socket, hdr);
+}
+
+/**
+ * Implementation of kernel_net_t.add_ip.
+ */
+static status_t add_ip(private_kernel_netlink_net_t *this, 
+                                               host_t *virtual_ip, host_t *iface_ip)
+{
+       iface_entry_t *iface;
+       addr_entry_t *addr;
+       iterator_t *addrs, *ifaces;
+       int ifindex;
+
+       DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
+       
+       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
+       while (ifaces->iterate(ifaces, (void**)&iface))
+       {
+               bool iface_found = FALSE;
+       
+               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
+               while (addrs->iterate(addrs, (void**)&addr))
+               {
+                       if (iface_ip->ip_equals(iface_ip, addr->ip))
+                       {
+                               iface_found = TRUE;
+                       }
+                       else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       {
+                               addr->refcount++;
+                               DBG2(DBG_KNL, "virtual IP %H already installed on %s",
+                                        virtual_ip, iface->ifname);
+                               addrs->destroy(addrs);
+                               ifaces->destroy(ifaces);
+                               return SUCCESS;
+                       }
+               }
+               addrs->destroy(addrs);
+               
+               if (iface_found)
+               {
+                       ifindex = iface->ifindex;
+                       addr = malloc_thing(addr_entry_t);
+                       addr->ip = virtual_ip->clone(virtual_ip);
+                       addr->refcount = 0;
+                       addr->virtual = TRUE;
+                       addr->scope = RT_SCOPE_UNIVERSE;
+                       iface->addrs->insert_last(iface->addrs, addr);
+                       
+                       if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
+                                                         ifindex, virtual_ip) == SUCCESS)
+                       {
+                               while (get_vip_refcount(this, virtual_ip) == 0)
+                               {       /* wait until address appears */
+                                       pthread_cond_wait(&this->cond, &this->mutex);
+                               }
+                               ifaces->destroy(ifaces);
+                               return SUCCESS;
+                       }
+                       ifaces->destroy(ifaces);
+                       DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
+                       return FAILED;
+               }
+       }
+       ifaces->destroy(ifaces);
+       
+       DBG1(DBG_KNL, "interface address %H not found, unable to install"
+                "virtual IP %H", iface_ip, virtual_ip);
+       return FAILED;
+}
+
+/**
+ * Implementation of kernel_net_t.del_ip.
+ */
+static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
+{
+       iface_entry_t *iface;
+       addr_entry_t *addr;
+       iterator_t *addrs, *ifaces;
+       status_t status;
+       int ifindex;
+
+       DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
+       
+       ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
+       while (ifaces->iterate(ifaces, (void**)&iface))
+       {
+               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
+               while (addrs->iterate(addrs, (void**)&addr))
+               {
+                       if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       {
+                               ifindex = iface->ifindex;
+                               if (addr->refcount == 1)
+                               {
+                                       status = manage_ipaddr(this, RTM_DELADDR, 0,
+                                                                                  ifindex, virtual_ip);
+                                       if (status == SUCCESS)
+                                       {       /* wait until the address is really gone */
+                                               while (get_vip_refcount(this, virtual_ip) > 0)
+                                               {
+                                                       pthread_cond_wait(&this->cond, &this->mutex);
+                                               }
+                                       }
+                                       addrs->destroy(addrs);
+                                       ifaces->destroy(ifaces);
+                                       return status;
+                               }
+                               else
+&nb