ikev1: Send and verify IPv6 addresses correctly
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
index 050b753..4e5e02d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Tobias Brunner
+ * Copyright (C) 2008-2014 Tobias Brunner
  * Copyright (C) 2005-2008 Martin Willi
  * Hochschule fuer Technik Rapperswil
  *
  */
 
 #include <sys/socket.h>
+#include <sys/utsname.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <unistd.h>
 #include <errno.h>
 #include <net/if.h>
+#ifdef HAVE_LINUX_FIB_RULES_H
+#include <linux/fib_rules.h>
+#endif
 
 #include "kernel_netlink_net.h"
 #include "kernel_netlink_shared.h"
 
 #include <hydra.h>
-#include <debug.h>
-#include <threading/thread.h>
-#include <threading/condvar.h>
+#include <utils/debug.h>
 #include <threading/mutex.h>
-#include <utils/linked_list.h>
+#include <threading/rwlock.h>
+#include <threading/rwlock_condvar.h>
+#include <threading/spinlock.h>
+#include <collections/hashtable.h>
+#include <collections/linked_list.h>
 #include <processing/jobs/callback_job.h>
 
 /** delay before firing roam events (ms) */
 #define ROAM_DELAY 100
 
+/** delay before reinstalling routes (ms) */
+#define ROUTE_DELAY 100
+
+/** maximum recursion when searching for addresses in get_route() */
+#define MAX_ROUTE_RECURSION 2
+
+#ifndef ROUTING_TABLE
+#define ROUTING_TABLE 0
+#endif
+
+#ifndef ROUTING_TABLE_PRIO
+#define ROUTING_TABLE_PRIO 0
+#endif
+
+ENUM(rt_msg_names, RTM_NEWLINK, RTM_GETRULE,
+       "RTM_NEWLINK",
+       "RTM_DELLINK",
+       "RTM_GETLINK",
+       "RTM_SETLINK",
+       "RTM_NEWADDR",
+       "RTM_DELADDR",
+       "RTM_GETADDR",
+       "31",
+       "RTM_NEWROUTE",
+       "RTM_DELROUTE",
+       "RTM_GETROUTE",
+       "35",
+       "RTM_NEWNEIGH",
+       "RTM_DELNEIGH",
+       "RTM_GETNEIGH",
+       "RTM_NEWRULE",
+       "RTM_DELRULE",
+       "RTM_GETRULE",
+);
+
 typedef struct addr_entry_t addr_entry_t;
 
 /**
- * IP address in an inface_entry_t
+ * IP address in an iface_entry_t
  */
 struct addr_entry_t {
 
-       /** The ip address */
+       /** the ip address */
        host_t *ip;
 
-       /** virtual IP managed by us */
-       bool virtual;
+       /** address flags */
+       u_char flags;
 
        /** scope of the address */
        u_char scope;
 
-       /** Number of times this IP is used, if virtual */
+       /** number of times this IP is used, if virtual (i.e. managed by us) */
        u_int refcount;
+
+       /** TRUE once it is installed, if virtual */
+       bool installed;
 };
 
 /**
@@ -105,6 +149,9 @@ struct iface_entry_t {
 
        /** list of addresses as host_t */
        linked_list_t *addrs;
+
+       /** TRUE if usable by config */
+       bool usable;
 };
 
 /**
@@ -116,6 +163,212 @@ static void iface_entry_destroy(iface_entry_t *this)
        free(this);
 }
 
+/**
+ * find an interface entry by index
+ */
+static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
+{
+       return this->ifindex == *ifindex;
+}
+
+/**
+ * find an interface entry by name
+ */
+static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
+{
+       return streq(this->ifname, ifname);
+}
+
+/**
+ * check if an interface is up
+ */
+static inline bool iface_entry_up(iface_entry_t *iface)
+{
+       return (iface->flags & IFF_UP) == IFF_UP;
+}
+
+/**
+ * check if an interface is up and usable
+ */
+static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
+{
+       return iface->usable && iface_entry_up(iface);
+}
+
+typedef struct addr_map_entry_t addr_map_entry_t;
+
+/**
+ * Entry that maps an IP address to an interface entry
+ */
+struct addr_map_entry_t {
+       /** The IP address */
+       host_t *ip;
+
+       /** The address entry for this IP address */
+       addr_entry_t *addr;
+
+       /** The interface this address is installed on */
+       iface_entry_t *iface;
+};
+
+/**
+ * Hash a addr_map_entry_t object, all entries with the same IP address
+ * are stored in the same bucket
+ */
+static u_int addr_map_entry_hash(addr_map_entry_t *this)
+{
+       return chunk_hash(this->ip->get_address(this->ip));
+}
+
+/**
+ * Compare two addr_map_entry_t objects, two entries are equal if they are
+ * installed on the same interface
+ */
+static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return a->iface->ifindex == b->iface->ifindex &&
+                  a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * an up and usable interface
+ */
+static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
+                                                                                          addr_map_entry_t *b)
+{
+       return iface_entry_up_and_usable(b->iface) &&
+                  a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * any active local interface
+ */
+static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * any local interface
+ */
+static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return a->ip->ip_equals(a->ip, b->ip);
+}
+
+typedef struct route_entry_t route_entry_t;
+
+/**
+ * Installed routing entry
+ */
+struct route_entry_t {
+       /** Name of the interface the route is bound to */
+       char *if_name;
+
+       /** Source ip of the route */
+       host_t *src_ip;
+
+       /** Gateway for this route */
+       host_t *gateway;
+
+       /** Destination net */
+       chunk_t dst_net;
+
+       /** Destination net prefixlen */
+       u_int8_t prefixlen;
+};
+
+/**
+ * Clone a route_entry_t object.
+ */
+static route_entry_t *route_entry_clone(route_entry_t *this)
+{
+       route_entry_t *route;
+
+       INIT(route,
+               .if_name = strdup(this->if_name),
+               .src_ip = this->src_ip->clone(this->src_ip),
+               .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
+               .dst_net = chunk_clone(this->dst_net),
+               .prefixlen = this->prefixlen,
+       );
+       return route;
+}
+
+/**
+ * Destroy a route_entry_t object
+ */
+static void route_entry_destroy(route_entry_t *this)
+{
+       free(this->if_name);
+       DESTROY_IF(this->src_ip);
+       DESTROY_IF(this->gateway);
+       chunk_free(&this->dst_net);
+       free(this);
+}
+
+/**
+ * Hash a route_entry_t object
+ */
+static u_int route_entry_hash(route_entry_t *this)
+{
+       return chunk_hash_inc(chunk_from_thing(this->prefixlen),
+                                                 chunk_hash(this->dst_net));
+}
+
+/**
+ * Compare two route_entry_t objects
+ */
+static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
+{
+       if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
+               a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
+               chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
+       {
+               return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
+                                       a->gateway->ip_equals(a->gateway, b->gateway));
+       }
+       return FALSE;
+}
+
+typedef struct net_change_t net_change_t;
+
+/**
+ * Queued network changes
+ */
+struct net_change_t {
+       /** Name of the interface that got activated (or an IP appeared on) */
+       char *if_name;
+};
+
+/**
+ * Destroy a net_change_t object
+ */
+static void net_change_destroy(net_change_t *this)
+{
+       free(this->if_name);
+       free(this);
+}
+
+/**
+ * Hash a net_change_t object
+ */
+static u_int net_change_hash(net_change_t *this)
+{
+       return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
+}
+
+/**
+ * Compare two net_change_t objects
+ */
+static bool net_change_equals(net_change_t *a, net_change_t *b)
+{
+       return streq(a->if_name, b->if_name);
+}
+
 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
 
 /**
@@ -128,14 +381,14 @@ struct private_kernel_netlink_net_t {
        kernel_netlink_net_t public;
 
        /**
-        * mutex to lock access to various lists
+        * lock to access various lists and maps
         */
-       mutex_t *mutex;
+       rwlock_t *lock;
 
        /**
         * condition variable to signal virtual IP add/removal
         */
-       condvar_t *condvar;
+       rwlock_condvar_t *condvar;
 
        /**
         * Cached list of interfaces and its addresses (iface_entry_t)
@@ -143,9 +396,14 @@ struct private_kernel_netlink_net_t {
        linked_list_t *ifaces;
 
        /**
-        * job receiving netlink events
+        * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
         */
-       callback_job_t *job;
+       hashtable_t *addrs;
+
+       /**
+        * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
+        */
+       hashtable_t *vips;
 
        /**
         * netlink rt socket (routing)
@@ -158,9 +416,19 @@ struct private_kernel_netlink_net_t {
        int socket_events;
 
        /**
-        * time of the last roam event
+        * earliest time of the next roam event
         */
-       timeval_t last_roam;
+       timeval_t next_roam;
+
+       /**
+        * roam event due to address change
+        */
+       bool roam_address;
+
+       /**
+        * lock to check and update roam event time
+        */
+       spinlock_t *roam_lock;
 
        /**
         * routing table to install routes
@@ -173,97 +441,459 @@ struct private_kernel_netlink_net_t {
        int routing_table_prio;
 
        /**
+        * installed routes
+        */
+       hashtable_t *routes;
+
+       /**
+        * mutex for routes
+        */
+       mutex_t *routes_lock;
+
+       /**
+        * interface changes which may trigger route reinstallation
+        */
+       hashtable_t *net_changes;
+
+       /**
+        * mutex for route reinstallation triggers
+        */
+       mutex_t *net_changes_lock;
+
+       /**
+        * time of last route reinstallation
+        */
+       timeval_t last_route_reinstall;
+
+       /**
         * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
         */
        bool process_route;
 
        /**
+        * whether to trigger roam events
+        */
+       bool roam_events;
+
+       /**
         * whether to actually install virtual IPs
         */
        bool install_virtual_ip;
 
        /**
+        * the name of the interface virtual IP addresses are installed on
+        */
+       char *install_virtual_ip_on;
+
+       /**
+        * whether preferred source addresses can be specified for IPv6 routes
+        */
+       bool rta_prefsrc_for_ipv6;
+
+       /**
+        * whether marks can be used in route lookups
+        */
+       bool rta_mark;
+
+       /**
+        * the mark excluded from the routing rule used for virtual IPs
+        */
+       mark_t routing_mark;
+
+       /**
+        * whether to prefer temporary IPv6 addresses over public ones
+        */
+       bool prefer_temporary_addrs;
+
+       /**
         * list with routing tables to be excluded from route lookup
         */
        linked_list_t *rt_exclude;
+
+       /**
+        * MTU to set on installed routes
+        */
+       u_int32_t mtu;
+
+       /**
+        * MSS to set on installed routes
+        */
+       u_int32_t mss;
 };
 
 /**
- * get the refcount of a virtual ip
+ * Forward declaration
  */
-static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
+static status_t manage_srcroute(private_kernel_netlink_net_t *this,
+                                                               int nlmsg_type, int flags, chunk_t dst_net,
+                                                               u_int8_t prefixlen, host_t *gateway,
+                                                               host_t *src_ip, char *if_name);
+
+/**
+ * Clear the queued network changes.
+ */
+static void net_changes_clear(private_kernel_netlink_net_t *this)
 {
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       int refcount = 0;
+       enumerator_t *enumerator;
+       net_change_t *change;
 
-       ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
-       while (ifaces->iterate(ifaces, (void**)&iface))
+       enumerator = this->net_changes->create_enumerator(this->net_changes);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&change))
        {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (addr->virtual && (iface->flags & IFF_UP) &&
-                               ip->ip_equals(ip, addr->ip))
+               this->net_changes->remove_at(this->net_changes, enumerator);
+               net_change_destroy(change);
+       }
+       enumerator->destroy(enumerator);
+}
+
+/**
+ * Act upon queued network changes.
+ */
+static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
+{
+       enumerator_t *enumerator;
+       route_entry_t *route;
+
+       this->net_changes_lock->lock(this->net_changes_lock);
+       this->routes_lock->lock(this->routes_lock);
+
+       enumerator = this->routes->create_enumerator(this->routes);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&route))
+       {
+               net_change_t *change, lookup = {
+                       .if_name = route->if_name,
+               };
+               /* check if a change for the outgoing interface is queued */
+               change = this->net_changes->get(this->net_changes, &lookup);
+               if (!change)
+               {       /* in case src_ip is not on the outgoing interface */
+                       if (this->public.interface.get_interface(&this->public.interface,
+                                                                                               route->src_ip, &lookup.if_name))
                        {
-                               refcount = addr->refcount;
-                               break;
+                               if (!streq(lookup.if_name, route->if_name))
+                               {
+                                       change = this->net_changes->get(this->net_changes, &lookup);
+                               }
+                               free(lookup.if_name);
                        }
                }
-               addrs->destroy(addrs);
-               if (refcount)
+               if (change)
+               {
+                       manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
+                                                       route->dst_net, route->prefixlen, route->gateway,
+                                                       route->src_ip, route->if_name);
+               }
+       }
+       enumerator->destroy(enumerator);
+       this->routes_lock->unlock(this->routes_lock);
+
+       net_changes_clear(this);
+       this->net_changes_lock->unlock(this->net_changes_lock);
+       return JOB_REQUEUE_NONE;
+}
+
+/**
+ * Queue route reinstallation caused by network changes for a given interface.
+ *
+ * The route reinstallation is delayed for a while and only done once for
+ * several calls during this delay, in order to avoid doing it too often.
+ * The interface name is freed.
+ */
+static void queue_route_reinstall(private_kernel_netlink_net_t *this,
+                                                                 char *if_name)
+{
+       net_change_t *update, *found;
+       timeval_t now;
+       job_t *job;
+
+       INIT(update,
+               .if_name = if_name
+       );
+
+       this->net_changes_lock->lock(this->net_changes_lock);
+       found = this->net_changes->put(this->net_changes, update, update);
+       if (found)
+       {
+               net_change_destroy(found);
+       }
+       time_monotonic(&now);
+       if (timercmp(&now, &this->last_route_reinstall, >))
+       {
+               timeval_add_ms(&now, ROUTE_DELAY);
+               this->last_route_reinstall = now;
+
+               job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
+                                                                                 this, NULL, NULL);
+               lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
+       }
+       this->net_changes_lock->unlock(this->net_changes_lock);
+}
+
+/**
+ * check if the given IP is known as virtual IP and currently installed
+ *
+ * this function will also return TRUE if the virtual IP entry disappeared.
+ * in that case the returned entry will be NULL.
+ *
+ * this->lock must be held when calling this function
+ */
+static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
+                                                                        host_t *ip, addr_map_entry_t **entry)
+{
+       addr_map_entry_t lookup = {
+               .ip = ip,
+       };
+
+       *entry = this->vips->get_match(this->vips, &lookup,
+                                                                 (void*)addr_map_entry_match);
+       if (*entry == NULL)
+       {       /* the virtual IP disappeared */
+               return TRUE;
+       }
+       return (*entry)->addr->installed;
+}
+
+/**
+ * check if the given IP is known as virtual IP
+ *
+ * this->lock must be held when calling this function
+ */
+static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
+{
+       addr_map_entry_t lookup = {
+               .ip = ip,
+       };
+
+       return this->vips->get_match(this->vips, &lookup,
+                                                               (void*)addr_map_entry_match) != NULL;
+}
+
+/**
+ * Add an address map entry
+ */
+static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
+                                                          iface_entry_t *iface)
+{
+       addr_map_entry_t *entry;
+
+       INIT(entry,
+               .ip = addr->ip,
+               .addr = addr,
+               .iface = iface,
+       );
+       entry = map->put(map, entry, entry);
+       free(entry);
+}
+
+/**
+ * Remove an address map entry
+ */
+static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
+                                                                 iface_entry_t *iface)
+{
+       addr_map_entry_t *entry, lookup = {
+               .ip = addr->ip,
+               .addr = addr,
+               .iface = iface,
+       };
+
+       entry = map->remove(map, &lookup);
+       free(entry);
+}
+
+/**
+ * Determine the type or scope of the given unicast IP address.  This is not
+ * the same thing returned in rtm_scope/ifa_scope.
+ *
+ * We use return values as defined in RFC 6724 (referring to RFC 4291).
+ */
+static u_char get_scope(host_t *ip)
+{
+       chunk_t addr;
+
+       addr = ip->get_address(ip);
+       switch (addr.len)
+       {
+               case 4:
+                       /* we use the mapping defined in RFC 6724, 3.2 */
+                       if (addr.ptr[0] == 127)
+                       {       /* link-local, same as the IPv6 loopback address */
+                               return 2;
+                       }
+                       if (addr.ptr[0] == 169 && addr.ptr[1] == 254)
+                       {       /* link-local */
+                               return 2;
+                       }
+                       break;
+               case 16:
+                       if (IN6_IS_ADDR_LOOPBACK((struct in6_addr*)addr.ptr))
+                       {       /* link-local, according to RFC 4291, 2.5.3 */
+                               return 2;
+                       }
+                       if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr*)addr.ptr))
+                       {
+                               return 2;
+                       }
+                       if (IN6_IS_ADDR_SITELOCAL((struct in6_addr*)addr.ptr))
+                       {       /* deprecated, according to RFC 4291, 2.5.7 */
+                               return 5;
+                       }
+                       break;
+               default:
+                       break;
+       }
+       /* global */
+       return 14;
+}
+
+/**
+ * Returns the length of the common prefix in bits up to the length of a's
+ * prefix, defined by RFC 6724 as the portion of the address not including the
+ * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
+ */
+static u_char common_prefix(host_t *a, host_t *b)
+{
+       chunk_t aa, ba;
+       u_char byte, bits = 0, match;
+
+       aa = a->get_address(a);
+       ba = b->get_address(b);
+       for (byte = 0; byte < 8; byte++)
+       {
+               if (aa.ptr[byte] != ba.ptr[byte])
                {
+                       match = aa.ptr[byte] ^ ba.ptr[byte];
+                       for (bits = 8; match; match >>= 1)
+                       {
+                               bits--;
+                       }
                        break;
                }
        }
-       ifaces->destroy(ifaces);
+       return byte * 8 + bits;
+}
 
-       return refcount;
+/**
+ * Compare two IP addresses and return TRUE if the second address is the better
+ * choice of the two to reach the destination.
+ * For IPv6 we approximately follow RFC 6724.
+ */
+static bool is_address_better(private_kernel_netlink_net_t *this,
+                                                         addr_entry_t *a, addr_entry_t *b, host_t *d)
+{
+       u_char sa, sb, sd, pa, pb;
+
+       /* rule 2: prefer appropriate scope */
+       if (d)
+       {
+               sa = get_scope(a->ip);
+               sb = get_scope(b->ip);
+               sd = get_scope(d);
+               if (sa < sb)
+               {
+                       return sa < sd;
+               }
+               else if (sb < sa)
+               {
+                       return sb >= sd;
+               }
+       }
+       if (a->ip->get_family(a->ip) == AF_INET)
+       {       /* stop here for IPv4, default to addresses found earlier */
+               return FALSE;
+       }
+       /* rule 3: avoid deprecated addresses (RFC 4862) */
+       if ((a->flags & IFA_F_DEPRECATED) != (b->flags & IFA_F_DEPRECATED))
+       {
+               return a->flags & IFA_F_DEPRECATED;
+       }
+       /* rule 4 is not applicable as we don't know if an address is a home or
+        * care-of addresses.
+        * rule 5 does not apply as we only compare addresses from one interface
+        * rule 6 requires a policy table (optionally configurable) to match
+        * configurable labels
+        */
+       /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
+       if ((a->flags & IFA_F_TEMPORARY) != (b->flags & IFA_F_TEMPORARY))
+       {
+               if (this->prefer_temporary_addrs)
+               {
+                       return b->flags & IFA_F_TEMPORARY;
+               }
+               return a->flags & IFA_F_TEMPORARY;
+       }
+       /* rule 8: use longest matching prefix */
+       if (d)
+       {
+               pa = common_prefix(a->ip, d);
+               pb = common_prefix(b->ip, d);
+               if (pa != pb)
+               {
+                       return pb > pa;
+               }
+       }
+       /* default to addresses found earlier */
+       return FALSE;
 }
 
 /**
- * get the first non-virtual ip address on the given interface.
- * returned host is a clone, has to be freed by caller.
+ * Get a non-virtual IP address on the given interface.
+ *
+ * If a candidate address is given, we first search for that address and if not
+ * found return the address as above.
+ * Returned host is a clone, has to be freed by caller.
+ *
+ * this->lock must be held when calling this function.
  */
 static host_t *get_interface_address(private_kernel_netlink_net_t *this,
-                                                                        int ifindex, int family)
+                                                                        int ifindex, int family, host_t *dest,
+                                                                        host_t *candidate)
 {
-       enumerator_t *ifaces, *addrs;
        iface_entry_t *iface;
-       addr_entry_t *addr;
-       host_t *ip = NULL;
+       enumerator_t *addrs;
+       addr_entry_t *addr, *best = NULL;
 
-       this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &ifindex) == SUCCESS)
        {
-               if (iface->ifindex == ifindex)
-               {
+               if (iface->usable)
+               {       /* only use interfaces not excluded by config */
                        addrs = iface->addrs->create_enumerator(iface->addrs);
                        while (addrs->enumerate(addrs, &addr))
                        {
-                               if (!addr->virtual && addr->ip->get_family(addr->ip) == family)
-                               {
-                                       ip = addr->ip->clone(addr->ip);
+                               if (addr->refcount ||
+                                       addr->ip->get_family(addr->ip) != family)
+                               {       /* ignore virtual IP addresses and ensure family matches */
+                                       continue;
+                               }
+                               if (candidate && candidate->ip_equals(candidate, addr->ip))
+                               {       /* stop if we find the candidate */
+                                       best = addr;
                                        break;
                                }
+                               else if (!best || is_address_better(this, best, addr, dest))
+                               {
+                                       best = addr;
+                               }
                        }
                        addrs->destroy(addrs);
-                       break;
                }
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
-       return ip;
+       return best ? best->ip->clone(best->ip) : NULL;
 }
 
 /**
  * callback function that raises the delayed roam event
  */
-static job_requeue_t roam_event(uintptr_t address)
+static job_requeue_t roam_event(private_kernel_netlink_net_t *this)
 {
-       hydra->kernel_interface->roam(hydra->kernel_interface, address != 0);
+       bool address;
+
+       this->roam_lock->lock(this->roam_lock);
+       address = this->roam_address;
+       this->roam_address = FALSE;
+       this->roam_lock->unlock(this->roam_lock);
+       hydra->kernel_interface->roam(hydra->kernel_interface, address);
        return JOB_REQUEUE_NONE;
 }
 
@@ -276,22 +906,61 @@ static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
        timeval_t now;
        job_t *job;
 
+       if (!this->roam_events)
+       {
+               return;
+       }
+
        time_monotonic(&now);
-       if (timercmp(&now, &this->last_roam, >))
+       this->roam_lock->lock(this->roam_lock);
+       this->roam_address |= address;
+       if (!timercmp(&now, &this->next_roam, >))
        {
-               now.tv_usec += ROAM_DELAY * 1000;
-               while (now.tv_usec > 1000000)
-               {
-                       now.tv_sec++;
-                       now.tv_usec -= 1000000;
-               }
-               this->last_roam = now;
+               this->roam_lock->unlock(this->roam_lock);
+               return;
+       }
+       timeval_add_ms(&now, ROAM_DELAY);
+       this->next_roam = now;
+       this->roam_lock->unlock(this->roam_lock);
+
+       job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
+                                                                         this, NULL, NULL);
+       lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
+}
+
+/**
+ * check if an interface with a given index is up and usable
+ *
+ * this->lock must be locked when calling this function
+ */
+static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
+                                                                          int index)
+{
+       iface_entry_t *iface;
 
-               job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
-                                                                                 (void*)(uintptr_t)(address ? 1 : 0),
-                                                                                 NULL, NULL);
-               lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &index) == SUCCESS)
+       {
+               return iface_entry_up_and_usable(iface);
        }
+       return FALSE;
+}
+
+/**
+ * unregister the current addr_entry_t from the hashtable it is stored in
+ *
+ * this->lock must be locked when calling this function
+ */
+static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
+                                                                 private_kernel_netlink_net_t *this)
+{
+       if (addr->refcount)
+       {
+               addr_map_entry_remove(this->vips, addr, iface);
+               this->condvar->broadcast(this->condvar);
+               return;
+       }
+       addr_map_entry_remove(this->addrs, addr, iface);
 }
 
 /**
@@ -300,15 +969,15 @@ static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
 static void process_link(private_kernel_netlink_net_t *this,
                                                 struct nlmsghdr *hdr, bool event)
 {
-       struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
+       struct ifinfomsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = IFLA_RTA(msg);
        size_t rtasize = IFLA_PAYLOAD (hdr);
        enumerator_t *enumerator;
        iface_entry_t *current, *entry = NULL;
        char *name = NULL;
-       bool update = FALSE;
+       bool update = FALSE, update_routes = FALSE;
 
-       while(RTA_OK(rta, rtasize))
+       while (RTA_OK(rta, rtasize))
        {
                switch (rta->rta_type)
                {
@@ -323,40 +992,30 @@ static void process_link(private_kernel_netlink_net_t *this,
                name = "(unknown)";
        }
 
-       this->mutex->lock(this->mutex);
+       this->lock->write_lock(this->lock);
        switch (hdr->nlmsg_type)
        {
                case RTM_NEWLINK:
                {
-                       if (msg->ifi_flags & IFF_LOOPBACK)
-                       {       /* ignore loopback interfaces */
-                               break;
-                       }
-                       enumerator = this->ifaces->create_enumerator(this->ifaces);
-                       while (enumerator->enumerate(enumerator, &current))
-                       {
-                               if (current->ifindex == msg->ifi_index)
-                               {
-                                       entry = current;
-                                       break;
-                               }
-                       }
-                       enumerator->destroy(enumerator);
-                       if (!entry)
+                       if (this->ifaces->find_first(this->ifaces,
+                                                                       (void*)iface_entry_by_index, (void**)&entry,
+                                                                       &msg->ifi_index) != SUCCESS)
                        {
-                               entry = malloc_thing(iface_entry_t);
-                               entry->ifindex = msg->ifi_index;
-                               entry->flags = 0;
-                               entry->addrs = linked_list_create();
+                               INIT(entry,
+                                       .ifindex = msg->ifi_index,
+                                       .addrs = linked_list_create(),
+                                       .usable = hydra->kernel_interface->is_interface_usable(
+                                                                                               hydra->kernel_interface, name),
+                               );
                                this->ifaces->insert_last(this->ifaces, entry);
                        }
                        strncpy(entry->ifname, name, IFNAMSIZ);
                        entry->ifname[IFNAMSIZ-1] = '\0';
-                       if (event)
+                       if (event && entry->usable)
                        {
                                if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
                                {
-                                       update = TRUE;
+                                       update = update_routes = TRUE;
                                        DBG1(DBG_KNL, "interface %s activated", name);
                                }
                                if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
@@ -375,9 +1034,17 @@ static void process_link(private_kernel_netlink_net_t *this,
                        {
                                if (current->ifindex == msg->ifi_index)
                                {
-                                       /* we do not remove it, as an address may be added to a
-                                        * "down" interface and we wan't to know that. */
-                                       current->flags = msg->ifi_flags;
+                                       if (event && current->usable)
+                                       {
+                                               update = TRUE;
+                                               DBG1(DBG_KNL, "interface %s deleted", current->ifname);
+                                       }
+                                       /* TODO: move virtual IPs installed on this interface to
+                                        * another interface? */
+                                       this->ifaces->remove_at(this->ifaces, enumerator);
+                                       current->addrs->invoke_function(current->addrs,
+                                                               (void*)addr_entry_unregister, current, this);
+                                       iface_entry_destroy(current);
                                        break;
                                }
                        }
@@ -385,9 +1052,13 @@ static void process_link(private_kernel_netlink_net_t *this,
                        break;
                }
        }
-       this->mutex->unlock(this->mutex);
+       this->lock->unlock(this->lock);
+
+       if (update_routes && event)
+       {
+               queue_route_reinstall(this, strdup(name));
+       }
 
-       /* send an update to all IKE_SAs */
        if (update && event)
        {
                fire_roam_event(this, TRUE);
@@ -400,17 +1071,16 @@ static void process_link(private_kernel_netlink_net_t *this,
 static void process_addr(private_kernel_netlink_net_t *this,
                                                 struct nlmsghdr *hdr, bool event)
 {
-       struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
+       struct ifaddrmsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = IFA_RTA(msg);
        size_t rtasize = IFA_PAYLOAD (hdr);
        host_t *host = NULL;
-       enumerator_t *ifaces, *addrs;
        iface_entry_t *iface;
-       addr_entry_t *addr;
        chunk_t local = chunk_empty, address = chunk_empty;
+       char *route_ifname = NULL;
        bool update = FALSE, found = FALSE, changed = FALSE;
 
-       while(RTA_OK(rta, rtasize))
+       while (RTA_OK(rta, rtasize))
        {
                switch (rta->rta_type)
                {
@@ -443,65 +1113,93 @@ static void process_addr(private_kernel_netlink_net_t *this,
                return;
        }
 
-       this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       this->lock->write_lock(this->lock);
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &msg->ifa_index) == SUCCESS)
        {
-               if (iface->ifindex == msg->ifa_index)
+               addr_map_entry_t *entry, lookup = {
+                       .ip = host,
+                       .iface = iface,
+               };
+               addr_entry_t *addr;
+
+               entry = this->vips->get(this->vips, &lookup);
+               if (entry)
                {
-                       addrs = iface->addrs->create_enumerator(iface->addrs);
-                       while (addrs->enumerate(addrs, &addr))
+                       if (hdr->nlmsg_type == RTM_NEWADDR)
+                       {       /* mark as installed and signal waiting threads */
+                               entry->addr->installed = TRUE;
+                       }
+                       else
+                       {       /* the address was already marked as uninstalled */
+                               addr = entry->addr;
+                               iface->addrs->remove(iface->addrs, addr, NULL);
+                               addr_map_entry_remove(this->vips, addr, iface);
+                               addr_entry_destroy(addr);
+                       }
+                       /* no roam events etc. for virtual IPs */
+                       this->condvar->broadcast(this->condvar);
+                       this->lock->unlock(this->lock);
+                       host->destroy(host);
+                       return;
+               }
+               entry = this->addrs->get(this->addrs, &lookup);
+               if (entry)
+               {
+                       if (hdr->nlmsg_type == RTM_DELADDR)
                        {
-                               if (host->ip_equals(host, addr->ip))
+                               found = TRUE;
+                               addr = entry->addr;
+                               iface->addrs->remove(iface->addrs, addr, NULL);
+                               if (iface->usable)
                                {
-                                       found = TRUE;
-                                       if (hdr->nlmsg_type == RTM_DELADDR)
-                                       {
-                                               iface->addrs->remove_at(iface->addrs, addrs);
-                                               if (!addr->virtual)
-                                               {
-                                                       changed = TRUE;
-                                                       DBG1(DBG_KNL, "%H disappeared from %s",
-                                                                host, iface->ifname);
-                                               }
-                                               addr_entry_destroy(addr);
-                                       }
-                                       else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
-                                       {
-                                               addr->refcount = 1;
-                                       }
+                                       changed = TRUE;
+                                       DBG1(DBG_KNL, "%H disappeared from %s", host,
+                                                iface->ifname);
                                }
+                               addr_map_entry_remove(this->addrs, addr, iface);
+                               addr_entry_destroy(addr);
                        }
-                       addrs->destroy(addrs);
-
+               }
+               else
+               {
                        if (hdr->nlmsg_type == RTM_NEWADDR)
                        {
-                               if (!found)
+                               found = TRUE;
+                               changed = TRUE;
+                               route_ifname = strdup(iface->ifname);
+                               INIT(addr,
+                                       .ip = host->clone(host),
+                                       .flags = msg->ifa_flags,
+                                       .scope = msg->ifa_scope,
+                               );
+                               iface->addrs->insert_last(iface->addrs, addr);
+                               addr_map_entry_add(this->addrs, addr, iface);
+                               if (event && iface->usable)
                                {
-                                       found = TRUE;
-                                       changed = TRUE;
-                                       addr = malloc_thing(addr_entry_t);
-                                       addr->ip = host->clone(host);
-                                       addr->virtual = FALSE;
-                                       addr->refcount = 1;
-                                       addr->scope = msg->ifa_scope;
-
-                                       iface->addrs->insert_last(iface->addrs, addr);
-                                       if (event)
-                                       {
-                                               DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
-                                       }
+                                       DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
                                }
                        }
-                       if (found && (iface->flags & IFF_UP))
-                       {
-                               update = TRUE;
-                       }
-                       break;
+               }
+               if (found && (iface->flags & IFF_UP))
+               {
+                       update = TRUE;
+               }
+               if (!iface->usable)
+               {       /* ignore events for interfaces excluded by config */
+                       update = changed = FALSE;
                }
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
+       this->lock->unlock(this->lock);
+
+       if (update && event && route_ifname)
+       {
+               queue_route_reinstall(this, route_ifname);
+       }
+       else
+       {
+               free(route_ifname);
+       }
        host->destroy(host);
 
        /* send an update to all IKE_SAs */
@@ -516,7 +1214,7 @@ static void process_addr(private_kernel_netlink_net_t *this,
  */
 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
 {
-       struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
+       struct rtmsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = RTM_RTA(msg);
        size_t rtasize = RTM_PAYLOAD(hdr);
        u_int32_t rta_oif = 0;
@@ -528,6 +1226,10 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
        {
                return;
        }
+       else if (msg->rtm_flags & RTM_F_CLONED)
+       {       /* ignore cached routes, seem to be created a lot for IPv6 */
+               return;
+       }
 
        while (RTA_OK(rta, rtasize))
        {
@@ -547,59 +1249,63 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
                }
                rta = RTA_NEXT(rta, rtasize);
        }
+       this->lock->read_lock(this->lock);
+       if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
+       {       /* ignore route changes for interfaces that are ignored or down */
+               this->lock->unlock(this->lock);
+               DESTROY_IF(host);
+               return;
+       }
        if (!host && rta_oif)
        {
-               host = get_interface_address(this, rta_oif, msg->rtm_family);
+               host = get_interface_address(this, rta_oif, msg->rtm_family,
+                                                                        NULL, NULL);
        }
-       if (host)
-       {
-               this->mutex->lock(this->mutex);
-               if (!get_vip_refcount(this, host))
-               {       /* ignore routes added for virtual IPs */
-                       fire_roam_event(this, FALSE);
-               }
-               this->mutex->unlock(this->mutex);
-               host->destroy(host);
+       if (!host || is_known_vip(this, host))
+       {       /* ignore routes added for virtual IPs */
+               this->lock->unlock(this->lock);
+               DESTROY_IF(host);
+               return;
        }
+       this->lock->unlock(this->lock);
+       fire_roam_event(this, FALSE);
+       host->destroy(host);
 }
 
 /**
  * Receives events from kernel
  */
-static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
+static bool receive_events(private_kernel_netlink_net_t *this, int fd,
+                                                  watcher_event_t event)
 {
-       char response[1024];
+       char response[1536];
        struct nlmsghdr *hdr = (struct nlmsghdr*)response;
        struct sockaddr_nl addr;
        socklen_t addr_len = sizeof(addr);
        int len;
-       bool oldstate;
-
-       oldstate = thread_cancelability(TRUE);
-       len = recvfrom(this->socket_events, response, sizeof(response), 0,
-                                  (struct sockaddr*)&addr, &addr_len);
-       thread_cancelability(oldstate);
 
+       len = recvfrom(this->socket_events, response, sizeof(response),
+                                  MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
        if (len < 0)
        {
                switch (errno)
                {
                        case EINTR:
                                /* interrupted, try again */
-                               return JOB_REQUEUE_DIRECT;
+                               return TRUE;
                        case EAGAIN:
                                /* no data ready, select again */
-                               return JOB_REQUEUE_DIRECT;
+                               return TRUE;
                        default:
                                DBG1(DBG_KNL, "unable to receive from rt event socket");
                                sleep(1);
-                               return JOB_REQUEUE_FAIR;
+                               return TRUE;
                }
        }
 
        if (addr.nl_pid != 0)
        {       /* not from kernel. not interested, try another one */
-               return JOB_REQUEUE_DIRECT;
+               return TRUE;
        }
 
        while (NLMSG_OK(hdr, len))
@@ -610,12 +1316,10 @@ static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
                        case RTM_NEWADDR:
                        case RTM_DELADDR:
                                process_addr(this, hdr, TRUE);
-                               this->condvar->broadcast(this->condvar);
                                break;
                        case RTM_NEWLINK:
                        case RTM_DELLINK:
                                process_link(this, hdr, TRUE);
-                               this->condvar->broadcast(this->condvar);
                                break;
                        case RTM_NEWROUTE:
                        case RTM_DELROUTE:
@@ -629,16 +1333,14 @@ static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
                }
                hdr = NLMSG_NEXT(hdr, len);
        }
-       return JOB_REQUEUE_DIRECT;
+       return TRUE;
 }
 
 /** enumerator over addresses */
 typedef struct {
        private_kernel_netlink_net_t* this;
-       /** whether to enumerate down interfaces */
-       bool include_down_ifaces;
-       /** whether to enumerate virtual ip addresses */
-       bool include_virtual_ips;
+       /** which addresses to enumerate */
+       kernel_address_type_t which;
 } address_enumerator_t;
 
 /**
@@ -646,7 +1348,7 @@ typedef struct {
  */
 static void address_enumerator_destroy(address_enumerator_t *data)
 {
-       data->this->mutex->unlock(data->this->mutex);
+       data->this->lock->unlock(data->this->lock);
        free(data);
 }
 
@@ -656,10 +1358,14 @@ static void address_enumerator_destroy(address_enumerator_t *data)
 static bool filter_addresses(address_enumerator_t *data,
                                                         addr_entry_t** in, host_t** out)
 {
-       if (!data->include_virtual_ips && (*in)->virtual)
+       if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
        {       /* skip virtual interfaces added by us */
                return FALSE;
        }
+       if (!(data->which & ADDR_TYPE_REGULAR) && !(*in)->refcount)
+       {       /* address is regular, but not requested */
+               return FALSE;
+       }
        if ((*in)->scope >= RT_SCOPE_LINK)
        {       /* skip addresses with a unusable scope */
                return FALSE;
@@ -685,7 +1391,15 @@ static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
                                                          iface_entry_t** out)
 {
-       if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
+       if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
+       {       /* skip interfaces excluded by config */
+               return FALSE;
+       }
+       if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
+       {       /* ignore loopback devices */
+               return FALSE;
+       }
+       if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
        {       /* skip interfaces not up */
                return FALSE;
        }
@@ -694,15 +1408,16 @@ static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
 }
 
 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
-       private_kernel_netlink_net_t *this,
-       bool include_down_ifaces, bool include_virtual_ips)
+       private_kernel_netlink_net_t *this, kernel_address_type_t which)
 {
-       address_enumerator_t *data = malloc_thing(address_enumerator_t);
-       data->this = this;
-       data->include_down_ifaces = include_down_ifaces;
-       data->include_virtual_ips = include_virtual_ips;
+       address_enumerator_t *data;
 
-       this->mutex->lock(this->mutex);
+       INIT(data,
+               .this = this,
+               .which = which,
+       );
+
+       this->lock->read_lock(this->lock);
        return enumerator_create_nested(
                                enumerator_create_filter(
                                        this->ifaces->create_enumerator(this->ifaces),
@@ -711,47 +1426,53 @@ METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
                                (void*)address_enumerator_destroy);
 }
 
-METHOD(kernel_net_t, get_interface_name, char*,
-       private_kernel_netlink_net_t *this, host_t* ip)
+METHOD(kernel_net_t, get_interface_name, bool,
+       private_kernel_netlink_net_t *this, host_t* ip, char **name)
 {
-       enumerator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       char *name = NULL;
+       addr_map_entry_t *entry, lookup = {
+               .ip = ip,
+       };
 
-       DBG2(DBG_KNL, "getting interface name for %H", ip);
-
-       this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (ip->is_anyaddr(ip))
+       {
+               return FALSE;
+       }
+       this->lock->read_lock(this->lock);
+       /* first try to find it on an up and usable interface */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match_up_and_usable);
+       if (entry)
        {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
-               {
-                       if (ip->ip_equals(ip, addr->ip))
-                       {
-                               name = strdup(iface->ifname);
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
                if (name)
                {
-                       break;
+                       *name = strdup(entry->iface->ifname);
+                       DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
                }
+               this->lock->unlock(this->lock);
+               return TRUE;
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
-
-       if (name)
+       /* in a second step, consider virtual IPs installed by us */
+       entry = this->vips->get_match(this->vips, &lookup,
+                                                                 (void*)addr_map_entry_match_up_and_usable);
+       if (entry)
        {
-               DBG2(DBG_KNL, "%H is on interface %s", ip, name);
+               if (name)
+               {
+                       *name = strdup(entry->iface->ifname);
+                       DBG2(DBG_KNL, "virtual IP %H is on interface %s", ip, *name);
+               }
+               this->lock->unlock(this->lock);
+               return TRUE;
        }
-       else
+       /* maybe it is installed on an ignored interface */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match_up);
+       if (!entry)
        {
-               DBG2(DBG_KNL, "%H is not a local address", ip);
+               DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
        }
-       return name;
+       this->lock->unlock(this->lock);
+       return FALSE;
 }
 
 /**
@@ -759,24 +1480,18 @@ METHOD(kernel_net_t, get_interface_name, char*,
  */
 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
 {
-       enumerator_t *ifaces;
        iface_entry_t *iface;
        int ifindex = 0;
 
        DBG2(DBG_KNL, "getting iface index for %s", name);
 
-       this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       this->lock->read_lock(this->lock);
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                                               (void**)&iface, name) == SUCCESS)
        {
-               if (streq(name, iface->ifname))
-               {
-                       ifindex = iface->ifindex;
-                       break;
-               }
+               ifindex = iface->ifindex;
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
+       this->lock->unlock(this->lock);
 
        if (ifindex == 0)
        {
@@ -786,111 +1501,229 @@ static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
 }
 
 /**
- * Check if an interface with a given index is up
+ * check if an address or net (addr with prefix net bits) is in
+ * subnet (net with net_len net bits)
  */
-static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
+static bool addr_in_subnet(chunk_t addr, int prefix, chunk_t net, int net_len)
 {
-       enumerator_t *ifaces;
-       iface_entry_t *iface;
-       /* default to TRUE for interface we do not monitor (e.g. lo) */
-       bool up = TRUE;
+       static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
+       int byte = 0;
+
+       if (net_len == 0)
+       {       /* any address matches a /0 network */
+               return TRUE;
+       }
+       if (addr.len != net.len || net_len > 8 * net.len || prefix < net_len)
+       {
+               return FALSE;
+       }
+       /* scan through all bytes in network order */
+       while (net_len > 0)
+       {
+               if (net_len < 8)
+               {
+                       return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
+               }
+               else
+               {
+                       if (addr.ptr[byte] != net.ptr[byte])
+                       {
+                               return FALSE;
+                       }
+                       byte++;
+                       net_len -= 8;
+               }
+       }
+       return TRUE;
+}
+
+/**
+ * Store information about a route retrieved via RTNETLINK
+ */
+typedef struct {
+       chunk_t gtw;
+       chunk_t src;
+       chunk_t dst;
+       host_t *src_host;
+       u_int8_t dst_len;
+       u_int32_t table;
+       u_int32_t oif;
+       u_int32_t priority;
+} rt_entry_t;
+
+/**
+ * Free a route entry
+ */
+static void rt_entry_destroy(rt_entry_t *this)
+{
+       DESTROY_IF(this->src_host);
+       free(this);
+}
+
+/**
+ * Check if the route received with RTM_NEWROUTE is usable based on its type.
+ */
+static bool route_usable(struct nlmsghdr *hdr)
+{
+       struct rtmsg *msg;
 
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       msg = NLMSG_DATA(hdr);
+       switch (msg->rtm_type)
        {
-               if (iface->ifindex == index)
-               {
-                       up = iface->flags & IFF_UP;
-                       break;
-               }
+               case RTN_BLACKHOLE:
+               case RTN_UNREACHABLE:
+               case RTN_PROHIBIT:
+               case RTN_THROW:
+                       return FALSE;
+               default:
+                       return TRUE;
        }
-       ifaces->destroy(ifaces);
-       return up;
 }
 
 /**
- * check if an address (chunk) addr is in subnet (net with net_len net bits)
+ * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
+ * reused if not NULL.
+ *
+ * Returned chunks point to internal data of the Netlink message.
  */
-static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
+static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
 {
-       static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
-       int byte = 0;
+       struct rtattr *rta;
+       struct rtmsg *msg;
+       size_t rtasize;
 
-       if (net_len == 0)
-       {       /* any address matches a /0 network */
-               return TRUE;
+       msg = NLMSG_DATA(hdr);
+       rta = RTM_RTA(msg);
+       rtasize = RTM_PAYLOAD(hdr);
+
+       if (route)
+       {
+               route->gtw = chunk_empty;
+               route->src = chunk_empty;
+               route->dst = chunk_empty;
+               route->dst_len = msg->rtm_dst_len;
+               route->table = msg->rtm_table;
+               route->oif = 0;
+               route->priority = 0;
        }
-       if (addr.len != net.len || net_len > 8 * net.len )
+       else
        {
-               return FALSE;
+               INIT(route,
+                       .dst_len = msg->rtm_dst_len,
+                       .table = msg->rtm_table,
+               );
        }
-       /* scan through all bytes in network order */
-       while (net_len > 0)
+
+       while (RTA_OK(rta, rtasize))
        {
-               if (net_len < 8)
-               {
-                       return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
-               }
-               else
+               switch (rta->rta_type)
                {
-                       if (addr.ptr[byte] != net.ptr[byte])
-                       {
-                               return FALSE;
-                       }
-                       byte++;
-                       net_len -= 8;
+                       case RTA_PREFSRC:
+                               route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_GATEWAY:
+                               route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_DST:
+                               route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_OIF:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->oif))
+                               {
+                                       route->oif = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+                       case RTA_PRIORITY:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->priority))
+                               {
+                                       route->priority = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+#ifdef HAVE_RTA_TABLE
+                       case RTA_TABLE:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->table))
+                               {
+                                       route->table = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+#endif /* HAVE_RTA_TABLE*/
                }
+               rta = RTA_NEXT(rta, rtasize);
        }
-       return TRUE;
+       return route;
 }
 
 /**
  * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
  */
 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
-                                                bool nexthop, host_t *candidate)
+                                                int prefix, bool nexthop, host_t *candidate,
+                                                u_int recursion)
 {
        netlink_buf_t request;
        struct nlmsghdr *hdr, *out, *current;
        struct rtmsg *msg;
        chunk_t chunk;
        size_t len;
-       int best = -1;
+       linked_list_t *routes;
+       rt_entry_t *route = NULL, *best = NULL;
        enumerator_t *enumerator;
-       host_t *src = NULL, *gtw = NULL;
+       host_t *addr = NULL;
+       bool match_net;
+       int family;
 
-       DBG2(DBG_KNL, "getting address to reach %H", dest);
+       if (recursion > MAX_ROUTE_RECURSION)
+       {
+               return NULL;
+       }
+       chunk = dest->get_address(dest);
+       len = chunk.len * 8;
+       prefix = prefix < 0 ? len : min(prefix, len);
+       match_net = prefix != len;
 
        memset(&request, 0, sizeof(request));
 
-       hdr = (struct nlmsghdr*)request;
+       family = dest->get_family(dest);
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST;
-       if (dest->get_family(dest) == AF_INET)
-       {
-               /* We dump all addresses for IPv4, as we want to ignore IPsec specific
-                * routes installed by us. But the kernel does not return source
-                * addresses in a IPv6 dump, so fall back to get() for v6 routes. */
-               hdr->nlmsg_flags |= NLM_F_ROOT | NLM_F_DUMP;
-       }
        hdr->nlmsg_type = RTM_GETROUTE;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
-       msg->rtm_family = dest->get_family(dest);
+       msg = NLMSG_DATA(hdr);
+       msg->rtm_family = family;
+       if (!match_net && this->rta_mark && this->routing_mark.value)
+       {
+               /* if our routing rule excludes packets with a certain mark we can
+                * get the preferred route without having to dump all routes */
+               chunk = chunk_from_thing(this->routing_mark.value);
+               netlink_add_attribute(hdr, RTA_MARK, chunk, sizeof(request));
+       }
+       else if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
+                        this->routing_table || match_net)
+       {       /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
+                * as we want to ignore routes with virtual IPs we cannot use DUMP
+                * if these routes are not installed in a separate table */
+               hdr->nlmsg_flags |= NLM_F_DUMP;
+       }
        if (candidate)
        {
                chunk = candidate->get_address(candidate);
                netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
        }
-       chunk = dest->get_address(dest);
-       netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
+       if (!match_net)
+       {
+               chunk = dest->get_address(dest);
+               netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
+       }
 
        if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
        {
-               DBG1(DBG_KNL, "getting address to %H failed", dest);
+               DBG2(DBG_KNL, "getting %s to reach %H/%d failed",
+                        nexthop ? "nexthop" : "address", dest, prefix);
                return NULL;
        }
-       this->mutex->lock(this->mutex);
+       routes = linked_list_create();
+       this->lock->read_lock(this->lock);
 
        for (current = out; NLMSG_OK(current, len);
                 current = NLMSG_NEXT(current, len))
@@ -901,123 +1734,62 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                break;
                        case RTM_NEWROUTE:
                        {
-                               struct rtattr *rta;
-                               size_t rtasize;
-                               chunk_t rta_gtw, rta_src, rta_dst;
-                               u_int32_t rta_oif = 0;
-                               host_t *new_src, *new_gtw;
-                               bool cont = FALSE;
+                               rt_entry_t *other;
                                uintptr_t table;
 
-                               rta_gtw = rta_src = rta_dst = chunk_empty;
-                               msg = (struct rtmsg*)(NLMSG_DATA(current));
-                               rta = RTM_RTA(msg);
-                               rtasize = RTM_PAYLOAD(current);
-                               while (RTA_OK(rta, rtasize))
+                               if (!route_usable(current))
                                {
-                                       switch (rta->rta_type)
-                                       {
-                                               case RTA_PREFSRC:
-                                                       rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_GATEWAY:
-                                                       rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_DST:
-                                                       rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_OIF:
-                                                       if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
-                                                       {
-                                                               rta_oif = *(u_int32_t*)RTA_DATA(rta);
-                                                       }
-                                                       break;
-                                       }
-                                       rta = RTA_NEXT(rta, rtasize);
-                               }
-                               if (msg->rtm_dst_len <= best)
-                               {       /* not better than a previous one */
                                        continue;
                                }
-                               enumerator = this->rt_exclude->create_enumerator(this->rt_exclude);
-                               while (enumerator->enumerate(enumerator, &table))
-                               {
-                                       if (table == msg->rtm_table)
-                                       {
-                                               cont = TRUE;
-                                               break;
-                                       }
-                               }
-                               enumerator->destroy(enumerator);
-                               if (cont)
-                               {
+                               route = parse_route(current, route);
+
+                               table = (uintptr_t)route->table;
+                               if (this->rt_exclude->find_first(this->rt_exclude, NULL,
+                                                                                                (void**)&table) == SUCCESS)
+                               {       /* route is from an excluded routing table */
                                        continue;
                                }
                                if (this->routing_table != 0 &&
-                                       msg->rtm_table == this->routing_table)
+                                       route->table == this->routing_table)
                                {       /* route is from our own ipsec routing table */
                                        continue;
                                }
-                               if (rta_oif && !is_interface_up(this, rta_oif))
+                               if (route->oif && !is_interface_up_and_usable(this, route->oif))
                                {       /* interface is down */
                                        continue;
                                }
-                               if (!addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))
+                               if (!addr_in_subnet(chunk, prefix, route->dst, route->dst_len))
                                {       /* route destination does not contain dest */
                                        continue;
                                }
-
-                               if (nexthop)
-                               {
-                                       /* nexthop lookup, return gateway if any */
-                                       DESTROY_IF(gtw);
-                                       gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       best = msg->rtm_dst_len;
-                                       continue;
-                               }
-                               if (rta_src.ptr)
-                               {       /* got a source address */
-                                       new_src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
-                                       if (new_src)
-                                       {
-                                               if (get_vip_refcount(this, new_src))
-                                               {       /* skip source address if it is installed by us */
-                                                       new_src->destroy(new_src);
-                                               }
-                                               else
-                                               {
-                                                       DESTROY_IF(src);
-                                                       src = new_src;
-                                                       best = msg->rtm_dst_len;
-                                               }
+                               if (route->src.ptr)
+                               {       /* verify source address, if any */
+                                       host_t *src = host_create_from_chunk(msg->rtm_family,
+                                                                                                                route->src, 0);
+                                       if (src && is_known_vip(this, src))
+                                       {       /* ignore routes installed by us */
+                                               src->destroy(src);
+                                               continue;
                                        }
-                                       continue;
+                                       route->src_host = src;
                                }
-                               if (rta_oif)
-                               {       /* no src or gtw, but an interface. Get address from it. */
-                                       new_src = get_interface_address(this, rta_oif,
-                                                                                                       msg->rtm_family);
-                                       if (new_src)
+                               /* insert route, sorted by priority and network prefix */
+                               enumerator = routes->create_enumerator(routes);
+                               while (enumerator->enumerate(enumerator, &other))
+                               {
+                                       if (route->priority < other->priority)
                                        {
-                                               DESTROY_IF(src);
-                                               src = new_src;
-                                               best = msg->rtm_dst_len;
+                                               break;
                                        }
-                                       continue;
-                               }
-                               if (rta_gtw.ptr)
-                               {       /* no source, but a gateway. Lookup source to reach gtw. */
-                                       new_gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       new_src = get_route(this, new_gtw, FALSE, candidate);
-                                       new_gtw->destroy(new_gtw);
-                                       if (new_src)
+                                       if (route->priority == other->priority &&
+                                               route->dst_len > other->dst_len)
                                        {
-                                               DESTROY_IF(src);
-                                               src = new_src;
-                                               best = msg->rtm_dst_len;
+                                               break;
                                        }
-                                       continue;
                                }
+                               routes->insert_before(routes, enumerator, route);
+                               enumerator->destroy(enumerator);
+                               route = NULL;
                                continue;
                        }
                        default:
@@ -1025,30 +1797,130 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                }
                break;
        }
-       free(out);
-       this->mutex->unlock(this->mutex);
+       if (route)
+       {
+               rt_entry_destroy(route);
+       }
+
+       /* now we have a list of routes matching dest, sorted by net prefix.
+        * we will look for source addresses for these routes and select the one
+        * with the preferred source address, if possible */
+       enumerator = routes->create_enumerator(routes);
+       while (enumerator->enumerate(enumerator, &route))
+       {
+               if (route->src_host)
+               {       /* got a source address with the route, if no preferred source
+                        * is given or it matches we are done, as this is the best route */
+                       if (!candidate || candidate->ip_equals(candidate, route->src_host))
+                       {
+                               best = route;
+                               break;
+                       }
+                       else if (route->oif)
+                       {       /* no match yet, maybe it is assigned to the same interface */
+                               host_t *src = get_interface_address(this, route->oif,
+                                                                                       msg->rtm_family, dest, candidate);
+                               if (src && src->ip_equals(src, candidate))
+                               {
+                                       route->src_host->destroy(route->src_host);
+                                       route->src_host = src;
+                                       best = route;
+                                       break;
+                               }
+                               DESTROY_IF(src);
+                       }
+                       /* no luck yet with the source address. if this is the best (first)
+                        * route we store it as fallback in case we don't find a route with
+                        * the preferred source */
+                       best = best ?: route;
+                       continue;
+               }
+               if (route->oif)
+               {       /* no src, but an interface - get address from it */
+                       route->src_host = get_interface_address(this, route->oif,
+                                                                                       msg->rtm_family, dest, candidate);
+                       if (route->src_host)
+                       {       /* we handle this address the same as the one above */
+                               if (!candidate ||
+                                        candidate->ip_equals(candidate, route->src_host))
+                               {
+                                       best = route;
+                                       break;
+                               }
+                               best = best ?: route;
+                               continue;
+                       }
+               }
+               if (route->gtw.ptr)
+               {       /* no src, no iface, but a gateway - lookup src to reach gtw */
+                       host_t *gtw;
+
+                       gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
+                       if (gtw && !gtw->ip_equals(gtw, dest))
+                       {
+                               route->src_host = get_route(this, gtw, -1, FALSE, candidate,
+                                                                                       recursion + 1);
+                       }
+                       DESTROY_IF(gtw);
+                       if (route->src_host)
+                       {       /* more of the same */
+                               if (!candidate ||
+                                        candidate->ip_equals(candidate, route->src_host))
+                               {
+                                       best = route;
+                                       break;
+                               }
+                               best = best ?: route;
+                       }
+               }
+       }
+       enumerator->destroy(enumerator);
 
        if (nexthop)
+       {       /* nexthop lookup, return gateway if any */
+               if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
+               {
+                       addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
+               }
+               if (!addr && !match_net)
+               {       /* fallback to destination address */
+                       addr = dest->clone(dest);
+               }
+       }
+       else
        {
-               if (gtw)
+               if (best)
                {
-                       return gtw;
+                       addr = best->src_host->clone(best->src_host);
                }
-               return dest->clone(dest);
        }
-       return src;
+       this->lock->unlock(this->lock);
+       routes->destroy_function(routes, (void*)rt_entry_destroy);
+       free(out);
+
+       if (addr)
+       {
+               DBG2(DBG_KNL, "using %H as %s to reach %H/%d", addr,
+                        nexthop ? "nexthop" : "address", dest, prefix);
+       }
+       else if (!recursion)
+       {
+               DBG2(DBG_KNL, "no %s found to reach %H/%d",
+                        nexthop ? "nexthop" : "address", dest, prefix);
+       }
+       return addr;
 }
 
 METHOD(kernel_net_t, get_source_addr, host_t*,
        private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
 {
-       return get_route(this, dest, FALSE, src);
+       return get_route(this, dest, -1, FALSE, src, 0);
 }
 
 METHOD(kernel_net_t, get_nexthop, host_t*,
-       private_kernel_netlink_net_t *this, host_t *dest)
+       private_kernel_netlink_net_t *this, host_t *dest, int prefix, host_t *src)
 {
-       return get_route(this, dest, TRUE, NULL);
+       return get_route(this, dest, prefix, TRUE, src, 0);
 }
 
 /**
@@ -1056,7 +1928,7 @@ METHOD(kernel_net_t, get_nexthop, host_t*,
  * By setting the appropriate nlmsg_type, the ip will be set or unset.
  */
 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
-                                                         int flags, int if_index, host_t *ip)
+                                                         int flags, int if_index, host_t *ip, int prefix)
 {
        netlink_buf_t request;
        struct nlmsghdr *hdr;
@@ -1067,107 +1939,146 @@ static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type
 
        chunk = ip->get_address(ip);
 
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
        hdr->nlmsg_type = nlmsg_type;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
 
-       msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
+       msg = NLMSG_DATA(hdr);
        msg->ifa_family = ip->get_family(ip);
        msg->ifa_flags = 0;
-       msg->ifa_prefixlen = 8 * chunk.len;
+       msg->ifa_prefixlen = prefix < 0 ? chunk.len * 8 : prefix;
        msg->ifa_scope = RT_SCOPE_UNIVERSE;
        msg->ifa_index = if_index;
 
        netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
 
+       if (ip->get_family(ip) == AF_INET6 && this->rta_prefsrc_for_ipv6)
+       {       /* if source routes are possible we let the virtual IP get deprecated
+                * immediately (but mark it as valid forever) so it gets only used if
+                * forced by our route, and not by the default IPv6 address selection */
+               struct ifa_cacheinfo cache = {
+                       .ifa_valid = 0xFFFFFFFF,
+                       .ifa_prefered = 0,
+               };
+               netlink_add_attribute(hdr, IFA_CACHEINFO, chunk_from_thing(cache),
+                                                         sizeof(request));
+       }
        return this->socket->send_ack(this->socket, hdr);
 }
 
 METHOD(kernel_net_t, add_ip, status_t,
-       private_kernel_netlink_net_t *this, host_t *virtual_ip, host_t *iface_ip)
+       private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
+       char *iface_name)
 {
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       enumerator_t *addrs, *ifaces;
-       int ifindex;
+       addr_map_entry_t *entry, lookup = {
+               .ip = virtual_ip,
+       };
+       iface_entry_t *iface = NULL;
 
        if (!this->install_virtual_ip)
        {       /* disabled by config */
                return SUCCESS;
        }
 
-       DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
-
-       this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               bool iface_found = FALSE;
-
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
-               {
-                       if (iface_ip->ip_equals(iface_ip, addr->ip))
+       this->lock->write_lock(this->lock);
+       /* the virtual IP might actually be installed as regular IP, in which case
+        * we don't track it as virtual IP */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match);
+       if (!entry)
+       {       /* otherwise it might already be installed as virtual IP */
+               entry = this->vips->get_match(this->vips, &lookup,
+                                                                        (void*)addr_map_entry_match);
+               if (entry)
+               {       /* the vip we found can be in one of three states: 1) installed and
+                        * ready, 2) just added by another thread, but not yet confirmed to
+                        * be installed by the kernel, 3) just deleted, but not yet gone.
+                        * Then while we wait below, several things could happen (as we
+                        * release the lock).  For instance, the interface could disappear,
+                        * or the IP is finally deleted, and it reappears on a different
+                        * interface. All these cases are handled by the call below. */
+                       while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
                        {
-                               iface_found = TRUE;
+                               this->condvar->wait(this->condvar, this->lock);
                        }
-                       else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       if (entry)
                        {
-                               addr->refcount++;
-                               DBG2(DBG_KNL, "virtual IP %H already installed on %s",
-                                        virtual_ip, iface->ifname);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
-                               return SUCCESS;
+                               entry->addr->refcount++;
                        }
                }
-               addrs->destroy(addrs);
-
-               if (iface_found)
+       }
+       if (entry)
+       {
+               DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
+                        entry->iface->ifname);
+               this->lock->unlock(this->lock);
+               return SUCCESS;
+       }
+       /* try to find the target interface, either by config or via src ip */
+       if (!this->install_virtual_ip_on ||
+                this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                               (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
+       {
+               if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                                                        (void**)&iface, iface_name) != SUCCESS)
+               {       /* if we don't find the requested interface we just use the first */
+                       this->ifaces->get_first(this->ifaces, (void**)&iface);
+               }
+       }
+       if (iface)
+       {
+               addr_entry_t *addr;
+               char *ifname;
+               int ifi;
+
+               INIT(addr,
+                       .ip = virtual_ip->clone(virtual_ip),
+                       .refcount = 1,
+                       .scope = RT_SCOPE_UNIVERSE,
+               );
+               iface->addrs->insert_last(iface->addrs, addr);
+               addr_map_entry_add(this->vips, addr, iface);
+               ifi = iface->ifindex;
+               this->lock->unlock(this->lock);
+               if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
+                                                 ifi, virtual_ip, prefix) == SUCCESS)
                {
-                       ifindex = iface->ifindex;
-                       addr = malloc_thing(addr_entry_t);
-                       addr->ip = virtual_ip->clone(virtual_ip);
-                       addr->refcount = 0;
-                       addr->virtual = TRUE;
-                       addr->scope = RT_SCOPE_UNIVERSE;
-                       iface->addrs->insert_last(iface->addrs, addr);
-
-                       if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
-                                                         ifindex, virtual_ip) == SUCCESS)
-                       {
-                               while (get_vip_refcount(this, virtual_ip) == 0)
-                               {       /* wait until address appears */
-                                       this->condvar->wait(this->condvar, this->mutex);
-                               }
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
+                       this->lock->write_lock(this->lock);
+                       while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
+                       {       /* wait until address appears */
+                               this->condvar->wait(this->condvar, this->lock);
+                       }
+                       if (entry)
+                       {       /* we fail if the interface got deleted in the meantime */
+                               ifname = strdup(entry->iface->ifname);
+                               this->lock->unlock(this->lock);
+                               DBG2(DBG_KNL, "virtual IP %H installed on %s",
+                                        virtual_ip, ifname);
+                               /* during IKEv1 reauthentication, children get moved from
+                                * old the new SA before the virtual IP is available. This
+                                * kills the route for our virtual IP, reinstall. */
+                               queue_route_reinstall(this, ifname);
                                return SUCCESS;
                        }
-                       ifaces->destroy(ifaces);
-                       this->mutex->unlock(this->mutex);
-                       DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
-                       return FAILED;
+                       this->lock->unlock(this->lock);
                }
+               DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
+               return FAILED;
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
-
-       DBG1(DBG_KNL, "interface address %H not found, unable to install"
-                "virtual IP %H", iface_ip, virtual_ip);
+       this->lock->unlock(this->lock);
+       DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
+                virtual_ip);
        return FAILED;
 }
 
 METHOD(kernel_net_t, del_ip, status_t,
-       private_kernel_netlink_net_t *this, host_t *virtual_ip)
+       private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
+       bool wait)
 {
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       enumerator_t *addrs, *ifaces;
-       status_t status;
-       int ifindex;
+       addr_map_entry_t *entry, lookup = {
+               .ip = virtual_ip,
+       };
 
        if (!this->install_virtual_ip)
        {       /* disabled by config */
@@ -1176,64 +2087,69 @@ METHOD(kernel_net_t, del_ip, status_t,
 
        DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
 
-       this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
+       this->lock->write_lock(this->lock);
+       entry = this->vips->get_match(this->vips, &lookup,
+                                                                (void*)addr_map_entry_match);
+       if (!entry)
+       {       /* we didn't install this IP as virtual IP */
+               entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                         (void*)addr_map_entry_match);
+               if (entry)
                {
-                       if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
+                                entry->iface->ifname);
+                       this->lock->unlock(this->lock);
+                       return SUCCESS;
+               }
+               DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
+               this->lock->unlock(this->lock);
+               return FAILED;
+       }
+       if (entry->addr->refcount == 1)
+       {
+               status_t status;
+               int ifi;
+
+               /* we set this flag so that threads calling add_ip will block and wait
+                * until the entry is gone, also so we can wait below */
+               entry->addr->installed = FALSE;
+               ifi = entry->iface->ifindex;
+               this->lock->unlock(this->lock);
+               status = manage_ipaddr(this, RTM_DELADDR, 0, ifi, virtual_ip, prefix);
+               if (status == SUCCESS && wait)
+               {       /* wait until the address is really gone */
+                       this->lock->write_lock(this->lock);
+                       while (is_known_vip(this, virtual_ip))
                        {
-                               ifindex = iface->ifindex;
-                               if (addr->refcount == 1)
-                               {
-                                       status = manage_ipaddr(this, RTM_DELADDR, 0,
-                                                                                  ifindex, virtual_ip);
-                                       if (status == SUCCESS)
-                                       {       /* wait until the address is really gone */
-                                               while (get_vip_refcount(this, virtual_ip) > 0)
-                                               {
-                                                       this->condvar->wait(this->condvar, this->mutex);
-                                               }
-                                       }
-                                       addrs->destroy(addrs);
-                                       ifaces->destroy(ifaces);
-                                       this->mutex->unlock(this->mutex);
-                                       return status;
-                               }
-                               else
-                               {
-                                       addr->refcount--;
-                               }
-                               DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
-                                        virtual_ip);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
-                               return SUCCESS;
+                               this->condvar->wait(this->condvar, this->lock);
                        }
+                       this->lock->unlock(this->lock);
                }
-               addrs->destroy(addrs);
+               return status;
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
-
-       DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
-       return FAILED;
+       else
+       {
+               entry->addr->refcount--;
+       }
+       DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
+                virtual_ip);
+       this->lock->unlock(this->lock);
+       return SUCCESS;
 }
 
 /**
  * Manages source routes in the routing table.
  * By setting the appropriate nlmsg_type, the route gets added or removed.
  */
-static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
-                                                               int flags, chunk_t dst_net, u_int8_t prefixlen,
-                                                               host_t *gateway, host_t *src_ip, char *if_name)
+static status_t manage_srcroute(private_kernel_netlink_net_t *this,
+                                                               int nlmsg_type, int flags, chunk_t dst_net,
+                                                               u_int8_t prefixlen, host_t *gateway,
+                                                               host_t *src_ip, char *if_name)
 {
        netlink_buf_t request;
        struct nlmsghdr *hdr;
        struct rtmsg *msg;
+       struct rtattr *rta;
        int ifindex;
        chunk_t chunk;
 
@@ -1260,12 +2176,12 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_ty
 
        memset(&request, 0, sizeof(request));
 
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
        hdr->nlmsg_type = nlmsg_type;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
+       msg = NLMSG_DATA(hdr);
        msg->rtm_family = src_ip->get_family(src_ip);
        msg->rtm_dst_len = prefixlen;
        msg->rtm_table = this->routing_table;
@@ -1286,6 +2202,30 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_ty
        chunk.len = sizeof(ifindex);
        netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
 
+       if (this->mtu || this->mss)
+       {
+               chunk = chunk_alloca(RTA_LENGTH((sizeof(struct rtattr) +
+                                                                                sizeof(u_int32_t)) * 2));
+               chunk.len = 0;
+               rta = (struct rtattr*)chunk.ptr;
+               if (this->mtu)
+               {
+                       rta->rta_type = RTAX_MTU;
+                       rta->rta_len = RTA_LENGTH(sizeof(u_int32_t));
+                       memcpy(RTA_DATA(rta), &this->mtu, sizeof(u_int32_t));
+                       chunk.len = rta->rta_len;
+               }
+               if (this->mss)
+               {
+                       rta = (struct rtattr*)(chunk.ptr + RTA_ALIGN(chunk.len));
+                       rta->rta_type = RTAX_ADVMSS;
+                       rta->rta_len = RTA_LENGTH(sizeof(u_int32_t));
+                       memcpy(RTA_DATA(rta), &this->mss, sizeof(u_int32_t));
+                       chunk.len = RTA_ALIGN(chunk.len) + rta->rta_len;
+               }
+               netlink_add_attribute(hdr, RTA_METRICS, chunk, sizeof(request));
+       }
+
        return this->socket->send_ack(this->socket, hdr);
 }
 
@@ -1293,16 +2233,59 @@ METHOD(kernel_net_t, add_route, status_t,
        private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
        host_t *gateway, host_t *src_ip, char *if_name)
 {
-       return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
-                               dst_net, prefixlen, gateway, src_ip, if_name);
+       status_t status;
+       route_entry_t *found, route = {
+               .dst_net = dst_net,
+               .prefixlen = prefixlen,
+               .gateway = gateway,
+               .src_ip = src_ip,
+               .if_name = if_name,
+       };
+
+       this->routes_lock->lock(this->routes_lock);
+       found = this->routes->get(this->routes, &route);
+       if (found)
+       {
+               this->routes_lock->unlock(this->routes_lock);
+               return ALREADY_DONE;
+       }
+       status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
+                                                        dst_net, prefixlen, gateway, src_ip, if_name);
+       if (status == SUCCESS)
+       {
+               found = route_entry_clone(&route);
+               this->routes->put(this->routes, found, found);
+       }
+       this->routes_lock->unlock(this->routes_lock);
+       return status;
 }
 
 METHOD(kernel_net_t, del_route, status_t,
        private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
        host_t *gateway, host_t *src_ip, char *if_name)
 {
-       return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
-                               gateway, src_ip, if_name);
+       status_t status;
+       route_entry_t *found, route = {
+               .dst_net = dst_net,
+               .prefixlen = prefixlen,
+               .gateway = gateway,
+               .src_ip = src_ip,
+               .if_name = if_name,
+       };
+
+       this->routes_lock->lock(this->routes_lock);
+       found = this->routes->get(this->routes, &route);
+       if (!found)
+       {
+               this->routes_lock->unlock(this->routes_lock);
+               return NOT_FOUND;
+       }
+       this->routes->remove(this->routes, found);
+       route_entry_destroy(found);
+       status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
+                                                        gateway, src_ip, if_name);
+       this->routes_lock->unlock(this->routes_lock);
+       return status;
 }
 
 /**
@@ -1318,14 +2301,14 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
        iface_entry_t *iface;
        addr_entry_t *addr;
 
-       DBG1(DBG_KNL, "listening on interfaces:");
+       DBG2(DBG_KNL, "known interfaces and IP addresses:");
 
        memset(&request, 0, sizeof(request));
 
-       in = (struct nlmsghdr*)&request;
+       in = &request.hdr;
        in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
        in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
-       msg = (struct rtgenmsg*)NLMSG_DATA(in);
+       msg = NLMSG_DATA(in);
        msg->rtgen_family = AF_UNSPEC;
 
        /* get all links */
@@ -1376,23 +2359,23 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
        }
        free(out);
 
-       this->mutex->lock(this->mutex);
+       this->lock->read_lock(this->lock);
        ifaces = this->ifaces->create_enumerator(this->ifaces);
        while (ifaces->enumerate(ifaces, &iface))
        {
-               if (iface->flags & IFF_UP)
+               if (iface_entry_up_and_usable(iface))
                {
-                       DBG1(DBG_KNL, "  %s", iface->ifname);
+                       DBG2(DBG_KNL, "  %s", iface->ifname);
                        addrs = iface->addrs->create_enumerator(iface->addrs);
                        while (addrs->enumerate(addrs, (void**)&addr))
                        {
-                               DBG1(DBG_KNL, "    %H", addr->ip);
+                               DBG2(DBG_KNL, "    %H", addr->ip);
                        }
                        addrs->destroy(addrs);
                }
        }
        ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
+       this->lock->unlock(this->lock);
        return SUCCESS;
 }
 
@@ -1406,9 +2389,10 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
        struct nlmsghdr *hdr;
        struct rtmsg *msg;
        chunk_t chunk;
+       char *fwmark;
 
        memset(&request, 0, sizeof(request));
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
        hdr->nlmsg_type = nlmsg_type;
        if (nlmsg_type == RTM_NEWRULE)
@@ -1417,7 +2401,7 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
        }
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
+       msg = NLMSG_DATA(hdr);
        msg->rtm_table = table;
        msg->rtm_family = family;
        msg->rtm_protocol = RTPROT_BOOT;
@@ -1427,12 +2411,94 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
        chunk = chunk_from_thing(prio);
        netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
 
+       fwmark = lib->settings->get_str(lib->settings,
+                                                       "%s.plugins.kernel-netlink.fwmark", NULL, lib->ns);
+       if (fwmark)
+       {
+#ifdef HAVE_LINUX_FIB_RULES_H
+               mark_t mark;
+
+               if (fwmark[0] == '!')
+               {
+                       msg->rtm_flags |= FIB_RULE_INVERT;
+                       fwmark++;
+               }
+               if (mark_from_string(fwmark, &mark))
+               {
+                       chunk = chunk_from_thing(mark.value);
+                       netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
+                       chunk = chunk_from_thing(mark.mask);
+                       netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
+                       if (msg->rtm_flags & FIB_RULE_INVERT)
+                       {
+                               this->routing_mark = mark;
+                       }
+               }
+#else
+               DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
+#endif
+       }
        return this->socket->send_ack(this->socket, hdr);
 }
 
+/**
+ * check for kernel features (currently only via version number)
+ */
+static void check_kernel_features(private_kernel_netlink_net_t *this)
+{
+       struct utsname utsname;
+       int a, b, c;
+
+       if (uname(&utsname) == 0)
+       {
+               switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
+               {
+                       case 3:
+                               if (a == 2)
+                               {
+                                       if (b == 6 && c >= 36)
+                                       {
+                                               this->rta_mark = TRUE;
+                                       }
+                                       DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
+                                                "RTA_PREFSRC for IPv6 routes", a, b, c);
+                                       break;
+                               }
+                               /* fall-through */
+                       case 2:
+                               /* only 3.x+ uses two part version numbers */
+                               this->rta_prefsrc_for_ipv6 = TRUE;
+                               this->rta_mark = TRUE;
+                               break;
+                       default:
+                               break;
+               }
+       }
+}
+
+/**
+ * Destroy an address to iface map
+ */
+static void addr_map_destroy(hashtable_t *map)
+{
+       enumerator_t *enumerator;
+       addr_map_entry_t *addr;
+
+       enumerator = map->create_enumerator(map);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
+       {
+               free(addr);
+       }
+       enumerator->destroy(enumerator);
+       map->destroy(map);
+}
+
 METHOD(kernel_net_t, destroy, void,
        private_kernel_netlink_net_t *this)
 {
+       enumerator_t *enumerator;
+       route_entry_t *route;
+
        if (this->routing_table)
        {
                manage_rule(this, RTM_DELRULE, AF_INET, this->routing_table,
@@ -1440,19 +2506,35 @@ METHOD(kernel_net_t, destroy, void,
                manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
                                        this->routing_table_prio);
        }
-       if (this->job)
-       {
-               this->job->cancel(this->job);
-       }
        if (this->socket_events > 0)
        {
+               lib->watcher->remove(lib->watcher, this->socket_events);
                close(this->socket_events);
        }
+       enumerator = this->routes->create_enumerator(this->routes);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&route))
+       {
+               manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
+                                               route->gateway, route->src_ip, route->if_name);
+               route_entry_destroy(route);
+       }
+       enumerator->destroy(enumerator);
+       this->routes->destroy(this->routes);
+       this->routes_lock->destroy(this->routes_lock);
        DESTROY_IF(this->socket);
+
+       net_changes_clear(this);
+       this->net_changes->destroy(this->net_changes);
+       this->net_changes_lock->destroy(this->net_changes_lock);
+
+       addr_map_destroy(this->addrs);
+       addr_map_destroy(this->vips);
+
        this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
        this->rt_exclude->destroy(this->rt_exclude);
+       this->roam_lock->destroy(this->roam_lock);
        this->condvar->destroy(this->condvar);
-       this->mutex->destroy(this->mutex);
+       this->lock->destroy(this->lock);
        free(this);
 }
 
@@ -1462,8 +2544,8 @@ METHOD(kernel_net_t, destroy, void,
 kernel_netlink_net_t *kernel_netlink_net_create()
 {
        private_kernel_netlink_net_t *this;
-       struct sockaddr_nl addr;
        enumerator_t *enumerator;
+       bool register_for_events = TRUE;
        char *exclude;
 
        INIT(this,
@@ -1480,24 +2562,57 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                .destroy = _destroy,
                        },
                },
-               .socket = netlink_socket_create(NETLINK_ROUTE),
+               .socket = netlink_socket_create(NETLINK_ROUTE, rt_msg_names,
+                       lib->settings->get_bool(lib->settings,
+                               "%s.plugins.kernel-netlink.parallel_route", FALSE, lib->ns)),
                .rt_exclude = linked_list_create(),
+               .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
+                                                                  (hashtable_equals_t)route_entry_equals, 16),
+               .net_changes = hashtable_create(
+                                                                  (hashtable_hash_t)net_change_hash,
+                                                                  (hashtable_equals_t)net_change_equals, 16),
+               .addrs = hashtable_create(
+                                                               (hashtable_hash_t)addr_map_entry_hash,
+                                                               (hashtable_equals_t)addr_map_entry_equals, 16),
+               .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
+                                                                (hashtable_equals_t)addr_map_entry_equals, 16),
+               .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
+               .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
                .ifaces = linked_list_create(),
-               .mutex = mutex_create(MUTEX_TYPE_RECURSIVE),
-               .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
+               .lock = rwlock_create(RWLOCK_TYPE_DEFAULT),
+               .condvar = rwlock_condvar_create(),
+               .roam_lock = spinlock_create(),
                .routing_table = lib->settings->get_int(lib->settings,
-                               "%s.routing_table", ROUTING_TABLE, hydra->daemon),
+                                               "%s.routing_table", ROUTING_TABLE, lib->ns),
                .routing_table_prio = lib->settings->get_int(lib->settings,
-                               "%s.routing_table_prio", ROUTING_TABLE_PRIO, hydra->daemon),
+                                               "%s.routing_table_prio", ROUTING_TABLE_PRIO, lib->ns),
                .process_route = lib->settings->get_bool(lib->settings,
-                               "%s.process_route", TRUE, hydra->daemon),
+                                               "%s.process_route", TRUE, lib->ns),
                .install_virtual_ip = lib->settings->get_bool(lib->settings,
-                               "%s.install_virtual_ip", TRUE, hydra->daemon),
+                                               "%s.install_virtual_ip", TRUE, lib->ns),
+               .install_virtual_ip_on = lib->settings->get_str(lib->settings,
+                                               "%s.install_virtual_ip_on", NULL, lib->ns),
+               .prefer_temporary_addrs = lib->settings->get_bool(lib->settings,
+                                               "%s.prefer_temporary_addrs", FALSE, lib->ns),
+               .roam_events = lib->settings->get_bool(lib->settings,
+                                               "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
+               .mtu = lib->settings->get_int(lib->settings,
+                                               "%s.plugins.kernel-netlink.mtu", 0, lib->ns),
+               .mss = lib->settings->get_int(lib->settings,
+                                               "%s.plugins.kernel-netlink.mss", 0, lib->ns),
        );
-       timerclear(&this->last_roam);
+       timerclear(&this->last_route_reinstall);
+       timerclear(&this->next_roam);
+
+       check_kernel_features(this);
+
+       if (streq(lib->ns, "starter"))
+       {       /* starter has no threads, so we do not register for kernel events */
+               register_for_events = FALSE;
+       }
 
        exclude = lib->settings->get_str(lib->settings,
-                                       "%s.ignore_routing_tables", NULL, hydra->daemon);
+                                                                        "%s.ignore_routing_tables", NULL, lib->ns);
        if (exclude)
        {
                char *token;
@@ -1517,29 +2632,33 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                enumerator->destroy(enumerator);
        }
 
-       memset(&addr, 0, sizeof(addr));
-       addr.nl_family = AF_NETLINK;
-
-       /* create and bind RT socket for events (address/interface/route changes) */
-       this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-       if (this->socket_events < 0)
-       {
-               DBG1(DBG_KNL, "unable to create RT event socket");
-               destroy(this);
-               return NULL;
-       }
-       addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
-                                        RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
-       if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
+       if (register_for_events)
        {
-               DBG1(DBG_KNL, "unable to bind RT event socket");
-               destroy(this);
-               return NULL;
-       }
+               struct sockaddr_nl addr;
+
+               memset(&addr, 0, sizeof(addr));
+               addr.nl_family = AF_NETLINK;
+
+               /* create and bind RT socket for events (address/interface/route changes) */
+               this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+               if (this->socket_events < 0)
+               {
+                       DBG1(DBG_KNL, "unable to create RT event socket");
+                       destroy(this);
+                       return NULL;
+               }
+               addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
+                                                RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
+               if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
+               {
+                       DBG1(DBG_KNL, "unable to bind RT event socket");
+                       destroy(this);
+                       return NULL;
+               }
 
-       this->job = callback_job_create_with_prio((callback_job_cb_t)receive_events,
-                                                                               this, NULL, NULL, JOB_PRIO_CRITICAL);
-       lib->processor->queue_job(lib->processor, (job_t*)this->job);
+               lib->watcher->add(lib->watcher, this->socket_events, WATCHER_READ,
+                                                 (watcher_cb_t)receive_events, this);
+       }
 
        if (init_address_list(this) != SUCCESS)
        {