Use a separate mutex for cached routes in kernel-netlink plugin
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
index 314c1ac..4b64a8d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Tobias Brunner
+ * Copyright (C) 2008-2012 Tobias Brunner
  * Copyright (C) 2005-2008 Martin Willi
  * Hochschule fuer Technik Rapperswil
  *
@@ -38,6 +38,7 @@
  */
 
 #include <sys/socket.h>
+#include <sys/utsname.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <unistd.h>
 #include <threading/thread.h>
 #include <threading/condvar.h>
 #include <threading/mutex.h>
+#include <threading/spinlock.h>
+#include <utils/hashtable.h>
 #include <utils/linked_list.h>
 #include <processing/jobs/callback_job.h>
 
 /** delay before firing roam events (ms) */
 #define ROAM_DELAY 100
 
+/** delay before reinstalling routes (ms) */
+#define ROUTE_DELAY 100
+
 typedef struct addr_entry_t addr_entry_t;
 
 /**
- * IP address in an inface_entry_t
+ * IP address in an iface_entry_t
  */
 struct addr_entry_t {
 
-       /** The ip address */
+       /** the ip address */
        host_t *ip;
 
-       /** virtual IP managed by us */
-       bool virtual;
-
        /** scope of the address */
        u_char scope;
 
-       /** Number of times this IP is used, if virtual */
+       /** number of times this IP is used, if virtual (i.e. managed by us) */
        u_int refcount;
+
+       /** TRUE once it is installed, if virtual */
+       bool installed;
 };
 
 /**
@@ -105,6 +111,9 @@ struct iface_entry_t {
 
        /** list of addresses as host_t */
        linked_list_t *addrs;
+
+       /** TRUE if usable by config */
+       bool usable;
 };
 
 /**
@@ -116,6 +125,208 @@ static void iface_entry_destroy(iface_entry_t *this)
        free(this);
 }
 
+/**
+ * find an interface entry by index
+ */
+static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
+{
+       return this->ifindex == *ifindex;
+}
+
+/**
+ * find an interface entry by name
+ */
+static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
+{
+       return streq(this->ifname, ifname);
+}
+
+/**
+ * check if an interface is up
+ */
+static inline bool iface_entry_up(iface_entry_t *iface)
+{
+       return (iface->flags & IFF_UP) == IFF_UP;
+}
+
+/**
+ * check if an interface is up and usable
+ */
+static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
+{
+       return iface->usable && iface_entry_up(iface);
+}
+
+typedef struct addr_map_entry_t addr_map_entry_t;
+
+/**
+ * Entry that maps an IP address to an interface entry
+ */
+struct addr_map_entry_t {
+       /** The IP address */
+       host_t *ip;
+
+       /** The address entry for this IP address */
+       addr_entry_t *addr;
+
+       /** The interface this address is installed on */
+       iface_entry_t *iface;
+};
+
+/**
+ * Hash a addr_map_entry_t object, all entries with the same IP address
+ * are stored in the same bucket
+ */
+static u_int addr_map_entry_hash(addr_map_entry_t *this)
+{
+       return chunk_hash(this->ip->get_address(this->ip));
+}
+
+/**
+ * Compare two addr_map_entry_t objects, two entries are equal if they are
+ * installed on the same interface
+ */
+static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return a->iface->ifindex == b->iface->ifindex &&
+                  a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * an up and usable interface
+ */
+static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
+                                                                                          addr_map_entry_t *b)
+{
+       return iface_entry_up_and_usable(b->iface) &&
+                  a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * any active local interface
+ */
+static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * any local interface
+ */
+static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return a->ip->ip_equals(a->ip, b->ip);
+}
+
+typedef struct route_entry_t route_entry_t;
+
+/**
+ * Installed routing entry
+ */
+struct route_entry_t {
+       /** Name of the interface the route is bound to */
+       char *if_name;
+
+       /** Source ip of the route */
+       host_t *src_ip;
+
+       /** Gateway for this route */
+       host_t *gateway;
+
+       /** Destination net */
+       chunk_t dst_net;
+
+       /** Destination net prefixlen */
+       u_int8_t prefixlen;
+};
+
+/**
+ * Clone a route_entry_t object.
+ */
+static route_entry_t *route_entry_clone(route_entry_t *this)
+{
+       route_entry_t *route;
+
+       INIT(route,
+               .if_name = strdup(this->if_name),
+               .src_ip = this->src_ip->clone(this->src_ip),
+               .gateway = this->gateway->clone(this->gateway),
+               .dst_net = chunk_clone(this->dst_net),
+               .prefixlen = this->prefixlen,
+       );
+       return route;
+}
+
+/**
+ * Destroy a route_entry_t object
+ */
+static void route_entry_destroy(route_entry_t *this)
+{
+       free(this->if_name);
+       DESTROY_IF(this->src_ip);
+       DESTROY_IF(this->gateway);
+       chunk_free(&this->dst_net);
+       free(this);
+}
+
+/**
+ * Hash a route_entry_t object
+ */
+static u_int route_entry_hash(route_entry_t *this)
+{
+       return chunk_hash_inc(chunk_from_thing(this->prefixlen),
+                                                 chunk_hash(this->dst_net));
+}
+
+/**
+ * Compare two route_entry_t objects
+ */
+static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
+{
+       return a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
+                  a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
+                  a->gateway->ip_equals(a->gateway, b->gateway) &&
+                  chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen;
+}
+
+typedef struct net_change_t net_change_t;
+
+/**
+ * Queued network changes
+ */
+struct net_change_t {
+       /** Name of the interface that got activated (or an IP appeared on) */
+       char *if_name;
+};
+
+/**
+ * Destroy a net_change_t object
+ */
+static void net_change_destroy(net_change_t *this)
+{
+       free(this->if_name);
+       free(this);
+}
+
+/**
+ * Hash a net_change_t object
+ */
+static u_int net_change_hash(net_change_t *this)
+{
+       return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
+}
+
+/**
+ * Compare two net_change_t objects
+ */
+static bool net_change_equals(net_change_t *a, net_change_t *b)
+{
+       return streq(a->if_name, b->if_name);
+}
+
 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
 
 /**
@@ -143,9 +354,14 @@ struct private_kernel_netlink_net_t {
        linked_list_t *ifaces;
 
        /**
-        * job receiving netlink events
+        * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
+        */
+       hashtable_t *addrs;
+
+       /**
+        * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
         */
-       callback_job_t *job;
+       hashtable_t *vips;
 
        /**
         * netlink rt socket (routing)
@@ -158,9 +374,14 @@ struct private_kernel_netlink_net_t {
        int socket_events;
 
        /**
-        * time of the last roam event
+        * earliest time of the next roam event
+        */
+       timeval_t next_roam;
+
+       /**
+        * lock to check and update roam event time
         */
-       timeval_t last_roam;
+       spinlock_t *roam_lock;
 
        /**
         * routing table to install routes
@@ -173,6 +394,31 @@ struct private_kernel_netlink_net_t {
        int routing_table_prio;
 
        /**
+        * installed routes
+        */
+       hashtable_t *routes;
+
+       /**
+        * mutex for routes
+        */
+       mutex_t *routes_lock;
+
+       /**
+        * interface changes which may trigger route reinstallation
+        */
+       hashtable_t *net_changes;
+
+       /**
+        * mutex for route reinstallation triggers
+        */
+       mutex_t *net_changes_lock;
+
+       /**
+        * time of last route reinstallation
+        */
+       timeval_t last_route_reinstall;
+
+       /**
         * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
         */
        bool process_route;
@@ -183,77 +429,255 @@ struct private_kernel_netlink_net_t {
        bool install_virtual_ip;
 
        /**
+        * the name of the interface virtual IP addresses are installed on
+        */
+       char *install_virtual_ip_on;
+
+       /**
+        * whether preferred source addresses can be specified for IPv6 routes
+        */
+       bool rta_prefsrc_for_ipv6;
+
+       /**
         * list with routing tables to be excluded from route lookup
         */
        linked_list_t *rt_exclude;
 };
 
 /**
- * get the refcount of a virtual ip
+ * Forward declaration
  */
-static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
+static status_t manage_srcroute(private_kernel_netlink_net_t *this,
+                                                               int nlmsg_type, int flags, chunk_t dst_net,
+                                                               u_int8_t prefixlen, host_t *gateway,
+                                                               host_t *src_ip, char *if_name);
+
+/**
+ * Clear the queued network changes.
+ */
+static void net_changes_clear(private_kernel_netlink_net_t *this)
 {
-       iterator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       int refcount = 0;
+       enumerator_t *enumerator;
+       net_change_t *change;
 
-       ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
-       while (ifaces->iterate(ifaces, (void**)&iface))
+       enumerator = this->net_changes->create_enumerator(this->net_changes);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&change))
        {
-               addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
-               while (addrs->iterate(addrs, (void**)&addr))
-               {
-                       if (addr->virtual && (iface->flags & IFF_UP) &&
-                               ip->ip_equals(ip, addr->ip))
+               this->net_changes->remove_at(this->net_changes, enumerator);
+               net_change_destroy(change);
+       }
+       enumerator->destroy(enumerator);
+}
+
+/**
+ * Act upon queued network changes.
+ */
+static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
+{
+       enumerator_t *enumerator;
+       route_entry_t *route;
+
+       this->net_changes_lock->lock(this->net_changes_lock);
+       this->routes_lock->lock(this->routes_lock);
+
+       enumerator = this->routes->create_enumerator(this->routes);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&route))
+       {
+               net_change_t *change, lookup = {
+                       .if_name = route->if_name,
+               };
+               /* check if a change for the outgoing interface is queued */
+               change = this->net_changes->get(this->net_changes, &lookup);
+               if (!change)
+               {       /* in case src_ip is not on the outgoing interface */
+                       if (this->public.interface.get_interface(&this->public.interface,
+                                                                                               route->src_ip, &lookup.if_name))
                        {
-                               refcount = addr->refcount;
-                               break;
+                               if (!streq(lookup.if_name, route->if_name))
+                               {
+                                       change = this->net_changes->get(this->net_changes, &lookup);
+                               }
+                               free(lookup.if_name);
                        }
                }
-               addrs->destroy(addrs);
-               if (refcount)
+               if (change)
                {
-                       break;
+                       manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
+                                                       route->dst_net, route->prefixlen, route->gateway,
+                                                       route->src_ip, route->if_name);
                }
        }
-       ifaces->destroy(ifaces);
+       enumerator->destroy(enumerator);
+       this->routes_lock->unlock(this->routes_lock);
 
-       return refcount;
+       net_changes_clear(this);
+       this->net_changes_lock->unlock(this->net_changes_lock);
+       return JOB_REQUEUE_NONE;
+}
+
+/**
+ * Queue route reinstallation caused by network changes for a given interface.
+ *
+ * The route reinstallation is delayed for a while and only done once for
+ * several calls during this delay, in order to avoid doing it too often.
+ * The interface name is freed.
+ */
+static void queue_route_reinstall(private_kernel_netlink_net_t *this,
+                                                                 char *if_name)
+{
+       net_change_t *update, *found;
+       timeval_t now;
+       job_t *job;
+
+       INIT(update,
+               .if_name = if_name
+       );
+
+       this->net_changes_lock->lock(this->net_changes_lock);
+       found = this->net_changes->put(this->net_changes, update, update);
+       if (found)
+       {
+               net_change_destroy(found);
+       }
+       time_monotonic(&now);
+       if (timercmp(&now, &this->last_route_reinstall, >))
+       {
+               now.tv_usec += ROUTE_DELAY * 1000;
+               while (now.tv_usec > 1000000)
+               {
+                       now.tv_sec++;
+                       now.tv_usec -= 1000000;
+               }
+               this->last_route_reinstall = now;
+
+               job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
+                                                                                 this, NULL, NULL);
+               lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
+       }
+       this->net_changes_lock->unlock(this->net_changes_lock);
+}
+
+/**
+ * check if the given IP is known as virtual IP and currently installed
+ *
+ * this function will also return TRUE if the virtual IP entry disappeared.
+ * in that case the returned entry will be NULL.
+ *
+ * this->mutex must be locked when calling this function
+ */
+static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
+                                                                        host_t *ip, addr_map_entry_t **entry)
+{
+       addr_map_entry_t lookup = {
+               .ip = ip,
+       };
+
+       *entry = this->vips->get_match(this->vips, &lookup,
+                                                                 (void*)addr_map_entry_match);
+       if (*entry == NULL)
+       {       /* the virtual IP disappeared */
+               return TRUE;
+       }
+       return (*entry)->addr->installed;
+}
+
+/**
+ * check if the given IP is known as virtual IP
+ *
+ * this->mutex must be locked when calling this function
+ */
+static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
+{
+       addr_map_entry_t lookup = {
+               .ip = ip,
+       };
+
+       return this->vips->get_match(this->vips, &lookup,
+                                                               (void*)addr_map_entry_match) != NULL;
+}
+
+/**
+ * Add an address map entry
+ */
+static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
+                                                          iface_entry_t *iface)
+{
+       addr_map_entry_t *entry;
+
+       INIT(entry,
+               .ip = addr->ip,
+               .addr = addr,
+               .iface = iface,
+       );
+       entry = map->put(map, entry, entry);
+       free(entry);
+}
+
+/**
+ * Remove an address map entry
+ */
+static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
+                                                                 iface_entry_t *iface)
+{
+       addr_map_entry_t *entry, lookup = {
+               .ip = addr->ip,
+               .addr = addr,
+               .iface = iface,
+       };
+
+       entry = map->remove(map, &lookup);
+       free(entry);
 }
 
 /**
  * get the first non-virtual ip address on the given interface.
+ * if a candidate address is given, we first search for that address and if not
+ * found return the address as above.
  * returned host is a clone, has to be freed by caller.
  */
 static host_t *get_interface_address(private_kernel_netlink_net_t *this,
-                                                                        int ifindex, int family)
+                                                                        int ifindex, int family, host_t *candidate)
 {
-       enumerator_t *ifaces, *addrs;
        iface_entry_t *iface;
+       enumerator_t *addrs;
        addr_entry_t *addr;
        host_t *ip = NULL;
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &ifindex) == SUCCESS)
        {
-               if (iface->ifindex == ifindex)
-               {
+               if (iface->usable)
+               {       /* only use interfaces not excluded by config */
                        addrs = iface->addrs->create_enumerator(iface->addrs);
                        while (addrs->enumerate(addrs, &addr))
                        {
-                               if (!addr->virtual && addr->ip->get_family(addr->ip) == family)
+                               if (addr->refcount)
+                               {       /* ignore virtual IP addresses */
+                                       continue;
+                               }
+                               if (addr->ip->get_family(addr->ip) == family)
                                {
-                                       ip = addr->ip->clone(addr->ip);
-                                       break;
+                                       if (!candidate || candidate->ip_equals(candidate, addr->ip))
+                                       {       /* stop at the first address if we don't search for a
+                                                * candidate or if the candidate matches */
+                                               ip = addr->ip;
+                                               break;
+                                       }
+                                       else if (!ip)
+                                       {       /* store the first address as fallback if candidate is
+                                                * not found */
+                                               ip = addr->ip;
+                                       }
                                }
                        }
                        addrs->destroy(addrs);
-                       break;
                }
        }
-       ifaces->destroy(ifaces);
+       if (ip)
+       {
+               ip = ip->clone(ip);
+       }
        this->mutex->unlock(this->mutex);
        return ip;
 }
@@ -277,21 +701,60 @@ static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
        job_t *job;
 
        time_monotonic(&now);
-       if (timercmp(&now, &this->last_roam, >))
+       this->roam_lock->lock(this->roam_lock);
+       if (!timercmp(&now, &this->next_roam, >))
        {
-               now.tv_usec += ROAM_DELAY * 1000;
-               while (now.tv_usec > 1000000)
-               {
-                       now.tv_sec++;
-                       now.tv_usec -= 1000000;
-               }
-               this->last_roam = now;
+               this->roam_lock->unlock(this->roam_lock);
+               return;
+       }
+       now.tv_usec += ROAM_DELAY * 1000;
+       while (now.tv_usec > 1000000)
+       {
+               now.tv_sec++;
+               now.tv_usec -= 1000000;
+       }
+       this->next_roam = now;
+       this->roam_lock->unlock(this->roam_lock);
+
+       job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
+                                                                         (void*)(uintptr_t)(address ? 1 : 0),
+                                                                          NULL, NULL);
+       lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
+}
 
-               job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
-                                                                                 (void*)(uintptr_t)(address ? 1 : 0),
-                                                                                 NULL, NULL);
-               lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
+/**
+ * check if an interface with a given index is up and usable
+ *
+ * this->mutex must be locked when calling this function
+ */
+static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
+                                                                          int index)
+{
+       iface_entry_t *iface;
+
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &index) == SUCCESS)
+       {
+               return iface_entry_up_and_usable(iface);
+       }
+       return FALSE;
+}
+
+/**
+ * unregister the current addr_entry_t from the hashtable it is stored in
+ *
+ * this->mutex must be locked when calling this function
+ */
+static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
+                                                                 private_kernel_netlink_net_t *this)
+{
+       if (addr->refcount)
+       {
+               addr_map_entry_remove(this->vips, addr, iface);
+               this->condvar->broadcast(this->condvar);
+               return;
        }
+       addr_map_entry_remove(this->addrs, addr, iface);
 }
 
 /**
@@ -306,9 +769,9 @@ static void process_link(private_kernel_netlink_net_t *this,
        enumerator_t *enumerator;
        iface_entry_t *current, *entry = NULL;
        char *name = NULL;
-       bool update = FALSE;
+       bool update = FALSE, update_routes = FALSE;
 
-       while(RTA_OK(rta, rtasize))
+       while (RTA_OK(rta, rtasize))
        {
                switch (rta->rta_type)
                {
@@ -328,35 +791,25 @@ static void process_link(private_kernel_netlink_net_t *this,
        {
                case RTM_NEWLINK:
                {
-                       if (msg->ifi_flags & IFF_LOOPBACK)
-                       {       /* ignore loopback interfaces */
-                               break;
-                       }
-                       enumerator = this->ifaces->create_enumerator(this->ifaces);
-                       while (enumerator->enumerate(enumerator, &current))
-                       {
-                               if (current->ifindex == msg->ifi_index)
-                               {
-                                       entry = current;
-                                       break;
-                               }
-                       }
-                       enumerator->destroy(enumerator);
-                       if (!entry)
+                       if (this->ifaces->find_first(this->ifaces,
+                                                                       (void*)iface_entry_by_index, (void**)&entry,
+                                                                       &msg->ifi_index) != SUCCESS)
                        {
-                               entry = malloc_thing(iface_entry_t);
-                               entry->ifindex = msg->ifi_index;
-                               entry->flags = 0;
-                               entry->addrs = linked_list_create();
+                               INIT(entry,
+                                       .ifindex = msg->ifi_index,
+                                       .addrs = linked_list_create(),
+                                       .usable = hydra->kernel_interface->is_interface_usable(
+                                                                                               hydra->kernel_interface, name),
+                               );
                                this->ifaces->insert_last(this->ifaces, entry);
                        }
-                       memcpy(entry->ifname, name, IFNAMSIZ);
+                       strncpy(entry->ifname, name, IFNAMSIZ);
                        entry->ifname[IFNAMSIZ-1] = '\0';
-                       if (event)
+                       if (event && entry->usable)
                        {
                                if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
                                {
-                                       update = TRUE;
+                                       update = update_routes = TRUE;
                                        DBG1(DBG_KNL, "interface %s activated", name);
                                }
                                if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
@@ -375,9 +828,17 @@ static void process_link(private_kernel_netlink_net_t *this,
                        {
                                if (current->ifindex == msg->ifi_index)
                                {
-                                       /* we do not remove it, as an address may be added to a
-                                        * "down" interface and we wan't to know that. */
-                                       current->flags = msg->ifi_flags;
+                                       if (event && current->usable)
+                                       {
+                                               update = TRUE;
+                                               DBG1(DBG_KNL, "interface %s deleted", current->ifname);
+                                       }
+                                       /* TODO: move virtual IPs installed on this interface to
+                                        * another interface? */
+                                       this->ifaces->remove_at(this->ifaces, enumerator);
+                                       current->addrs->invoke_function(current->addrs,
+                                                               (void*)addr_entry_unregister, current, this);
+                                       iface_entry_destroy(current);
                                        break;
                                }
                        }
@@ -387,7 +848,11 @@ static void process_link(private_kernel_netlink_net_t *this,
        }
        this->mutex->unlock(this->mutex);
 
-       /* send an update to all IKE_SAs */
+       if (update_routes && event)
+       {
+               queue_route_reinstall(this, strdup(name));
+       }
+
        if (update && event)
        {
                fire_roam_event(this, TRUE);
@@ -404,13 +869,12 @@ static void process_addr(private_kernel_netlink_net_t *this,
        struct rtattr *rta = IFA_RTA(msg);
        size_t rtasize = IFA_PAYLOAD (hdr);
        host_t *host = NULL;
-       enumerator_t *ifaces, *addrs;
        iface_entry_t *iface;
-       addr_entry_t *addr;
        chunk_t local = chunk_empty, address = chunk_empty;
+       char *route_ifname = NULL;
        bool update = FALSE, found = FALSE, changed = FALSE;
 
-       while(RTA_OK(rta, rtasize))
+       while (RTA_OK(rta, rtasize))
        {
                switch (rta->rta_type)
                {
@@ -444,64 +908,91 @@ static void process_addr(private_kernel_netlink_net_t *this,
        }
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &msg->ifa_index) == SUCCESS)
        {
-               if (iface->ifindex == msg->ifa_index)
+               addr_map_entry_t *entry, lookup = {
+                       .ip = host,
+                       .iface = iface,
+               };
+               addr_entry_t *addr;
+
+               entry = this->vips->get(this->vips, &lookup);
+               if (entry)
                {
-                       addrs = iface->addrs->create_enumerator(iface->addrs);
-                       while (addrs->enumerate(addrs, &addr))
+                       if (hdr->nlmsg_type == RTM_NEWADDR)
+                       {       /* mark as installed and signal waiting threads */
+                               entry->addr->installed = TRUE;
+                       }
+                       else
+                       {       /* the address was already marked as uninstalled */
+                               addr = entry->addr;
+                               iface->addrs->remove(iface->addrs, addr, NULL);
+                               addr_map_entry_remove(this->vips, addr, iface);
+                               addr_entry_destroy(addr);
+                       }
+                       /* no roam events etc. for virtual IPs */
+                       this->condvar->broadcast(this->condvar);
+                       this->mutex->unlock(this->mutex);
+                       host->destroy(host);
+                       return;
+               }
+               entry = this->addrs->get(this->addrs, &lookup);
+               if (entry)
+               {
+                       if (hdr->nlmsg_type == RTM_DELADDR)
                        {
-                               if (host->ip_equals(host, addr->ip))
+                               found = TRUE;
+                               addr = entry->addr;
+                               iface->addrs->remove(iface->addrs, addr, NULL);
+                               if (iface->usable)
                                {
-                                       found = TRUE;
-                                       if (hdr->nlmsg_type == RTM_DELADDR)
-                                       {
-                                               iface->addrs->remove_at(iface->addrs, addrs);
-                                               if (!addr->virtual)
-                                               {
-                                                       changed = TRUE;
-                                                       DBG1(DBG_KNL, "%H disappeared from %s",
-                                                                host, iface->ifname);
-                                               }
-                                               addr_entry_destroy(addr);
-                                       }
-                                       else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
-                                       {
-                                               addr->refcount = 1;
-                                       }
+                                       changed = TRUE;
+                                       DBG1(DBG_KNL, "%H disappeared from %s", host,
+                                                iface->ifname);
                                }
+                               addr_map_entry_remove(this->addrs, addr, iface);
+                               addr_entry_destroy(addr);
                        }
-                       addrs->destroy(addrs);
-
+               }
+               else
+               {
                        if (hdr->nlmsg_type == RTM_NEWADDR)
                        {
-                               if (!found)
+                               found = TRUE;
+                               changed = TRUE;
+                               route_ifname = strdup(iface->ifname);
+                               INIT(addr,
+                                       .ip = host->clone(host),
+                                       .scope = msg->ifa_scope,
+                               );
+                               iface->addrs->insert_last(iface->addrs, addr);
+                               addr_map_entry_add(this->addrs, addr, iface);
+                               if (event && iface->usable)
                                {
-                                       found = TRUE;
-                                       changed = TRUE;
-                                       addr = malloc_thing(addr_entry_t);
-                                       addr->ip = host->clone(host);
-                                       addr->virtual = FALSE;
-                                       addr->refcount = 1;
-                                       addr->scope = msg->ifa_scope;
-
-                                       iface->addrs->insert_last(iface->addrs, addr);
-                                       if (event)
-                                       {
-                                               DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
-                                       }
+                                       DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
                                }
                        }
-                       if (found && (iface->flags & IFF_UP))
-                       {
-                               update = TRUE;
-                       }
-                       break;
+               }
+               if (found && (iface->flags & IFF_UP))
+               {
+                       update = TRUE;
+               }
+               if (!iface->usable)
+               {       /* ignore events for interfaces excluded by config */
+                       update = changed = FALSE;
                }
        }
-       ifaces->destroy(ifaces);
        this->mutex->unlock(this->mutex);
+
+       if (update && event && route_ifname)
+       {
+               queue_route_reinstall(this, route_ifname);
+       }
+       else
+       {
+               free(route_ifname);
+       }
        host->destroy(host);
 
        /* send an update to all IKE_SAs */
@@ -528,12 +1019,17 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
        {
                return;
        }
+       else if (msg->rtm_flags & RTM_F_CLONED)
+       {       /* ignore cached routes, seem to be created a lot for IPv6 */
+               return;
+       }
 
        while (RTA_OK(rta, rtasize))
        {
                switch (rta->rta_type)
                {
                        case RTA_PREFSRC:
+                               DESTROY_IF(host);
                                host = host_create_from_chunk(msg->rtm_family,
                                                        chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
                                break;
@@ -546,20 +1042,26 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
                }
                rta = RTA_NEXT(rta, rtasize);
        }
+       this->mutex->lock(this->mutex);
+       if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
+       {       /* ignore route changes for interfaces that are ignored or down */
+               this->mutex->unlock(this->mutex);
+               DESTROY_IF(host);
+               return;
+       }
        if (!host && rta_oif)
        {
-               host = get_interface_address(this, rta_oif, msg->rtm_family);
+               host = get_interface_address(this, rta_oif, msg->rtm_family, NULL);
        }
        if (host)
        {
-               this->mutex->lock(this->mutex);
-               if (!get_vip_refcount(this, host))
+               if (!is_known_vip(this, host))
                {       /* ignore routes added for virtual IPs */
                        fire_roam_event(this, FALSE);
                }
-               this->mutex->unlock(this->mutex);
                host->destroy(host);
        }
+       this->mutex->unlock(this->mutex);
 }
 
 /**
@@ -609,12 +1111,10 @@ static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
                        case RTM_NEWADDR:
                        case RTM_DELADDR:
                                process_addr(this, hdr, TRUE);
-                               this->condvar->broadcast(this->condvar);
                                break;
                        case RTM_NEWLINK:
                        case RTM_DELLINK:
                                process_link(this, hdr, TRUE);
-                               this->condvar->broadcast(this->condvar);
                                break;
                        case RTM_NEWROUTE:
                        case RTM_DELROUTE:
@@ -634,10 +1134,8 @@ static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
 /** enumerator over addresses */
 typedef struct {
        private_kernel_netlink_net_t* this;
-       /** whether to enumerate down interfaces */
-       bool include_down_ifaces;
-       /** whether to enumerate virtual ip addresses */
-       bool include_virtual_ips;
+       /** which addresses to enumerate */
+       kernel_address_type_t which;
 } address_enumerator_t;
 
 /**
@@ -652,9 +1150,10 @@ static void address_enumerator_destroy(address_enumerator_t *data)
 /**
  * filter for addresses
  */
-static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
+static bool filter_addresses(address_enumerator_t *data,
+                                                        addr_entry_t** in, host_t** out)
 {
-       if (!data->include_virtual_ips && (*in)->virtual)
+       if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
        {       /* skip virtual interfaces added by us */
                return FALSE;
        }
@@ -669,18 +1168,29 @@ static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host
 /**
  * enumerator constructor for interfaces
  */
-static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
+static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
+                                                                                        address_enumerator_t *data)
 {
-       return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
+       return enumerator_create_filter(
+                               iface->addrs->create_enumerator(iface->addrs),
                                (void*)filter_addresses, data, NULL);
 }
 
 /**
  * filter for interfaces
  */
-static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
+static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
+                                                         iface_entry_t** out)
 {
-       if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
+       if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
+       {       /* skip interfaces excluded by config */
+               return FALSE;
+       }
+       if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
+       {       /* ignore loopback devices */
+               return FALSE;
+       }
+       if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
        {       /* skip interfaces not up */
                return FALSE;
        }
@@ -688,67 +1198,56 @@ static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, if
        return TRUE;
 }
 
-/**
- * implementation of kernel_net_t.create_address_enumerator
- */
-static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
-               bool include_down_ifaces, bool include_virtual_ips)
+METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
+       private_kernel_netlink_net_t *this, kernel_address_type_t which)
 {
        address_enumerator_t *data = malloc_thing(address_enumerator_t);
        data->this = this;
-       data->include_down_ifaces = include_down_ifaces;
-       data->include_virtual_ips = include_virtual_ips;
+       data->which = which;
 
        this->mutex->lock(this->mutex);
        return enumerator_create_nested(
-                               enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
-                                                       (void*)filter_interfaces, data, NULL),
-                               (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
+                               enumerator_create_filter(
+                                       this->ifaces->create_enumerator(this->ifaces),
+                                       (void*)filter_interfaces, data, NULL),
+                               (void*)create_iface_enumerator, data,
+                               (void*)address_enumerator_destroy);
 }
 
-/**
- * implementation of kernel_net_t.get_interface_name
- */
-static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
+METHOD(kernel_net_t, get_interface_name, bool,
+       private_kernel_netlink_net_t *this, host_t* ip, char **name)
 {
-       enumerator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       char *name = NULL;
-
-       DBG2(DBG_KNL, "getting interface name for %H", ip);
+       addr_map_entry_t *entry, lookup = {
+               .ip = ip,
+       };
 
+       if (ip->is_anyaddr(ip))
+       {
+               return FALSE;
+       }
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       /* first try to find it on an up and usable interface */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match_up_and_usable);
+       if (entry)
        {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
-               {
-                       if (ip->ip_equals(ip, addr->ip))
-                       {
-                               name = strdup(iface->ifname);
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
                if (name)
                {
-                       break;
+                       *name = strdup(entry->iface->ifname);
+                       DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
                }
+               this->mutex->unlock(this->mutex);
+               return TRUE;
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
-
-       if (name)
-       {
-               DBG2(DBG_KNL, "%H is on interface %s", ip, name);
-       }
-       else
+       /* maybe it is installed on an ignored interface */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match_up);
+       if (!entry)
        {
-               DBG2(DBG_KNL, "%H is not a local address", ip);
+               DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
        }
-       return name;
+       this->mutex->unlock(this->mutex);
+       return FALSE;
 }
 
 /**
@@ -756,23 +1255,17 @@ static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
  */
 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
 {
-       enumerator_t *ifaces;
        iface_entry_t *iface;
        int ifindex = 0;
 
        DBG2(DBG_KNL, "getting iface index for %s", name);
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                                               (void**)&iface, name) == SUCCESS)
        {
-               if (streq(name, iface->ifname))
-               {
-                       ifindex = iface->ifindex;
-                       break;
-               }
+               ifindex = iface->ifindex;
        }
-       ifaces->destroy(ifaces);
        this->mutex->unlock(this->mutex);
 
        if (ifindex == 0)
@@ -783,29 +1276,6 @@ static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
 }
 
 /**
- * Check if an interface with a given index is up
- */
-static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
-{
-       enumerator_t *ifaces;
-       iface_entry_t *iface;
-       /* default to TRUE for interface we do not monitor (e.g. lo) */
-       bool up = TRUE;
-
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               if (iface->ifindex == index)
-               {
-                       up = iface->flags & IFF_UP;
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-       return up;
-}
-
-/**
  * check if an address (chunk) addr is in subnet (net with net_len net bits)
  */
 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
@@ -842,6 +1312,94 @@ static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
 }
 
 /**
+ * Store information about a route retrieved via RTNETLINK
+ */
+typedef struct {
+       chunk_t gtw;
+       chunk_t src;
+       chunk_t dst;
+       host_t *src_host;
+       u_int8_t dst_len;
+       u_int32_t table;
+       u_int32_t oif;
+} rt_entry_t;
+
+/**
+ * Free a route entry
+ */
+static void rt_entry_destroy(rt_entry_t *this)
+{
+       DESTROY_IF(this->src_host);
+       free(this);
+}
+
+/**
+ * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
+ * reused if not NULL.
+ *
+ * Returned chunks point to internal data of the Netlink message.
+ */
+static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
+{
+       struct rtattr *rta;
+       struct rtmsg *msg;
+       size_t rtasize;
+
+       msg = (struct rtmsg*)(NLMSG_DATA(hdr));
+       rta = RTM_RTA(msg);
+       rtasize = RTM_PAYLOAD(hdr);
+
+       if (route)
+       {
+               route->gtw = chunk_empty;
+               route->src = chunk_empty;
+               route->dst = chunk_empty;
+               route->dst_len = msg->rtm_dst_len;
+               route->table = msg->rtm_table;
+               route->oif = 0;
+       }
+       else
+       {
+               INIT(route,
+                       .dst_len = msg->rtm_dst_len,
+                       .table = msg->rtm_table,
+               );
+       }
+
+       while (RTA_OK(rta, rtasize))
+       {
+               switch (rta->rta_type)
+               {
+                       case RTA_PREFSRC:
+                               route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_GATEWAY:
+                               route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_DST:
+                               route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_OIF:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->oif))
+                               {
+                                       route->oif = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+#ifdef HAVE_RTA_TABLE
+                       case RTA_TABLE:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->table))
+                               {
+                                       route->table = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+#endif /* HAVE_RTA_TABLE*/
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       return route;
+}
+
+/**
  * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
  */
 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
@@ -852,22 +1410,21 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
        struct rtmsg *msg;
        chunk_t chunk;
        size_t len;
-       int best = -1;
+       linked_list_t *routes;
+       rt_entry_t *route = NULL, *best = NULL;
        enumerator_t *enumerator;
-       host_t *src = NULL, *gtw = NULL;
-
-       DBG2(DBG_KNL, "getting address to reach %H", dest);
+       host_t *addr = NULL;
 
        memset(&request, 0, sizeof(request));
 
        hdr = (struct nlmsghdr*)request;
        hdr->nlmsg_flags = NLM_F_REQUEST;
-       if (dest->get_family(dest) == AF_INET)
-       {
-               /* We dump all addresses for IPv4, as we want to ignore IPsec specific
-                * routes installed by us. But the kernel does not return source
-                * addresses in a IPv6 dump, so fall back to get() for v6 routes. */
-               hdr->nlmsg_flags |= NLM_F_ROOT | NLM_F_DUMP;
+       if (dest->get_family(dest) == AF_INET || this->rta_prefsrc_for_ipv6 ||
+               this->routing_table)
+       {       /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
+                * as we want to ignore routes with virtual IPs we cannot use DUMP
+                * if these routes are not installed in a separate table */
+               hdr->nlmsg_flags |= NLM_F_DUMP;
        }
        hdr->nlmsg_type = RTM_GETROUTE;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
@@ -884,9 +1441,11 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
 
        if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
        {
-               DBG1(DBG_KNL, "getting address to %H failed", dest);
+               DBG2(DBG_KNL, "getting %s to reach %H failed",
+                        nexthop ? "nexthop" : "address", dest);
                return NULL;
        }
+       routes = linked_list_create();
        this->mutex->lock(this->mutex);
 
        for (current = out; NLMSG_OK(current, len);
@@ -898,123 +1457,53 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                break;
                        case RTM_NEWROUTE:
                        {
-                               struct rtattr *rta;
-                               size_t rtasize;
-                               chunk_t rta_gtw, rta_src, rta_dst;
-                               u_int32_t rta_oif = 0;
-                               host_t *new_src, *new_gtw;
-                               bool cont = FALSE;
+                               rt_entry_t *other;
                                uintptr_t table;
 
-                               rta_gtw = rta_src = rta_dst = chunk_empty;
-                               msg = (struct rtmsg*)(NLMSG_DATA(current));
-                               rta = RTM_RTA(msg);
-                               rtasize = RTM_PAYLOAD(current);
-                               while (RTA_OK(rta, rtasize))
-                               {
-                                       switch (rta->rta_type)
-                                       {
-                                               case RTA_PREFSRC:
-                                                       rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_GATEWAY:
-                                                       rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_DST:
-                                                       rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_OIF:
-                                                       if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
-                                                       {
-                                                               rta_oif = *(u_int32_t*)RTA_DATA(rta);
-                                                       }
-                                                       break;
-                                       }
-                                       rta = RTA_NEXT(rta, rtasize);
-                               }
-                               if (msg->rtm_dst_len <= best)
-                               {       /* not better than a previous one */
-                                       continue;
-                               }
-                               enumerator = this->rt_exclude->create_enumerator(this->rt_exclude);
-                               while (enumerator->enumerate(enumerator, &table))
-                               {
-                                       if (table == msg->rtm_table)
-                                       {
-                                               cont = TRUE;
-                                               break;
-                                       }
-                               }
-                               enumerator->destroy(enumerator);
-                               if (cont)
-                               {
+                               route = parse_route(current, route);
+
+                               table = (uintptr_t)route->table;
+                               if (this->rt_exclude->find_first(this->rt_exclude, NULL,
+                                                                                                (void**)&table) == SUCCESS)
+                               {       /* route is from an excluded routing table */
                                        continue;
                                }
                                if (this->routing_table != 0 &&
-                                       msg->rtm_table == this->routing_table)
+                                       route->table == this->routing_table)
                                {       /* route is from our own ipsec routing table */
                                        continue;
                                }
-                               if (rta_oif && !is_interface_up(this, rta_oif))
+                               if (route->oif && !is_interface_up_and_usable(this, route->oif))
                                {       /* interface is down */
                                        continue;
                                }
-                               if (!addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))
+                               if (!addr_in_subnet(chunk, route->dst, route->dst_len))
                                {       /* route destination does not contain dest */
                                        continue;
                                }
-
-                               if (nexthop)
-                               {
-                                       /* nexthop lookup, return gateway if any */
-                                       DESTROY_IF(gtw);
-                                       gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       best = msg->rtm_dst_len;
-                                       continue;
-                               }
-                               if (rta_src.ptr)
-                               {       /* got a source address */
-                                       new_src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
-                                       if (new_src)
-                                       {
-                                               if (get_vip_refcount(this, new_src))
-                                               {       /* skip source address if it is installed by us */
-                                                       new_src->destroy(new_src);
-                                               }
-                                               else
-                                               {
-                                                       DESTROY_IF(src);
-                                                       src = new_src;
-                                                       best = msg->rtm_dst_len;
-                                               }
-                                       }
-                                       continue;
-                               }
-                               if (rta_oif)
-                               {       /* no src or gtw, but an interface. Get address from it. */
-                                       new_src = get_interface_address(this, rta_oif,
-                                                                                                       msg->rtm_family);
-                                       if (new_src)
-                                       {
-                                               DESTROY_IF(src);
-                                               src = new_src;
-                                               best = msg->rtm_dst_len;
+                               if (route->src.ptr)
+                               {       /* verify source address, if any */
+                                       host_t *src = host_create_from_chunk(msg->rtm_family,
+                                                                                                                route->src, 0);
+                                       if (src && is_known_vip(this, src))
+                                       {       /* ignore routes installed by us */
+                                               src->destroy(src);
+                                               continue;
                                        }
-                                       continue;
+                                       route->src_host = src;
                                }
-                               if (rta_gtw.ptr)
-                               {       /* no source, but a gateway. Lookup source to reach gtw. */
-                                       new_gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       new_src = get_route(this, new_gtw, FALSE, candidate);
-                                       new_gtw->destroy(new_gtw);
-                                       if (new_src)
+                               /* insert route, sorted by decreasing network prefix */
+                               enumerator = routes->create_enumerator(routes);
+                               while (enumerator->enumerate(enumerator, &other))
+                               {
+                                       if (route->dst_len > other->dst_len)
                                        {
-                                               DESTROY_IF(src);
-                                               src = new_src;
-                                               best = msg->rtm_dst_len;
+                                               break;
                                        }
-                                       continue;
                                }
+                               routes->insert_before(routes, enumerator, route);
+                               enumerator->destroy(enumerator);
+                               route = NULL;
                                continue;
                        }
                        default:
@@ -1022,35 +1511,123 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                }
                break;
        }
-       free(out);
-       this->mutex->unlock(this->mutex);
+       if (route)
+       {
+               rt_entry_destroy(route);
+       }
+
+       /* now we have a list of routes matching dest, sorted by net prefix.
+        * we will look for source addresses for these routes and select the one
+        * with the preferred source address, if possible */
+       enumerator = routes->create_enumerator(routes);
+       while (enumerator->enumerate(enumerator, &route))
+       {
+               if (route->src_host)
+               {       /* got a source address with the route, if no preferred source
+                        * is given or it matches we are done, as this is the best route */
+                       if (!candidate || candidate->ip_equals(candidate, route->src_host))
+                       {
+                               best = route;
+                               break;
+                       }
+                       else if (route->oif)
+                       {       /* no match yet, maybe it is assigned to the same interface */
+                               host_t *src = get_interface_address(this, route->oif,
+                                                                                                       msg->rtm_family, candidate);
+                               if (src && src->ip_equals(src, candidate))
+                               {
+                                       route->src_host->destroy(route->src_host);
+                                       route->src_host = src;
+                                       best = route;
+                                       break;
+                               }
+                               DESTROY_IF(src);
+                       }
+                       /* no luck yet with the source address. if this is the best (first)
+                        * route we store it as fallback in case we don't find a route with
+                        * the preferred source */
+                       best = best ?: route;
+                       continue;
+               }
+               if (route->oif)
+               {       /* no src, but an interface - get address from it */
+                       route->src_host = get_interface_address(this, route->oif,
+                                                                                                       msg->rtm_family, candidate);
+                       if (route->src_host)
+                       {       /* we handle this address the same as the one above */
+                               if (!candidate ||
+                                        candidate->ip_equals(candidate, route->src_host))
+                               {
+                                       best = route;
+                                       break;
+                               }
+                               best = best ?: route;
+                               continue;
+                       }
+               }
+               if (route->gtw.ptr)
+               {       /* no src, no iface, but a gateway - lookup src to reach gtw */
+                       host_t *gtw;
+
+                       gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
+                       route->src_host = get_route(this, gtw, FALSE, candidate);
+                       gtw->destroy(gtw);
+                       if (route->src_host)
+                       {       /* more of the same */
+                               if (!candidate ||
+                                        candidate->ip_equals(candidate, route->src_host))
+                               {
+                                       best = route;
+                                       break;
+                               }
+                               best = best ?: route;
+                       }
+               }
+       }
+       enumerator->destroy(enumerator);
 
        if (nexthop)
+       {       /* nexthop lookup, return gateway if any */
+               if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
+               {
+                       addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
+               }
+               addr = addr ?: dest->clone(dest);
+       }
+       else
        {
-               if (gtw)
+               if (best)
                {
-                       return gtw;
+                       addr = best->src_host->clone(best->src_host);
                }
-               return dest->clone(dest);
        }
-       return src;
+       this->mutex->unlock(this->mutex);
+       routes->destroy_function(routes, (void*)rt_entry_destroy);
+       free(out);
+
+       if (addr)
+       {
+               DBG2(DBG_KNL, "using %H as %s to reach %H", addr,
+                        nexthop ? "nexthop" : "address", dest);
+       }
+       else
+       {
+               DBG2(DBG_KNL, "no %s found to reach %H",
+                        nexthop ? "nexthop" : "address", dest);
+       }
+       return addr;
 }
 
-/**
- * Implementation of kernel_net_t.get_source_addr.
- */
-static host_t* get_source_addr(private_kernel_netlink_net_t *this,
-                                                          host_t *dest, host_t *src)
+METHOD(kernel_net_t, get_source_addr, host_t*,
+       private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
 {
        return get_route(this, dest, FALSE, src);
 }
 
-/**
- * Implementation of kernel_net_t.get_nexthop.
- */
-static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
+METHOD(kernel_net_t, get_nexthop, host_t*,
+       private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
 {
-       return get_route(this, dest, TRUE, NULL);
+       return get_route(this, dest, TRUE, src);
 }
 
 /**
@@ -1086,95 +1663,112 @@ static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type
        return this->socket->send_ack(this->socket, hdr);
 }
 
-/**
- * Implementation of kernel_net_t.add_ip.
- */
-static status_t add_ip(private_kernel_netlink_net_t *this,
-                                               host_t *virtual_ip, host_t *iface_ip)
+METHOD(kernel_net_t, add_ip, status_t,
+       private_kernel_netlink_net_t *this, host_t *virtual_ip, host_t *iface_ip)
 {
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       enumerator_t *addrs, *ifaces;
-       int ifindex;
+       addr_map_entry_t *entry, lookup = {
+               .ip = virtual_ip,
+       };
+       iface_entry_t *iface = NULL;
 
        if (!this->install_virtual_ip)
        {       /* disabled by config */
                return SUCCESS;
        }
 
-       DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
-
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               bool iface_found = FALSE;
-
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
-               {
-                       if (iface_ip->ip_equals(iface_ip, addr->ip))
+       /* the virtual IP might actually be installed as regular IP, in which case
+        * we don't track it as virtual IP */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match);
+       if (!entry)
+       {       /* otherwise it might already be installed as virtual IP */
+               entry = this->vips->get_match(this->vips, &lookup,
+                                                                        (void*)addr_map_entry_match);
+               if (entry)
+               {       /* the vip we found can be in one of three states: 1) installed and
+                        * ready, 2) just added by another thread, but not yet confirmed to
+                        * be installed by the kernel, 3) just deleted, but not yet gone.
+                        * Then while we wait below, several things could happen (as we
+                        * release the mutex).  For instance, the interface could disappear,
+                        * or the IP is finally deleted, and it reappears on a different
+                        * interface. All these cases are handled by the call below. */
+                       while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
                        {
-                               iface_found = TRUE;
+                               this->condvar->wait(this->condvar, this->mutex);
                        }
-                       else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       if (entry)
                        {
-                               addr->refcount++;
-                               DBG2(DBG_KNL, "virtual IP %H already installed on %s",
-                                        virtual_ip, iface->ifname);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
-                               return SUCCESS;
+                               entry->addr->refcount++;
                        }
                }
-               addrs->destroy(addrs);
-
-               if (iface_found)
+       }
+       if (entry)
+       {
+               DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
+                        entry->iface->ifname);
+               this->mutex->unlock(this->mutex);
+               return SUCCESS;
+       }
+       /* try to find the target interface, either by config or via src ip */
+       if (!this->install_virtual_ip_on ||
+                this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                               (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
+       {
+               lookup.ip = iface_ip;
+               entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                         (void*)addr_map_entry_match);
+               if (!entry)
+               {       /* if we don't find the requested interface we just use the first */
+                       this->ifaces->get_first(this->ifaces, (void**)&iface);
+               }
+               else
                {
-                       ifindex = iface->ifindex;
-                       addr = malloc_thing(addr_entry_t);
-                       addr->ip = virtual_ip->clone(virtual_ip);
-                       addr->refcount = 0;
-                       addr->virtual = TRUE;
-                       addr->scope = RT_SCOPE_UNIVERSE;
-                       iface->addrs->insert_last(iface->addrs, addr);
-
-                       if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
-                                                         ifindex, virtual_ip) == SUCCESS)
-                       {
-                               while (get_vip_refcount(this, virtual_ip) == 0)
-                               {       /* wait until address appears */
-                                       this->condvar->wait(this->condvar, this->mutex);
-                               }
-                               ifaces->destroy(ifaces);
+                       iface = entry->iface;
+               }
+       }
+       if (iface)
+       {
+               addr_entry_t *addr;
+
+               INIT(addr,
+                       .ip = virtual_ip->clone(virtual_ip),
+                       .refcount = 1,
+                       .scope = RT_SCOPE_UNIVERSE,
+               );
+               iface->addrs->insert_last(iface->addrs, addr);
+               addr_map_entry_add(this->vips, addr, iface);
+               if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
+                                                 iface->ifindex, virtual_ip) == SUCCESS)
+               {
+                       while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
+                       {       /* wait until address appears */
+                               this->condvar->wait(this->condvar, this->mutex);
+                       }
+                       if (entry)
+                       {       /* we fail if the interface got deleted in the meantime */
+                               DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
+                                        entry->iface->ifname);
                                this->mutex->unlock(this->mutex);
                                return SUCCESS;
                        }
-                       ifaces->destroy(ifaces);
-                       this->mutex->unlock(this->mutex);
-                       DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
-                       return FAILED;
                }
+               this->mutex->unlock(this->mutex);
+               DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
+               return FAILED;
        }
-       ifaces->destroy(ifaces);
        this->mutex->unlock(this->mutex);
-
-       DBG1(DBG_KNL, "interface address %H not found, unable to install"
-                "virtual IP %H", iface_ip, virtual_ip);
+       DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
+                virtual_ip);
        return FAILED;
 }
 
-/**
- * Implementation of kernel_net_t.del_ip.
- */
-static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
+METHOD(kernel_net_t, del_ip, status_t,
+       private_kernel_netlink_net_t *this, host_t *virtual_ip)
 {
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       enumerator_t *addrs, *ifaces;
-       status_t status;
-       int ifindex;
+       addr_map_entry_t *entry, lookup = {
+               .ip = virtual_ip,
+       };
 
        if (!this->install_virtual_ip)
        {       /* disabled by config */
@@ -1184,59 +1778,60 @@ static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
        DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
+       entry = this->vips->get_match(this->vips, &lookup,
+                                                                (void*)addr_map_entry_match);
+       if (!entry)
+       {       /* we didn't install this IP as virtual IP */
+               entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                         (void*)addr_map_entry_match);
+               if (entry)
                {
-                       if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
+                                entry->iface->ifname);
+                       this->mutex->unlock(this->mutex);
+                       return SUCCESS;
+               }
+               DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
+               this->mutex->unlock(this->mutex);
+               return FAILED;
+       }
+       if (entry->addr->refcount == 1)
+       {
+               status_t status;
+
+               /* we set this flag so that threads calling add_ip will block and wait
+                * until the entry is gone, also so we can wait below */
+               entry->addr->installed = FALSE;
+               status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
+                                                          virtual_ip);
+               if (status == SUCCESS)
+               {       /* wait until the address is really gone */
+                       while (is_known_vip(this, virtual_ip))
                        {
-                               ifindex = iface->ifindex;
-                               if (addr->refcount == 1)
-                               {
-                                       status = manage_ipaddr(this, RTM_DELADDR, 0,
-                                                                                  ifindex, virtual_ip);
-                                       if (status == SUCCESS)
-                                       {       /* wait until the address is really gone */
-                                               while (get_vip_refcount(this, virtual_ip) > 0)
-                                               {
-                                                       this->condvar->wait(this->condvar, this->mutex);
-                                               }
-                                       }
-                                       addrs->destroy(addrs);
-                                       ifaces->destroy(ifaces);
-                                       this->mutex->unlock(this->mutex);
-                                       return status;
-                               }
-                               else
-                               {
-                                       addr->refcount--;
-                               }
-                               DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
-                                        virtual_ip);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
-                               return SUCCESS;
+                               this->condvar->wait(this->condvar, this->mutex);
                        }
                }
-               addrs->destroy(addrs);
+               this->mutex->unlock(this->mutex);
+               return status;
        }
-       ifaces->destroy(ifaces);
+       else
+       {
+               entry->addr->refcount--;
+       }
+       DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
+                virtual_ip);
        this->mutex->unlock(this->mutex);
-
-       DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
-       return FAILED;
+       return SUCCESS;
 }
 
 /**
  * Manages source routes in the routing table.
  * By setting the appropriate nlmsg_type, the route gets added or removed.
  */
-static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
-                                                               int flags, chunk_t dst_net, u_int8_t prefixlen,
-                                                               host_t *gateway, host_t *src_ip, char *if_name)
+static status_t manage_srcroute(private_kernel_netlink_net_t *this,
+                                                               int nlmsg_type, int flags, chunk_t dst_net,
+                                                               u_int8_t prefixlen, host_t *gateway,
+                                                               host_t *src_ip, char *if_name)
 {
        netlink_buf_t request;
        struct nlmsghdr *hdr;
@@ -1296,24 +1891,60 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_ty
        return this->socket->send_ack(this->socket, hdr);
 }
 
-/**
- * Implementation of kernel_net_t.add_route.
- */
-static status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
-               u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
+METHOD(kernel_net_t, add_route, status_t,
+       private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
+       host_t *gateway, host_t *src_ip, char *if_name)
 {
-       return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
-                               dst_net, prefixlen, gateway, src_ip, if_name);
+       status_t status;
+       route_entry_t *found, route = {
+               .dst_net = dst_net,
+               .prefixlen = prefixlen,
+               .gateway = gateway,
+               .src_ip = src_ip,
+               .if_name = if_name,
+       };
+
+       this->routes_lock->lock(this->routes_lock);
+       found = this->routes->get(this->routes, &route);
+       if (found)
+       {
+               this->routes_lock->unlock(this->routes_lock);
+               return ALREADY_DONE;
+       }
+       found = route_entry_clone(&route);
+       this->routes->put(this->routes, found, found);
+       status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
+                                                        dst_net, prefixlen, gateway, src_ip, if_name);
+       this->routes_lock->unlock(this->routes_lock);
+       return status;
 }
 
-/**
- * Implementation of kernel_net_t.del_route.
- */
-static status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
-               u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
+METHOD(kernel_net_t, del_route, status_t,
+       private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
+       host_t *gateway, host_t *src_ip, char *if_name)
 {
-       return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
-                               gateway, src_ip, if_name);
+       status_t status;
+       route_entry_t *found, route = {
+               .dst_net = dst_net,
+               .prefixlen = prefixlen,
+               .gateway = gateway,
+               .src_ip = src_ip,
+               .if_name = if_name,
+       };
+
+       this->routes_lock->lock(this->routes_lock);
+       found = this->routes->get(this->routes, &route);
+       if (!found)
+       {
+               this->routes_lock->unlock(this->routes_lock);
+               return NOT_FOUND;
+       }
+       this->routes->remove(this->routes, found);
+       route_entry_destroy(found);
+       status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
+                                                        gateway, src_ip, if_name);
+       this->routes_lock->unlock(this->routes_lock);
+       return status;
 }
 
 /**
@@ -1329,7 +1960,7 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
        iface_entry_t *iface;
        addr_entry_t *addr;
 
-       DBG1(DBG_KNL, "listening on interfaces:");
+       DBG2(DBG_KNL, "known interfaces and IP addresses:");
 
        memset(&request, 0, sizeof(request));
 
@@ -1391,13 +2022,13 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
        ifaces = this->ifaces->create_enumerator(this->ifaces);
        while (ifaces->enumerate(ifaces, &iface))
        {
-               if (iface->flags & IFF_UP)
+               if (iface_entry_up_and_usable(iface))
                {
-                       DBG1(DBG_KNL, "  %s", iface->ifname);
+                       DBG2(DBG_KNL, "  %s", iface->ifname);
                        addrs = iface->addrs->create_enumerator(iface->addrs);
                        while (addrs->enumerate(addrs, (void**)&addr))
                        {
-                               DBG1(DBG_KNL, "    %H", addr->ip);
+                               DBG2(DBG_KNL, "    %H", addr->ip);
                        }
                        addrs->destroy(addrs);
                }
@@ -1442,10 +2073,58 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
 }
 
 /**
- * Implementation of kernel_netlink_net_t.destroy.
+ * check for kernel features (currently only via version number)
+ */
+static void check_kernel_features(private_kernel_netlink_net_t *this)
+{
+       struct utsname utsname;
+       int a, b, c;
+
+       if (uname(&utsname) == 0)
+       {
+               switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
+               {
+                       case 3:
+                               if (a == 2)
+                               {
+                                       DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
+                                                "RTA_PREFSRC for IPv6 routes", a, b, c);
+                                       break;
+                               }
+                               /* fall-through */
+                       case 2:
+                               /* only 3.x+ uses two part version numbers */
+                               this->rta_prefsrc_for_ipv6 = TRUE;
+                               break;
+                       default:
+                               break;
+               }
+       }
+}
+
+/**
+ * Destroy an address to iface map
  */
-static void destroy(private_kernel_netlink_net_t *this)
+static void addr_map_destroy(hashtable_t *map)
+{
+       enumerator_t *enumerator;
+       addr_map_entry_t *addr;
+
+       enumerator = map->create_enumerator(map);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
+       {
+               free(addr);
+       }
+       enumerator->destroy(enumerator);
+       map->destroy(map);
+}
+
+METHOD(kernel_net_t, destroy, void,
+       private_kernel_netlink_net_t *this)
 {
+       enumerator_t *enumerator;
+       route_entry_t *route;
+
        if (this->routing_table)
        {
                manage_rule(this, RTM_DELRULE, AF_INET, this->routing_table,
@@ -1453,17 +2132,32 @@ static void destroy(private_kernel_netlink_net_t *this)
                manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
                                        this->routing_table_prio);
        }
-       if (this->job)
-       {
-               this->job->cancel(this->job);
-       }
        if (this->socket_events > 0)
        {
                close(this->socket_events);
        }
+       enumerator = this->routes->create_enumerator(this->routes);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&route))
+       {
+               manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
+                                               route->gateway, route->src_ip, route->if_name);
+               route_entry_destroy(route);
+       }
+       enumerator->destroy(enumerator);
+       this->routes->destroy(this->routes);
+       this->routes_lock->destroy(this->routes_lock);
        DESTROY_IF(this->socket);
+
+       net_changes_clear(this);
+       this->net_changes->destroy(this->net_changes);
+       this->net_changes_lock->destroy(this->net_changes_lock);
+
+       addr_map_destroy(this->addrs);
+       addr_map_destroy(this->vips);
+
        this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
        this->rt_exclude->destroy(this->rt_exclude);
+       this->roam_lock->destroy(this->roam_lock);
        this->condvar->destroy(this->condvar);
        this->mutex->destroy(this->mutex);
        free(this);
@@ -1474,37 +2168,64 @@ static void destroy(private_kernel_netlink_net_t *this)
  */
 kernel_netlink_net_t *kernel_netlink_net_create()
 {
-       private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
-       struct sockaddr_nl addr;
+       private_kernel_netlink_net_t *this;
        enumerator_t *enumerator;
+       bool register_for_events = TRUE;
        char *exclude;
 
-       /* public functions */
-       this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
-       this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
-       this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest, host_t *src))get_source_addr;
-       this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
-       this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
-       this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
-       this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
-       this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
-       this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
-
-       /* private members */
-       this->ifaces = linked_list_create();
-       this->mutex = mutex_create(MUTEX_TYPE_RECURSIVE);
-       this->condvar = condvar_create(CONDVAR_TYPE_DEFAULT);
-       timerclear(&this->last_roam);
-       this->routing_table = lib->settings->get_int(lib->settings,
-                                       "%s.routing_table", ROUTING_TABLE, hydra->daemon);
-       this->routing_table_prio = lib->settings->get_int(lib->settings,
-                                       "%s.routing_table_prio", ROUTING_TABLE_PRIO, hydra->daemon);
-       this->process_route = lib->settings->get_bool(lib->settings,
-                                       "%s.process_route", TRUE, hydra->daemon);
-       this->install_virtual_ip = lib->settings->get_bool(lib->settings,
-                                       "%s.install_virtual_ip", TRUE, hydra->daemon);
-
-       this->rt_exclude = linked_list_create();
+       INIT(this,
+               .public = {
+                       .interface = {
+                               .get_interface = _get_interface_name,
+                               .create_address_enumerator = _create_address_enumerator,
+                               .get_source_addr = _get_source_addr,
+                               .get_nexthop = _get_nexthop,
+                               .add_ip = _add_ip,
+                               .del_ip = _del_ip,
+                               .add_route = _add_route,
+                               .del_route = _del_route,
+                               .destroy = _destroy,
+                       },
+               },
+               .socket = netlink_socket_create(NETLINK_ROUTE),
+               .rt_exclude = linked_list_create(),
+               .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
+                                                                  (hashtable_equals_t)route_entry_equals, 16),
+               .net_changes = hashtable_create(
+                                                                  (hashtable_hash_t)net_change_hash,
+                                                                  (hashtable_equals_t)net_change_equals, 16),
+               .addrs = hashtable_create(
+                                                               (hashtable_hash_t)addr_map_entry_hash,
+                                                               (hashtable_equals_t)addr_map_entry_equals, 16),
+               .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
+                                                                (hashtable_equals_t)addr_map_entry_equals, 16),
+               .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
+               .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
+               .ifaces = linked_list_create(),
+               .mutex = mutex_create(MUTEX_TYPE_RECURSIVE),
+               .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
+               .roam_lock = spinlock_create(),
+               .routing_table = lib->settings->get_int(lib->settings,
+                               "%s.routing_table", ROUTING_TABLE, hydra->daemon),
+               .routing_table_prio = lib->settings->get_int(lib->settings,
+                               "%s.routing_table_prio", ROUTING_TABLE_PRIO, hydra->daemon),
+               .process_route = lib->settings->get_bool(lib->settings,
+                               "%s.process_route", TRUE, hydra->daemon),
+               .install_virtual_ip = lib->settings->get_bool(lib->settings,
+                               "%s.install_virtual_ip", TRUE, hydra->daemon),
+               .install_virtual_ip_on = lib->settings->get_str(lib->settings,
+                               "%s.install_virtual_ip_on", NULL, hydra->daemon),
+       );
+       timerclear(&this->last_route_reinstall);
+       timerclear(&this->next_roam);
+
+       check_kernel_features(this);
+
+       if (streq(hydra->daemon, "starter"))
+       {       /* starter has no threads, so we do not register for kernel events */
+               register_for_events = FALSE;
+       }
+
        exclude = lib->settings->get_str(lib->settings,
                                        "%s.ignore_routing_tables", NULL, hydra->daemon);
        if (exclude)
@@ -1526,32 +2247,35 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                enumerator->destroy(enumerator);
        }
 
-       this->socket = netlink_socket_create(NETLINK_ROUTE);
-       this->job = NULL;
+       if (register_for_events)
+       {
+               struct sockaddr_nl addr;
 
-       memset(&addr, 0, sizeof(addr));
-       addr.nl_family = AF_NETLINK;
+               memset(&addr, 0, sizeof(addr));
+               addr.nl_family = AF_NETLINK;
 
-       /* create and bind RT socket for events (address/interface/route changes) */
-       this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-       if (this->socket_events < 0)
-       {
-               DBG1(DBG_KNL, "unable to create RT event socket");
-               destroy(this);
-               return NULL;
-       }
-       addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
-                                        RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
-       if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
-       {
-               DBG1(DBG_KNL, "unable to bind RT event socket");
-               destroy(this);
-               return NULL;
-       }
+               /* create and bind RT socket for events (address/interface/route changes) */
+               this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+               if (this->socket_events < 0)
+               {
+                       DBG1(DBG_KNL, "unable to create RT event socket");
+                       destroy(this);
+                       return NULL;
+               }
+               addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
+                                                RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
+               if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
+               {
+                       DBG1(DBG_KNL, "unable to bind RT event socket");
+                       destroy(this);
+                       return NULL;
+               }
 
-       this->job = callback_job_create((callback_job_cb_t)receive_events,
-                                                                       this, NULL, NULL);
-       lib->processor->queue_job(lib->processor, (job_t*)this->job);
+               lib->processor->queue_job(lib->processor,
+                       (job_t*)callback_job_create_with_prio(
+                                       (callback_job_cb_t)receive_events, this, NULL,
+                                       (callback_job_cancel_t)return_false, JOB_PRIO_CRITICAL));
+       }
 
        if (init_address_list(this) != SUCCESS)
        {