Use a separate mutex for cached routes in kernel-netlink plugin
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
index 2f2167a..4b64a8d 100644 (file)
@@ -53,6 +53,7 @@
 #include <threading/thread.h>
 #include <threading/condvar.h>
 #include <threading/mutex.h>
+#include <threading/spinlock.h>
 #include <utils/hashtable.h>
 #include <utils/linked_list.h>
 #include <processing/jobs/callback_job.h>
 typedef struct addr_entry_t addr_entry_t;
 
 /**
- * IP address in an inface_entry_t
+ * IP address in an iface_entry_t
  */
 struct addr_entry_t {
 
-       /** The ip address */
+       /** the ip address */
        host_t *ip;
 
-       /** virtual IP managed by us */
-       bool virtual;
-
        /** scope of the address */
        u_char scope;
 
-       /** Number of times this IP is used, if virtual */
+       /** number of times this IP is used, if virtual (i.e. managed by us) */
        u_int refcount;
+
+       /** TRUE once it is installed, if virtual */
+       bool installed;
 };
 
 /**
@@ -110,6 +111,9 @@ struct iface_entry_t {
 
        /** list of addresses as host_t */
        linked_list_t *addrs;
+
+       /** TRUE if usable by config */
+       bool usable;
 };
 
 /**
@@ -121,6 +125,102 @@ static void iface_entry_destroy(iface_entry_t *this)
        free(this);
 }
 
+/**
+ * find an interface entry by index
+ */
+static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
+{
+       return this->ifindex == *ifindex;
+}
+
+/**
+ * find an interface entry by name
+ */
+static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
+{
+       return streq(this->ifname, ifname);
+}
+
+/**
+ * check if an interface is up
+ */
+static inline bool iface_entry_up(iface_entry_t *iface)
+{
+       return (iface->flags & IFF_UP) == IFF_UP;
+}
+
+/**
+ * check if an interface is up and usable
+ */
+static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
+{
+       return iface->usable && iface_entry_up(iface);
+}
+
+typedef struct addr_map_entry_t addr_map_entry_t;
+
+/**
+ * Entry that maps an IP address to an interface entry
+ */
+struct addr_map_entry_t {
+       /** The IP address */
+       host_t *ip;
+
+       /** The address entry for this IP address */
+       addr_entry_t *addr;
+
+       /** The interface this address is installed on */
+       iface_entry_t *iface;
+};
+
+/**
+ * Hash a addr_map_entry_t object, all entries with the same IP address
+ * are stored in the same bucket
+ */
+static u_int addr_map_entry_hash(addr_map_entry_t *this)
+{
+       return chunk_hash(this->ip->get_address(this->ip));
+}
+
+/**
+ * Compare two addr_map_entry_t objects, two entries are equal if they are
+ * installed on the same interface
+ */
+static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return a->iface->ifindex == b->iface->ifindex &&
+                  a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * an up and usable interface
+ */
+static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
+                                                                                          addr_map_entry_t *b)
+{
+       return iface_entry_up_and_usable(b->iface) &&
+                  a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * any active local interface
+ */
+static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
+}
+
+/**
+ * Used with get_match this finds an address entry if it is installed on
+ * any local interface
+ */
+static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
+{
+       return a->ip->ip_equals(a->ip, b->ip);
+}
+
 typedef struct route_entry_t route_entry_t;
 
 /**
@@ -254,9 +354,14 @@ struct private_kernel_netlink_net_t {
        linked_list_t *ifaces;
 
        /**
-        * job receiving netlink events
+        * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
         */
-       callback_job_t *job;
+       hashtable_t *addrs;
+
+       /**
+        * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
+        */
+       hashtable_t *vips;
 
        /**
         * netlink rt socket (routing)
@@ -269,9 +374,14 @@ struct private_kernel_netlink_net_t {
        int socket_events;
 
        /**
-        * time of the last roam event
+        * earliest time of the next roam event
+        */
+       timeval_t next_roam;
+
+       /**
+        * lock to check and update roam event time
         */
-       timeval_t last_roam;
+       spinlock_t *roam_lock;
 
        /**
         * routing table to install routes
@@ -289,6 +399,11 @@ struct private_kernel_netlink_net_t {
        hashtable_t *routes;
 
        /**
+        * mutex for routes
+        */
+       mutex_t *routes_lock;
+
+       /**
         * interface changes which may trigger route reinstallation
         */
        hashtable_t *net_changes;
@@ -314,6 +429,11 @@ struct private_kernel_netlink_net_t {
        bool install_virtual_ip;
 
        /**
+        * the name of the interface virtual IP addresses are installed on
+        */
+       char *install_virtual_ip_on;
+
+       /**
         * whether preferred source addresses can be specified for IPv6 routes
         */
        bool rta_prefsrc_for_ipv6;
@@ -358,7 +478,7 @@ static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
        route_entry_t *route;
 
        this->net_changes_lock->lock(this->net_changes_lock);
-       this->mutex->lock(this->mutex);
+       this->routes_lock->lock(this->routes_lock);
 
        enumerator = this->routes->create_enumerator(this->routes);
        while (enumerator->enumerate(enumerator, NULL, (void**)&route))
@@ -370,13 +490,15 @@ static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
                change = this->net_changes->get(this->net_changes, &lookup);
                if (!change)
                {       /* in case src_ip is not on the outgoing interface */
-                       lookup.if_name = this->public.interface.get_interface(
-                                                                               &this->public.interface, route->src_ip);
-                       if (lookup.if_name && !streq(lookup.if_name, route->if_name))
+                       if (this->public.interface.get_interface(&this->public.interface,
+                                                                                               route->src_ip, &lookup.if_name))
                        {
-                               change = this->net_changes->get(this->net_changes, &lookup);
+                               if (!streq(lookup.if_name, route->if_name))
+                               {
+                                       change = this->net_changes->get(this->net_changes, &lookup);
+                               }
+                               free(lookup.if_name);
                        }
-                       free(lookup.if_name);
                }
                if (change)
                {
@@ -386,7 +508,7 @@ static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
                }
        }
        enumerator->destroy(enumerator);
-       this->mutex->unlock(this->mutex);
+       this->routes_lock->unlock(this->routes_lock);
 
        net_changes_clear(this);
        this->net_changes_lock->unlock(this->net_changes_lock);
@@ -436,71 +558,126 @@ static void queue_route_reinstall(private_kernel_netlink_net_t *this,
 }
 
 /**
- * get the refcount of a virtual ip
+ * check if the given IP is known as virtual IP and currently installed
+ *
+ * this function will also return TRUE if the virtual IP entry disappeared.
+ * in that case the returned entry will be NULL.
+ *
+ * this->mutex must be locked when calling this function
  */
-static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
+static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
+                                                                        host_t *ip, addr_map_entry_t **entry)
 {
-       enumerator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       int refcount = 0;
+       addr_map_entry_t lookup = {
+               .ip = ip,
+       };
 
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, (void**)&iface))
-       {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, (void**)&addr))
-               {
-                       if (addr->virtual && (iface->flags & IFF_UP) &&
-                               ip->ip_equals(ip, addr->ip))
-                       {
-                               refcount = addr->refcount;
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
-               if (refcount)
-               {
-                       break;
-               }
+       *entry = this->vips->get_match(this->vips, &lookup,
+                                                                 (void*)addr_map_entry_match);
+       if (*entry == NULL)
+       {       /* the virtual IP disappeared */
+               return TRUE;
        }
-       ifaces->destroy(ifaces);
+       return (*entry)->addr->installed;
+}
+
+/**
+ * check if the given IP is known as virtual IP
+ *
+ * this->mutex must be locked when calling this function
+ */
+static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
+{
+       addr_map_entry_t lookup = {
+               .ip = ip,
+       };
+
+       return this->vips->get_match(this->vips, &lookup,
+                                                               (void*)addr_map_entry_match) != NULL;
+}
 
-       return refcount;
+/**
+ * Add an address map entry
+ */
+static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
+                                                          iface_entry_t *iface)
+{
+       addr_map_entry_t *entry;
+
+       INIT(entry,
+               .ip = addr->ip,
+               .addr = addr,
+               .iface = iface,
+       );
+       entry = map->put(map, entry, entry);
+       free(entry);
+}
+
+/**
+ * Remove an address map entry
+ */
+static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
+                                                                 iface_entry_t *iface)
+{
+       addr_map_entry_t *entry, lookup = {
+               .ip = addr->ip,
+               .addr = addr,
+               .iface = iface,
+       };
+
+       entry = map->remove(map, &lookup);
+       free(entry);
 }
 
 /**
  * get the first non-virtual ip address on the given interface.
+ * if a candidate address is given, we first search for that address and if not
+ * found return the address as above.
  * returned host is a clone, has to be freed by caller.
  */
 static host_t *get_interface_address(private_kernel_netlink_net_t *this,
-                                                                        int ifindex, int family)
+                                                                        int ifindex, int family, host_t *candidate)
 {
-       enumerator_t *ifaces, *addrs;
        iface_entry_t *iface;
+       enumerator_t *addrs;
        addr_entry_t *addr;
        host_t *ip = NULL;
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &ifindex) == SUCCESS)
        {
-               if (iface->ifindex == ifindex)
-               {
+               if (iface->usable)
+               {       /* only use interfaces not excluded by config */
                        addrs = iface->addrs->create_enumerator(iface->addrs);
                        while (addrs->enumerate(addrs, &addr))
                        {
-                               if (!addr->virtual && addr->ip->get_family(addr->ip) == family)
+                               if (addr->refcount)
+                               {       /* ignore virtual IP addresses */
+                                       continue;
+                               }
+                               if (addr->ip->get_family(addr->ip) == family)
                                {
-                                       ip = addr->ip->clone(addr->ip);
-                                       break;
+                                       if (!candidate || candidate->ip_equals(candidate, addr->ip))
+                                       {       /* stop at the first address if we don't search for a
+                                                * candidate or if the candidate matches */
+                                               ip = addr->ip;
+                                               break;
+                                       }
+                                       else if (!ip)
+                                       {       /* store the first address as fallback if candidate is
+                                                * not found */
+                                               ip = addr->ip;
+                                       }
                                }
                        }
                        addrs->destroy(addrs);
-                       break;
                }
        }
-       ifaces->destroy(ifaces);
+       if (ip)
+       {
+               ip = ip->clone(ip);
+       }
        this->mutex->unlock(this->mutex);
        return ip;
 }
@@ -524,21 +701,60 @@ static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
        job_t *job;
 
        time_monotonic(&now);
-       if (timercmp(&now, &this->last_roam, >))
+       this->roam_lock->lock(this->roam_lock);
+       if (!timercmp(&now, &this->next_roam, >))
        {
-               now.tv_usec += ROAM_DELAY * 1000;
-               while (now.tv_usec > 1000000)
-               {
-                       now.tv_sec++;
-                       now.tv_usec -= 1000000;
-               }
-               this->last_roam = now;
+               this->roam_lock->unlock(this->roam_lock);
+               return;
+       }
+       now.tv_usec += ROAM_DELAY * 1000;
+       while (now.tv_usec > 1000000)
+       {
+               now.tv_sec++;
+               now.tv_usec -= 1000000;
+       }
+       this->next_roam = now;
+       this->roam_lock->unlock(this->roam_lock);
+
+       job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
+                                                                         (void*)(uintptr_t)(address ? 1 : 0),
+                                                                          NULL, NULL);
+       lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
+}
+
+/**
+ * check if an interface with a given index is up and usable
+ *
+ * this->mutex must be locked when calling this function
+ */
+static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
+                                                                          int index)
+{
+       iface_entry_t *iface;
 
-               job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
-                                                                                 (void*)(uintptr_t)(address ? 1 : 0),
-                                                                                 NULL, NULL);
-               lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &index) == SUCCESS)
+       {
+               return iface_entry_up_and_usable(iface);
+       }
+       return FALSE;
+}
+
+/**
+ * unregister the current addr_entry_t from the hashtable it is stored in
+ *
+ * this->mutex must be locked when calling this function
+ */
+static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
+                                                                 private_kernel_netlink_net_t *this)
+{
+       if (addr->refcount)
+       {
+               addr_map_entry_remove(this->vips, addr, iface);
+               this->condvar->broadcast(this->condvar);
+               return;
        }
+       addr_map_entry_remove(this->addrs, addr, iface);
 }
 
 /**
@@ -575,31 +791,21 @@ static void process_link(private_kernel_netlink_net_t *this,
        {
                case RTM_NEWLINK:
                {
-                       if (msg->ifi_flags & IFF_LOOPBACK)
-                       {       /* ignore loopback interfaces */
-                               break;
-                       }
-                       enumerator = this->ifaces->create_enumerator(this->ifaces);
-                       while (enumerator->enumerate(enumerator, &current))
+                       if (this->ifaces->find_first(this->ifaces,
+                                                                       (void*)iface_entry_by_index, (void**)&entry,
+                                                                       &msg->ifi_index) != SUCCESS)
                        {
-                               if (current->ifindex == msg->ifi_index)
-                               {
-                                       entry = current;
-                                       break;
-                               }
-                       }
-                       enumerator->destroy(enumerator);
-                       if (!entry)
-                       {
-                               entry = malloc_thing(iface_entry_t);
-                               entry->ifindex = msg->ifi_index;
-                               entry->flags = 0;
-                               entry->addrs = linked_list_create();
+                               INIT(entry,
+                                       .ifindex = msg->ifi_index,
+                                       .addrs = linked_list_create(),
+                                       .usable = hydra->kernel_interface->is_interface_usable(
+                                                                                               hydra->kernel_interface, name),
+                               );
                                this->ifaces->insert_last(this->ifaces, entry);
                        }
                        strncpy(entry->ifname, name, IFNAMSIZ);
                        entry->ifname[IFNAMSIZ-1] = '\0';
-                       if (event)
+                       if (event && entry->usable)
                        {
                                if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
                                {
@@ -622,12 +828,16 @@ static void process_link(private_kernel_netlink_net_t *this,
                        {
                                if (current->ifindex == msg->ifi_index)
                                {
-                                       if (event)
+                                       if (event && current->usable)
                                        {
                                                update = TRUE;
                                                DBG1(DBG_KNL, "interface %s deleted", current->ifname);
                                        }
+                                       /* TODO: move virtual IPs installed on this interface to
+                                        * another interface? */
                                        this->ifaces->remove_at(this->ifaces, enumerator);
+                                       current->addrs->invoke_function(current->addrs,
+                                                               (void*)addr_entry_unregister, current, this);
                                        iface_entry_destroy(current);
                                        break;
                                }
@@ -643,7 +853,6 @@ static void process_link(private_kernel_netlink_net_t *this,
                queue_route_reinstall(this, strdup(name));
        }
 
-       /* send an update to all IKE_SAs */
        if (update && event)
        {
                fire_roam_event(this, TRUE);
@@ -660,9 +869,7 @@ static void process_addr(private_kernel_netlink_net_t *this,
        struct rtattr *rta = IFA_RTA(msg);
        size_t rtasize = IFA_PAYLOAD (hdr);
        host_t *host = NULL;
-       enumerator_t *ifaces, *addrs;
        iface_entry_t *iface;
-       addr_entry_t *addr;
        chunk_t local = chunk_empty, address = chunk_empty;
        char *route_ifname = NULL;
        bool update = FALSE, found = FALSE, changed = FALSE;
@@ -701,64 +908,81 @@ static void process_addr(private_kernel_netlink_net_t *this,
        }
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
+                                                                (void**)&iface, &msg->ifa_index) == SUCCESS)
        {
-               if (iface->ifindex == msg->ifa_index)
+               addr_map_entry_t *entry, lookup = {
+                       .ip = host,
+                       .iface = iface,
+               };
+               addr_entry_t *addr;
+
+               entry = this->vips->get(this->vips, &lookup);
+               if (entry)
                {
-                       addrs = iface->addrs->create_enumerator(iface->addrs);
-                       while (addrs->enumerate(addrs, &addr))
+                       if (hdr->nlmsg_type == RTM_NEWADDR)
+                       {       /* mark as installed and signal waiting threads */
+                               entry->addr->installed = TRUE;
+                       }
+                       else
+                       {       /* the address was already marked as uninstalled */
+                               addr = entry->addr;
+                               iface->addrs->remove(iface->addrs, addr, NULL);
+                               addr_map_entry_remove(this->vips, addr, iface);
+                               addr_entry_destroy(addr);
+                       }
+                       /* no roam events etc. for virtual IPs */
+                       this->condvar->broadcast(this->condvar);
+                       this->mutex->unlock(this->mutex);
+                       host->destroy(host);
+                       return;
+               }
+               entry = this->addrs->get(this->addrs, &lookup);
+               if (entry)
+               {
+                       if (hdr->nlmsg_type == RTM_DELADDR)
                        {
-                               if (host->ip_equals(host, addr->ip))
+                               found = TRUE;
+                               addr = entry->addr;
+                               iface->addrs->remove(iface->addrs, addr, NULL);
+                               if (iface->usable)
                                {
-                                       found = TRUE;
-                                       if (hdr->nlmsg_type == RTM_DELADDR)
-                                       {
-                                               iface->addrs->remove_at(iface->addrs, addrs);
-                                               if (!addr->virtual)
-                                               {
-                                                       changed = TRUE;
-                                                       DBG1(DBG_KNL, "%H disappeared from %s",
-                                                                host, iface->ifname);
-                                               }
-                                               addr_entry_destroy(addr);
-                                       }
-                                       else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
-                                       {
-                                               addr->refcount = 1;
-                                       }
+                                       changed = TRUE;
+                                       DBG1(DBG_KNL, "%H disappeared from %s", host,
+                                                iface->ifname);
                                }
+                               addr_map_entry_remove(this->addrs, addr, iface);
+                               addr_entry_destroy(addr);
                        }
-                       addrs->destroy(addrs);
-
+               }
+               else
+               {
                        if (hdr->nlmsg_type == RTM_NEWADDR)
                        {
-                               if (!found)
+                               found = TRUE;
+                               changed = TRUE;
+                               route_ifname = strdup(iface->ifname);
+                               INIT(addr,
+                                       .ip = host->clone(host),
+                                       .scope = msg->ifa_scope,
+                               );
+                               iface->addrs->insert_last(iface->addrs, addr);
+                               addr_map_entry_add(this->addrs, addr, iface);
+                               if (event && iface->usable)
                                {
-                                       found = TRUE;
-                                       changed = TRUE;
-                                       route_ifname = strdup(iface->ifname);
-                                       addr = malloc_thing(addr_entry_t);
-                                       addr->ip = host->clone(host);
-                                       addr->virtual = FALSE;
-                                       addr->refcount = 1;
-                                       addr->scope = msg->ifa_scope;
-
-                                       iface->addrs->insert_last(iface->addrs, addr);
-                                       if (event)
-                                       {
-                                               DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
-                                       }
+                                       DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
                                }
                        }
-                       if (found && (iface->flags & IFF_UP))
-                       {
-                               update = TRUE;
-                       }
-                       break;
+               }
+               if (found && (iface->flags & IFF_UP))
+               {
+                       update = TRUE;
+               }
+               if (!iface->usable)
+               {       /* ignore events for interfaces excluded by config */
+                       update = changed = FALSE;
                }
        }
-       ifaces->destroy(ifaces);
        this->mutex->unlock(this->mutex);
 
        if (update && event && route_ifname)
@@ -818,20 +1042,26 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
                }
                rta = RTA_NEXT(rta, rtasize);
        }
+       this->mutex->lock(this->mutex);
+       if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
+       {       /* ignore route changes for interfaces that are ignored or down */
+               this->mutex->unlock(this->mutex);
+               DESTROY_IF(host);
+               return;
+       }
        if (!host && rta_oif)
        {
-               host = get_interface_address(this, rta_oif, msg->rtm_family);
+               host = get_interface_address(this, rta_oif, msg->rtm_family, NULL);
        }
        if (host)
        {
-               this->mutex->lock(this->mutex);
-               if (!get_vip_refcount(this, host))
+               if (!is_known_vip(this, host))
                {       /* ignore routes added for virtual IPs */
                        fire_roam_event(this, FALSE);
                }
-               this->mutex->unlock(this->mutex);
                host->destroy(host);
        }
+       this->mutex->unlock(this->mutex);
 }
 
 /**
@@ -881,12 +1111,10 @@ static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
                        case RTM_NEWADDR:
                        case RTM_DELADDR:
                                process_addr(this, hdr, TRUE);
-                               this->condvar->broadcast(this->condvar);
                                break;
                        case RTM_NEWLINK:
                        case RTM_DELLINK:
                                process_link(this, hdr, TRUE);
-                               this->condvar->broadcast(this->condvar);
                                break;
                        case RTM_NEWROUTE:
                        case RTM_DELROUTE:
@@ -906,10 +1134,8 @@ static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
 /** enumerator over addresses */
 typedef struct {
        private_kernel_netlink_net_t* this;
-       /** whether to enumerate down interfaces */
-       bool include_down_ifaces;
-       /** whether to enumerate virtual ip addresses */
-       bool include_virtual_ips;
+       /** which addresses to enumerate */
+       kernel_address_type_t which;
 } address_enumerator_t;
 
 /**
@@ -927,7 +1153,7 @@ static void address_enumerator_destroy(address_enumerator_t *data)
 static bool filter_addresses(address_enumerator_t *data,
                                                         addr_entry_t** in, host_t** out)
 {
-       if (!data->include_virtual_ips && (*in)->virtual)
+       if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
        {       /* skip virtual interfaces added by us */
                return FALSE;
        }
@@ -956,7 +1182,15 @@ static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
                                                          iface_entry_t** out)
 {
-       if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
+       if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
+       {       /* skip interfaces excluded by config */
+               return FALSE;
+       }
+       if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
+       {       /* ignore loopback devices */
+               return FALSE;
+       }
+       if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
        {       /* skip interfaces not up */
                return FALSE;
        }
@@ -965,13 +1199,11 @@ static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
 }
 
 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
-       private_kernel_netlink_net_t *this,
-       bool include_down_ifaces, bool include_virtual_ips)
+       private_kernel_netlink_net_t *this, kernel_address_type_t which)
 {
        address_enumerator_t *data = malloc_thing(address_enumerator_t);
        data->this = this;
-       data->include_down_ifaces = include_down_ifaces;
-       data->include_virtual_ips = include_virtual_ips;
+       data->which = which;
 
        this->mutex->lock(this->mutex);
        return enumerator_create_nested(
@@ -982,47 +1214,40 @@ METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
                                (void*)address_enumerator_destroy);
 }
 
-METHOD(kernel_net_t, get_interface_name, char*,
-       private_kernel_netlink_net_t *this, host_t* ip)
+METHOD(kernel_net_t, get_interface_name, bool,
+       private_kernel_netlink_net_t *this, host_t* ip, char **name)
 {
-       enumerator_t *ifaces, *addrs;
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       char *name = NULL;
-
-       DBG2(DBG_KNL, "getting interface name for %H", ip);
+       addr_map_entry_t *entry, lookup = {
+               .ip = ip,
+       };
 
+       if (ip->is_anyaddr(ip))
+       {
+               return FALSE;
+       }
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       /* first try to find it on an up and usable interface */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match_up_and_usable);
+       if (entry)
        {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
-               {
-                       if (ip->ip_equals(ip, addr->ip))
-                       {
-                               name = strdup(iface->ifname);
-                               break;
-                       }
-               }
-               addrs->destroy(addrs);
                if (name)
                {
-                       break;
+                       *name = strdup(entry->iface->ifname);
+                       DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
                }
+               this->mutex->unlock(this->mutex);
+               return TRUE;
        }
-       ifaces->destroy(ifaces);
-       this->mutex->unlock(this->mutex);
-
-       if (name)
-       {
-               DBG2(DBG_KNL, "%H is on interface %s", ip, name);
-       }
-       else
+       /* maybe it is installed on an ignored interface */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match_up);
+       if (!entry)
        {
-               DBG2(DBG_KNL, "%H is not a local address", ip);
+               DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
        }
-       return name;
+       this->mutex->unlock(this->mutex);
+       return FALSE;
 }
 
 /**
@@ -1030,23 +1255,17 @@ METHOD(kernel_net_t, get_interface_name, char*,
  */
 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
 {
-       enumerator_t *ifaces;
        iface_entry_t *iface;
        int ifindex = 0;
 
        DBG2(DBG_KNL, "getting iface index for %s", name);
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
+       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                                               (void**)&iface, name) == SUCCESS)
        {
-               if (streq(name, iface->ifname))
-               {
-                       ifindex = iface->ifindex;
-                       break;
-               }
+               ifindex = iface->ifindex;
        }
-       ifaces->destroy(ifaces);
        this->mutex->unlock(this->mutex);
 
        if (ifindex == 0)
@@ -1057,29 +1276,6 @@ static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
 }
 
 /**
- * Check if an interface with a given index is up
- */
-static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
-{
-       enumerator_t *ifaces;
-       iface_entry_t *iface;
-       /* default to TRUE for interface we do not monitor (e.g. lo) */
-       bool up = TRUE;
-
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               if (iface->ifindex == index)
-               {
-                       up = iface->flags & IFF_UP;
-                       break;
-               }
-       }
-       ifaces->destroy(ifaces);
-       return up;
-}
-
-/**
  * check if an address (chunk) addr is in subnet (net with net_len net bits)
  */
 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
@@ -1116,6 +1312,94 @@ static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
 }
 
 /**
+ * Store information about a route retrieved via RTNETLINK
+ */
+typedef struct {
+       chunk_t gtw;
+       chunk_t src;
+       chunk_t dst;
+       host_t *src_host;
+       u_int8_t dst_len;
+       u_int32_t table;
+       u_int32_t oif;
+} rt_entry_t;
+
+/**
+ * Free a route entry
+ */
+static void rt_entry_destroy(rt_entry_t *this)
+{
+       DESTROY_IF(this->src_host);
+       free(this);
+}
+
+/**
+ * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
+ * reused if not NULL.
+ *
+ * Returned chunks point to internal data of the Netlink message.
+ */
+static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
+{
+       struct rtattr *rta;
+       struct rtmsg *msg;
+       size_t rtasize;
+
+       msg = (struct rtmsg*)(NLMSG_DATA(hdr));
+       rta = RTM_RTA(msg);
+       rtasize = RTM_PAYLOAD(hdr);
+
+       if (route)
+       {
+               route->gtw = chunk_empty;
+               route->src = chunk_empty;
+               route->dst = chunk_empty;
+               route->dst_len = msg->rtm_dst_len;
+               route->table = msg->rtm_table;
+               route->oif = 0;
+       }
+       else
+       {
+               INIT(route,
+                       .dst_len = msg->rtm_dst_len,
+                       .table = msg->rtm_table,
+               );
+       }
+
+       while (RTA_OK(rta, rtasize))
+       {
+               switch (rta->rta_type)
+               {
+                       case RTA_PREFSRC:
+                               route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_GATEWAY:
+                               route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_DST:
+                               route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
+                               break;
+                       case RTA_OIF:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->oif))
+                               {
+                                       route->oif = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+#ifdef HAVE_RTA_TABLE
+                       case RTA_TABLE:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->table))
+                               {
+                                       route->table = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
+#endif /* HAVE_RTA_TABLE*/
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       return route;
+}
+
+/**
  * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
  */
 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
@@ -1126,11 +1410,10 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
        struct rtmsg *msg;
        chunk_t chunk;
        size_t len;
-       int best = -1;
+       linked_list_t *routes;
+       rt_entry_t *route = NULL, *best = NULL;
        enumerator_t *enumerator;
-       host_t *src = NULL, *gtw = NULL;
-
-       DBG2(DBG_KNL, "getting address to reach %H", dest);
+       host_t *addr = NULL;
 
        memset(&request, 0, sizeof(request));
 
@@ -1158,9 +1441,11 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
 
        if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
        {
-               DBG1(DBG_KNL, "getting address to %H failed", dest);
+               DBG2(DBG_KNL, "getting %s to reach %H failed",
+                        nexthop ? "nexthop" : "address", dest);
                return NULL;
        }
+       routes = linked_list_create();
        this->mutex->lock(this->mutex);
 
        for (current = out; NLMSG_OK(current, len);
@@ -1172,132 +1457,53 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                break;
                        case RTM_NEWROUTE:
                        {
-                               struct rtattr *rta;
-                               size_t rtasize;
-                               chunk_t rta_gtw, rta_src, rta_dst;
-                               u_int32_t rta_oif = 0, rta_table;
-                               host_t *new_src, *new_gtw;
-                               bool cont = FALSE;
+                               rt_entry_t *other;
                                uintptr_t table;
 
-                               rta_gtw = rta_src = rta_dst = chunk_empty;
-                               msg = (struct rtmsg*)(NLMSG_DATA(current));
-                               rta = RTM_RTA(msg);
-                               rtasize = RTM_PAYLOAD(current);
-                               rta_table = msg->rtm_table;
-                               while (RTA_OK(rta, rtasize))
-                               {
-                                       switch (rta->rta_type)
-                                       {
-                                               case RTA_PREFSRC:
-                                                       rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_GATEWAY:
-                                                       rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_DST:
-                                                       rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                                       break;
-                                               case RTA_OIF:
-                                                       if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
-                                                       {
-                                                               rta_oif = *(u_int32_t*)RTA_DATA(rta);
-                                                       }
-                                                       break;
-#ifdef HAVE_RTA_TABLE
-                                               case RTA_TABLE:
-                                                       if (RTA_PAYLOAD(rta) == sizeof(rta_table))
-                                                       {
-                                                               rta_table = *(u_int32_t*)RTA_DATA(rta);
-                                                       }
-                                                       break;
-#endif /* HAVE_RTA_TABLE*/
-                                       }
-                                       rta = RTA_NEXT(rta, rtasize);
-                               }
-                               if (msg->rtm_dst_len <= best)
-                               {       /* not better than a previous one */
-                                       continue;
-                               }
-                               enumerator = this->rt_exclude->create_enumerator(this->rt_exclude);
-                               while (enumerator->enumerate(enumerator, &table))
-                               {
-                                       if (table == rta_table)
-                                       {
-                                               cont = TRUE;
-                                               break;
-                                       }
-                               }
-                               enumerator->destroy(enumerator);
-                               if (cont)
-                               {
+                               route = parse_route(current, route);
+
+                               table = (uintptr_t)route->table;
+                               if (this->rt_exclude->find_first(this->rt_exclude, NULL,
+                                                                                                (void**)&table) == SUCCESS)
+                               {       /* route is from an excluded routing table */
                                        continue;
                                }
                                if (this->routing_table != 0 &&
-                                       rta_table == this->routing_table)
+                                       route->table == this->routing_table)
                                {       /* route is from our own ipsec routing table */
                                        continue;
                                }
-                               if (rta_oif && !is_interface_up(this, rta_oif))
+                               if (route->oif && !is_interface_up_and_usable(this, route->oif))
                                {       /* interface is down */
                                        continue;
                                }
-                               if (!addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))
+                               if (!addr_in_subnet(chunk, route->dst, route->dst_len))
                                {       /* route destination does not contain dest */
                                        continue;
                                }
-
-                               if (nexthop)
-                               {
-                                       /* nexthop lookup, return gateway if any */
-                                       DESTROY_IF(gtw);
-                                       gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       best = msg->rtm_dst_len;
-                                       continue;
-                               }
-                               if (rta_src.ptr)
-                               {       /* got a source address */
-                                       new_src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
-                                       if (new_src)
-                                       {
-                                               if (get_vip_refcount(this, new_src))
-                                               {       /* skip source address if it is installed by us */
-                                                       new_src->destroy(new_src);
-                                               }
-                                               else
-                                               {
-                                                       DESTROY_IF(src);
-                                                       src = new_src;
-                                                       best = msg->rtm_dst_len;
-                                               }
+                               if (route->src.ptr)
+                               {       /* verify source address, if any */
+                                       host_t *src = host_create_from_chunk(msg->rtm_family,
+                                                                                                                route->src, 0);
+                                       if (src && is_known_vip(this, src))
+                                       {       /* ignore routes installed by us */
+                                               src->destroy(src);
+                                               continue;
                                        }
-                                       continue;
-                               }
-                               if (rta_oif)
-                               {       /* no src or gtw, but an interface. Get address from it. */
-                                       new_src = get_interface_address(this, rta_oif,
-                                                                                                       msg->rtm_family);
-                                       if (new_src)
-                                       {
-                                               DESTROY_IF(src);
-                                               src = new_src;
-                                               best = msg->rtm_dst_len;
-                                       }
-                                       continue;
+                                       route->src_host = src;
                                }
-                               if (rta_gtw.ptr)
-                               {       /* no source, but a gateway. Lookup source to reach gtw. */
-                                       new_gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
-                                       new_src = get_route(this, new_gtw, FALSE, candidate);
-                                       new_gtw->destroy(new_gtw);
-                                       if (new_src)
+                               /* insert route, sorted by decreasing network prefix */
+                               enumerator = routes->create_enumerator(routes);
+                               while (enumerator->enumerate(enumerator, &other))
+                               {
+                                       if (route->dst_len > other->dst_len)
                                        {
-                                               DESTROY_IF(src);
-                                               src = new_src;
-                                               best = msg->rtm_dst_len;
+                                               break;
                                        }
-                                       continue;
                                }
+                               routes->insert_before(routes, enumerator, route);
+                               enumerator->destroy(enumerator);
+                               route = NULL;
                                continue;
                        }
                        default:
@@ -1305,18 +1511,111 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                }
                break;
        }
-       free(out);
-       this->mutex->unlock(this->mutex);
+       if (route)
+       {
+               rt_entry_destroy(route);
+       }
+
+       /* now we have a list of routes matching dest, sorted by net prefix.
+        * we will look for source addresses for these routes and select the one
+        * with the preferred source address, if possible */
+       enumerator = routes->create_enumerator(routes);
+       while (enumerator->enumerate(enumerator, &route))
+       {
+               if (route->src_host)
+               {       /* got a source address with the route, if no preferred source
+                        * is given or it matches we are done, as this is the best route */
+                       if (!candidate || candidate->ip_equals(candidate, route->src_host))
+                       {
+                               best = route;
+                               break;
+                       }
+                       else if (route->oif)
+                       {       /* no match yet, maybe it is assigned to the same interface */
+                               host_t *src = get_interface_address(this, route->oif,
+                                                                                                       msg->rtm_family, candidate);
+                               if (src && src->ip_equals(src, candidate))
+                               {
+                                       route->src_host->destroy(route->src_host);
+                                       route->src_host = src;
+                                       best = route;
+                                       break;
+                               }
+                               DESTROY_IF(src);
+                       }
+                       /* no luck yet with the source address. if this is the best (first)
+                        * route we store it as fallback in case we don't find a route with
+                        * the preferred source */
+                       best = best ?: route;
+                       continue;
+               }
+               if (route->oif)
+               {       /* no src, but an interface - get address from it */
+                       route->src_host = get_interface_address(this, route->oif,
+                                                                                                       msg->rtm_family, candidate);
+                       if (route->src_host)
+                       {       /* we handle this address the same as the one above */
+                               if (!candidate ||
+                                        candidate->ip_equals(candidate, route->src_host))
+                               {
+                                       best = route;
+                                       break;
+                               }
+                               best = best ?: route;
+                               continue;
+                       }
+               }
+               if (route->gtw.ptr)
+               {       /* no src, no iface, but a gateway - lookup src to reach gtw */
+                       host_t *gtw;
+
+                       gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
+                       route->src_host = get_route(this, gtw, FALSE, candidate);
+                       gtw->destroy(gtw);
+                       if (route->src_host)
+                       {       /* more of the same */
+                               if (!candidate ||
+                                        candidate->ip_equals(candidate, route->src_host))
+                               {
+                                       best = route;
+                                       break;
+                               }
+                               best = best ?: route;
+                       }
+               }
+       }
+       enumerator->destroy(enumerator);
 
        if (nexthop)
+       {       /* nexthop lookup, return gateway if any */
+               if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
+               {
+                       addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
+               }
+               addr = addr ?: dest->clone(dest);
+       }
+       else
        {
-               if (gtw)
+               if (best)
                {
-                       return gtw;
+                       addr = best->src_host->clone(best->src_host);
                }
-               return dest->clone(dest);
        }
-       return src;
+       this->mutex->unlock(this->mutex);
+       routes->destroy_function(routes, (void*)rt_entry_destroy);
+       free(out);
+
+       if (addr)
+       {
+               DBG2(DBG_KNL, "using %H as %s to reach %H", addr,
+                        nexthop ? "nexthop" : "address", dest);
+       }
+       else
+       {
+               DBG2(DBG_KNL, "no %s found to reach %H",
+                        nexthop ? "nexthop" : "address", dest);
+       }
+       return addr;
 }
 
 METHOD(kernel_net_t, get_source_addr, host_t*,
@@ -1326,9 +1625,9 @@ METHOD(kernel_net_t, get_source_addr, host_t*,
 }
 
 METHOD(kernel_net_t, get_nexthop, host_t*,
-       private_kernel_netlink_net_t *this, host_t *dest)
+       private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
 {
-       return get_route(this, dest, TRUE, NULL);
+       return get_route(this, dest, TRUE, src);
 }
 
 /**
@@ -1367,87 +1666,109 @@ static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type
 METHOD(kernel_net_t, add_ip, status_t,
        private_kernel_netlink_net_t *this, host_t *virtual_ip, host_t *iface_ip)
 {
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       enumerator_t *addrs, *ifaces;
-       int ifindex;
+       addr_map_entry_t *entry, lookup = {
+               .ip = virtual_ip,
+       };
+       iface_entry_t *iface = NULL;
 
        if (!this->install_virtual_ip)
        {       /* disabled by config */
                return SUCCESS;
        }
 
-       DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
-
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               bool iface_found = FALSE;
-
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
-               {
-                       if (iface_ip->ip_equals(iface_ip, addr->ip))
+       /* the virtual IP might actually be installed as regular IP, in which case
+        * we don't track it as virtual IP */
+       entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                 (void*)addr_map_entry_match);
+       if (!entry)
+       {       /* otherwise it might already be installed as virtual IP */
+               entry = this->vips->get_match(this->vips, &lookup,
+                                                                        (void*)addr_map_entry_match);
+               if (entry)
+               {       /* the vip we found can be in one of three states: 1) installed and
+                        * ready, 2) just added by another thread, but not yet confirmed to
+                        * be installed by the kernel, 3) just deleted, but not yet gone.
+                        * Then while we wait below, several things could happen (as we
+                        * release the mutex).  For instance, the interface could disappear,
+                        * or the IP is finally deleted, and it reappears on a different
+                        * interface. All these cases are handled by the call below. */
+                       while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
                        {
-                               iface_found = TRUE;
+                               this->condvar->wait(this->condvar, this->mutex);
                        }
-                       else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       if (entry)
                        {
-                               addr->refcount++;
-                               DBG2(DBG_KNL, "virtual IP %H already installed on %s",
-                                        virtual_ip, iface->ifname);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
-                               return SUCCESS;
+                               entry->addr->refcount++;
                        }
                }
-               addrs->destroy(addrs);
-
-               if (iface_found)
+       }
+       if (entry)
+       {
+               DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
+                        entry->iface->ifname);
+               this->mutex->unlock(this->mutex);
+               return SUCCESS;
+       }
+       /* try to find the target interface, either by config or via src ip */
+       if (!this->install_virtual_ip_on ||
+                this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
+                                               (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
+       {
+               lookup.ip = iface_ip;
+               entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                         (void*)addr_map_entry_match);
+               if (!entry)
+               {       /* if we don't find the requested interface we just use the first */
+                       this->ifaces->get_first(this->ifaces, (void**)&iface);
+               }
+               else
                {
-                       ifindex = iface->ifindex;
-                       addr = malloc_thing(addr_entry_t);
-                       addr->ip = virtual_ip->clone(virtual_ip);
-                       addr->refcount = 0;
-                       addr->virtual = TRUE;
-                       addr->scope = RT_SCOPE_UNIVERSE;
-                       iface->addrs->insert_last(iface->addrs, addr);
-
-                       if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
-                                                         ifindex, virtual_ip) == SUCCESS)
-                       {
-                               while (get_vip_refcount(this, virtual_ip) == 0)
-                               {       /* wait until address appears */
-                                       this->condvar->wait(this->condvar, this->mutex);
-                               }
-                               ifaces->destroy(ifaces);
+                       iface = entry->iface;
+               }
+       }
+       if (iface)
+       {
+               addr_entry_t *addr;
+
+               INIT(addr,
+                       .ip = virtual_ip->clone(virtual_ip),
+                       .refcount = 1,
+                       .scope = RT_SCOPE_UNIVERSE,
+               );
+               iface->addrs->insert_last(iface->addrs, addr);
+               addr_map_entry_add(this->vips, addr, iface);
+               if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
+                                                 iface->ifindex, virtual_ip) == SUCCESS)
+               {
+                       while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
+                       {       /* wait until address appears */
+                               this->condvar->wait(this->condvar, this->mutex);
+                       }
+                       if (entry)
+                       {       /* we fail if the interface got deleted in the meantime */
+                               DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
+                                        entry->iface->ifname);
                                this->mutex->unlock(this->mutex);
                                return SUCCESS;
                        }
-                       ifaces->destroy(ifaces);
-                       this->mutex->unlock(this->mutex);
-                       DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
-                       return FAILED;
                }
+               this->mutex->unlock(this->mutex);
+               DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
+               return FAILED;
        }
-       ifaces->destroy(ifaces);
        this->mutex->unlock(this->mutex);
-
-       DBG1(DBG_KNL, "interface address %H not found, unable to install"
-                "virtual IP %H", iface_ip, virtual_ip);
+       DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
+                virtual_ip);
        return FAILED;
 }
 
 METHOD(kernel_net_t, del_ip, status_t,
        private_kernel_netlink_net_t *this, host_t *virtual_ip)
 {
-       iface_entry_t *iface;
-       addr_entry_t *addr;
-       enumerator_t *addrs, *ifaces;
-       status_t status;
-       int ifindex;
+       addr_map_entry_t *entry, lookup = {
+               .ip = virtual_ip,
+       };
 
        if (!this->install_virtual_ip)
        {       /* disabled by config */
@@ -1457,50 +1778,50 @@ METHOD(kernel_net_t, del_ip, status_t,
        DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
 
        this->mutex->lock(this->mutex);
-       ifaces = this->ifaces->create_enumerator(this->ifaces);
-       while (ifaces->enumerate(ifaces, &iface))
-       {
-               addrs = iface->addrs->create_enumerator(iface->addrs);
-               while (addrs->enumerate(addrs, &addr))
+       entry = this->vips->get_match(this->vips, &lookup,
+                                                                (void*)addr_map_entry_match);
+       if (!entry)
+       {       /* we didn't install this IP as virtual IP */
+               entry = this->addrs->get_match(this->addrs, &lookup,
+                                                                         (void*)addr_map_entry_match);
+               if (entry)
                {
-                       if (virtual_ip->ip_equals(virtual_ip, addr->ip))
+                       DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
+                                entry->iface->ifname);
+                       this->mutex->unlock(this->mutex);
+                       return SUCCESS;
+               }
+               DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
+               this->mutex->unlock(this->mutex);
+               return FAILED;
+       }
+       if (entry->addr->refcount == 1)
+       {
+               status_t status;
+
+               /* we set this flag so that threads calling add_ip will block and wait
+                * until the entry is gone, also so we can wait below */
+               entry->addr->installed = FALSE;
+               status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
+                                                          virtual_ip);
+               if (status == SUCCESS)
+               {       /* wait until the address is really gone */
+                       while (is_known_vip(this, virtual_ip))
                        {
-                               ifindex = iface->ifindex;
-                               if (addr->refcount == 1)
-                               {
-                                       status = manage_ipaddr(this, RTM_DELADDR, 0,
-                                                                                  ifindex, virtual_ip);
-                                       if (status == SUCCESS)
-                                       {       /* wait until the address is really gone */
-                                               while (get_vip_refcount(this, virtual_ip) > 0)
-                                               {
-                                                       this->condvar->wait(this->condvar, this->mutex);
-                                               }
-                                       }
-                                       addrs->destroy(addrs);
-                                       ifaces->destroy(ifaces);
-                                       this->mutex->unlock(this->mutex);
-                                       return status;
-                               }
-                               else
-                               {
-                                       addr->refcount--;
-                               }
-                               DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
-                                        virtual_ip);
-                               addrs->destroy(addrs);
-                               ifaces->destroy(ifaces);
-                               this->mutex->unlock(this->mutex);
-                               return SUCCESS;
+                               this->condvar->wait(this->condvar, this->mutex);
                        }
                }
-               addrs->destroy(addrs);
+               this->mutex->unlock(this->mutex);
+               return status;
        }
-       ifaces->destroy(ifaces);
+       else
+       {
+               entry->addr->refcount--;
+       }
+       DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
+                virtual_ip);
        this->mutex->unlock(this->mutex);
-
-       DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
-       return FAILED;
+       return SUCCESS;
 }
 
 /**
@@ -1583,18 +1904,18 @@ METHOD(kernel_net_t, add_route, status_t,
                .if_name = if_name,
        };
 
-       this->mutex->lock(this->mutex);
+       this->routes_lock->lock(this->routes_lock);
        found = this->routes->get(this->routes, &route);
        if (found)
        {
-               this->mutex->unlock(this->mutex);
+               this->routes_lock->unlock(this->routes_lock);
                return ALREADY_DONE;
        }
        found = route_entry_clone(&route);
        this->routes->put(this->routes, found, found);
        status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
                                                         dst_net, prefixlen, gateway, src_ip, if_name);
-       this->mutex->unlock(this->mutex);
+       this->routes_lock->unlock(this->routes_lock);
        return status;
 }
 
@@ -1611,18 +1932,18 @@ METHOD(kernel_net_t, del_route, status_t,
                .if_name = if_name,
        };
 
-       this->mutex->lock(this->mutex);
+       this->routes_lock->lock(this->routes_lock);
        found = this->routes->get(this->routes, &route);
        if (!found)
        {
-               this->mutex->unlock(this->mutex);
+               this->routes_lock->unlock(this->routes_lock);
                return NOT_FOUND;
        }
        this->routes->remove(this->routes, found);
        route_entry_destroy(found);
        status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
                                                         gateway, src_ip, if_name);
-       this->mutex->unlock(this->mutex);
+       this->routes_lock->unlock(this->routes_lock);
        return status;
 }
 
@@ -1639,7 +1960,7 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
        iface_entry_t *iface;
        addr_entry_t *addr;
 
-       DBG1(DBG_KNL, "listening on interfaces:");
+       DBG2(DBG_KNL, "known interfaces and IP addresses:");
 
        memset(&request, 0, sizeof(request));
 
@@ -1701,13 +2022,13 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
        ifaces = this->ifaces->create_enumerator(this->ifaces);
        while (ifaces->enumerate(ifaces, &iface))
        {
-               if (iface->flags & IFF_UP)
+               if (iface_entry_up_and_usable(iface))
                {
-                       DBG1(DBG_KNL, "  %s", iface->ifname);
+                       DBG2(DBG_KNL, "  %s", iface->ifname);
                        addrs = iface->addrs->create_enumerator(iface->addrs);
                        while (addrs->enumerate(addrs, (void**)&addr))
                        {
-                               DBG1(DBG_KNL, "    %H", addr->ip);
+                               DBG2(DBG_KNL, "    %H", addr->ip);
                        }
                        addrs->destroy(addrs);
                }
@@ -1781,6 +2102,23 @@ static void check_kernel_features(private_kernel_netlink_net_t *this)
        }
 }
 
+/**
+ * Destroy an address to iface map
+ */
+static void addr_map_destroy(hashtable_t *map)
+{
+       enumerator_t *enumerator;
+       addr_map_entry_t *addr;
+
+       enumerator = map->create_enumerator(map);
+       while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
+       {
+               free(addr);
+       }
+       enumerator->destroy(enumerator);
+       map->destroy(map);
+}
+
 METHOD(kernel_net_t, destroy, void,
        private_kernel_netlink_net_t *this)
 {
@@ -1794,10 +2132,6 @@ METHOD(kernel_net_t, destroy, void,
                manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
                                        this->routing_table_prio);
        }
-       if (this->job)
-       {
-               this->job->cancel(this->job);
-       }
        if (this->socket_events > 0)
        {
                close(this->socket_events);
@@ -1811,14 +2145,19 @@ METHOD(kernel_net_t, destroy, void,
        }
        enumerator->destroy(enumerator);
        this->routes->destroy(this->routes);
+       this->routes_lock->destroy(this->routes_lock);
        DESTROY_IF(this->socket);
 
        net_changes_clear(this);
        this->net_changes->destroy(this->net_changes);
        this->net_changes_lock->destroy(this->net_changes_lock);
 
+       addr_map_destroy(this->addrs);
+       addr_map_destroy(this->vips);
+
        this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
        this->rt_exclude->destroy(this->rt_exclude);
+       this->roam_lock->destroy(this->roam_lock);
        this->condvar->destroy(this->condvar);
        this->mutex->destroy(this->mutex);
        free(this);
@@ -1855,10 +2194,17 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                .net_changes = hashtable_create(
                                                                   (hashtable_hash_t)net_change_hash,
                                                                   (hashtable_equals_t)net_change_equals, 16),
+               .addrs = hashtable_create(
+                                                               (hashtable_hash_t)addr_map_entry_hash,
+                                                               (hashtable_equals_t)addr_map_entry_equals, 16),
+               .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
+                                                                (hashtable_equals_t)addr_map_entry_equals, 16),
+               .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
                .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
                .ifaces = linked_list_create(),
                .mutex = mutex_create(MUTEX_TYPE_RECURSIVE),
                .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
+               .roam_lock = spinlock_create(),
                .routing_table = lib->settings->get_int(lib->settings,
                                "%s.routing_table", ROUTING_TABLE, hydra->daemon),
                .routing_table_prio = lib->settings->get_int(lib->settings,
@@ -1867,9 +2213,11 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                "%s.process_route", TRUE, hydra->daemon),
                .install_virtual_ip = lib->settings->get_bool(lib->settings,
                                "%s.install_virtual_ip", TRUE, hydra->daemon),
+               .install_virtual_ip_on = lib->settings->get_str(lib->settings,
+                               "%s.install_virtual_ip_on", NULL, hydra->daemon),
        );
        timerclear(&this->last_route_reinstall);
-       timerclear(&this->last_roam);
+       timerclear(&this->next_roam);
 
        check_kernel_features(this);
 
@@ -1923,9 +2271,10 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                        return NULL;
                }
 
-               this->job = callback_job_create_with_prio((callback_job_cb_t)receive_events,
-                                                                                       this, NULL, NULL, JOB_PRIO_CRITICAL);
-               lib->processor->queue_job(lib->processor, (job_t*)this->job);
+               lib->processor->queue_job(lib->processor,
+                       (job_t*)callback_job_create_with_prio(
+                                       (callback_job_cb_t)receive_events, this, NULL,
+                                       (callback_job_cancel_t)return_false, JOB_PRIO_CRITICAL));
        }
 
        if (init_address_list(this) != SUCCESS)