ikev1: Send and verify IPv6 addresses correctly
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
index e8d146c..4e5e02d 100644 (file)
 #define ROUTING_TABLE_PRIO 0
 #endif
 
+ENUM(rt_msg_names, RTM_NEWLINK, RTM_GETRULE,
+       "RTM_NEWLINK",
+       "RTM_DELLINK",
+       "RTM_GETLINK",
+       "RTM_SETLINK",
+       "RTM_NEWADDR",
+       "RTM_DELADDR",
+       "RTM_GETADDR",
+       "31",
+       "RTM_NEWROUTE",
+       "RTM_DELROUTE",
+       "RTM_GETROUTE",
+       "35",
+       "RTM_NEWNEIGH",
+       "RTM_DELNEIGH",
+       "RTM_GETNEIGH",
+       "RTM_NEWRULE",
+       "RTM_DELRULE",
+       "RTM_GETRULE",
+);
+
 typedef struct addr_entry_t addr_entry_t;
 
 /**
@@ -470,6 +491,16 @@ struct private_kernel_netlink_net_t {
        bool rta_prefsrc_for_ipv6;
 
        /**
+        * whether marks can be used in route lookups
+        */
+       bool rta_mark;
+
+       /**
+        * the mark excluded from the routing rule used for virtual IPs
+        */
+       mark_t routing_mark;
+
+       /**
         * whether to prefer temporary IPv6 addresses over public ones
         */
        bool prefer_temporary_addrs;
@@ -1517,6 +1548,7 @@ typedef struct {
        u_int8_t dst_len;
        u_int32_t table;
        u_int32_t oif;
+       u_int32_t priority;
 } rt_entry_t;
 
 /**
@@ -1529,6 +1561,26 @@ static void rt_entry_destroy(rt_entry_t *this)
 }
 
 /**
+ * Check if the route received with RTM_NEWROUTE is usable based on its type.
+ */
+static bool route_usable(struct nlmsghdr *hdr)
+{
+       struct rtmsg *msg;
+
+       msg = NLMSG_DATA(hdr);
+       switch (msg->rtm_type)
+       {
+               case RTN_BLACKHOLE:
+               case RTN_UNREACHABLE:
+               case RTN_PROHIBIT:
+               case RTN_THROW:
+                       return FALSE;
+               default:
+                       return TRUE;
+       }
+}
+
+/**
  * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
  * reused if not NULL.
  *
@@ -1552,6 +1604,7 @@ static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
                route->dst_len = msg->rtm_dst_len;
                route->table = msg->rtm_table;
                route->oif = 0;
+               route->priority = 0;
        }
        else
        {
@@ -1580,6 +1633,12 @@ static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
                                        route->oif = *(u_int32_t*)RTA_DATA(rta);
                                }
                                break;
+                       case RTA_PRIORITY:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->priority))
+                               {
+                                       route->priority = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
 #ifdef HAVE_RTA_TABLE
                        case RTA_TABLE:
                                if (RTA_PAYLOAD(rta) == sizeof(route->table))
@@ -1627,18 +1686,25 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
        family = dest->get_family(dest);
        hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST;
-       if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
-               this->routing_table || match_net)
-       {       /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
-                * as we want to ignore routes with virtual IPs we cannot use DUMP
-                * if these routes are not installed in a separate table */
-               hdr->nlmsg_flags |= NLM_F_DUMP;
-       }
        hdr->nlmsg_type = RTM_GETROUTE;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
        msg = NLMSG_DATA(hdr);
        msg->rtm_family = family;
+       if (!match_net && this->rta_mark && this->routing_mark.value)
+       {
+               /* if our routing rule excludes packets with a certain mark we can
+                * get the preferred route without having to dump all routes */
+               chunk = chunk_from_thing(this->routing_mark.value);
+               netlink_add_attribute(hdr, RTA_MARK, chunk, sizeof(request));
+       }
+       else if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
+                        this->routing_table || match_net)
+       {       /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
+                * as we want to ignore routes with virtual IPs we cannot use DUMP
+                * if these routes are not installed in a separate table */
+               hdr->nlmsg_flags |= NLM_F_DUMP;
+       }
        if (candidate)
        {
                chunk = candidate->get_address(candidate);
@@ -1671,6 +1737,10 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                rt_entry_t *other;
                                uintptr_t table;
 
+                               if (!route_usable(current))
+                               {
+                                       continue;
+                               }
                                route = parse_route(current, route);
 
                                table = (uintptr_t)route->table;
@@ -1703,11 +1773,16 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                        }
                                        route->src_host = src;
                                }
-                               /* insert route, sorted by decreasing network prefix */
+                               /* insert route, sorted by priority and network prefix */
                                enumerator = routes->create_enumerator(routes);
                                while (enumerator->enumerate(enumerator, &other))
                                {
-                                       if (route->dst_len > other->dst_len)
+                                       if (route->priority < other->priority)
+                                       {
+                                               break;
+                                       }
+                                       if (route->priority == other->priority &&
+                                               route->dst_len > other->dst_len)
                                        {
                                                break;
                                        }
@@ -1954,6 +2029,8 @@ METHOD(kernel_net_t, add_ip, status_t,
        if (iface)
        {
                addr_entry_t *addr;
+               char *ifname;
+               int ifi;
 
                INIT(addr,
                        .ip = virtual_ip->clone(virtual_ip),
@@ -1962,26 +2039,30 @@ METHOD(kernel_net_t, add_ip, status_t,
                );
                iface->addrs->insert_last(iface->addrs, addr);
                addr_map_entry_add(this->vips, addr, iface);
+               ifi = iface->ifindex;
+               this->lock->unlock(this->lock);
                if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
-                                                 iface->ifindex, virtual_ip, prefix) == SUCCESS)
+                                                 ifi, virtual_ip, prefix) == SUCCESS)
                {
+                       this->lock->write_lock(this->lock);
                        while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
                        {       /* wait until address appears */
                                this->condvar->wait(this->condvar, this->lock);
                        }
                        if (entry)
                        {       /* we fail if the interface got deleted in the meantime */
-                               DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
-                                        entry->iface->ifname);
+                               ifname = strdup(entry->iface->ifname);
                                this->lock->unlock(this->lock);
+                               DBG2(DBG_KNL, "virtual IP %H installed on %s",
+                                        virtual_ip, ifname);
                                /* during IKEv1 reauthentication, children get moved from
                                 * old the new SA before the virtual IP is available. This
                                 * kills the route for our virtual IP, reinstall. */
-                               queue_route_reinstall(this, strdup(entry->iface->ifname));
+                               queue_route_reinstall(this, ifname);
                                return SUCCESS;
                        }
+                       this->lock->unlock(this->lock);
                }
-               this->lock->unlock(this->lock);
                DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
                return FAILED;
        }
@@ -2027,20 +2108,23 @@ METHOD(kernel_net_t, del_ip, status_t,
        if (entry->addr->refcount == 1)
        {
                status_t status;
+               int ifi;
 
                /* we set this flag so that threads calling add_ip will block and wait
                 * until the entry is gone, also so we can wait below */
                entry->addr->installed = FALSE;
-               status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
-                                                          virtual_ip, prefix);
+               ifi = entry->iface->ifindex;
+               this->lock->unlock(this->lock);
+               status = manage_ipaddr(this, RTM_DELADDR, 0, ifi, virtual_ip, prefix);
                if (status == SUCCESS && wait)
                {       /* wait until the address is really gone */
+                       this->lock->write_lock(this->lock);
                        while (is_known_vip(this, virtual_ip))
                        {
                                this->condvar->wait(this->condvar, this->lock);
                        }
+                       this->lock->unlock(this->lock);
                }
-               this->lock->unlock(this->lock);
                return status;
        }
        else
@@ -2345,6 +2429,10 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
                        netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
                        chunk = chunk_from_thing(mark.mask);
                        netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
+                       if (msg->rtm_flags & FIB_RULE_INVERT)
+                       {
+                               this->routing_mark = mark;
+                       }
                }
 #else
                DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
@@ -2368,6 +2456,10 @@ static void check_kernel_features(private_kernel_netlink_net_t *this)
                        case 3:
                                if (a == 2)
                                {
+                                       if (b == 6 && c >= 36)
+                                       {
+                                               this->rta_mark = TRUE;
+                                       }
                                        DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
                                                 "RTA_PREFSRC for IPv6 routes", a, b, c);
                                        break;
@@ -2376,6 +2468,7 @@ static void check_kernel_features(private_kernel_netlink_net_t *this)
                        case 2:
                                /* only 3.x+ uses two part version numbers */
                                this->rta_prefsrc_for_ipv6 = TRUE;
+                               this->rta_mark = TRUE;
                                break;
                        default:
                                break;
@@ -2469,7 +2562,9 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                .destroy = _destroy,
                        },
                },
-               .socket = netlink_socket_create(NETLINK_ROUTE, NULL),
+               .socket = netlink_socket_create(NETLINK_ROUTE, rt_msg_names,
+                       lib->settings->get_bool(lib->settings,
+                               "%s.plugins.kernel-netlink.parallel_route", FALSE, lib->ns)),
                .rt_exclude = linked_list_create(),
                .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
                                                                   (hashtable_equals_t)route_entry_equals, 16),