ikev1: Send and verify IPv6 addresses correctly
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
index bb19418..4e5e02d 100644 (file)
 #define ROUTING_TABLE_PRIO 0
 #endif
 
+ENUM(rt_msg_names, RTM_NEWLINK, RTM_GETRULE,
+       "RTM_NEWLINK",
+       "RTM_DELLINK",
+       "RTM_GETLINK",
+       "RTM_SETLINK",
+       "RTM_NEWADDR",
+       "RTM_DELADDR",
+       "RTM_GETADDR",
+       "31",
+       "RTM_NEWROUTE",
+       "RTM_DELROUTE",
+       "RTM_GETROUTE",
+       "35",
+       "RTM_NEWNEIGH",
+       "RTM_DELNEIGH",
+       "RTM_GETNEIGH",
+       "RTM_NEWRULE",
+       "RTM_DELRULE",
+       "RTM_GETRULE",
+);
+
 typedef struct addr_entry_t addr_entry_t;
 
 /**
@@ -470,6 +491,16 @@ struct private_kernel_netlink_net_t {
        bool rta_prefsrc_for_ipv6;
 
        /**
+        * whether marks can be used in route lookups
+        */
+       bool rta_mark;
+
+       /**
+        * the mark excluded from the routing rule used for virtual IPs
+        */
+       mark_t routing_mark;
+
+       /**
         * whether to prefer temporary IPv6 addresses over public ones
         */
        bool prefer_temporary_addrs;
@@ -478,6 +509,16 @@ struct private_kernel_netlink_net_t {
         * list with routing tables to be excluded from route lookup
         */
        linked_list_t *rt_exclude;
+
+       /**
+        * MTU to set on installed routes
+        */
+       u_int32_t mtu;
+
+       /**
+        * MSS to set on installed routes
+        */
+       u_int32_t mss;
 };
 
 /**
@@ -685,15 +726,15 @@ static u_char get_scope(host_t *ip)
                        }
                        break;
                case 16:
-                       if (IN6_IS_ADDR_LOOPBACK(addr.ptr))
+                       if (IN6_IS_ADDR_LOOPBACK((struct in6_addr*)addr.ptr))
                        {       /* link-local, according to RFC 4291, 2.5.3 */
                                return 2;
                        }
-                       if (IN6_IS_ADDR_LINKLOCAL(addr.ptr))
+                       if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr*)addr.ptr))
                        {
                                return 2;
                        }
-                       if (IN6_IS_ADDR_SITELOCAL(addr.ptr))
+                       if (IN6_IS_ADDR_SITELOCAL((struct in6_addr*)addr.ptr))
                        {       /* deprecated, according to RFC 4291, 2.5.7 */
                                return 5;
                        }
@@ -928,7 +969,7 @@ static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
 static void process_link(private_kernel_netlink_net_t *this,
                                                 struct nlmsghdr *hdr, bool event)
 {
-       struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
+       struct ifinfomsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = IFLA_RTA(msg);
        size_t rtasize = IFLA_PAYLOAD (hdr);
        enumerator_t *enumerator;
@@ -1030,7 +1071,7 @@ static void process_link(private_kernel_netlink_net_t *this,
 static void process_addr(private_kernel_netlink_net_t *this,
                                                 struct nlmsghdr *hdr, bool event)
 {
-       struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
+       struct ifaddrmsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = IFA_RTA(msg);
        size_t rtasize = IFA_PAYLOAD (hdr);
        host_t *host = NULL;
@@ -1173,7 +1214,7 @@ static void process_addr(private_kernel_netlink_net_t *this,
  */
 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
 {
-       struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
+       struct rtmsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = RTM_RTA(msg);
        size_t rtasize = RTM_PAYLOAD(hdr);
        u_int32_t rta_oif = 0;
@@ -1507,6 +1548,7 @@ typedef struct {
        u_int8_t dst_len;
        u_int32_t table;
        u_int32_t oif;
+       u_int32_t priority;
 } rt_entry_t;
 
 /**
@@ -1519,6 +1561,26 @@ static void rt_entry_destroy(rt_entry_t *this)
 }
 
 /**
+ * Check if the route received with RTM_NEWROUTE is usable based on its type.
+ */
+static bool route_usable(struct nlmsghdr *hdr)
+{
+       struct rtmsg *msg;
+
+       msg = NLMSG_DATA(hdr);
+       switch (msg->rtm_type)
+       {
+               case RTN_BLACKHOLE:
+               case RTN_UNREACHABLE:
+               case RTN_PROHIBIT:
+               case RTN_THROW:
+                       return FALSE;
+               default:
+                       return TRUE;
+       }
+}
+
+/**
  * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
  * reused if not NULL.
  *
@@ -1530,7 +1592,7 @@ static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
        struct rtmsg *msg;
        size_t rtasize;
 
-       msg = (struct rtmsg*)(NLMSG_DATA(hdr));
+       msg = NLMSG_DATA(hdr);
        rta = RTM_RTA(msg);
        rtasize = RTM_PAYLOAD(hdr);
 
@@ -1542,6 +1604,7 @@ static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
                route->dst_len = msg->rtm_dst_len;
                route->table = msg->rtm_table;
                route->oif = 0;
+               route->priority = 0;
        }
        else
        {
@@ -1570,6 +1633,12 @@ static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
                                        route->oif = *(u_int32_t*)RTA_DATA(rta);
                                }
                                break;
+                       case RTA_PRIORITY:
+                               if (RTA_PAYLOAD(rta) == sizeof(route->priority))
+                               {
+                                       route->priority = *(u_int32_t*)RTA_DATA(rta);
+                               }
+                               break;
 #ifdef HAVE_RTA_TABLE
                        case RTA_TABLE:
                                if (RTA_PAYLOAD(rta) == sizeof(route->table))
@@ -1615,20 +1684,27 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
        memset(&request, 0, sizeof(request));
 
        family = dest->get_family(dest);
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST;
-       if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
-               this->routing_table || match_net)
+       hdr->nlmsg_type = RTM_GETROUTE;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+
+       msg = NLMSG_DATA(hdr);
+       msg->rtm_family = family;
+       if (!match_net && this->rta_mark && this->routing_mark.value)
+       {
+               /* if our routing rule excludes packets with a certain mark we can
+                * get the preferred route without having to dump all routes */
+               chunk = chunk_from_thing(this->routing_mark.value);
+               netlink_add_attribute(hdr, RTA_MARK, chunk, sizeof(request));
+       }
+       else if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
+                        this->routing_table || match_net)
        {       /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
                 * as we want to ignore routes with virtual IPs we cannot use DUMP
                 * if these routes are not installed in a separate table */
                hdr->nlmsg_flags |= NLM_F_DUMP;
        }
-       hdr->nlmsg_type = RTM_GETROUTE;
-       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
-
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
-       msg->rtm_family = family;
        if (candidate)
        {
                chunk = candidate->get_address(candidate);
@@ -1661,6 +1737,10 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                rt_entry_t *other;
                                uintptr_t table;
 
+                               if (!route_usable(current))
+                               {
+                                       continue;
+                               }
                                route = parse_route(current, route);
 
                                table = (uintptr_t)route->table;
@@ -1693,11 +1773,16 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                        }
                                        route->src_host = src;
                                }
-                               /* insert route, sorted by decreasing network prefix */
+                               /* insert route, sorted by priority and network prefix */
                                enumerator = routes->create_enumerator(routes);
                                while (enumerator->enumerate(enumerator, &other))
                                {
-                                       if (route->dst_len > other->dst_len)
+                                       if (route->priority < other->priority)
+                                       {
+                                               break;
+                                       }
+                                       if (route->priority == other->priority &&
+                                               route->dst_len > other->dst_len)
                                        {
                                                break;
                                        }
@@ -1854,12 +1939,12 @@ static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type
 
        chunk = ip->get_address(ip);
 
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
        hdr->nlmsg_type = nlmsg_type;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
 
-       msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
+       msg = NLMSG_DATA(hdr);
        msg->ifa_family = ip->get_family(ip);
        msg->ifa_flags = 0;
        msg->ifa_prefixlen = prefix < 0 ? chunk.len * 8 : prefix;
@@ -1868,6 +1953,17 @@ static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type
 
        netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
 
+       if (ip->get_family(ip) == AF_INET6 && this->rta_prefsrc_for_ipv6)
+       {       /* if source routes are possible we let the virtual IP get deprecated
+                * immediately (but mark it as valid forever) so it gets only used if
+                * forced by our route, and not by the default IPv6 address selection */
+               struct ifa_cacheinfo cache = {
+                       .ifa_valid = 0xFFFFFFFF,
+                       .ifa_prefered = 0,
+               };
+               netlink_add_attribute(hdr, IFA_CACHEINFO, chunk_from_thing(cache),
+                                                         sizeof(request));
+       }
        return this->socket->send_ack(this->socket, hdr);
 }
 
@@ -1933,6 +2029,8 @@ METHOD(kernel_net_t, add_ip, status_t,
        if (iface)
        {
                addr_entry_t *addr;
+               char *ifname;
+               int ifi;
 
                INIT(addr,
                        .ip = virtual_ip->clone(virtual_ip),
@@ -1941,26 +2039,30 @@ METHOD(kernel_net_t, add_ip, status_t,
                );
                iface->addrs->insert_last(iface->addrs, addr);
                addr_map_entry_add(this->vips, addr, iface);
+               ifi = iface->ifindex;
+               this->lock->unlock(this->lock);
                if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
-                                                 iface->ifindex, virtual_ip, prefix) == SUCCESS)
+                                                 ifi, virtual_ip, prefix) == SUCCESS)
                {
+                       this->lock->write_lock(this->lock);
                        while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
                        {       /* wait until address appears */
                                this->condvar->wait(this->condvar, this->lock);
                        }
                        if (entry)
                        {       /* we fail if the interface got deleted in the meantime */
-                               DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
-                                        entry->iface->ifname);
+                               ifname = strdup(entry->iface->ifname);
                                this->lock->unlock(this->lock);
+                               DBG2(DBG_KNL, "virtual IP %H installed on %s",
+                                        virtual_ip, ifname);
                                /* during IKEv1 reauthentication, children get moved from
                                 * old the new SA before the virtual IP is available. This
                                 * kills the route for our virtual IP, reinstall. */
-                               queue_route_reinstall(this, strdup(entry->iface->ifname));
+                               queue_route_reinstall(this, ifname);
                                return SUCCESS;
                        }
+                       this->lock->unlock(this->lock);
                }
-               this->lock->unlock(this->lock);
                DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
                return FAILED;
        }
@@ -2006,20 +2108,23 @@ METHOD(kernel_net_t, del_ip, status_t,
        if (entry->addr->refcount == 1)
        {
                status_t status;
+               int ifi;
 
                /* we set this flag so that threads calling add_ip will block and wait
                 * until the entry is gone, also so we can wait below */
                entry->addr->installed = FALSE;
-               status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
-                                                          virtual_ip, prefix);
+               ifi = entry->iface->ifindex;
+               this->lock->unlock(this->lock);
+               status = manage_ipaddr(this, RTM_DELADDR, 0, ifi, virtual_ip, prefix);
                if (status == SUCCESS && wait)
                {       /* wait until the address is really gone */
+                       this->lock->write_lock(this->lock);
                        while (is_known_vip(this, virtual_ip))
                        {
                                this->condvar->wait(this->condvar, this->lock);
                        }
+                       this->lock->unlock(this->lock);
                }
-               this->lock->unlock(this->lock);
                return status;
        }
        else
@@ -2044,6 +2149,7 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this,
        netlink_buf_t request;
        struct nlmsghdr *hdr;
        struct rtmsg *msg;
+       struct rtattr *rta;
        int ifindex;
        chunk_t chunk;
 
@@ -2070,12 +2176,12 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this,
 
        memset(&request, 0, sizeof(request));
 
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
        hdr->nlmsg_type = nlmsg_type;
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
+       msg = NLMSG_DATA(hdr);
        msg->rtm_family = src_ip->get_family(src_ip);
        msg->rtm_dst_len = prefixlen;
        msg->rtm_table = this->routing_table;
@@ -2096,6 +2202,30 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this,
        chunk.len = sizeof(ifindex);
        netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
 
+       if (this->mtu || this->mss)
+       {
+               chunk = chunk_alloca(RTA_LENGTH((sizeof(struct rtattr) +
+                                                                                sizeof(u_int32_t)) * 2));
+               chunk.len = 0;
+               rta = (struct rtattr*)chunk.ptr;
+               if (this->mtu)
+               {
+                       rta->rta_type = RTAX_MTU;
+                       rta->rta_len = RTA_LENGTH(sizeof(u_int32_t));
+                       memcpy(RTA_DATA(rta), &this->mtu, sizeof(u_int32_t));
+                       chunk.len = rta->rta_len;
+               }
+               if (this->mss)
+               {
+                       rta = (struct rtattr*)(chunk.ptr + RTA_ALIGN(chunk.len));
+                       rta->rta_type = RTAX_ADVMSS;
+                       rta->rta_len = RTA_LENGTH(sizeof(u_int32_t));
+                       memcpy(RTA_DATA(rta), &this->mss, sizeof(u_int32_t));
+                       chunk.len = RTA_ALIGN(chunk.len) + rta->rta_len;
+               }
+               netlink_add_attribute(hdr, RTA_METRICS, chunk, sizeof(request));
+       }
+
        return this->socket->send_ack(this->socket, hdr);
 }
 
@@ -2175,10 +2305,10 @@ static status_t init_address_list(private_kernel_netlink_net_t *this)
 
        memset(&request, 0, sizeof(request));
 
-       in = (struct nlmsghdr*)&request;
+       in = &request.hdr;
        in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
        in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
-       msg = (struct rtgenmsg*)NLMSG_DATA(in);
+       msg = NLMSG_DATA(in);
        msg->rtgen_family = AF_UNSPEC;
 
        /* get all links */
@@ -2262,7 +2392,7 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
        char *fwmark;
 
        memset(&request, 0, sizeof(request));
-       hdr = (struct nlmsghdr*)request;
+       hdr = &request.hdr;
        hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
        hdr->nlmsg_type = nlmsg_type;
        if (nlmsg_type == RTM_NEWRULE)
@@ -2271,7 +2401,7 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
        }
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
-       msg = (struct rtmsg*)NLMSG_DATA(hdr);
+       msg = NLMSG_DATA(hdr);
        msg->rtm_table = table;
        msg->rtm_family = family;
        msg->rtm_protocol = RTPROT_BOOT;
@@ -2299,6 +2429,10 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
                        netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
                        chunk = chunk_from_thing(mark.mask);
                        netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
+                       if (msg->rtm_flags & FIB_RULE_INVERT)
+                       {
+                               this->routing_mark = mark;
+                       }
                }
 #else
                DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
@@ -2322,6 +2456,10 @@ static void check_kernel_features(private_kernel_netlink_net_t *this)
                        case 3:
                                if (a == 2)
                                {
+                                       if (b == 6 && c >= 36)
+                                       {
+                                               this->rta_mark = TRUE;
+                                       }
                                        DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
                                                 "RTA_PREFSRC for IPv6 routes", a, b, c);
                                        break;
@@ -2330,6 +2468,7 @@ static void check_kernel_features(private_kernel_netlink_net_t *this)
                        case 2:
                                /* only 3.x+ uses two part version numbers */
                                this->rta_prefsrc_for_ipv6 = TRUE;
+                               this->rta_mark = TRUE;
                                break;
                        default:
                                break;
@@ -2423,7 +2562,9 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                .destroy = _destroy,
                        },
                },
-               .socket = netlink_socket_create(NETLINK_ROUTE),
+               .socket = netlink_socket_create(NETLINK_ROUTE, rt_msg_names,
+                       lib->settings->get_bool(lib->settings,
+                               "%s.plugins.kernel-netlink.parallel_route", FALSE, lib->ns)),
                .rt_exclude = linked_list_create(),
                .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
                                                                   (hashtable_equals_t)route_entry_equals, 16),
@@ -2455,6 +2596,10 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                                "%s.prefer_temporary_addrs", FALSE, lib->ns),
                .roam_events = lib->settings->get_bool(lib->settings,
                                                "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
+               .mtu = lib->settings->get_int(lib->settings,
+                                               "%s.plugins.kernel-netlink.mtu", 0, lib->ns),
+               .mss = lib->settings->get_int(lib->settings,
+                                               "%s.plugins.kernel-netlink.mss", 0, lib->ns),
        );
        timerclear(&this->last_route_reinstall);
        timerclear(&this->next_roam);