2 * Copyright (C) 2008-2014 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
47 #ifdef HAVE_LINUX_FIB_RULES_H
48 #include <linux/fib_rules.h>
51 #include "kernel_netlink_net.h"
52 #include "kernel_netlink_shared.h"
55 #include <utils/debug.h>
56 #include <threading/mutex.h>
57 #include <threading/rwlock.h>
58 #include <threading/rwlock_condvar.h>
59 #include <threading/spinlock.h>
60 #include <collections/hashtable.h>
61 #include <collections/linked_list.h>
62 #include <processing/jobs/callback_job.h>
64 /** delay before firing roam events (ms) */
65 #define ROAM_DELAY 100
67 /** delay before reinstalling routes (ms) */
68 #define ROUTE_DELAY 100
70 /** maximum recursion when searching for addresses in get_route() */
71 #define MAX_ROUTE_RECURSION 2
74 #define ROUTING_TABLE 0
77 #ifndef ROUTING_TABLE_PRIO
78 #define ROUTING_TABLE_PRIO 0
81 ENUM(rt_msg_names
, RTM_NEWLINK
, RTM_GETRULE
,
102 typedef struct addr_entry_t addr_entry_t
;
105 * IP address in an iface_entry_t
107 struct addr_entry_t
{
109 /** the ip address */
115 /** scope of the address */
118 /** number of times this IP is used, if virtual (i.e. managed by us) */
121 /** TRUE once it is installed, if virtual */
126 * destroy a addr_entry_t object
128 static void addr_entry_destroy(addr_entry_t
*this)
130 this->ip
->destroy(this->ip
);
134 typedef struct iface_entry_t iface_entry_t
;
137 * A network interface on this system, containing addr_entry_t's
139 struct iface_entry_t
{
141 /** interface index */
144 /** name of the interface */
145 char ifname
[IFNAMSIZ
];
147 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
150 /** list of addresses as host_t */
151 linked_list_t
*addrs
;
153 /** TRUE if usable by config */
158 * destroy an interface entry
160 static void iface_entry_destroy(iface_entry_t
*this)
162 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
167 * find an interface entry by index
169 static bool iface_entry_by_index(iface_entry_t
*this, int *ifindex
)
171 return this->ifindex
== *ifindex
;
175 * find an interface entry by name
177 static bool iface_entry_by_name(iface_entry_t
*this, char *ifname
)
179 return streq(this->ifname
, ifname
);
183 * check if an interface is up
185 static inline bool iface_entry_up(iface_entry_t
*iface
)
187 return (iface
->flags
& IFF_UP
) == IFF_UP
;
191 * check if an interface is up and usable
193 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
195 return iface
->usable
&& iface_entry_up(iface
);
198 typedef struct addr_map_entry_t addr_map_entry_t
;
201 * Entry that maps an IP address to an interface entry
203 struct addr_map_entry_t
{
204 /** The IP address */
207 /** The address entry for this IP address */
210 /** The interface this address is installed on */
211 iface_entry_t
*iface
;
215 * Hash a addr_map_entry_t object, all entries with the same IP address
216 * are stored in the same bucket
218 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
220 return chunk_hash(this->ip
->get_address(this->ip
));
224 * Compare two addr_map_entry_t objects, two entries are equal if they are
225 * installed on the same interface
227 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
229 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
230 a
->ip
->ip_equals(a
->ip
, b
->ip
);
234 * Used with get_match this finds an address entry if it is installed on
235 * an up and usable interface
237 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
240 return iface_entry_up_and_usable(b
->iface
) &&
241 a
->ip
->ip_equals(a
->ip
, b
->ip
);
245 * Used with get_match this finds an address entry if it is installed on
246 * any active local interface
248 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
250 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
254 * Used with get_match this finds an address entry if it is installed on
255 * any local interface
257 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
259 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
262 typedef struct route_entry_t route_entry_t
;
265 * Installed routing entry
267 struct route_entry_t
{
268 /** Name of the interface the route is bound to */
271 /** Source ip of the route */
274 /** Gateway for this route */
277 /** Destination net */
280 /** Destination net prefixlen */
285 * Clone a route_entry_t object.
287 static route_entry_t
*route_entry_clone(route_entry_t
*this)
289 route_entry_t
*route
;
292 .if_name
= strdup(this->if_name
),
293 .src_ip
= this->src_ip
->clone(this->src_ip
),
294 .gateway
= this->gateway ?
this->gateway
->clone(this->gateway
) : NULL
,
295 .dst_net
= chunk_clone(this->dst_net
),
296 .prefixlen
= this->prefixlen
,
302 * Destroy a route_entry_t object
304 static void route_entry_destroy(route_entry_t
*this)
307 DESTROY_IF(this->src_ip
);
308 DESTROY_IF(this->gateway
);
309 chunk_free(&this->dst_net
);
314 * Hash a route_entry_t object
316 static u_int
route_entry_hash(route_entry_t
*this)
318 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
319 chunk_hash(this->dst_net
));
323 * Compare two route_entry_t objects
325 static bool route_entry_equals(route_entry_t
*a
, route_entry_t
*b
)
327 if (a
->if_name
&& b
->if_name
&& streq(a
->if_name
, b
->if_name
) &&
328 a
->src_ip
->ip_equals(a
->src_ip
, b
->src_ip
) &&
329 chunk_equals(a
->dst_net
, b
->dst_net
) && a
->prefixlen
== b
->prefixlen
)
331 return (!a
->gateway
&& !b
->gateway
) || (a
->gateway
&& b
->gateway
&&
332 a
->gateway
->ip_equals(a
->gateway
, b
->gateway
));
337 typedef struct net_change_t net_change_t
;
340 * Queued network changes
342 struct net_change_t
{
343 /** Name of the interface that got activated (or an IP appeared on) */
348 * Destroy a net_change_t object
350 static void net_change_destroy(net_change_t
*this)
357 * Hash a net_change_t object
359 static u_int
net_change_hash(net_change_t
*this)
361 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
365 * Compare two net_change_t objects
367 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
369 return streq(a
->if_name
, b
->if_name
);
372 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
375 * Private variables and functions of kernel_netlink_net class.
377 struct private_kernel_netlink_net_t
{
379 * Public part of the kernel_netlink_net_t object.
381 kernel_netlink_net_t
public;
384 * lock to access various lists and maps
389 * condition variable to signal virtual IP add/removal
391 rwlock_condvar_t
*condvar
;
394 * Cached list of interfaces and its addresses (iface_entry_t)
396 linked_list_t
*ifaces
;
399 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
404 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
409 * netlink rt socket (routing)
411 netlink_socket_t
*socket
;
414 * Netlink rt socket to receive address change events
419 * earliest time of the next roam event
424 * roam event due to address change
429 * lock to check and update roam event time
431 spinlock_t
*roam_lock
;
434 * routing table to install routes
439 * priority of used routing table
441 int routing_table_prio
;
451 mutex_t
*routes_lock
;
454 * interface changes which may trigger route reinstallation
456 hashtable_t
*net_changes
;
459 * mutex for route reinstallation triggers
461 mutex_t
*net_changes_lock
;
464 * time of last route reinstallation
466 timeval_t last_route_reinstall
;
469 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
474 * whether to trigger roam events
479 * whether to actually install virtual IPs
481 bool install_virtual_ip
;
484 * the name of the interface virtual IP addresses are installed on
486 char *install_virtual_ip_on
;
489 * whether preferred source addresses can be specified for IPv6 routes
491 bool rta_prefsrc_for_ipv6
;
494 * whether to prefer temporary IPv6 addresses over public ones
496 bool prefer_temporary_addrs
;
499 * list with routing tables to be excluded from route lookup
501 linked_list_t
*rt_exclude
;
504 * MTU to set on installed routes
509 * MSS to set on installed routes
515 * Forward declaration
517 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
518 int nlmsg_type
, int flags
, chunk_t dst_net
,
519 u_int8_t prefixlen
, host_t
*gateway
,
520 host_t
*src_ip
, char *if_name
);
523 * Clear the queued network changes.
525 static void net_changes_clear(private_kernel_netlink_net_t
*this)
527 enumerator_t
*enumerator
;
528 net_change_t
*change
;
530 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
531 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
533 this->net_changes
->remove_at(this->net_changes
, enumerator
);
534 net_change_destroy(change
);
536 enumerator
->destroy(enumerator
);
540 * Act upon queued network changes.
542 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
544 enumerator_t
*enumerator
;
545 route_entry_t
*route
;
547 this->net_changes_lock
->lock(this->net_changes_lock
);
548 this->routes_lock
->lock(this->routes_lock
);
550 enumerator
= this->routes
->create_enumerator(this->routes
);
551 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
553 net_change_t
*change
, lookup
= {
554 .if_name
= route
->if_name
,
556 /* check if a change for the outgoing interface is queued */
557 change
= this->net_changes
->get(this->net_changes
, &lookup
);
559 { /* in case src_ip is not on the outgoing interface */
560 if (this->public.interface
.get_interface(&this->public.interface
,
561 route
->src_ip
, &lookup
.if_name
))
563 if (!streq(lookup
.if_name
, route
->if_name
))
565 change
= this->net_changes
->get(this->net_changes
, &lookup
);
567 free(lookup
.if_name
);
572 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
573 route
->dst_net
, route
->prefixlen
, route
->gateway
,
574 route
->src_ip
, route
->if_name
);
577 enumerator
->destroy(enumerator
);
578 this->routes_lock
->unlock(this->routes_lock
);
580 net_changes_clear(this);
581 this->net_changes_lock
->unlock(this->net_changes_lock
);
582 return JOB_REQUEUE_NONE
;
586 * Queue route reinstallation caused by network changes for a given interface.
588 * The route reinstallation is delayed for a while and only done once for
589 * several calls during this delay, in order to avoid doing it too often.
590 * The interface name is freed.
592 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
595 net_change_t
*update
, *found
;
603 this->net_changes_lock
->lock(this->net_changes_lock
);
604 found
= this->net_changes
->put(this->net_changes
, update
, update
);
607 net_change_destroy(found
);
609 time_monotonic(&now
);
610 if (timercmp(&now
, &this->last_route_reinstall
, >))
612 timeval_add_ms(&now
, ROUTE_DELAY
);
613 this->last_route_reinstall
= now
;
615 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
617 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
619 this->net_changes_lock
->unlock(this->net_changes_lock
);
623 * check if the given IP is known as virtual IP and currently installed
625 * this function will also return TRUE if the virtual IP entry disappeared.
626 * in that case the returned entry will be NULL.
628 * this->lock must be held when calling this function
630 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
631 host_t
*ip
, addr_map_entry_t
**entry
)
633 addr_map_entry_t lookup
= {
637 *entry
= this->vips
->get_match(this->vips
, &lookup
,
638 (void*)addr_map_entry_match
);
640 { /* the virtual IP disappeared */
643 return (*entry
)->addr
->installed
;
647 * check if the given IP is known as virtual IP
649 * this->lock must be held when calling this function
651 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
653 addr_map_entry_t lookup
= {
657 return this->vips
->get_match(this->vips
, &lookup
,
658 (void*)addr_map_entry_match
) != NULL
;
662 * Add an address map entry
664 static void addr_map_entry_add(hashtable_t
*map
, addr_entry_t
*addr
,
665 iface_entry_t
*iface
)
667 addr_map_entry_t
*entry
;
674 entry
= map
->put(map
, entry
, entry
);
679 * Remove an address map entry
681 static void addr_map_entry_remove(hashtable_t
*map
, addr_entry_t
*addr
,
682 iface_entry_t
*iface
)
684 addr_map_entry_t
*entry
, lookup
= {
690 entry
= map
->remove(map
, &lookup
);
695 * Determine the type or scope of the given unicast IP address. This is not
696 * the same thing returned in rtm_scope/ifa_scope.
698 * We use return values as defined in RFC 6724 (referring to RFC 4291).
700 static u_char
get_scope(host_t
*ip
)
704 addr
= ip
->get_address(ip
);
708 /* we use the mapping defined in RFC 6724, 3.2 */
709 if (addr
.ptr
[0] == 127)
710 { /* link-local, same as the IPv6 loopback address */
713 if (addr
.ptr
[0] == 169 && addr
.ptr
[1] == 254)
719 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr
*)addr
.ptr
))
720 { /* link-local, according to RFC 4291, 2.5.3 */
723 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr
*)addr
.ptr
))
727 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr
*)addr
.ptr
))
728 { /* deprecated, according to RFC 4291, 2.5.7 */
740 * Returns the length of the common prefix in bits up to the length of a's
741 * prefix, defined by RFC 6724 as the portion of the address not including the
742 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
744 static u_char
common_prefix(host_t
*a
, host_t
*b
)
747 u_char byte
, bits
= 0, match
;
749 aa
= a
->get_address(a
);
750 ba
= b
->get_address(b
);
751 for (byte
= 0; byte
< 8; byte
++)
753 if (aa
.ptr
[byte
] != ba
.ptr
[byte
])
755 match
= aa
.ptr
[byte
] ^ ba
.ptr
[byte
];
756 for (bits
= 8; match
; match
>>= 1)
763 return byte
* 8 + bits
;
767 * Compare two IP addresses and return TRUE if the second address is the better
768 * choice of the two to reach the destination.
769 * For IPv6 we approximately follow RFC 6724.
771 static bool is_address_better(private_kernel_netlink_net_t
*this,
772 addr_entry_t
*a
, addr_entry_t
*b
, host_t
*d
)
774 u_char sa
, sb
, sd
, pa
, pb
;
776 /* rule 2: prefer appropriate scope */
779 sa
= get_scope(a
->ip
);
780 sb
= get_scope(b
->ip
);
791 if (a
->ip
->get_family(a
->ip
) == AF_INET
)
792 { /* stop here for IPv4, default to addresses found earlier */
795 /* rule 3: avoid deprecated addresses (RFC 4862) */
796 if ((a
->flags
& IFA_F_DEPRECATED
) != (b
->flags
& IFA_F_DEPRECATED
))
798 return a
->flags
& IFA_F_DEPRECATED
;
800 /* rule 4 is not applicable as we don't know if an address is a home or
802 * rule 5 does not apply as we only compare addresses from one interface
803 * rule 6 requires a policy table (optionally configurable) to match
804 * configurable labels
806 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
807 if ((a
->flags
& IFA_F_TEMPORARY
) != (b
->flags
& IFA_F_TEMPORARY
))
809 if (this->prefer_temporary_addrs
)
811 return b
->flags
& IFA_F_TEMPORARY
;
813 return a
->flags
& IFA_F_TEMPORARY
;
815 /* rule 8: use longest matching prefix */
818 pa
= common_prefix(a
->ip
, d
);
819 pb
= common_prefix(b
->ip
, d
);
825 /* default to addresses found earlier */
830 * Get a non-virtual IP address on the given interface.
832 * If a candidate address is given, we first search for that address and if not
833 * found return the address as above.
834 * Returned host is a clone, has to be freed by caller.
836 * this->lock must be held when calling this function.
838 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
839 int ifindex
, int family
, host_t
*dest
,
842 iface_entry_t
*iface
;
844 addr_entry_t
*addr
, *best
= NULL
;
846 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
847 (void**)&iface
, &ifindex
) == SUCCESS
)
850 { /* only use interfaces not excluded by config */
851 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
852 while (addrs
->enumerate(addrs
, &addr
))
854 if (addr
->refcount
||
855 addr
->ip
->get_family(addr
->ip
) != family
)
856 { /* ignore virtual IP addresses and ensure family matches */
859 if (candidate
&& candidate
->ip_equals(candidate
, addr
->ip
))
860 { /* stop if we find the candidate */
864 else if (!best
|| is_address_better(this, best
, addr
, dest
))
869 addrs
->destroy(addrs
);
872 return best ? best
->ip
->clone(best
->ip
) : NULL
;
876 * callback function that raises the delayed roam event
878 static job_requeue_t
roam_event(private_kernel_netlink_net_t
*this)
882 this->roam_lock
->lock(this->roam_lock
);
883 address
= this->roam_address
;
884 this->roam_address
= FALSE
;
885 this->roam_lock
->unlock(this->roam_lock
);
886 hydra
->kernel_interface
->roam(hydra
->kernel_interface
, address
);
887 return JOB_REQUEUE_NONE
;
891 * fire a roaming event. we delay it for a bit and fire only one event
892 * for multiple calls. otherwise we would create too many events.
894 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
899 if (!this->roam_events
)
904 time_monotonic(&now
);
905 this->roam_lock
->lock(this->roam_lock
);
906 this->roam_address
|= address
;
907 if (!timercmp(&now
, &this->next_roam
, >))
909 this->roam_lock
->unlock(this->roam_lock
);
912 timeval_add_ms(&now
, ROAM_DELAY
);
913 this->next_roam
= now
;
914 this->roam_lock
->unlock(this->roam_lock
);
916 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
918 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
922 * check if an interface with a given index is up and usable
924 * this->lock must be locked when calling this function
926 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
929 iface_entry_t
*iface
;
931 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
932 (void**)&iface
, &index
) == SUCCESS
)
934 return iface_entry_up_and_usable(iface
);
940 * unregister the current addr_entry_t from the hashtable it is stored in
942 * this->lock must be locked when calling this function
944 static void addr_entry_unregister(addr_entry_t
*addr
, iface_entry_t
*iface
,
945 private_kernel_netlink_net_t
*this)
949 addr_map_entry_remove(this->vips
, addr
, iface
);
950 this->condvar
->broadcast(this->condvar
);
953 addr_map_entry_remove(this->addrs
, addr
, iface
);
957 * process RTM_NEWLINK/RTM_DELLINK from kernel
959 static void process_link(private_kernel_netlink_net_t
*this,
960 struct nlmsghdr
*hdr
, bool event
)
962 struct ifinfomsg
* msg
= NLMSG_DATA(hdr
);
963 struct rtattr
*rta
= IFLA_RTA(msg
);
964 size_t rtasize
= IFLA_PAYLOAD (hdr
);
965 enumerator_t
*enumerator
;
966 iface_entry_t
*current
, *entry
= NULL
;
968 bool update
= FALSE
, update_routes
= FALSE
;
970 while (RTA_OK(rta
, rtasize
))
972 switch (rta
->rta_type
)
975 name
= RTA_DATA(rta
);
978 rta
= RTA_NEXT(rta
, rtasize
);
985 this->lock
->write_lock(this->lock
);
986 switch (hdr
->nlmsg_type
)
990 if (this->ifaces
->find_first(this->ifaces
,
991 (void*)iface_entry_by_index
, (void**)&entry
,
992 &msg
->ifi_index
) != SUCCESS
)
995 .ifindex
= msg
->ifi_index
,
996 .addrs
= linked_list_create(),
997 .usable
= hydra
->kernel_interface
->is_interface_usable(
998 hydra
->kernel_interface
, name
),
1000 this->ifaces
->insert_last(this->ifaces
, entry
);
1002 strncpy(entry
->ifname
, name
, IFNAMSIZ
);
1003 entry
->ifname
[IFNAMSIZ
-1] = '\0';
1004 if (event
&& entry
->usable
)
1006 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
1008 update
= update_routes
= TRUE
;
1009 DBG1(DBG_KNL
, "interface %s activated", name
);
1011 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
1014 DBG1(DBG_KNL
, "interface %s deactivated", name
);
1017 entry
->flags
= msg
->ifi_flags
;
1022 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
1023 while (enumerator
->enumerate(enumerator
, ¤t
))
1025 if (current
->ifindex
== msg
->ifi_index
)
1027 if (event
&& current
->usable
)
1030 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
1032 /* TODO: move virtual IPs installed on this interface to
1033 * another interface? */
1034 this->ifaces
->remove_at(this->ifaces
, enumerator
);
1035 current
->addrs
->invoke_function(current
->addrs
,
1036 (void*)addr_entry_unregister
, current
, this);
1037 iface_entry_destroy(current
);
1041 enumerator
->destroy(enumerator
);
1045 this->lock
->unlock(this->lock
);
1047 if (update_routes
&& event
)
1049 queue_route_reinstall(this, strdup(name
));
1052 if (update
&& event
)
1054 fire_roam_event(this, TRUE
);
1059 * process RTM_NEWADDR/RTM_DELADDR from kernel
1061 static void process_addr(private_kernel_netlink_net_t
*this,
1062 struct nlmsghdr
*hdr
, bool event
)
1064 struct ifaddrmsg
* msg
= NLMSG_DATA(hdr
);
1065 struct rtattr
*rta
= IFA_RTA(msg
);
1066 size_t rtasize
= IFA_PAYLOAD (hdr
);
1067 host_t
*host
= NULL
;
1068 iface_entry_t
*iface
;
1069 chunk_t local
= chunk_empty
, address
= chunk_empty
;
1070 char *route_ifname
= NULL
;
1071 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
1073 while (RTA_OK(rta
, rtasize
))
1075 switch (rta
->rta_type
)
1078 local
.ptr
= RTA_DATA(rta
);
1079 local
.len
= RTA_PAYLOAD(rta
);
1082 address
.ptr
= RTA_DATA(rta
);
1083 address
.len
= RTA_PAYLOAD(rta
);
1086 rta
= RTA_NEXT(rta
, rtasize
);
1089 /* For PPP interfaces, we need the IFA_LOCAL address,
1090 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1091 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1094 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
1096 else if (address
.ptr
)
1098 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
1106 this->lock
->write_lock(this->lock
);
1107 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
1108 (void**)&iface
, &msg
->ifa_index
) == SUCCESS
)
1110 addr_map_entry_t
*entry
, lookup
= {
1116 entry
= this->vips
->get(this->vips
, &lookup
);
1119 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1120 { /* mark as installed and signal waiting threads */
1121 entry
->addr
->installed
= TRUE
;
1124 { /* the address was already marked as uninstalled */
1126 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1127 addr_map_entry_remove(this->vips
, addr
, iface
);
1128 addr_entry_destroy(addr
);
1130 /* no roam events etc. for virtual IPs */
1131 this->condvar
->broadcast(this->condvar
);
1132 this->lock
->unlock(this->lock
);
1133 host
->destroy(host
);
1136 entry
= this->addrs
->get(this->addrs
, &lookup
);
1139 if (hdr
->nlmsg_type
== RTM_DELADDR
)
1143 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1147 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
1150 addr_map_entry_remove(this->addrs
, addr
, iface
);
1151 addr_entry_destroy(addr
);
1156 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1160 route_ifname
= strdup(iface
->ifname
);
1162 .ip
= host
->clone(host
),
1163 .flags
= msg
->ifa_flags
,
1164 .scope
= msg
->ifa_scope
,
1166 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1167 addr_map_entry_add(this->addrs
, addr
, iface
);
1168 if (event
&& iface
->usable
)
1170 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
1174 if (found
&& (iface
->flags
& IFF_UP
))
1179 { /* ignore events for interfaces excluded by config */
1180 update
= changed
= FALSE
;
1183 this->lock
->unlock(this->lock
);
1185 if (update
&& event
&& route_ifname
)
1187 queue_route_reinstall(this, route_ifname
);
1193 host
->destroy(host
);
1195 /* send an update to all IKE_SAs */
1196 if (update
&& event
&& changed
)
1198 fire_roam_event(this, TRUE
);
1203 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1205 static void process_route(private_kernel_netlink_net_t
*this, struct nlmsghdr
*hdr
)
1207 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1208 struct rtattr
*rta
= RTM_RTA(msg
);
1209 size_t rtasize
= RTM_PAYLOAD(hdr
);
1210 u_int32_t rta_oif
= 0;
1211 host_t
*host
= NULL
;
1213 /* ignore routes added by us or in the local routing table (local addrs) */
1214 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1215 msg
->rtm_table
== RT_TABLE_LOCAL
))
1219 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1220 { /* ignore cached routes, seem to be created a lot for IPv6 */
1224 while (RTA_OK(rta
, rtasize
))
1226 switch (rta
->rta_type
)
1230 host
= host_create_from_chunk(msg
->rtm_family
,
1231 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1234 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1236 rta_oif
= *(u_int32_t
*)RTA_DATA(rta
);
1240 rta
= RTA_NEXT(rta
, rtasize
);
1242 this->lock
->read_lock(this->lock
);
1243 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1244 { /* ignore route changes for interfaces that are ignored or down */
1245 this->lock
->unlock(this->lock
);
1249 if (!host
&& rta_oif
)
1251 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
,
1254 if (!host
|| is_known_vip(this, host
))
1255 { /* ignore routes added for virtual IPs */
1256 this->lock
->unlock(this->lock
);
1260 this->lock
->unlock(this->lock
);
1261 fire_roam_event(this, FALSE
);
1262 host
->destroy(host
);
1266 * Receives events from kernel
1268 static bool receive_events(private_kernel_netlink_net_t
*this, int fd
,
1269 watcher_event_t event
)
1271 char response
[1536];
1272 struct nlmsghdr
*hdr
= (struct nlmsghdr
*)response
;
1273 struct sockaddr_nl addr
;
1274 socklen_t addr_len
= sizeof(addr
);
1277 len
= recvfrom(this->socket_events
, response
, sizeof(response
),
1278 MSG_DONTWAIT
, (struct sockaddr
*)&addr
, &addr_len
);
1284 /* interrupted, try again */
1287 /* no data ready, select again */
1290 DBG1(DBG_KNL
, "unable to receive from rt event socket");
1296 if (addr
.nl_pid
!= 0)
1297 { /* not from kernel. not interested, try another one */
1301 while (NLMSG_OK(hdr
, len
))
1303 /* looks good so far, dispatch netlink message */
1304 switch (hdr
->nlmsg_type
)
1308 process_addr(this, hdr
, TRUE
);
1312 process_link(this, hdr
, TRUE
);
1316 if (this->process_route
)
1318 process_route(this, hdr
);
1324 hdr
= NLMSG_NEXT(hdr
, len
);
1329 /** enumerator over addresses */
1331 private_kernel_netlink_net_t
* this;
1332 /** which addresses to enumerate */
1333 kernel_address_type_t which
;
1334 } address_enumerator_t
;
1337 * cleanup function for address enumerator
1339 static void address_enumerator_destroy(address_enumerator_t
*data
)
1341 data
->this->lock
->unlock(data
->this->lock
);
1346 * filter for addresses
1348 static bool filter_addresses(address_enumerator_t
*data
,
1349 addr_entry_t
** in
, host_t
** out
)
1351 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && (*in
)->refcount
)
1352 { /* skip virtual interfaces added by us */
1355 if (!(data
->which
& ADDR_TYPE_REGULAR
) && !(*in
)->refcount
)
1356 { /* address is regular, but not requested */
1359 if ((*in
)->scope
>= RT_SCOPE_LINK
)
1360 { /* skip addresses with a unusable scope */
1368 * enumerator constructor for interfaces
1370 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1371 address_enumerator_t
*data
)
1373 return enumerator_create_filter(
1374 iface
->addrs
->create_enumerator(iface
->addrs
),
1375 (void*)filter_addresses
, data
, NULL
);
1379 * filter for interfaces
1381 static bool filter_interfaces(address_enumerator_t
*data
, iface_entry_t
** in
,
1382 iface_entry_t
** out
)
1384 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !(*in
)->usable
)
1385 { /* skip interfaces excluded by config */
1388 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && ((*in
)->flags
& IFF_LOOPBACK
))
1389 { /* ignore loopback devices */
1392 if (!(data
->which
& ADDR_TYPE_DOWN
) && !((*in
)->flags
& IFF_UP
))
1393 { /* skip interfaces not up */
1400 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1401 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1403 address_enumerator_t
*data
;
1410 this->lock
->read_lock(this->lock
);
1411 return enumerator_create_nested(
1412 enumerator_create_filter(
1413 this->ifaces
->create_enumerator(this->ifaces
),
1414 (void*)filter_interfaces
, data
, NULL
),
1415 (void*)create_iface_enumerator
, data
,
1416 (void*)address_enumerator_destroy
);
1419 METHOD(kernel_net_t
, get_interface_name
, bool,
1420 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1422 addr_map_entry_t
*entry
, lookup
= {
1426 if (ip
->is_anyaddr(ip
))
1430 this->lock
->read_lock(this->lock
);
1431 /* first try to find it on an up and usable interface */
1432 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1433 (void*)addr_map_entry_match_up_and_usable
);
1438 *name
= strdup(entry
->iface
->ifname
);
1439 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1441 this->lock
->unlock(this->lock
);
1444 /* in a second step, consider virtual IPs installed by us */
1445 entry
= this->vips
->get_match(this->vips
, &lookup
,
1446 (void*)addr_map_entry_match_up_and_usable
);
1451 *name
= strdup(entry
->iface
->ifname
);
1452 DBG2(DBG_KNL
, "virtual IP %H is on interface %s", ip
, *name
);
1454 this->lock
->unlock(this->lock
);
1457 /* maybe it is installed on an ignored interface */
1458 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1459 (void*)addr_map_entry_match_up
);
1462 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1464 this->lock
->unlock(this->lock
);
1469 * get the index of an interface by name
1471 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1473 iface_entry_t
*iface
;
1476 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1478 this->lock
->read_lock(this->lock
);
1479 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1480 (void**)&iface
, name
) == SUCCESS
)
1482 ifindex
= iface
->ifindex
;
1484 this->lock
->unlock(this->lock
);
1488 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1494 * check if an address or net (addr with prefix net bits) is in
1495 * subnet (net with net_len net bits)
1497 static bool addr_in_subnet(chunk_t addr
, int prefix
, chunk_t net
, int net_len
)
1499 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1503 { /* any address matches a /0 network */
1506 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
|| prefix
< net_len
)
1510 /* scan through all bytes in network order */
1515 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
1519 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
1531 * Store information about a route retrieved via RTNETLINK
1544 * Free a route entry
1546 static void rt_entry_destroy(rt_entry_t
*this)
1548 DESTROY_IF(this->src_host
);
1553 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1554 * reused if not NULL.
1556 * Returned chunks point to internal data of the Netlink message.
1558 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1564 msg
= NLMSG_DATA(hdr
);
1566 rtasize
= RTM_PAYLOAD(hdr
);
1570 route
->gtw
= chunk_empty
;
1571 route
->src
= chunk_empty
;
1572 route
->dst
= chunk_empty
;
1573 route
->dst_len
= msg
->rtm_dst_len
;
1574 route
->table
= msg
->rtm_table
;
1580 .dst_len
= msg
->rtm_dst_len
,
1581 .table
= msg
->rtm_table
,
1585 while (RTA_OK(rta
, rtasize
))
1587 switch (rta
->rta_type
)
1590 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1593 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1596 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1599 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1601 route
->oif
= *(u_int32_t
*)RTA_DATA(rta
);
1604 #ifdef HAVE_RTA_TABLE
1606 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1608 route
->table
= *(u_int32_t
*)RTA_DATA(rta
);
1611 #endif /* HAVE_RTA_TABLE*/
1613 rta
= RTA_NEXT(rta
, rtasize
);
1619 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1621 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1622 int prefix
, bool nexthop
, host_t
*candidate
,
1625 netlink_buf_t request
;
1626 struct nlmsghdr
*hdr
, *out
, *current
;
1630 linked_list_t
*routes
;
1631 rt_entry_t
*route
= NULL
, *best
= NULL
;
1632 enumerator_t
*enumerator
;
1633 host_t
*addr
= NULL
;
1637 if (recursion
> MAX_ROUTE_RECURSION
)
1641 chunk
= dest
->get_address(dest
);
1642 len
= chunk
.len
* 8;
1643 prefix
= prefix
< 0 ? len
: min(prefix
, len
);
1644 match_net
= prefix
!= len
;
1646 memset(&request
, 0, sizeof(request
));
1648 family
= dest
->get_family(dest
);
1650 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1651 if (family
== AF_INET
|| this->rta_prefsrc_for_ipv6
||
1652 this->routing_table
|| match_net
)
1653 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1654 * as we want to ignore routes with virtual IPs we cannot use DUMP
1655 * if these routes are not installed in a separate table */
1656 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1658 hdr
->nlmsg_type
= RTM_GETROUTE
;
1659 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1661 msg
= NLMSG_DATA(hdr
);
1662 msg
->rtm_family
= family
;
1665 chunk
= candidate
->get_address(candidate
);
1666 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1670 chunk
= dest
->get_address(dest
);
1671 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1674 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1676 DBG2(DBG_KNL
, "getting %s to reach %H/%d failed",
1677 nexthop ?
"nexthop" : "address", dest
, prefix
);
1680 routes
= linked_list_create();
1681 this->lock
->read_lock(this->lock
);
1683 for (current
= out
; NLMSG_OK(current
, len
);
1684 current
= NLMSG_NEXT(current
, len
))
1686 switch (current
->nlmsg_type
)
1695 route
= parse_route(current
, route
);
1697 table
= (uintptr_t)route
->table
;
1698 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
1699 (void**)&table
) == SUCCESS
)
1700 { /* route is from an excluded routing table */
1703 if (this->routing_table
!= 0 &&
1704 route
->table
== this->routing_table
)
1705 { /* route is from our own ipsec routing table */
1708 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
1709 { /* interface is down */
1712 if (!addr_in_subnet(chunk
, prefix
, route
->dst
, route
->dst_len
))
1713 { /* route destination does not contain dest */
1717 { /* verify source address, if any */
1718 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
1720 if (src
&& is_known_vip(this, src
))
1721 { /* ignore routes installed by us */
1725 route
->src_host
= src
;
1727 /* insert route, sorted by decreasing network prefix */
1728 enumerator
= routes
->create_enumerator(routes
);
1729 while (enumerator
->enumerate(enumerator
, &other
))
1731 if (route
->dst_len
> other
->dst_len
)
1736 routes
->insert_before(routes
, enumerator
, route
);
1737 enumerator
->destroy(enumerator
);
1748 rt_entry_destroy(route
);
1751 /* now we have a list of routes matching dest, sorted by net prefix.
1752 * we will look for source addresses for these routes and select the one
1753 * with the preferred source address, if possible */
1754 enumerator
= routes
->create_enumerator(routes
);
1755 while (enumerator
->enumerate(enumerator
, &route
))
1757 if (route
->src_host
)
1758 { /* got a source address with the route, if no preferred source
1759 * is given or it matches we are done, as this is the best route */
1760 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
1765 else if (route
->oif
)
1766 { /* no match yet, maybe it is assigned to the same interface */
1767 host_t
*src
= get_interface_address(this, route
->oif
,
1768 msg
->rtm_family
, dest
, candidate
);
1769 if (src
&& src
->ip_equals(src
, candidate
))
1771 route
->src_host
->destroy(route
->src_host
);
1772 route
->src_host
= src
;
1778 /* no luck yet with the source address. if this is the best (first)
1779 * route we store it as fallback in case we don't find a route with
1780 * the preferred source */
1781 best
= best ?
: route
;
1785 { /* no src, but an interface - get address from it */
1786 route
->src_host
= get_interface_address(this, route
->oif
,
1787 msg
->rtm_family
, dest
, candidate
);
1788 if (route
->src_host
)
1789 { /* we handle this address the same as the one above */
1791 candidate
->ip_equals(candidate
, route
->src_host
))
1796 best
= best ?
: route
;
1801 { /* no src, no iface, but a gateway - lookup src to reach gtw */
1804 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
1805 if (gtw
&& !gtw
->ip_equals(gtw
, dest
))
1807 route
->src_host
= get_route(this, gtw
, -1, FALSE
, candidate
,
1811 if (route
->src_host
)
1812 { /* more of the same */
1814 candidate
->ip_equals(candidate
, route
->src_host
))
1819 best
= best ?
: route
;
1823 enumerator
->destroy(enumerator
);
1826 { /* nexthop lookup, return gateway if any */
1827 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
1829 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
1831 if (!addr
&& !match_net
)
1832 { /* fallback to destination address */
1833 addr
= dest
->clone(dest
);
1840 addr
= best
->src_host
->clone(best
->src_host
);
1843 this->lock
->unlock(this->lock
);
1844 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
1849 DBG2(DBG_KNL
, "using %H as %s to reach %H/%d", addr
,
1850 nexthop ?
"nexthop" : "address", dest
, prefix
);
1852 else if (!recursion
)
1854 DBG2(DBG_KNL
, "no %s found to reach %H/%d",
1855 nexthop ?
"nexthop" : "address", dest
, prefix
);
1860 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
1861 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
1863 return get_route(this, dest
, -1, FALSE
, src
, 0);
1866 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
1867 private_kernel_netlink_net_t
*this, host_t
*dest
, int prefix
, host_t
*src
)
1869 return get_route(this, dest
, prefix
, TRUE
, src
, 0);
1873 * Manages the creation and deletion of ip addresses on an interface.
1874 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1876 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
1877 int flags
, int if_index
, host_t
*ip
, int prefix
)
1879 netlink_buf_t request
;
1880 struct nlmsghdr
*hdr
;
1881 struct ifaddrmsg
*msg
;
1884 memset(&request
, 0, sizeof(request
));
1886 chunk
= ip
->get_address(ip
);
1889 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
1890 hdr
->nlmsg_type
= nlmsg_type
;
1891 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
1893 msg
= NLMSG_DATA(hdr
);
1894 msg
->ifa_family
= ip
->get_family(ip
);
1896 msg
->ifa_prefixlen
= prefix
< 0 ? chunk
.len
* 8 : prefix
;
1897 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
1898 msg
->ifa_index
= if_index
;
1900 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
1902 if (ip
->get_family(ip
) == AF_INET6
&& this->rta_prefsrc_for_ipv6
)
1903 { /* if source routes are possible we let the virtual IP get deprecated
1904 * immediately (but mark it as valid forever) so it gets only used if
1905 * forced by our route, and not by the default IPv6 address selection */
1906 struct ifa_cacheinfo cache
= {
1907 .ifa_valid
= 0xFFFFFFFF,
1910 netlink_add_attribute(hdr
, IFA_CACHEINFO
, chunk_from_thing(cache
),
1913 return this->socket
->send_ack(this->socket
, hdr
);
1916 METHOD(kernel_net_t
, add_ip
, status_t
,
1917 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
1920 addr_map_entry_t
*entry
, lookup
= {
1923 iface_entry_t
*iface
= NULL
;
1925 if (!this->install_virtual_ip
)
1926 { /* disabled by config */
1930 this->lock
->write_lock(this->lock
);
1931 /* the virtual IP might actually be installed as regular IP, in which case
1932 * we don't track it as virtual IP */
1933 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1934 (void*)addr_map_entry_match
);
1936 { /* otherwise it might already be installed as virtual IP */
1937 entry
= this->vips
->get_match(this->vips
, &lookup
,
1938 (void*)addr_map_entry_match
);
1940 { /* the vip we found can be in one of three states: 1) installed and
1941 * ready, 2) just added by another thread, but not yet confirmed to
1942 * be installed by the kernel, 3) just deleted, but not yet gone.
1943 * Then while we wait below, several things could happen (as we
1944 * release the lock). For instance, the interface could disappear,
1945 * or the IP is finally deleted, and it reappears on a different
1946 * interface. All these cases are handled by the call below. */
1947 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
1949 this->condvar
->wait(this->condvar
, this->lock
);
1953 entry
->addr
->refcount
++;
1959 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
1960 entry
->iface
->ifname
);
1961 this->lock
->unlock(this->lock
);
1964 /* try to find the target interface, either by config or via src ip */
1965 if (!this->install_virtual_ip_on
||
1966 this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1967 (void**)&iface
, this->install_virtual_ip_on
) != SUCCESS
)
1969 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1970 (void**)&iface
, iface_name
) != SUCCESS
)
1971 { /* if we don't find the requested interface we just use the first */
1972 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
1980 .ip
= virtual_ip
->clone(virtual_ip
),
1982 .scope
= RT_SCOPE_UNIVERSE
,
1984 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1985 addr_map_entry_add(this->vips
, addr
, iface
);
1986 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
1987 iface
->ifindex
, virtual_ip
, prefix
) == SUCCESS
)
1989 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
1990 { /* wait until address appears */
1991 this->condvar
->wait(this->condvar
, this->lock
);
1994 { /* we fail if the interface got deleted in the meantime */
1995 DBG2(DBG_KNL
, "virtual IP %H installed on %s", virtual_ip
,
1996 entry
->iface
->ifname
);
1997 this->lock
->unlock(this->lock
);
1998 /* during IKEv1 reauthentication, children get moved from
1999 * old the new SA before the virtual IP is available. This
2000 * kills the route for our virtual IP, reinstall. */
2001 queue_route_reinstall(this, strdup(entry
->iface
->ifname
));
2005 this->lock
->unlock(this->lock
);
2006 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
2009 this->lock
->unlock(this->lock
);
2010 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
2015 METHOD(kernel_net_t
, del_ip
, status_t
,
2016 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2019 addr_map_entry_t
*entry
, lookup
= {
2023 if (!this->install_virtual_ip
)
2024 { /* disabled by config */
2028 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
2030 this->lock
->write_lock(this->lock
);
2031 entry
= this->vips
->get_match(this->vips
, &lookup
,
2032 (void*)addr_map_entry_match
);
2034 { /* we didn't install this IP as virtual IP */
2035 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2036 (void*)addr_map_entry_match
);
2039 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
2040 entry
->iface
->ifname
);
2041 this->lock
->unlock(this->lock
);
2044 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
2045 this->lock
->unlock(this->lock
);
2048 if (entry
->addr
->refcount
== 1)
2052 /* we set this flag so that threads calling add_ip will block and wait
2053 * until the entry is gone, also so we can wait below */
2054 entry
->addr
->installed
= FALSE
;
2055 status
= manage_ipaddr(this, RTM_DELADDR
, 0, entry
->iface
->ifindex
,
2056 virtual_ip
, prefix
);
2057 if (status
== SUCCESS
&& wait
)
2058 { /* wait until the address is really gone */
2059 while (is_known_vip(this, virtual_ip
))
2061 this->condvar
->wait(this->condvar
, this->lock
);
2064 this->lock
->unlock(this->lock
);
2069 entry
->addr
->refcount
--;
2071 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
2073 this->lock
->unlock(this->lock
);
2078 * Manages source routes in the routing table.
2079 * By setting the appropriate nlmsg_type, the route gets added or removed.
2081 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
2082 int nlmsg_type
, int flags
, chunk_t dst_net
,
2083 u_int8_t prefixlen
, host_t
*gateway
,
2084 host_t
*src_ip
, char *if_name
)
2086 netlink_buf_t request
;
2087 struct nlmsghdr
*hdr
;
2093 /* if route is 0.0.0.0/0, we can't install it, as it would
2094 * overwrite the default route. Instead, we add two routes:
2095 * 0.0.0.0/1 and 128.0.0.0/1 */
2096 if (this->routing_table
== 0 && prefixlen
== 0)
2099 u_int8_t half_prefixlen
;
2102 half_net
= chunk_alloca(dst_net
.len
);
2103 memset(half_net
.ptr
, 0, half_net
.len
);
2106 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
2107 gateway
, src_ip
, if_name
);
2108 half_net
.ptr
[0] |= 0x80;
2109 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
2110 gateway
, src_ip
, if_name
);
2114 memset(&request
, 0, sizeof(request
));
2117 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2118 hdr
->nlmsg_type
= nlmsg_type
;
2119 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2121 msg
= NLMSG_DATA(hdr
);
2122 msg
->rtm_family
= src_ip
->get_family(src_ip
);
2123 msg
->rtm_dst_len
= prefixlen
;
2124 msg
->rtm_table
= this->routing_table
;
2125 msg
->rtm_protocol
= RTPROT_STATIC
;
2126 msg
->rtm_type
= RTN_UNICAST
;
2127 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2129 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
2130 chunk
= src_ip
->get_address(src_ip
);
2131 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
2132 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
2134 chunk
= gateway
->get_address(gateway
);
2135 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
2137 ifindex
= get_interface_index(this, if_name
);
2138 chunk
.ptr
= (char*)&ifindex
;
2139 chunk
.len
= sizeof(ifindex
);
2140 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
2142 if (this->mtu
|| this->mss
)
2144 chunk
= chunk_alloca(RTA_LENGTH((sizeof(struct rtattr
) +
2145 sizeof(u_int32_t
)) * 2));
2147 rta
= (struct rtattr
*)chunk
.ptr
;
2150 rta
->rta_type
= RTAX_MTU
;
2151 rta
->rta_len
= RTA_LENGTH(sizeof(u_int32_t
));
2152 memcpy(RTA_DATA(rta
), &this->mtu
, sizeof(u_int32_t
));
2153 chunk
.len
= rta
->rta_len
;
2157 rta
= (struct rtattr
*)(chunk
.ptr
+ RTA_ALIGN(chunk
.len
));
2158 rta
->rta_type
= RTAX_ADVMSS
;
2159 rta
->rta_len
= RTA_LENGTH(sizeof(u_int32_t
));
2160 memcpy(RTA_DATA(rta
), &this->mss
, sizeof(u_int32_t
));
2161 chunk
.len
= RTA_ALIGN(chunk
.len
) + rta
->rta_len
;
2163 netlink_add_attribute(hdr
, RTA_METRICS
, chunk
, sizeof(request
));
2166 return this->socket
->send_ack(this->socket
, hdr
);
2169 METHOD(kernel_net_t
, add_route
, status_t
,
2170 private_kernel_netlink_net_t
*this, chunk_t dst_net
, u_int8_t prefixlen
,
2171 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
2174 route_entry_t
*found
, route
= {
2176 .prefixlen
= prefixlen
,
2182 this->routes_lock
->lock(this->routes_lock
);
2183 found
= this->routes
->get(this->routes
, &route
);
2186 this->routes_lock
->unlock(this->routes_lock
);
2187 return ALREADY_DONE
;
2189 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
2190 dst_net
, prefixlen
, gateway
, src_ip
, if_name
);
2191 if (status
== SUCCESS
)
2193 found
= route_entry_clone(&route
);
2194 this->routes
->put(this->routes
, found
, found
);
2196 this->routes_lock
->unlock(this->routes_lock
);
2200 METHOD(kernel_net_t
, del_route
, status_t
,
2201 private_kernel_netlink_net_t
*this, chunk_t dst_net
, u_int8_t prefixlen
,
2202 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
2205 route_entry_t
*found
, route
= {
2207 .prefixlen
= prefixlen
,
2213 this->routes_lock
->lock(this->routes_lock
);
2214 found
= this->routes
->get(this->routes
, &route
);
2217 this->routes_lock
->unlock(this->routes_lock
);
2220 this->routes
->remove(this->routes
, found
);
2221 route_entry_destroy(found
);
2222 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
2223 gateway
, src_ip
, if_name
);
2224 this->routes_lock
->unlock(this->routes_lock
);
2229 * Initialize a list of local addresses.
2231 static status_t
init_address_list(private_kernel_netlink_net_t
*this)
2233 netlink_buf_t request
;
2234 struct nlmsghdr
*out
, *current
, *in
;
2235 struct rtgenmsg
*msg
;
2237 enumerator_t
*ifaces
, *addrs
;
2238 iface_entry_t
*iface
;
2241 DBG2(DBG_KNL
, "known interfaces and IP addresses:");
2243 memset(&request
, 0, sizeof(request
));
2246 in
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtgenmsg
));
2247 in
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_MATCH
| NLM_F_ROOT
;
2248 msg
= NLMSG_DATA(in
);
2249 msg
->rtgen_family
= AF_UNSPEC
;
2252 in
->nlmsg_type
= RTM_GETLINK
;
2253 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2258 while (NLMSG_OK(current
, len
))
2260 switch (current
->nlmsg_type
)
2265 process_link(this, current
, FALSE
);
2268 current
= NLMSG_NEXT(current
, len
);
2275 /* get all interface addresses */
2276 in
->nlmsg_type
= RTM_GETADDR
;
2277 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2282 while (NLMSG_OK(current
, len
))
2284 switch (current
->nlmsg_type
)
2289 process_addr(this, current
, FALSE
);
2292 current
= NLMSG_NEXT(current
, len
);
2299 this->lock
->read_lock(this->lock
);
2300 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
2301 while (ifaces
->enumerate(ifaces
, &iface
))
2303 if (iface_entry_up_and_usable(iface
))
2305 DBG2(DBG_KNL
, " %s", iface
->ifname
);
2306 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
2307 while (addrs
->enumerate(addrs
, (void**)&addr
))
2309 DBG2(DBG_KNL
, " %H", addr
->ip
);
2311 addrs
->destroy(addrs
);
2314 ifaces
->destroy(ifaces
);
2315 this->lock
->unlock(this->lock
);
2320 * create or delete a rule to use our routing table
2322 static status_t
manage_rule(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2323 int family
, u_int32_t table
, u_int32_t prio
)
2325 netlink_buf_t request
;
2326 struct nlmsghdr
*hdr
;
2331 memset(&request
, 0, sizeof(request
));
2333 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2334 hdr
->nlmsg_type
= nlmsg_type
;
2335 if (nlmsg_type
== RTM_NEWRULE
)
2337 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2339 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2341 msg
= NLMSG_DATA(hdr
);
2342 msg
->rtm_table
= table
;
2343 msg
->rtm_family
= family
;
2344 msg
->rtm_protocol
= RTPROT_BOOT
;
2345 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2346 msg
->rtm_type
= RTN_UNICAST
;
2348 chunk
= chunk_from_thing(prio
);
2349 netlink_add_attribute(hdr
, RTA_PRIORITY
, chunk
, sizeof(request
));
2351 fwmark
= lib
->settings
->get_str(lib
->settings
,
2352 "%s.plugins.kernel-netlink.fwmark", NULL
, lib
->ns
);
2355 #ifdef HAVE_LINUX_FIB_RULES_H
2358 if (fwmark
[0] == '!')
2360 msg
->rtm_flags
|= FIB_RULE_INVERT
;
2363 if (mark_from_string(fwmark
, &mark
))
2365 chunk
= chunk_from_thing(mark
.value
);
2366 netlink_add_attribute(hdr
, FRA_FWMARK
, chunk
, sizeof(request
));
2367 chunk
= chunk_from_thing(mark
.mask
);
2368 netlink_add_attribute(hdr
, FRA_FWMASK
, chunk
, sizeof(request
));
2371 DBG1(DBG_KNL
, "setting firewall mark on routing rule is not supported");
2374 return this->socket
->send_ack(this->socket
, hdr
);
2378 * check for kernel features (currently only via version number)
2380 static void check_kernel_features(private_kernel_netlink_net_t
*this)
2382 struct utsname utsname
;
2385 if (uname(&utsname
) == 0)
2387 switch(sscanf(utsname
.release
, "%d.%d.%d", &a
, &b
, &c
))
2392 DBG2(DBG_KNL
, "detected Linux %d.%d.%d, no support for "
2393 "RTA_PREFSRC for IPv6 routes", a
, b
, c
);
2398 /* only 3.x+ uses two part version numbers */
2399 this->rta_prefsrc_for_ipv6
= TRUE
;
2408 * Destroy an address to iface map
2410 static void addr_map_destroy(hashtable_t
*map
)
2412 enumerator_t
*enumerator
;
2413 addr_map_entry_t
*addr
;
2415 enumerator
= map
->create_enumerator(map
);
2416 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&addr
))
2420 enumerator
->destroy(enumerator
);
2424 METHOD(kernel_net_t
, destroy
, void,
2425 private_kernel_netlink_net_t
*this)
2427 enumerator_t
*enumerator
;
2428 route_entry_t
*route
;
2430 if (this->routing_table
)
2432 manage_rule(this, RTM_DELRULE
, AF_INET
, this->routing_table
,
2433 this->routing_table_prio
);
2434 manage_rule(this, RTM_DELRULE
, AF_INET6
, this->routing_table
,
2435 this->routing_table_prio
);
2437 if (this->socket_events
> 0)
2439 lib
->watcher
->remove(lib
->watcher
, this->socket_events
);
2440 close(this->socket_events
);
2442 enumerator
= this->routes
->create_enumerator(this->routes
);
2443 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
2445 manage_srcroute(this, RTM_DELROUTE
, 0, route
->dst_net
, route
->prefixlen
,
2446 route
->gateway
, route
->src_ip
, route
->if_name
);
2447 route_entry_destroy(route
);
2449 enumerator
->destroy(enumerator
);
2450 this->routes
->destroy(this->routes
);
2451 this->routes_lock
->destroy(this->routes_lock
);
2452 DESTROY_IF(this->socket
);
2454 net_changes_clear(this);
2455 this->net_changes
->destroy(this->net_changes
);
2456 this->net_changes_lock
->destroy(this->net_changes_lock
);
2458 addr_map_destroy(this->addrs
);
2459 addr_map_destroy(this->vips
);
2461 this->ifaces
->destroy_function(this->ifaces
, (void*)iface_entry_destroy
);
2462 this->rt_exclude
->destroy(this->rt_exclude
);
2463 this->roam_lock
->destroy(this->roam_lock
);
2464 this->condvar
->destroy(this->condvar
);
2465 this->lock
->destroy(this->lock
);
2470 * Described in header.
2472 kernel_netlink_net_t
*kernel_netlink_net_create()
2474 private_kernel_netlink_net_t
*this;
2475 enumerator_t
*enumerator
;
2476 bool register_for_events
= TRUE
;
2482 .get_interface
= _get_interface_name
,
2483 .create_address_enumerator
= _create_address_enumerator
,
2484 .get_source_addr
= _get_source_addr
,
2485 .get_nexthop
= _get_nexthop
,
2488 .add_route
= _add_route
,
2489 .del_route
= _del_route
,
2490 .destroy
= _destroy
,
2493 .socket
= netlink_socket_create(NETLINK_ROUTE
, rt_msg_names
),
2494 .rt_exclude
= linked_list_create(),
2495 .routes
= hashtable_create((hashtable_hash_t
)route_entry_hash
,
2496 (hashtable_equals_t
)route_entry_equals
, 16),
2497 .net_changes
= hashtable_create(
2498 (hashtable_hash_t
)net_change_hash
,
2499 (hashtable_equals_t
)net_change_equals
, 16),
2500 .addrs
= hashtable_create(
2501 (hashtable_hash_t
)addr_map_entry_hash
,
2502 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2503 .vips
= hashtable_create((hashtable_hash_t
)addr_map_entry_hash
,
2504 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2505 .routes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2506 .net_changes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2507 .ifaces
= linked_list_create(),
2508 .lock
= rwlock_create(RWLOCK_TYPE_DEFAULT
),
2509 .condvar
= rwlock_condvar_create(),
2510 .roam_lock
= spinlock_create(),
2511 .routing_table
= lib
->settings
->get_int(lib
->settings
,
2512 "%s.routing_table", ROUTING_TABLE
, lib
->ns
),
2513 .routing_table_prio
= lib
->settings
->get_int(lib
->settings
,
2514 "%s.routing_table_prio", ROUTING_TABLE_PRIO
, lib
->ns
),
2515 .process_route
= lib
->settings
->get_bool(lib
->settings
,
2516 "%s.process_route", TRUE
, lib
->ns
),
2517 .install_virtual_ip
= lib
->settings
->get_bool(lib
->settings
,
2518 "%s.install_virtual_ip", TRUE
, lib
->ns
),
2519 .install_virtual_ip_on
= lib
->settings
->get_str(lib
->settings
,
2520 "%s.install_virtual_ip_on", NULL
, lib
->ns
),
2521 .prefer_temporary_addrs
= lib
->settings
->get_bool(lib
->settings
,
2522 "%s.prefer_temporary_addrs", FALSE
, lib
->ns
),
2523 .roam_events
= lib
->settings
->get_bool(lib
->settings
,
2524 "%s.plugins.kernel-netlink.roam_events", TRUE
, lib
->ns
),
2525 .mtu
= lib
->settings
->get_int(lib
->settings
,
2526 "%s.plugins.kernel-netlink.mtu", 0, lib
->ns
),
2527 .mss
= lib
->settings
->get_int(lib
->settings
,
2528 "%s.plugins.kernel-netlink.mss", 0, lib
->ns
),
2530 timerclear(&this->last_route_reinstall
);
2531 timerclear(&this->next_roam
);
2533 check_kernel_features(this);
2535 if (streq(lib
->ns
, "starter"))
2536 { /* starter has no threads, so we do not register for kernel events */
2537 register_for_events
= FALSE
;
2540 exclude
= lib
->settings
->get_str(lib
->settings
,
2541 "%s.ignore_routing_tables", NULL
, lib
->ns
);
2547 enumerator
= enumerator_create_token(exclude
, " ", " ");
2548 while (enumerator
->enumerate(enumerator
, &token
))
2551 table
= strtoul(token
, NULL
, 10);
2555 this->rt_exclude
->insert_last(this->rt_exclude
, (void*)table
);
2558 enumerator
->destroy(enumerator
);
2561 if (register_for_events
)
2563 struct sockaddr_nl addr
;
2565 memset(&addr
, 0, sizeof(addr
));
2566 addr
.nl_family
= AF_NETLINK
;
2568 /* create and bind RT socket for events (address/interface/route changes) */
2569 this->socket_events
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
2570 if (this->socket_events
< 0)
2572 DBG1(DBG_KNL
, "unable to create RT event socket");
2576 addr
.nl_groups
= RTMGRP_IPV4_IFADDR
| RTMGRP_IPV6_IFADDR
|
2577 RTMGRP_IPV4_ROUTE
| RTMGRP_IPV6_ROUTE
| RTMGRP_LINK
;
2578 if (bind(this->socket_events
, (struct sockaddr
*)&addr
, sizeof(addr
)))
2580 DBG1(DBG_KNL
, "unable to bind RT event socket");
2585 lib
->watcher
->add(lib
->watcher
, this->socket_events
, WATCHER_READ
,
2586 (watcher_cb_t
)receive_events
, this);
2589 if (init_address_list(this) != SUCCESS
)
2591 DBG1(DBG_KNL
, "unable to get interface list");
2596 if (this->routing_table
)
2598 if (manage_rule(this, RTM_NEWRULE
, AF_INET
, this->routing_table
,
2599 this->routing_table_prio
) != SUCCESS
)
2601 DBG1(DBG_KNL
, "unable to create IPv4 routing table rule");
2603 if (manage_rule(this, RTM_NEWRULE
, AF_INET6
, this->routing_table
,
2604 this->routing_table_prio
) != SUCCESS
)
2606 DBG1(DBG_KNL
, "unable to create IPv6 routing table rule");
2610 return &this->public;