2 * Copyright (C) 2008-2019 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * HSR Hochschule fuer Technik Rapperswil
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <linux/if_addrlabel.h>
48 #ifdef HAVE_LINUX_FIB_RULES_H
49 #include <linux/fib_rules.h>
52 #include "kernel_netlink_net.h"
53 #include "kernel_netlink_shared.h"
56 #include <utils/debug.h>
57 #include <threading/mutex.h>
58 #include <threading/rwlock.h>
59 #include <threading/rwlock_condvar.h>
60 #include <threading/spinlock.h>
61 #include <collections/hashtable.h>
62 #include <collections/linked_list.h>
63 #include <processing/jobs/callback_job.h>
65 /** delay before firing roam events (ms) */
66 #define ROAM_DELAY 100
68 /** delay before reinstalling routes (ms) */
69 #define ROUTE_DELAY 100
71 /** maximum recursion when searching for addresses in get_route() */
72 #define MAX_ROUTE_RECURSION 2
75 #define ROUTING_TABLE 0
78 #ifndef ROUTING_TABLE_PRIO
79 #define ROUTING_TABLE_PRIO 0
82 /** multicast groups (for groups > 31 setsockopt has to be used) */
83 #define nl_group(group) (1 << (group - 1))
85 ENUM(rt_msg_names
, RTM_NEWLINK
, RTM_GETRULE
,
106 typedef struct addr_entry_t addr_entry_t
;
109 * IP address in an iface_entry_t
111 struct addr_entry_t
{
113 /** the ip address */
119 /** scope of the address */
122 /** number of times this IP is used, if virtual (i.e. managed by us) */
125 /** TRUE once it is installed, if virtual */
130 * destroy a addr_entry_t object
132 static void addr_entry_destroy(addr_entry_t
*this)
134 this->ip
->destroy(this->ip
);
138 typedef struct iface_entry_t iface_entry_t
;
141 * A network interface on this system, containing addr_entry_t's
143 struct iface_entry_t
{
145 /** interface index */
148 /** name of the interface */
149 char ifname
[IFNAMSIZ
];
151 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
154 /** list of addresses as host_t */
155 linked_list_t
*addrs
;
157 /** TRUE if usable by config */
162 * destroy an interface entry
164 static void iface_entry_destroy(iface_entry_t
*this)
166 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
170 CALLBACK(iface_entry_by_index
, bool,
171 iface_entry_t
*this, va_list args
)
175 VA_ARGS_VGET(args
, ifindex
);
176 return this->ifindex
== ifindex
;
179 CALLBACK(iface_entry_by_name
, bool,
180 iface_entry_t
*this, va_list args
)
184 VA_ARGS_VGET(args
, ifname
);
185 return streq(this->ifname
, ifname
);
189 * check if an interface is up
191 static inline bool iface_entry_up(iface_entry_t
*iface
)
193 return (iface
->flags
& IFF_UP
) == IFF_UP
;
197 * check if an interface is up and usable
199 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
201 return iface
->usable
&& iface_entry_up(iface
);
204 typedef struct addr_map_entry_t addr_map_entry_t
;
207 * Entry that maps an IP address to an interface entry
209 struct addr_map_entry_t
{
210 /** The IP address */
213 /** The address entry for this IP address */
216 /** The interface this address is installed on */
217 iface_entry_t
*iface
;
221 * Hash a addr_map_entry_t object, all entries with the same IP address
222 * are stored in the same bucket
224 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
226 return chunk_hash(this->ip
->get_address(this->ip
));
230 * Compare two addr_map_entry_t objects, two entries are equal if they are
231 * installed on the same interface
233 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
235 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
236 a
->ip
->ip_equals(a
->ip
, b
->ip
);
240 * Used with get_match this finds an address entry if it is installed on
241 * an up and usable interface
243 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
246 return iface_entry_up_and_usable(b
->iface
) &&
247 a
->ip
->ip_equals(a
->ip
, b
->ip
);
251 * Used with get_match this finds an address entry if it is installed on
252 * any active local interface
254 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
256 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
260 * Used with get_match this finds an address entry if it is installed on
261 * any local interface
263 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
265 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
268 typedef struct route_entry_t route_entry_t
;
271 * Installed routing entry
273 struct route_entry_t
{
274 /** Name of the interface the route is bound to */
277 /** Source ip of the route */
280 /** Gateway for this route */
283 /** Destination net */
286 /** Destination net prefixlen */
291 * Clone a route_entry_t object.
293 static route_entry_t
*route_entry_clone(route_entry_t
*this)
295 route_entry_t
*route
;
298 .if_name
= strdup(this->if_name
),
299 .src_ip
= this->src_ip
->clone(this->src_ip
),
300 .gateway
= this->gateway ?
this->gateway
->clone(this->gateway
) : NULL
,
301 .dst_net
= chunk_clone(this->dst_net
),
302 .prefixlen
= this->prefixlen
,
308 * Destroy a route_entry_t object
310 static void route_entry_destroy(route_entry_t
*this)
313 DESTROY_IF(this->src_ip
);
314 DESTROY_IF(this->gateway
);
315 chunk_free(&this->dst_net
);
320 * Hash a route_entry_t object
322 static u_int
route_entry_hash(route_entry_t
*this)
324 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
325 chunk_hash(this->dst_net
));
329 * Compare two route_entry_t objects
331 static bool route_entry_equals(route_entry_t
*a
, route_entry_t
*b
)
333 if (a
->if_name
&& b
->if_name
&& streq(a
->if_name
, b
->if_name
) &&
334 a
->src_ip
->ip_equals(a
->src_ip
, b
->src_ip
) &&
335 chunk_equals(a
->dst_net
, b
->dst_net
) && a
->prefixlen
== b
->prefixlen
)
337 return (!a
->gateway
&& !b
->gateway
) || (a
->gateway
&& b
->gateway
&&
338 a
->gateway
->ip_equals(a
->gateway
, b
->gateway
));
343 typedef struct net_change_t net_change_t
;
346 * Queued network changes
348 struct net_change_t
{
349 /** Name of the interface that got activated (or an IP appeared on) */
354 * Destroy a net_change_t object
356 static void net_change_destroy(net_change_t
*this)
363 * Hash a net_change_t object
365 static u_int
net_change_hash(net_change_t
*this)
367 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
371 * Compare two net_change_t objects
373 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
375 return streq(a
->if_name
, b
->if_name
);
378 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
381 * Private variables and functions of kernel_netlink_net class.
383 struct private_kernel_netlink_net_t
{
385 * Public part of the kernel_netlink_net_t object.
387 kernel_netlink_net_t
public;
390 * lock to access various lists and maps
395 * condition variable to signal virtual IP add/removal
397 rwlock_condvar_t
*condvar
;
400 * Cached list of interfaces and its addresses (iface_entry_t)
402 linked_list_t
*ifaces
;
405 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
410 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
415 * netlink rt socket (routing)
417 netlink_socket_t
*socket
;
420 * Netlink rt socket to receive address change events
425 * earliest time of the next roam event
430 * roam event due to address change
435 * lock to check and update roam event time
437 spinlock_t
*roam_lock
;
440 * routing table to install routes
442 uint32_t routing_table
;
445 * priority of used routing table
447 uint32_t routing_table_prio
;
457 mutex_t
*routes_lock
;
460 * interface changes which may trigger route reinstallation
462 hashtable_t
*net_changes
;
465 * mutex for route reinstallation triggers
467 mutex_t
*net_changes_lock
;
470 * time of last route reinstallation
472 timeval_t last_route_reinstall
;
475 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
480 * whether to react to RTM_NEWRULE or RTM_DELRULE events
485 * whether to trigger roam events
490 * whether to install IPsec policy routes
495 * whether to actually install virtual IPs
497 bool install_virtual_ip
;
500 * the name of the interface virtual IP addresses are installed on
502 char *install_virtual_ip_on
;
505 * whether preferred source addresses can be specified for IPv6 routes
507 bool rta_prefsrc_for_ipv6
;
510 * whether marks can be used in route lookups
515 * the mark excluded from the routing rule used for virtual IPs
520 * whether to prefer temporary IPv6 addresses over public ones
522 bool prefer_temporary_addrs
;
525 * list with routing tables to be excluded from route lookup
527 linked_list_t
*rt_exclude
;
530 * MTU to set on installed routes
535 * MSS to set on installed routes
541 * Forward declaration
543 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
544 int nlmsg_type
, int flags
, chunk_t dst_net
,
545 uint8_t prefixlen
, host_t
*gateway
,
546 host_t
*src_ip
, char *if_name
);
549 * Clear the queued network changes.
551 static void net_changes_clear(private_kernel_netlink_net_t
*this)
553 enumerator_t
*enumerator
;
554 net_change_t
*change
;
556 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
557 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
559 this->net_changes
->remove_at(this->net_changes
, enumerator
);
560 net_change_destroy(change
);
562 enumerator
->destroy(enumerator
);
566 * Act upon queued network changes.
568 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
570 enumerator_t
*enumerator
;
571 route_entry_t
*route
;
573 this->net_changes_lock
->lock(this->net_changes_lock
);
574 this->routes_lock
->lock(this->routes_lock
);
576 enumerator
= this->routes
->create_enumerator(this->routes
);
577 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
579 net_change_t
*change
, lookup
= {
580 .if_name
= route
->if_name
,
582 /* check if a change for the outgoing interface is queued */
583 change
= this->net_changes
->get(this->net_changes
, &lookup
);
585 { /* in case src_ip is not on the outgoing interface */
586 if (this->public.interface
.get_interface(&this->public.interface
,
587 route
->src_ip
, &lookup
.if_name
))
589 if (!streq(lookup
.if_name
, route
->if_name
))
591 change
= this->net_changes
->get(this->net_changes
, &lookup
);
593 free(lookup
.if_name
);
598 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
599 route
->dst_net
, route
->prefixlen
, route
->gateway
,
600 route
->src_ip
, route
->if_name
);
603 enumerator
->destroy(enumerator
);
604 this->routes_lock
->unlock(this->routes_lock
);
606 net_changes_clear(this);
607 this->net_changes_lock
->unlock(this->net_changes_lock
);
608 return JOB_REQUEUE_NONE
;
612 * Queue route reinstallation caused by network changes for a given interface.
614 * The route reinstallation is delayed for a while and only done once for
615 * several calls during this delay, in order to avoid doing it too often.
616 * The interface name is freed.
618 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
621 net_change_t
*update
, *found
;
629 this->net_changes_lock
->lock(this->net_changes_lock
);
630 found
= this->net_changes
->put(this->net_changes
, update
, update
);
633 net_change_destroy(found
);
635 time_monotonic(&now
);
636 if (timercmp(&now
, &this->last_route_reinstall
, >))
638 timeval_add_ms(&now
, ROUTE_DELAY
);
639 this->last_route_reinstall
= now
;
641 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
643 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
645 this->net_changes_lock
->unlock(this->net_changes_lock
);
649 * check if the given IP is known as virtual IP and currently installed
651 * this function will also return TRUE if the virtual IP entry disappeared.
652 * in that case the returned entry will be NULL.
654 * this->lock must be held when calling this function
656 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
657 host_t
*ip
, addr_map_entry_t
**entry
)
659 addr_map_entry_t lookup
= {
663 *entry
= this->vips
->get_match(this->vips
, &lookup
,
664 (void*)addr_map_entry_match
);
666 { /* the virtual IP disappeared */
669 return (*entry
)->addr
->installed
;
673 * check if the given IP is known as virtual IP
675 * this->lock must be held when calling this function
677 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
679 addr_map_entry_t lookup
= {
683 return this->vips
->get_match(this->vips
, &lookup
,
684 (void*)addr_map_entry_match
) != NULL
;
688 * Add an address map entry
690 static void addr_map_entry_add(hashtable_t
*map
, addr_entry_t
*addr
,
691 iface_entry_t
*iface
)
693 addr_map_entry_t
*entry
;
700 entry
= map
->put(map
, entry
, entry
);
705 * Remove an address map entry
707 static void addr_map_entry_remove(hashtable_t
*map
, addr_entry_t
*addr
,
708 iface_entry_t
*iface
)
710 addr_map_entry_t
*entry
, lookup
= {
716 entry
= map
->remove(map
, &lookup
);
721 * Check if an address or net (addr with prefix net bits) is in
722 * subnet (net with net_len net bits)
724 static bool addr_in_subnet(chunk_t addr
, int prefix
, chunk_t net
, int net_len
)
726 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
730 { /* any address matches a /0 network */
733 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
|| prefix
< net_len
)
737 /* scan through all bytes in network order */
742 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
746 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
758 * Check if the given address is in subnet (net with net_len net bits)
760 static bool host_in_subnet(host_t
*host
, chunk_t net
, int net_len
)
764 addr
= host
->get_address(host
);
765 return addr_in_subnet(addr
, addr
.len
* 8, net
, net_len
);
769 * Determine the type or scope of the given unicast IP address. This is not
770 * the same thing returned in rtm_scope/ifa_scope.
772 * We use return values as defined in RFC 6724 (referring to RFC 4291).
774 static u_char
get_scope(host_t
*ip
)
778 addr
= ip
->get_address(ip
);
782 /* we use the mapping defined in RFC 6724, 3.2 */
783 if (addr
.ptr
[0] == 127)
784 { /* link-local, same as the IPv6 loopback address */
787 if (addr
.ptr
[0] == 169 && addr
.ptr
[1] == 254)
793 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr
*)addr
.ptr
))
794 { /* link-local, according to RFC 4291, 2.5.3 */
797 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr
*)addr
.ptr
))
801 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr
*)addr
.ptr
))
802 { /* deprecated, according to RFC 4291, 2.5.7 */
814 * Determine the label of the given unicast IP address.
816 * We currently only support the default table given in RFC 6724:
818 * Prefix Precedence Label
829 static u_char
get_label(host_t
*ip
)
836 /* priority table ordered by prefix */
838 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
839 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), 128, 0 },
841 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
842 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), 96, 4 },
844 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
845 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 96, 3 },
847 { chunk_from_chars(0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
848 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 32, 5 },
850 { chunk_from_chars(0x20, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
851 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 2 },
853 { chunk_from_chars(0x3f, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
854 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 12 },
856 { chunk_from_chars(0xfe, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
857 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 10, 11 },
859 { chunk_from_chars(0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
860 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 7, 13 },
864 for (i
= 0; i
< countof(priorities
); i
++)
866 if (host_in_subnet(ip
, priorities
[i
].net
, priorities
[i
].prefix
))
868 return priorities
[i
].label
;
876 * Returns the length of the common prefix in bits up to the length of a's
877 * prefix, defined by RFC 6724 as the portion of the address not including the
878 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
880 static u_char
common_prefix(host_t
*a
, host_t
*b
)
883 u_char byte
, bits
= 0, match
;
885 aa
= a
->get_address(a
);
886 ba
= b
->get_address(b
);
887 for (byte
= 0; byte
< 8; byte
++)
889 if (aa
.ptr
[byte
] != ba
.ptr
[byte
])
891 match
= aa
.ptr
[byte
] ^ ba
.ptr
[byte
];
892 for (bits
= 8; match
; match
>>= 1)
899 return byte
* 8 + bits
;
903 * Compare two IP addresses and return TRUE if the second address is the better
904 * choice of the two to reach the destination.
905 * For IPv6 we approximately follow RFC 6724.
907 static bool is_address_better(private_kernel_netlink_net_t
*this,
908 addr_entry_t
*a
, addr_entry_t
*b
, host_t
*d
)
910 u_char sa
, sb
, sd
, la
, lb
, ld
, pa
, pb
;
912 /* rule 2: prefer appropriate scope */
915 sa
= get_scope(a
->ip
);
916 sb
= get_scope(b
->ip
);
927 if (a
->ip
->get_family(a
->ip
) == AF_INET
)
928 { /* stop here for IPv4, default to addresses found earlier */
931 /* rule 3: avoid deprecated addresses (RFC 4862) */
932 if ((a
->flags
& IFA_F_DEPRECATED
) != (b
->flags
& IFA_F_DEPRECATED
))
934 return a
->flags
& IFA_F_DEPRECATED
;
936 /* rule 4 is not applicable as we don't know if an address is a home or
938 * rule 5 does not apply as we only compare addresses from one interface
940 /* rule 6: prefer matching label */
943 la
= get_label(a
->ip
);
944 lb
= get_label(b
->ip
);
946 if (la
== ld
&& lb
!= ld
)
950 else if (lb
== ld
&& la
!= ld
)
955 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
956 if ((a
->flags
& IFA_F_TEMPORARY
) != (b
->flags
& IFA_F_TEMPORARY
))
958 if (this->prefer_temporary_addrs
)
960 return b
->flags
& IFA_F_TEMPORARY
;
962 return a
->flags
& IFA_F_TEMPORARY
;
964 /* rule 8: use longest matching prefix */
967 pa
= common_prefix(a
->ip
, d
);
968 pb
= common_prefix(b
->ip
, d
);
974 /* default to addresses found earlier */
979 * Get a non-virtual IP address on the given interfaces and optionally in a
982 * If a candidate address is given, we first search for that address and if not
983 * found return the address as above.
984 * Returned host is a clone, has to be freed by caller.
986 * this->lock must be held when calling this function.
988 static host_t
*get_matching_address(private_kernel_netlink_net_t
*this,
989 int *ifindex
, int family
, chunk_t net
,
990 uint8_t mask
, host_t
*dest
,
993 enumerator_t
*ifaces
, *addrs
;
994 iface_entry_t
*iface
;
995 addr_entry_t
*addr
, *best
= NULL
;
996 bool candidate_matched
= FALSE
;
998 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
999 while (ifaces
->enumerate(ifaces
, &iface
))
1001 if (iface
->usable
&& (!ifindex
|| iface
->ifindex
== *ifindex
))
1002 { /* only use matching interfaces not excluded by config */
1003 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
1004 while (addrs
->enumerate(addrs
, &addr
))
1006 if (addr
->refcount
||
1007 addr
->ip
->get_family(addr
->ip
) != family
)
1008 { /* ignore virtual IP addresses and ensure family matches */
1011 if (net
.ptr
&& !host_in_subnet(addr
->ip
, net
, mask
))
1012 { /* optionally match a subnet */
1015 if (candidate
&& candidate
->ip_equals(candidate
, addr
->ip
))
1016 { /* stop if we find the candidate */
1018 candidate_matched
= TRUE
;
1021 else if (!best
|| is_address_better(this, best
, addr
, dest
))
1026 addrs
->destroy(addrs
);
1027 if (ifindex
|| candidate_matched
)
1033 ifaces
->destroy(ifaces
);
1034 return best ? best
->ip
->clone(best
->ip
) : NULL
;
1038 * Get a non-virtual IP address on the given interface.
1040 * If a candidate address is given, we first search for that address and if not
1041 * found return the address as above.
1042 * Returned host is a clone, has to be freed by caller.
1044 * this->lock must be held when calling this function.
1046 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
1047 int ifindex
, int family
, host_t
*dest
,
1050 return get_matching_address(this, &ifindex
, family
, chunk_empty
, 0, dest
,
1055 * Get a non-virtual IP address in the given subnet.
1057 * If a candidate address is given, we first search for that address and if not
1058 * found return the address as above.
1059 * Returned host is a clone, has to be freed by caller.
1061 * this->lock must be held when calling this function.
1063 static host_t
*get_subnet_address(private_kernel_netlink_net_t
*this,
1064 int family
, chunk_t net
, uint8_t mask
,
1065 host_t
*dest
, host_t
*candidate
)
1067 return get_matching_address(this, NULL
, family
, net
, mask
, dest
, candidate
);
1071 * callback function that raises the delayed roam event
1073 static job_requeue_t
roam_event(private_kernel_netlink_net_t
*this)
1077 this->roam_lock
->lock(this->roam_lock
);
1078 address
= this->roam_address
;
1079 this->roam_address
= FALSE
;
1080 this->roam_lock
->unlock(this->roam_lock
);
1081 charon
->kernel
->roam(charon
->kernel
, address
);
1082 return JOB_REQUEUE_NONE
;
1086 * fire a roaming event. we delay it for a bit and fire only one event
1087 * for multiple calls. otherwise we would create too many events.
1089 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
1094 if (!this->roam_events
)
1099 time_monotonic(&now
);
1100 this->roam_lock
->lock(this->roam_lock
);
1101 this->roam_address
|= address
;
1102 if (!timercmp(&now
, &this->next_roam
, >))
1104 this->roam_lock
->unlock(this->roam_lock
);
1107 timeval_add_ms(&now
, ROAM_DELAY
);
1108 this->next_roam
= now
;
1109 this->roam_lock
->unlock(this->roam_lock
);
1111 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
1113 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
1117 * check if an interface with a given index is up and usable
1119 * this->lock must be locked when calling this function
1121 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
1124 iface_entry_t
*iface
;
1126 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1127 (void**)&iface
, index
))
1129 return iface_entry_up_and_usable(iface
);
1135 * unregister the current addr_entry_t from the hashtable it is stored in
1137 * this->lock must be locked when calling this function
1139 CALLBACK(addr_entry_unregister
, void,
1140 addr_entry_t
*addr
, va_list args
)
1142 private_kernel_netlink_net_t
*this;
1143 iface_entry_t
*iface
;
1145 VA_ARGS_VGET(args
, iface
, this);
1148 addr_map_entry_remove(this->vips
, addr
, iface
);
1149 this->condvar
->broadcast(this->condvar
);
1152 addr_map_entry_remove(this->addrs
, addr
, iface
);
1156 * process RTM_NEWLINK/RTM_DELLINK from kernel
1158 static void process_link(private_kernel_netlink_net_t
*this,
1159 struct nlmsghdr
*hdr
, bool event
)
1161 struct ifinfomsg
* msg
= NLMSG_DATA(hdr
);
1162 struct rtattr
*rta
= IFLA_RTA(msg
);
1163 size_t rtasize
= IFLA_PAYLOAD (hdr
);
1164 enumerator_t
*enumerator
;
1165 iface_entry_t
*current
, *entry
= NULL
;
1167 bool update
= FALSE
, update_routes
= FALSE
;
1169 while (RTA_OK(rta
, rtasize
))
1171 switch (rta
->rta_type
)
1174 name
= RTA_DATA(rta
);
1177 rta
= RTA_NEXT(rta
, rtasize
);
1184 this->lock
->write_lock(this->lock
);
1185 switch (hdr
->nlmsg_type
)
1189 if (!this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1190 (void**)&entry
, msg
->ifi_index
))
1193 .ifindex
= msg
->ifi_index
,
1194 .addrs
= linked_list_create(),
1196 this->ifaces
->insert_last(this->ifaces
, entry
);
1198 strncpy(entry
->ifname
, name
, IFNAMSIZ
);
1199 entry
->ifname
[IFNAMSIZ
-1] = '\0';
1200 entry
->usable
= charon
->kernel
->is_interface_usable(charon
->kernel
,
1202 if (event
&& entry
->usable
)
1204 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
1206 update
= update_routes
= TRUE
;
1207 DBG1(DBG_KNL
, "interface %s activated", name
);
1209 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
1212 DBG1(DBG_KNL
, "interface %s deactivated", name
);
1215 entry
->flags
= msg
->ifi_flags
;
1220 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
1221 while (enumerator
->enumerate(enumerator
, ¤t
))
1223 if (current
->ifindex
== msg
->ifi_index
)
1225 if (event
&& current
->usable
)
1228 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
1230 /* TODO: move virtual IPs installed on this interface to
1231 * another interface? */
1232 this->ifaces
->remove_at(this->ifaces
, enumerator
);
1233 current
->addrs
->invoke_function(current
->addrs
,
1234 addr_entry_unregister
, current
, this);
1235 iface_entry_destroy(current
);
1239 enumerator
->destroy(enumerator
);
1243 this->lock
->unlock(this->lock
);
1245 if (update_routes
&& event
)
1247 queue_route_reinstall(this, strdup(name
));
1250 if (update
&& event
)
1252 fire_roam_event(this, TRUE
);
1257 * process RTM_NEWADDR/RTM_DELADDR from kernel
1259 static void process_addr(private_kernel_netlink_net_t
*this,
1260 struct nlmsghdr
*hdr
, bool event
)
1262 struct ifaddrmsg
* msg
= NLMSG_DATA(hdr
);
1263 struct rtattr
*rta
= IFA_RTA(msg
);
1264 size_t rtasize
= IFA_PAYLOAD (hdr
);
1265 host_t
*host
= NULL
;
1266 iface_entry_t
*iface
;
1267 chunk_t local
= chunk_empty
, address
= chunk_empty
;
1268 char *route_ifname
= NULL
;
1269 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
1271 while (RTA_OK(rta
, rtasize
))
1273 switch (rta
->rta_type
)
1276 local
.ptr
= RTA_DATA(rta
);
1277 local
.len
= RTA_PAYLOAD(rta
);
1280 address
.ptr
= RTA_DATA(rta
);
1281 address
.len
= RTA_PAYLOAD(rta
);
1284 rta
= RTA_NEXT(rta
, rtasize
);
1287 /* For PPP interfaces, we need the IFA_LOCAL address,
1288 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1289 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1292 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
1294 else if (address
.ptr
)
1296 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
1304 this->lock
->write_lock(this->lock
);
1305 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1306 (void**)&iface
, msg
->ifa_index
))
1308 addr_map_entry_t
*entry
, lookup
= {
1314 entry
= this->vips
->get(this->vips
, &lookup
);
1317 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1318 { /* mark as installed and signal waiting threads */
1319 entry
->addr
->installed
= TRUE
;
1322 { /* the address was already marked as uninstalled */
1324 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1325 addr_map_entry_remove(this->vips
, addr
, iface
);
1326 addr_entry_destroy(addr
);
1328 /* no roam events etc. for virtual IPs */
1329 this->condvar
->broadcast(this->condvar
);
1330 this->lock
->unlock(this->lock
);
1331 host
->destroy(host
);
1334 entry
= this->addrs
->get(this->addrs
, &lookup
);
1337 if (hdr
->nlmsg_type
== RTM_DELADDR
)
1341 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1345 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
1348 addr_map_entry_remove(this->addrs
, addr
, iface
);
1349 addr_entry_destroy(addr
);
1354 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1358 route_ifname
= strdup(iface
->ifname
);
1360 .ip
= host
->clone(host
),
1361 .flags
= msg
->ifa_flags
,
1362 .scope
= msg
->ifa_scope
,
1364 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1365 addr_map_entry_add(this->addrs
, addr
, iface
);
1366 if (event
&& iface
->usable
)
1368 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
1372 if (found
&& (iface
->flags
& IFF_UP
))
1377 { /* ignore events for interfaces excluded by config */
1378 update
= changed
= FALSE
;
1381 this->lock
->unlock(this->lock
);
1383 if (update
&& event
&& route_ifname
)
1385 queue_route_reinstall(this, route_ifname
);
1391 host
->destroy(host
);
1393 /* send an update to all IKE_SAs */
1394 if (update
&& event
&& changed
)
1396 fire_roam_event(this, TRUE
);
1401 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1403 static void process_route(private_kernel_netlink_net_t
*this,
1404 struct nlmsghdr
*hdr
)
1406 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1407 struct rtattr
*rta
= RTM_RTA(msg
);
1408 size_t rtasize
= RTM_PAYLOAD(hdr
);
1409 uint32_t rta_oif
= 0;
1410 host_t
*host
= NULL
;
1412 /* ignore routes added by us or in the local routing table (local addrs) */
1413 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1414 msg
->rtm_table
== RT_TABLE_LOCAL
))
1418 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1419 { /* ignore cached routes, seem to be created a lot for IPv6 */
1423 while (RTA_OK(rta
, rtasize
))
1425 switch (rta
->rta_type
)
1427 #ifdef HAVE_RTA_TABLE
1429 /* also check against extended table ID */
1430 if (RTA_PAYLOAD(rta
) == sizeof(uint32_t) &&
1431 this->routing_table
== *(uint32_t*)RTA_DATA(rta
))
1436 #endif /* HAVE_RTA_TABLE */
1439 host
= host_create_from_chunk(msg
->rtm_family
,
1440 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1443 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1445 rta_oif
= *(uint32_t*)RTA_DATA(rta
);
1449 rta
= RTA_NEXT(rta
, rtasize
);
1451 this->lock
->read_lock(this->lock
);
1452 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1453 { /* ignore route changes for interfaces that are ignored or down */
1454 this->lock
->unlock(this->lock
);
1458 if (!host
&& rta_oif
)
1460 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
,
1463 if (!host
|| is_known_vip(this, host
))
1464 { /* ignore routes added for virtual IPs */
1465 this->lock
->unlock(this->lock
);
1469 this->lock
->unlock(this->lock
);
1470 fire_roam_event(this, FALSE
);
1471 host
->destroy(host
);
1475 * process RTM_NEW|DELRULE from kernel
1477 static void process_rule(private_kernel_netlink_net_t
*this,
1478 struct nlmsghdr
*hdr
)
1480 #ifdef HAVE_LINUX_FIB_RULES_H
1481 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1482 struct rtattr
*rta
= RTM_RTA(msg
);
1483 size_t rtasize
= RTM_PAYLOAD(hdr
);
1485 /* ignore rules added by us or in the local routing table (local addrs) */
1486 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1487 msg
->rtm_table
== RT_TABLE_LOCAL
))
1492 while (RTA_OK(rta
, rtasize
))
1494 switch (rta
->rta_type
)
1497 /* also check against extended table ID */
1498 if (RTA_PAYLOAD(rta
) == sizeof(uint32_t) &&
1499 this->routing_table
== *(uint32_t*)RTA_DATA(rta
))
1505 rta
= RTA_NEXT(rta
, rtasize
);
1507 fire_roam_event(this, FALSE
);
1512 * Receives events from kernel
1514 static bool receive_events(private_kernel_netlink_net_t
*this, int fd
,
1515 watcher_event_t event
)
1517 char response
[netlink_get_buflen()];
1518 struct nlmsghdr
*hdr
= (struct nlmsghdr
*)response
;
1519 struct sockaddr_nl addr
;
1520 socklen_t addr_len
= sizeof(addr
);
1523 len
= recvfrom(this->socket_events
, response
, sizeof(response
),
1524 MSG_DONTWAIT
, (struct sockaddr
*)&addr
, &addr_len
);
1530 /* interrupted, try again */
1533 /* no data ready, select again */
1536 DBG1(DBG_KNL
, "unable to receive from RT event socket %s (%d)",
1537 strerror(errno
), errno
);
1543 if (addr
.nl_pid
!= 0)
1544 { /* not from kernel. not interested, try another one */
1548 while (NLMSG_OK(hdr
, len
))
1550 /* looks good so far, dispatch netlink message */
1551 switch (hdr
->nlmsg_type
)
1555 process_addr(this, hdr
, TRUE
);
1559 process_link(this, hdr
, TRUE
);
1563 if (this->process_route
)
1565 process_route(this, hdr
);
1570 if (this->process_rules
)
1572 process_rule(this, hdr
);
1578 hdr
= NLMSG_NEXT(hdr
, len
);
1583 /** enumerator over addresses */
1585 private_kernel_netlink_net_t
* this;
1586 /** which addresses to enumerate */
1587 kernel_address_type_t which
;
1588 } address_enumerator_t
;
1590 CALLBACK(address_enumerator_destroy
, void,
1591 address_enumerator_t
*data
)
1593 data
->this->lock
->unlock(data
->this->lock
);
1597 CALLBACK(filter_addresses
, bool,
1598 address_enumerator_t
*data
, enumerator_t
*orig
, va_list args
)
1603 VA_ARGS_VGET(args
, out
);
1605 while (orig
->enumerate(orig
, &addr
))
1607 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && addr
->refcount
)
1608 { /* skip virtual interfaces added by us */
1611 if (!(data
->which
& ADDR_TYPE_REGULAR
) && !addr
->refcount
)
1612 { /* address is regular, but not requested */
1615 if (addr
->flags
& IFA_F_DEPRECATED
||
1616 addr
->scope
>= RT_SCOPE_LINK
)
1617 { /* skip deprecated addresses or those with an unusable scope */
1620 if (addr
->ip
->get_family(addr
->ip
) == AF_INET6
)
1621 { /* handle temporary IPv6 addresses according to config */
1622 bool temporary
= (addr
->flags
& IFA_F_TEMPORARY
) == IFA_F_TEMPORARY
;
1623 if (data
->this->prefer_temporary_addrs
!= temporary
)
1635 * enumerator constructor for interfaces
1637 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1638 address_enumerator_t
*data
)
1640 return enumerator_create_filter(
1641 iface
->addrs
->create_enumerator(iface
->addrs
),
1642 filter_addresses
, data
, NULL
);
1645 CALLBACK(filter_interfaces
, bool,
1646 address_enumerator_t
*data
, enumerator_t
*orig
, va_list args
)
1648 iface_entry_t
*iface
, **out
;
1650 VA_ARGS_VGET(args
, out
);
1652 while (orig
->enumerate(orig
, &iface
))
1654 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !iface
->usable
)
1655 { /* skip interfaces excluded by config */
1658 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && (iface
->flags
& IFF_LOOPBACK
))
1659 { /* ignore loopback devices */
1662 if (!(data
->which
& ADDR_TYPE_DOWN
) && !(iface
->flags
& IFF_UP
))
1663 { /* skip interfaces not up */
1672 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1673 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1675 address_enumerator_t
*data
;
1682 this->lock
->read_lock(this->lock
);
1683 return enumerator_create_nested(
1684 enumerator_create_filter(
1685 this->ifaces
->create_enumerator(this->ifaces
),
1686 filter_interfaces
, data
, NULL
),
1687 (void*)create_iface_enumerator
, data
,
1688 address_enumerator_destroy
);
1691 METHOD(kernel_net_t
, get_interface_name
, bool,
1692 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1694 addr_map_entry_t
*entry
, lookup
= {
1698 if (ip
->is_anyaddr(ip
))
1702 this->lock
->read_lock(this->lock
);
1703 /* first try to find it on an up and usable interface */
1704 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1705 (void*)addr_map_entry_match_up_and_usable
);
1710 *name
= strdup(entry
->iface
->ifname
);
1711 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1713 this->lock
->unlock(this->lock
);
1716 /* in a second step, consider virtual IPs installed by us */
1717 entry
= this->vips
->get_match(this->vips
, &lookup
,
1718 (void*)addr_map_entry_match_up_and_usable
);
1723 *name
= strdup(entry
->iface
->ifname
);
1724 DBG2(DBG_KNL
, "virtual IP %H is on interface %s", ip
, *name
);
1726 this->lock
->unlock(this->lock
);
1729 /* maybe it is installed on an ignored interface */
1730 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1731 (void*)addr_map_entry_match_up
);
1734 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1736 this->lock
->unlock(this->lock
);
1741 * get the index of an interface by name
1743 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1745 iface_entry_t
*iface
;
1748 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1750 this->lock
->read_lock(this->lock
);
1751 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
1752 (void**)&iface
, name
))
1754 ifindex
= iface
->ifindex
;
1756 this->lock
->unlock(this->lock
);
1760 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1766 * get the name of an interface by index (allocated)
1768 static char *get_interface_name_by_index(private_kernel_netlink_net_t
*this,
1771 iface_entry_t
*iface
;
1774 DBG2(DBG_KNL
, "getting iface name for index %d", index
);
1776 this->lock
->read_lock(this->lock
);
1777 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1778 (void**)&iface
, index
))
1780 name
= strdup(iface
->ifname
);
1782 this->lock
->unlock(this->lock
);
1786 DBG1(DBG_KNL
, "unable to get interface name for %d", index
);
1792 * Store information about a route retrieved via RTNETLINK
1808 * Free a route entry
1810 static void rt_entry_destroy(rt_entry_t
*this)
1812 DESTROY_IF(this->src_host
);
1817 * Check if the route received with RTM_NEWROUTE is usable based on its type.
1819 static bool route_usable(struct nlmsghdr
*hdr
, bool allow_local
)
1823 msg
= NLMSG_DATA(hdr
);
1824 switch (msg
->rtm_type
)
1827 case RTN_UNREACHABLE
:
1839 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1840 * reused if not NULL.
1842 * Returned chunks point to internal data of the Netlink message.
1844 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1850 msg
= NLMSG_DATA(hdr
);
1852 rtasize
= RTM_PAYLOAD(hdr
);
1856 *route
= (rt_entry_t
){
1857 .dst_len
= msg
->rtm_dst_len
,
1858 .src_len
= msg
->rtm_src_len
,
1859 .table
= msg
->rtm_table
,
1865 .dst_len
= msg
->rtm_dst_len
,
1866 .src_len
= msg
->rtm_src_len
,
1867 .table
= msg
->rtm_table
,
1871 while (RTA_OK(rta
, rtasize
))
1873 switch (rta
->rta_type
)
1876 route
->pref_src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1879 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1882 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1885 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1888 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1890 route
->oif
= *(uint32_t*)RTA_DATA(rta
);
1894 if (RTA_PAYLOAD(rta
) == sizeof(route
->priority
))
1896 route
->priority
= *(uint32_t*)RTA_DATA(rta
);
1899 #ifdef HAVE_RTA_TABLE
1901 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1903 route
->table
= *(uint32_t*)RTA_DATA(rta
);
1906 #endif /* HAVE_RTA_TABLE*/
1908 rta
= RTA_NEXT(rta
, rtasize
);
1914 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1916 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1917 int prefix
, bool nexthop
, host_t
*candidate
,
1918 char **iface
, u_int recursion
)
1920 netlink_buf_t request
;
1921 struct nlmsghdr
*hdr
, *out
, *current
;
1925 linked_list_t
*routes
;
1926 rt_entry_t
*route
= NULL
, *best
= NULL
;
1927 enumerator_t
*enumerator
;
1928 host_t
*addr
= NULL
;
1932 if (recursion
> MAX_ROUTE_RECURSION
)
1936 chunk
= dest
->get_address(dest
);
1937 len
= chunk
.len
* 8;
1938 prefix
= prefix
< 0 ? len
: min(prefix
, len
);
1939 match_net
= prefix
!= len
;
1941 memset(&request
, 0, sizeof(request
));
1943 family
= dest
->get_family(dest
);
1945 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1946 hdr
->nlmsg_type
= RTM_GETROUTE
;
1947 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1949 msg
= NLMSG_DATA(hdr
);
1950 msg
->rtm_family
= family
;
1951 if (!match_net
&& this->rta_mark
&& this->routing_mark
.value
)
1953 /* if our routing rule excludes packets with a certain mark we can
1954 * get the preferred route without having to dump all routes */
1955 chunk
= chunk_from_thing(this->routing_mark
.value
);
1956 netlink_add_attribute(hdr
, RTA_MARK
, chunk
, sizeof(request
));
1958 else if (family
== AF_INET
|| this->rta_prefsrc_for_ipv6
||
1959 this->routing_table
|| match_net
)
1960 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1961 * as we want to ignore routes with virtual IPs we cannot use DUMP
1962 * if these routes are not installed in a separate table */
1963 if (this->install_routes
)
1965 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1970 chunk
= candidate
->get_address(candidate
);
1971 if (hdr
->nlmsg_flags
& NLM_F_DUMP
)
1973 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1977 netlink_add_attribute(hdr
, RTA_SRC
, chunk
, sizeof(request
));
1980 /* we use this below to match against the routes */
1981 chunk
= dest
->get_address(dest
);
1984 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1987 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1989 DBG2(DBG_KNL
, "getting %s to reach %H/%d failed",
1990 nexthop ?
"nexthop" : "address", dest
, prefix
);
1993 routes
= linked_list_create();
1994 this->lock
->read_lock(this->lock
);
1996 for (current
= out
; NLMSG_OK(current
, len
);
1997 current
= NLMSG_NEXT(current
, len
))
1999 switch (current
->nlmsg_type
)
2008 if (!route_usable(current
, TRUE
))
2012 route
= parse_route(current
, route
);
2014 table
= (uintptr_t)route
->table
;
2015 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
2017 { /* route is from an excluded routing table */
2020 if (this->routing_table
!= 0 &&
2021 route
->table
== this->routing_table
)
2022 { /* route is from our own ipsec routing table */
2025 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
2026 { /* interface is down */
2029 if (!addr_in_subnet(chunk
, prefix
, route
->dst
, route
->dst_len
))
2030 { /* route destination does not contain dest */
2033 if (route
->pref_src
.ptr
)
2034 { /* verify source address, if any */
2035 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
2036 route
->pref_src
, 0);
2037 if (src
&& is_known_vip(this, src
))
2038 { /* ignore routes installed by us */
2042 route
->src_host
= src
;
2044 /* insert route, sorted by network prefix and priority */
2045 enumerator
= routes
->create_enumerator(routes
);
2046 while (enumerator
->enumerate(enumerator
, &other
))
2048 if (route
->dst_len
> other
->dst_len
)
2052 if (route
->dst_len
== other
->dst_len
&&
2053 route
->priority
< other
->priority
)
2058 routes
->insert_before(routes
, enumerator
, route
);
2059 enumerator
->destroy(enumerator
);
2070 rt_entry_destroy(route
);
2073 /* now we have a list of routes matching dest, sorted by net prefix.
2074 * we will look for source addresses for these routes and select the one
2075 * with the preferred source address, if possible */
2076 enumerator
= routes
->create_enumerator(routes
);
2077 while (enumerator
->enumerate(enumerator
, &route
))
2079 if (route
->src_host
)
2080 { /* got a source address with the route, if no preferred source
2081 * is given or it matches we are done, as this is the best route */
2082 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
2087 else if (route
->oif
)
2088 { /* no match yet, maybe it is assigned to the same interface */
2089 host_t
*src
= get_interface_address(this, route
->oif
,
2090 msg
->rtm_family
, dest
, candidate
);
2091 if (src
&& src
->ip_equals(src
, candidate
))
2093 route
->src_host
->destroy(route
->src_host
);
2094 route
->src_host
= src
;
2100 /* no luck yet with the source address. if this is the best (first)
2101 * route we store it as fallback in case we don't find a route with
2102 * the preferred source */
2103 best
= best ?
: route
;
2107 { /* no src, but a source selector, try to find a matching address */
2108 route
->src_host
= get_subnet_address(this, msg
->rtm_family
,
2109 route
->src
, route
->src_len
, dest
,
2111 if (route
->src_host
)
2112 { /* we handle this address the same as the one above */
2114 candidate
->ip_equals(candidate
, route
->src_host
))
2119 best
= best ?
: route
;
2124 { /* no src, but an interface - get address from it */
2125 route
->src_host
= get_interface_address(this, route
->oif
,
2126 msg
->rtm_family
, dest
, candidate
);
2127 if (route
->src_host
)
2128 { /* more of the same */
2130 candidate
->ip_equals(candidate
, route
->src_host
))
2135 best
= best ?
: route
;
2140 { /* no src, no iface, but a gateway - lookup src to reach gtw */
2143 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
2144 if (gtw
&& !gtw
->ip_equals(gtw
, dest
))
2146 route
->src_host
= get_route(this, gtw
, -1, FALSE
, candidate
,
2147 iface
, recursion
+ 1);
2150 if (route
->src_host
)
2151 { /* more of the same */
2153 candidate
->ip_equals(candidate
, route
->src_host
))
2158 best
= best ?
: route
;
2162 enumerator
->destroy(enumerator
);
2165 { /* nexthop lookup, return gateway and oif if any */
2170 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
2172 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
2173 if (iface
&& best
->oif
)
2175 *iface
= get_interface_name_by_index(this, best
->oif
);
2178 if (!addr
&& !match_net
)
2179 { /* fallback to destination address */
2180 addr
= dest
->clone(dest
);
2187 addr
= best
->src_host
->clone(best
->src_host
);
2190 this->lock
->unlock(this->lock
);
2191 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
2196 if (nexthop
&& iface
&& *iface
)
2198 DBG2(DBG_KNL
, "using %H as nexthop and %s as dev to reach %H/%d",
2199 addr
, *iface
, dest
, prefix
);
2203 DBG2(DBG_KNL
, "using %H as %s to reach %H/%d", addr
,
2204 nexthop ?
"nexthop" : "address", dest
, prefix
);
2207 else if (!recursion
)
2209 DBG2(DBG_KNL
, "no %s found to reach %H/%d",
2210 nexthop ?
"nexthop" : "address", dest
, prefix
);
2215 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
2216 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
2218 return get_route(this, dest
, -1, FALSE
, src
, NULL
, 0);
2221 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
2222 private_kernel_netlink_net_t
*this, host_t
*dest
, int prefix
, host_t
*src
,
2225 return get_route(this, dest
, prefix
, TRUE
, src
, iface
, 0);
2228 /** enumerator over subnets */
2230 enumerator_t
public;
2231 private_kernel_netlink_net_t
*private;
2232 /** message from the kernel */
2233 struct nlmsghdr
*msg
;
2234 /** current message from the kernel */
2235 struct nlmsghdr
*current
;
2236 /** remaining length */
2238 /** last subnet enumerated */
2240 /** interface of current net */
2241 char ifname
[IFNAMSIZ
];
2242 } subnet_enumerator_t
;
2244 METHOD(enumerator_t
, destroy_subnet_enumerator
, void,
2245 subnet_enumerator_t
*this)
2247 DESTROY_IF(this->net
);
2252 METHOD(enumerator_t
, enumerate_subnets
, bool,
2253 subnet_enumerator_t
*this, va_list args
)
2259 VA_ARGS_VGET(args
, net
, mask
, ifname
);
2263 this->current
= this->msg
;
2267 this->current
= NLMSG_NEXT(this->current
, this->len
);
2268 DESTROY_IF(this->net
);
2272 while (NLMSG_OK(this->current
, this->len
))
2274 switch (this->current
->nlmsg_type
)
2282 if (!route_usable(this->current
, FALSE
))
2286 parse_route(this->current
, &route
);
2288 if (route
.table
&& (
2289 route
.table
== RT_TABLE_LOCAL
||
2290 route
.table
== this->private->routing_table
))
2291 { /* ignore our own and the local routing tables */
2294 else if (route
.gtw
.ptr
)
2295 { /* ignore routes via gateway/next hop */
2299 if (route
.dst
.ptr
&& route
.oif
&&
2300 if_indextoname(route
.oif
, this->ifname
))
2302 this->net
= host_create_from_chunk(AF_UNSPEC
, route
.dst
, 0);
2304 *mask
= route
.dst_len
;
2305 *ifname
= this->ifname
;
2313 this->current
= NLMSG_NEXT(this->current
, this->len
);
2318 METHOD(kernel_net_t
, create_local_subnet_enumerator
, enumerator_t
*,
2319 private_kernel_netlink_net_t
*this)
2321 netlink_buf_t request
;
2322 struct nlmsghdr
*hdr
, *out
;
2325 subnet_enumerator_t
*enumerator
;
2327 memset(&request
, 0, sizeof(request
));
2330 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
2331 hdr
->nlmsg_type
= RTM_GETROUTE
;
2332 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2333 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
2335 msg
= NLMSG_DATA(hdr
);
2336 msg
->rtm_scope
= RT_SCOPE_LINK
;
2338 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
2340 DBG2(DBG_KNL
, "enumerating local subnets failed");
2341 return enumerator_create_empty();
2346 .enumerate
= enumerator_enumerate_default
,
2347 .venumerate
= _enumerate_subnets
,
2348 .destroy
= _destroy_subnet_enumerator
,
2354 return &enumerator
->public;
2358 * Manages the creation and deletion of IPv6 address labels for virtual IPs.
2359 * By setting the appropriate nlmsg_type the label is either added or removed.
2361 static status_t
manage_addrlabel(private_kernel_netlink_net_t
*this,
2362 int nlmsg_type
, host_t
*ip
)
2364 netlink_buf_t request
;
2365 struct nlmsghdr
*hdr
;
2366 struct ifaddrlblmsg
*msg
;
2370 memset(&request
, 0, sizeof(request
));
2372 chunk
= ip
->get_address(ip
);
2375 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2376 if (nlmsg_type
== RTM_NEWADDRLABEL
)
2378 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2380 hdr
->nlmsg_type
= nlmsg_type
;
2381 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrlblmsg
));
2383 msg
= NLMSG_DATA(hdr
);
2384 msg
->ifal_family
= ip
->get_family(ip
);
2385 msg
->ifal_prefixlen
= chunk
.len
* 8;
2387 netlink_add_attribute(hdr
, IFAL_ADDRESS
, chunk
, sizeof(request
));
2388 /* doesn't really matter as default labels are < 20 but this makes it kinda
2391 netlink_add_attribute(hdr
, IFAL_LABEL
, chunk_from_thing(label
),
2394 return this->socket
->send_ack(this->socket
, hdr
);
2398 * Manages the creation and deletion of ip addresses on an interface.
2399 * By setting the appropriate nlmsg_type, the ip will be set or unset.
2401 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2402 int flags
, int if_index
, host_t
*ip
, int prefix
)
2404 netlink_buf_t request
;
2405 struct nlmsghdr
*hdr
;
2406 struct ifaddrmsg
*msg
;
2409 memset(&request
, 0, sizeof(request
));
2411 chunk
= ip
->get_address(ip
);
2414 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2415 hdr
->nlmsg_type
= nlmsg_type
;
2416 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
2418 msg
= NLMSG_DATA(hdr
);
2419 msg
->ifa_family
= ip
->get_family(ip
);
2421 msg
->ifa_prefixlen
= prefix
< 0 ? chunk
.len
* 8 : prefix
;
2422 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
2423 msg
->ifa_index
= if_index
;
2425 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
2427 if (ip
->get_family(ip
) == AF_INET6
)
2430 msg
->ifa_flags
|= IFA_F_NODAD
;
2432 if (this->rta_prefsrc_for_ipv6
)
2434 /* if source routes are possible we set a label for this virtual IP
2435 * so it gets only used if forced by our route, and not by the
2436 * default IPv6 address selection */
2437 int labelop
= nlmsg_type
== RTM_NEWADDR ? RTM_NEWADDRLABEL
2439 if (manage_addrlabel(this, labelop
, ip
) != SUCCESS
)
2441 /* if we can't use address labels we let the virtual IP get
2442 * deprecated immediately (but mark it as valid forever), which
2443 * should also avoid that it gets used by the default address
2445 struct ifa_cacheinfo cache
= {
2446 .ifa_valid
= 0xFFFFFFFF,
2449 netlink_add_attribute(hdr
, IFA_CACHEINFO
,
2450 chunk_from_thing(cache
), sizeof(request
));
2454 return this->socket
->send_ack(this->socket
, hdr
);
2457 METHOD(kernel_net_t
, add_ip
, status_t
,
2458 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2461 addr_map_entry_t
*entry
, lookup
= {
2464 iface_entry_t
*iface
= NULL
;
2466 if (!this->install_virtual_ip
)
2467 { /* disabled by config */
2471 this->lock
->write_lock(this->lock
);
2472 /* the virtual IP might actually be installed as regular IP, in which case
2473 * we don't track it as virtual IP */
2474 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2475 (void*)addr_map_entry_match
);
2477 { /* otherwise it might already be installed as virtual IP */
2478 entry
= this->vips
->get_match(this->vips
, &lookup
,
2479 (void*)addr_map_entry_match
);
2481 { /* the vip we found can be in one of three states: 1) installed and
2482 * ready, 2) just added by another thread, but not yet confirmed to
2483 * be installed by the kernel, 3) just deleted, but not yet gone.
2484 * Then while we wait below, several things could happen (as we
2485 * release the lock). For instance, the interface could disappear,
2486 * or the IP is finally deleted, and it reappears on a different
2487 * interface. All these cases are handled by the call below. */
2488 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2490 this->condvar
->wait(this->condvar
, this->lock
);
2494 entry
->addr
->refcount
++;
2500 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
2501 entry
->iface
->ifname
);
2502 this->lock
->unlock(this->lock
);
2505 /* try to find the target interface, either by config or via src ip */
2506 if (!this->install_virtual_ip_on
||
2507 !this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
2508 (void**)&iface
, this->install_virtual_ip_on
))
2510 if (!this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
2511 (void**)&iface
, iface_name
))
2512 { /* if we don't find the requested interface we just use the first */
2513 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
2523 .ip
= virtual_ip
->clone(virtual_ip
),
2525 .scope
= RT_SCOPE_UNIVERSE
,
2527 iface
->addrs
->insert_last(iface
->addrs
, addr
);
2528 addr_map_entry_add(this->vips
, addr
, iface
);
2529 ifi
= iface
->ifindex
;
2530 this->lock
->unlock(this->lock
);
2531 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
2532 ifi
, virtual_ip
, prefix
) == SUCCESS
)
2534 this->lock
->write_lock(this->lock
);
2535 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2536 { /* wait until address appears */
2537 this->condvar
->wait(this->condvar
, this->lock
);
2540 { /* we fail if the interface got deleted in the meantime */
2541 ifname
= strdup(entry
->iface
->ifname
);
2542 this->lock
->unlock(this->lock
);
2543 DBG2(DBG_KNL
, "virtual IP %H installed on %s",
2544 virtual_ip
, ifname
);
2545 /* during IKEv1 reauthentication, children get moved from
2546 * old the new SA before the virtual IP is available. This
2547 * kills the route for our virtual IP, reinstall. */
2548 queue_route_reinstall(this, ifname
);
2551 this->lock
->unlock(this->lock
);
2553 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
2556 this->lock
->unlock(this->lock
);
2557 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
2562 METHOD(kernel_net_t
, del_ip
, status_t
,
2563 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2566 addr_map_entry_t
*entry
, lookup
= {
2570 if (!this->install_virtual_ip
)
2571 { /* disabled by config */
2575 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
2577 this->lock
->write_lock(this->lock
);
2578 entry
= this->vips
->get_match(this->vips
, &lookup
,
2579 (void*)addr_map_entry_match
);
2581 { /* we didn't install this IP as virtual IP */
2582 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2583 (void*)addr_map_entry_match
);
2586 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
2587 entry
->iface
->ifname
);
2588 this->lock
->unlock(this->lock
);
2591 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
2592 this->lock
->unlock(this->lock
);
2595 if (entry
->addr
->refcount
== 1)
2600 /* we set this flag so that threads calling add_ip will block and wait
2601 * until the entry is gone, also so we can wait below */
2602 entry
->addr
->installed
= FALSE
;
2603 ifi
= entry
->iface
->ifindex
;
2604 this->lock
->unlock(this->lock
);
2605 status
= manage_ipaddr(this, RTM_DELADDR
, 0, ifi
, virtual_ip
, prefix
);
2606 if (status
== SUCCESS
&& wait
)
2607 { /* wait until the address is really gone */
2608 this->lock
->write_lock(this->lock
);
2609 while (is_known_vip(this, virtual_ip
))
2611 this->condvar
->wait(this->condvar
, this->lock
);
2613 this->lock
->unlock(this->lock
);
2619 entry
->addr
->refcount
--;
2621 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
2623 this->lock
->unlock(this->lock
);
2628 * Manages source routes in the routing table.
2629 * By setting the appropriate nlmsg_type, the route gets added or removed.
2631 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
2632 int nlmsg_type
, int flags
, chunk_t dst_net
,
2633 uint8_t prefixlen
, host_t
*gateway
,
2634 host_t
*src_ip
, char *if_name
)
2636 netlink_buf_t request
;
2637 struct nlmsghdr
*hdr
;
2643 /* if route is 0.0.0.0/0, we can't install it, as it would
2644 * overwrite the default route. Instead, we add two routes:
2645 * 0.0.0.0/1 and 128.0.0.0/1 */
2646 if (this->routing_table
== 0 && prefixlen
== 0)
2649 uint8_t half_prefixlen
;
2652 half_net
= chunk_alloca(dst_net
.len
);
2653 memset(half_net
.ptr
, 0, half_net
.len
);
2656 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
,
2657 half_prefixlen
, gateway
, src_ip
, if_name
);
2658 half_net
.ptr
[0] |= 0x80;
2659 status
|= manage_srcroute(this, nlmsg_type
, flags
, half_net
,
2660 half_prefixlen
, gateway
, src_ip
, if_name
);
2664 memset(&request
, 0, sizeof(request
));
2667 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2668 hdr
->nlmsg_type
= nlmsg_type
;
2669 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2671 msg
= NLMSG_DATA(hdr
);
2672 msg
->rtm_family
= src_ip
->get_family(src_ip
);
2673 msg
->rtm_dst_len
= prefixlen
;
2674 msg
->rtm_protocol
= RTPROT_STATIC
;
2675 msg
->rtm_type
= RTN_UNICAST
;
2676 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2678 if (this->routing_table
< 256)
2680 msg
->rtm_table
= this->routing_table
;
2684 #ifdef HAVE_RTA_TABLE
2685 chunk
= chunk_from_thing(this->routing_table
);
2686 netlink_add_attribute(hdr
, RTA_TABLE
, chunk
, sizeof(request
));
2688 DBG1(DBG_KNL
, "routing table IDs > 255 are not supported");
2690 #endif /* HAVE_RTA_TABLE */
2692 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
2693 chunk
= src_ip
->get_address(src_ip
);
2694 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
2695 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
2697 chunk
= gateway
->get_address(gateway
);
2698 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
2700 ifindex
= get_interface_index(this, if_name
);
2701 chunk
.ptr
= (char*)&ifindex
;
2702 chunk
.len
= sizeof(ifindex
);
2703 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
2705 if (this->mtu
|| this->mss
)
2707 chunk
= chunk_alloca(RTA_LENGTH((sizeof(struct rtattr
) +
2708 sizeof(uint32_t)) * 2));
2710 rta
= (struct rtattr
*)chunk
.ptr
;
2713 rta
->rta_type
= RTAX_MTU
;
2714 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2715 memcpy(RTA_DATA(rta
), &this->mtu
, sizeof(uint32_t));
2716 chunk
.len
= rta
->rta_len
;
2720 rta
= (struct rtattr
*)(chunk
.ptr
+ RTA_ALIGN(chunk
.len
));
2721 rta
->rta_type
= RTAX_ADVMSS
;
2722 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2723 memcpy(RTA_DATA(rta
), &this->mss
, sizeof(uint32_t));
2724 chunk
.len
= RTA_ALIGN(chunk
.len
) + rta
->rta_len
;
2726 netlink_add_attribute(hdr
, RTA_METRICS
, chunk
, sizeof(request
));
2729 return this->socket
->send_ack(this->socket
, hdr
);
2733 * Helper struct used to check routes
2736 /** the entry we look for */
2737 route_entry_t route
;
2738 /** kernel interface */
2739 private_kernel_netlink_net_t
*this;
2740 } route_entry_lookup_t
;
2743 * Check if a matching route entry has a VIP associated
2745 static bool route_with_vip(route_entry_lookup_t
*a
, route_entry_t
*b
)
2747 if (chunk_equals(a
->route
.dst_net
, b
->dst_net
) &&
2748 a
->route
.prefixlen
== b
->prefixlen
&&
2749 is_known_vip(a
->this, b
->src_ip
))
2757 * Check if there is any route entry with a matching destination
2759 static bool route_with_dst(route_entry_lookup_t
*a
, route_entry_t
*b
)
2761 if (chunk_equals(a
->route
.dst_net
, b
->dst_net
) &&
2762 a
->route
.prefixlen
== b
->prefixlen
)
2769 METHOD(kernel_net_t
, add_route
, status_t
,
2770 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2771 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
2774 route_entry_t
*found
;
2775 route_entry_lookup_t lookup
= {
2778 .prefixlen
= prefixlen
,
2786 this->routes_lock
->lock(this->routes_lock
);
2787 found
= this->routes
->get(this->routes
, &lookup
.route
);
2790 this->routes_lock
->unlock(this->routes_lock
);
2791 return ALREADY_DONE
;
2794 /* don't replace the route if we already have one with a VIP installed,
2795 * but keep track of it in case that other route is uninstalled */
2796 this->lock
->read_lock(this->lock
);
2797 if (!is_known_vip(this, src_ip
))
2799 found
= this->routes
->get_match(this->routes
, &lookup
,
2800 (void*)route_with_vip
);
2802 this->lock
->unlock(this->lock
);
2809 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
|NLM_F_REPLACE
,
2810 dst_net
, prefixlen
, gateway
, src_ip
, if_name
);
2812 if (status
== SUCCESS
)
2814 found
= route_entry_clone(&lookup
.route
);
2815 this->routes
->put(this->routes
, found
, found
);
2817 this->routes_lock
->unlock(this->routes_lock
);
2821 METHOD(kernel_net_t
, del_route
, status_t
,
2822 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2823 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
2826 route_entry_t
*found
;
2827 route_entry_lookup_t lookup
= {
2830 .prefixlen
= prefixlen
,
2838 this->routes_lock
->lock(this->routes_lock
);
2839 found
= this->routes
->remove(this->routes
, &lookup
.route
);
2842 this->routes_lock
->unlock(this->routes_lock
);
2845 route_entry_destroy(found
);
2847 /* check if there are any other routes for the same destination and if
2848 * so update the route, otherwise uninstall it */
2849 this->lock
->read_lock(this->lock
);
2850 found
= this->routes
->get_match(this->routes
, &lookup
,
2851 (void*)route_with_vip
);
2852 this->lock
->unlock(this->lock
);
2855 found
= this->routes
->get_match(this->routes
, &lookup
,
2856 (void*)route_with_dst
);
2860 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
|NLM_F_REPLACE
,
2861 found
->dst_net
, found
->prefixlen
, found
->gateway
,
2862 found
->src_ip
, found
->if_name
);
2866 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
2867 gateway
, src_ip
, if_name
);
2869 this->routes_lock
->unlock(this->routes_lock
);
2874 * Initialize a list of local addresses.