2 * Copyright (C) 2008-2016 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * HSR Hochschule fuer Technik Rapperswil
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
47 #ifdef HAVE_LINUX_FIB_RULES_H
48 #include <linux/fib_rules.h>
51 #include "kernel_netlink_net.h"
52 #include "kernel_netlink_shared.h"
55 #include <utils/debug.h>
56 #include <threading/mutex.h>
57 #include <threading/rwlock.h>
58 #include <threading/rwlock_condvar.h>
59 #include <threading/spinlock.h>
60 #include <collections/hashtable.h>
61 #include <collections/linked_list.h>
62 #include <processing/jobs/callback_job.h>
64 /** delay before firing roam events (ms) */
65 #define ROAM_DELAY 100
67 /** delay before reinstalling routes (ms) */
68 #define ROUTE_DELAY 100
70 /** maximum recursion when searching for addresses in get_route() */
71 #define MAX_ROUTE_RECURSION 2
74 #define ROUTING_TABLE 0
77 #ifndef ROUTING_TABLE_PRIO
78 #define ROUTING_TABLE_PRIO 0
81 ENUM(rt_msg_names
, RTM_NEWLINK
, RTM_GETRULE
,
102 typedef struct addr_entry_t addr_entry_t
;
105 * IP address in an iface_entry_t
107 struct addr_entry_t
{
109 /** the ip address */
115 /** scope of the address */
118 /** number of times this IP is used, if virtual (i.e. managed by us) */
121 /** TRUE once it is installed, if virtual */
126 * destroy a addr_entry_t object
128 static void addr_entry_destroy(addr_entry_t
*this)
130 this->ip
->destroy(this->ip
);
134 typedef struct iface_entry_t iface_entry_t
;
137 * A network interface on this system, containing addr_entry_t's
139 struct iface_entry_t
{
141 /** interface index */
144 /** name of the interface */
145 char ifname
[IFNAMSIZ
];
147 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
150 /** list of addresses as host_t */
151 linked_list_t
*addrs
;
153 /** TRUE if usable by config */
158 * destroy an interface entry
160 static void iface_entry_destroy(iface_entry_t
*this)
162 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
167 * find an interface entry by index
169 static bool iface_entry_by_index(iface_entry_t
*this, int *ifindex
)
171 return this->ifindex
== *ifindex
;
175 * find an interface entry by name
177 static bool iface_entry_by_name(iface_entry_t
*this, char *ifname
)
179 return streq(this->ifname
, ifname
);
183 * check if an interface is up
185 static inline bool iface_entry_up(iface_entry_t
*iface
)
187 return (iface
->flags
& IFF_UP
) == IFF_UP
;
191 * check if an interface is up and usable
193 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
195 return iface
->usable
&& iface_entry_up(iface
);
198 typedef struct addr_map_entry_t addr_map_entry_t
;
201 * Entry that maps an IP address to an interface entry
203 struct addr_map_entry_t
{
204 /** The IP address */
207 /** The address entry for this IP address */
210 /** The interface this address is installed on */
211 iface_entry_t
*iface
;
215 * Hash a addr_map_entry_t object, all entries with the same IP address
216 * are stored in the same bucket
218 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
220 return chunk_hash(this->ip
->get_address(this->ip
));
224 * Compare two addr_map_entry_t objects, two entries are equal if they are
225 * installed on the same interface
227 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
229 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
230 a
->ip
->ip_equals(a
->ip
, b
->ip
);
234 * Used with get_match this finds an address entry if it is installed on
235 * an up and usable interface
237 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
240 return iface_entry_up_and_usable(b
->iface
) &&
241 a
->ip
->ip_equals(a
->ip
, b
->ip
);
245 * Used with get_match this finds an address entry if it is installed on
246 * any active local interface
248 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
250 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
254 * Used with get_match this finds an address entry if it is installed on
255 * any local interface
257 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
259 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
262 typedef struct route_entry_t route_entry_t
;
265 * Installed routing entry
267 struct route_entry_t
{
268 /** Name of the interface the route is bound to */
271 /** Source ip of the route */
274 /** Gateway for this route */
277 /** Destination net */
280 /** Destination net prefixlen */
285 * Clone a route_entry_t object.
287 static route_entry_t
*route_entry_clone(route_entry_t
*this)
289 route_entry_t
*route
;
292 .if_name
= strdup(this->if_name
),
293 .src_ip
= this->src_ip
->clone(this->src_ip
),
294 .gateway
= this->gateway ?
this->gateway
->clone(this->gateway
) : NULL
,
295 .dst_net
= chunk_clone(this->dst_net
),
296 .prefixlen
= this->prefixlen
,
302 * Destroy a route_entry_t object
304 static void route_entry_destroy(route_entry_t
*this)
307 DESTROY_IF(this->src_ip
);
308 DESTROY_IF(this->gateway
);
309 chunk_free(&this->dst_net
);
314 * Hash a route_entry_t object
316 static u_int
route_entry_hash(route_entry_t
*this)
318 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
319 chunk_hash(this->dst_net
));
323 * Compare two route_entry_t objects
325 static bool route_entry_equals(route_entry_t
*a
, route_entry_t
*b
)
327 if (a
->if_name
&& b
->if_name
&& streq(a
->if_name
, b
->if_name
) &&
328 a
->src_ip
->ip_equals(a
->src_ip
, b
->src_ip
) &&
329 chunk_equals(a
->dst_net
, b
->dst_net
) && a
->prefixlen
== b
->prefixlen
)
331 return (!a
->gateway
&& !b
->gateway
) || (a
->gateway
&& b
->gateway
&&
332 a
->gateway
->ip_equals(a
->gateway
, b
->gateway
));
337 typedef struct net_change_t net_change_t
;
340 * Queued network changes
342 struct net_change_t
{
343 /** Name of the interface that got activated (or an IP appeared on) */
348 * Destroy a net_change_t object
350 static void net_change_destroy(net_change_t
*this)
357 * Hash a net_change_t object
359 static u_int
net_change_hash(net_change_t
*this)
361 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
365 * Compare two net_change_t objects
367 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
369 return streq(a
->if_name
, b
->if_name
);
372 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
375 * Private variables and functions of kernel_netlink_net class.
377 struct private_kernel_netlink_net_t
{
379 * Public part of the kernel_netlink_net_t object.
381 kernel_netlink_net_t
public;
384 * lock to access various lists and maps
389 * condition variable to signal virtual IP add/removal
391 rwlock_condvar_t
*condvar
;
394 * Cached list of interfaces and its addresses (iface_entry_t)
396 linked_list_t
*ifaces
;
399 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
404 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
409 * netlink rt socket (routing)
411 netlink_socket_t
*socket
;
414 * Netlink rt socket to receive address change events
419 * earliest time of the next roam event
424 * roam event due to address change
429 * lock to check and update roam event time
431 spinlock_t
*roam_lock
;
434 * routing table to install routes
439 * priority of used routing table
441 int routing_table_prio
;
451 mutex_t
*routes_lock
;
454 * interface changes which may trigger route reinstallation
456 hashtable_t
*net_changes
;
459 * mutex for route reinstallation triggers
461 mutex_t
*net_changes_lock
;
464 * time of last route reinstallation
466 timeval_t last_route_reinstall
;
469 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
474 * whether to trigger roam events
479 * whether to actually install virtual IPs
481 bool install_virtual_ip
;
484 * the name of the interface virtual IP addresses are installed on
486 char *install_virtual_ip_on
;
489 * whether preferred source addresses can be specified for IPv6 routes
491 bool rta_prefsrc_for_ipv6
;
494 * whether marks can be used in route lookups
499 * the mark excluded from the routing rule used for virtual IPs
504 * whether to prefer temporary IPv6 addresses over public ones
506 bool prefer_temporary_addrs
;
509 * list with routing tables to be excluded from route lookup
511 linked_list_t
*rt_exclude
;
514 * MTU to set on installed routes
519 * MSS to set on installed routes
525 * Forward declaration
527 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
528 int nlmsg_type
, int flags
, chunk_t dst_net
,
529 uint8_t prefixlen
, host_t
*gateway
,
530 host_t
*src_ip
, char *if_name
);
533 * Clear the queued network changes.
535 static void net_changes_clear(private_kernel_netlink_net_t
*this)
537 enumerator_t
*enumerator
;
538 net_change_t
*change
;
540 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
541 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
543 this->net_changes
->remove_at(this->net_changes
, enumerator
);
544 net_change_destroy(change
);
546 enumerator
->destroy(enumerator
);
550 * Act upon queued network changes.
552 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
554 enumerator_t
*enumerator
;
555 route_entry_t
*route
;
557 this->net_changes_lock
->lock(this->net_changes_lock
);
558 this->routes_lock
->lock(this->routes_lock
);
560 enumerator
= this->routes
->create_enumerator(this->routes
);
561 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
563 net_change_t
*change
, lookup
= {
564 .if_name
= route
->if_name
,
566 /* check if a change for the outgoing interface is queued */
567 change
= this->net_changes
->get(this->net_changes
, &lookup
);
569 { /* in case src_ip is not on the outgoing interface */
570 if (this->public.interface
.get_interface(&this->public.interface
,
571 route
->src_ip
, &lookup
.if_name
))
573 if (!streq(lookup
.if_name
, route
->if_name
))
575 change
= this->net_changes
->get(this->net_changes
, &lookup
);
577 free(lookup
.if_name
);
582 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
583 route
->dst_net
, route
->prefixlen
, route
->gateway
,
584 route
->src_ip
, route
->if_name
);
587 enumerator
->destroy(enumerator
);
588 this->routes_lock
->unlock(this->routes_lock
);
590 net_changes_clear(this);
591 this->net_changes_lock
->unlock(this->net_changes_lock
);
592 return JOB_REQUEUE_NONE
;
596 * Queue route reinstallation caused by network changes for a given interface.
598 * The route reinstallation is delayed for a while and only done once for
599 * several calls during this delay, in order to avoid doing it too often.
600 * The interface name is freed.
602 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
605 net_change_t
*update
, *found
;
613 this->net_changes_lock
->lock(this->net_changes_lock
);
614 found
= this->net_changes
->put(this->net_changes
, update
, update
);
617 net_change_destroy(found
);
619 time_monotonic(&now
);
620 if (timercmp(&now
, &this->last_route_reinstall
, >))
622 timeval_add_ms(&now
, ROUTE_DELAY
);
623 this->last_route_reinstall
= now
;
625 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
627 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
629 this->net_changes_lock
->unlock(this->net_changes_lock
);
633 * check if the given IP is known as virtual IP and currently installed
635 * this function will also return TRUE if the virtual IP entry disappeared.
636 * in that case the returned entry will be NULL.
638 * this->lock must be held when calling this function
640 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
641 host_t
*ip
, addr_map_entry_t
**entry
)
643 addr_map_entry_t lookup
= {
647 *entry
= this->vips
->get_match(this->vips
, &lookup
,
648 (void*)addr_map_entry_match
);
650 { /* the virtual IP disappeared */
653 return (*entry
)->addr
->installed
;
657 * check if the given IP is known as virtual IP
659 * this->lock must be held when calling this function
661 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
663 addr_map_entry_t lookup
= {
667 return this->vips
->get_match(this->vips
, &lookup
,
668 (void*)addr_map_entry_match
) != NULL
;
672 * Add an address map entry
674 static void addr_map_entry_add(hashtable_t
*map
, addr_entry_t
*addr
,
675 iface_entry_t
*iface
)
677 addr_map_entry_t
*entry
;
684 entry
= map
->put(map
, entry
, entry
);
689 * Remove an address map entry
691 static void addr_map_entry_remove(hashtable_t
*map
, addr_entry_t
*addr
,
692 iface_entry_t
*iface
)
694 addr_map_entry_t
*entry
, lookup
= {
700 entry
= map
->remove(map
, &lookup
);
705 * Check if an address or net (addr with prefix net bits) is in
706 * subnet (net with net_len net bits)
708 static bool addr_in_subnet(chunk_t addr
, int prefix
, chunk_t net
, int net_len
)
710 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
714 { /* any address matches a /0 network */
717 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
|| prefix
< net_len
)
721 /* scan through all bytes in network order */
726 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
730 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
742 * Check if the given address is in subnet (net with net_len net bits)
744 static bool host_in_subnet(host_t
*host
, chunk_t net
, int net_len
)
748 addr
= host
->get_address(host
);
749 return addr_in_subnet(addr
, addr
.len
* 8, net
, net_len
);
753 * Determine the type or scope of the given unicast IP address. This is not
754 * the same thing returned in rtm_scope/ifa_scope.
756 * We use return values as defined in RFC 6724 (referring to RFC 4291).
758 static u_char
get_scope(host_t
*ip
)
762 addr
= ip
->get_address(ip
);
766 /* we use the mapping defined in RFC 6724, 3.2 */
767 if (addr
.ptr
[0] == 127)
768 { /* link-local, same as the IPv6 loopback address */
771 if (addr
.ptr
[0] == 169 && addr
.ptr
[1] == 254)
777 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr
*)addr
.ptr
))
778 { /* link-local, according to RFC 4291, 2.5.3 */
781 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr
*)addr
.ptr
))
785 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr
*)addr
.ptr
))
786 { /* deprecated, according to RFC 4291, 2.5.7 */
798 * Determine the label of the given unicast IP address.
800 * We currently only support the default table given in RFC 6724:
802 * Prefix Precedence Label
813 static u_char
get_label(host_t
*ip
)
820 /* priority table ordered by prefix */
822 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
823 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), 128, 0 },
825 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
826 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), 96, 4 },
828 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
829 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 96, 3 },
831 { chunk_from_chars(0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
832 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 32, 5 },
834 { chunk_from_chars(0x20, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
835 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 2 },
837 { chunk_from_chars(0x3f, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
838 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 12 },
840 { chunk_from_chars(0xfe, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
841 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 10, 11 },
843 { chunk_from_chars(0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
844 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 7, 13 },
848 for (i
= 0; i
< countof(priorities
); i
++)
850 if (host_in_subnet(ip
, priorities
[i
].net
, priorities
[i
].prefix
))
852 return priorities
[i
].label
;
860 * Returns the length of the common prefix in bits up to the length of a's
861 * prefix, defined by RFC 6724 as the portion of the address not including the
862 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
864 static u_char
common_prefix(host_t
*a
, host_t
*b
)
867 u_char byte
, bits
= 0, match
;
869 aa
= a
->get_address(a
);
870 ba
= b
->get_address(b
);
871 for (byte
= 0; byte
< 8; byte
++)
873 if (aa
.ptr
[byte
] != ba
.ptr
[byte
])
875 match
= aa
.ptr
[byte
] ^ ba
.ptr
[byte
];
876 for (bits
= 8; match
; match
>>= 1)
883 return byte
* 8 + bits
;
887 * Compare two IP addresses and return TRUE if the second address is the better
888 * choice of the two to reach the destination.
889 * For IPv6 we approximately follow RFC 6724.
891 static bool is_address_better(private_kernel_netlink_net_t
*this,
892 addr_entry_t
*a
, addr_entry_t
*b
, host_t
*d
)
894 u_char sa
, sb
, sd
, la
, lb
, ld
, pa
, pb
;
896 /* rule 2: prefer appropriate scope */
899 sa
= get_scope(a
->ip
);
900 sb
= get_scope(b
->ip
);
911 if (a
->ip
->get_family(a
->ip
) == AF_INET
)
912 { /* stop here for IPv4, default to addresses found earlier */
915 /* rule 3: avoid deprecated addresses (RFC 4862) */
916 if ((a
->flags
& IFA_F_DEPRECATED
) != (b
->flags
& IFA_F_DEPRECATED
))
918 return a
->flags
& IFA_F_DEPRECATED
;
920 /* rule 4 is not applicable as we don't know if an address is a home or
922 * rule 5 does not apply as we only compare addresses from one interface
924 /* rule 6: prefer matching label */
927 la
= get_label(a
->ip
);
928 lb
= get_label(b
->ip
);
930 if (la
== ld
&& lb
!= ld
)
934 else if (lb
== ld
&& la
!= ld
)
939 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
940 if ((a
->flags
& IFA_F_TEMPORARY
) != (b
->flags
& IFA_F_TEMPORARY
))
942 if (this->prefer_temporary_addrs
)
944 return b
->flags
& IFA_F_TEMPORARY
;
946 return a
->flags
& IFA_F_TEMPORARY
;
948 /* rule 8: use longest matching prefix */
951 pa
= common_prefix(a
->ip
, d
);
952 pb
= common_prefix(b
->ip
, d
);
958 /* default to addresses found earlier */
963 * Get a non-virtual IP address on the given interfaces and optionally in a
966 * If a candidate address is given, we first search for that address and if not
967 * found return the address as above.
968 * Returned host is a clone, has to be freed by caller.
970 * this->lock must be held when calling this function.
972 static host_t
*get_matching_address(private_kernel_netlink_net_t
*this,
973 int *ifindex
, int family
, chunk_t net
,
974 uint8_t mask
, host_t
*dest
,
977 enumerator_t
*ifaces
, *addrs
;
978 iface_entry_t
*iface
;
979 addr_entry_t
*addr
, *best
= NULL
;
980 bool candidate_matched
= FALSE
;
982 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
983 while (ifaces
->enumerate(ifaces
, &iface
))
985 if (iface
->usable
&& (!ifindex
|| iface
->ifindex
== *ifindex
))
986 { /* only use matching interfaces not excluded by config */
987 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
988 while (addrs
->enumerate(addrs
, &addr
))
990 if (addr
->refcount
||
991 addr
->ip
->get_family(addr
->ip
) != family
)
992 { /* ignore virtual IP addresses and ensure family matches */
995 if (net
.ptr
&& !host_in_subnet(addr
->ip
, net
, mask
))
996 { /* optionally match a subnet */
999 if (candidate
&& candidate
->ip_equals(candidate
, addr
->ip
))
1000 { /* stop if we find the candidate */
1002 candidate_matched
= TRUE
;
1005 else if (!best
|| is_address_better(this, best
, addr
, dest
))
1010 addrs
->destroy(addrs
);
1011 if (ifindex
|| candidate_matched
)
1017 ifaces
->destroy(ifaces
);
1018 return best ? best
->ip
->clone(best
->ip
) : NULL
;
1022 * Get a non-virtual IP address on the given interface.
1024 * If a candidate address is given, we first search for that address and if not
1025 * found return the address as above.
1026 * Returned host is a clone, has to be freed by caller.
1028 * this->lock must be held when calling this function.
1030 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
1031 int ifindex
, int family
, host_t
*dest
,
1034 return get_matching_address(this, &ifindex
, family
, chunk_empty
, 0, dest
,
1039 * Get a non-virtual IP address in the given subnet.
1041 * If a candidate address is given, we first search for that address and if not
1042 * found return the address as above.
1043 * Returned host is a clone, has to be freed by caller.
1045 * this->lock must be held when calling this function.
1047 static host_t
*get_subnet_address(private_kernel_netlink_net_t
*this,
1048 int family
, chunk_t net
, uint8_t mask
,
1049 host_t
*dest
, host_t
*candidate
)
1051 return get_matching_address(this, NULL
, family
, net
, mask
, dest
, candidate
);
1055 * callback function that raises the delayed roam event
1057 static job_requeue_t
roam_event(private_kernel_netlink_net_t
*this)
1061 this->roam_lock
->lock(this->roam_lock
);
1062 address
= this->roam_address
;
1063 this->roam_address
= FALSE
;
1064 this->roam_lock
->unlock(this->roam_lock
);
1065 charon
->kernel
->roam(charon
->kernel
, address
);
1066 return JOB_REQUEUE_NONE
;
1070 * fire a roaming event. we delay it for a bit and fire only one event
1071 * for multiple calls. otherwise we would create too many events.
1073 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
1078 if (!this->roam_events
)
1083 time_monotonic(&now
);
1084 this->roam_lock
->lock(this->roam_lock
);
1085 this->roam_address
|= address
;
1086 if (!timercmp(&now
, &this->next_roam
, >))
1088 this->roam_lock
->unlock(this->roam_lock
);
1091 timeval_add_ms(&now
, ROAM_DELAY
);
1092 this->next_roam
= now
;
1093 this->roam_lock
->unlock(this->roam_lock
);
1095 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
1097 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
1101 * check if an interface with a given index is up and usable
1103 * this->lock must be locked when calling this function
1105 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
1108 iface_entry_t
*iface
;
1110 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
1111 (void**)&iface
, &index
) == SUCCESS
)
1113 return iface_entry_up_and_usable(iface
);
1119 * unregister the current addr_entry_t from the hashtable it is stored in
1121 * this->lock must be locked when calling this function
1123 static void addr_entry_unregister(addr_entry_t
*addr
, iface_entry_t
*iface
,
1124 private_kernel_netlink_net_t
*this)
1128 addr_map_entry_remove(this->vips
, addr
, iface
);
1129 this->condvar
->broadcast(this->condvar
);
1132 addr_map_entry_remove(this->addrs
, addr
, iface
);
1136 * process RTM_NEWLINK/RTM_DELLINK from kernel
1138 static void process_link(private_kernel_netlink_net_t
*this,
1139 struct nlmsghdr
*hdr
, bool event
)
1141 struct ifinfomsg
* msg
= NLMSG_DATA(hdr
);
1142 struct rtattr
*rta
= IFLA_RTA(msg
);
1143 size_t rtasize
= IFLA_PAYLOAD (hdr
);
1144 enumerator_t
*enumerator
;
1145 iface_entry_t
*current
, *entry
= NULL
;
1147 bool update
= FALSE
, update_routes
= FALSE
;
1149 while (RTA_OK(rta
, rtasize
))
1151 switch (rta
->rta_type
)
1154 name
= RTA_DATA(rta
);
1157 rta
= RTA_NEXT(rta
, rtasize
);
1164 this->lock
->write_lock(this->lock
);
1165 switch (hdr
->nlmsg_type
)
1169 if (this->ifaces
->find_first(this->ifaces
,
1170 (void*)iface_entry_by_index
, (void**)&entry
,
1171 &msg
->ifi_index
) != SUCCESS
)
1174 .ifindex
= msg
->ifi_index
,
1175 .addrs
= linked_list_create(),
1176 .usable
= charon
->kernel
->is_interface_usable(
1177 charon
->kernel
, name
),
1179 this->ifaces
->insert_last(this->ifaces
, entry
);
1181 strncpy(entry
->ifname
, name
, IFNAMSIZ
);
1182 entry
->ifname
[IFNAMSIZ
-1] = '\0';
1183 if (event
&& entry
->usable
)
1185 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
1187 update
= update_routes
= TRUE
;
1188 DBG1(DBG_KNL
, "interface %s activated", name
);
1190 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
1193 DBG1(DBG_KNL
, "interface %s deactivated", name
);
1196 entry
->flags
= msg
->ifi_flags
;
1201 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
1202 while (enumerator
->enumerate(enumerator
, ¤t
))
1204 if (current
->ifindex
== msg
->ifi_index
)
1206 if (event
&& current
->usable
)
1209 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
1211 /* TODO: move virtual IPs installed on this interface to
1212 * another interface? */
1213 this->ifaces
->remove_at(this->ifaces
, enumerator
);
1214 current
->addrs
->invoke_function(current
->addrs
,
1215 (void*)addr_entry_unregister
, current
, this);
1216 iface_entry_destroy(current
);
1220 enumerator
->destroy(enumerator
);
1224 this->lock
->unlock(this->lock
);
1226 if (update_routes
&& event
)
1228 queue_route_reinstall(this, strdup(name
));
1231 if (update
&& event
)
1233 fire_roam_event(this, TRUE
);
1238 * process RTM_NEWADDR/RTM_DELADDR from kernel
1240 static void process_addr(private_kernel_netlink_net_t
*this,
1241 struct nlmsghdr
*hdr
, bool event
)
1243 struct ifaddrmsg
* msg
= NLMSG_DATA(hdr
);
1244 struct rtattr
*rta
= IFA_RTA(msg
);
1245 size_t rtasize
= IFA_PAYLOAD (hdr
);
1246 host_t
*host
= NULL
;
1247 iface_entry_t
*iface
;
1248 chunk_t local
= chunk_empty
, address
= chunk_empty
;
1249 char *route_ifname
= NULL
;
1250 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
1252 while (RTA_OK(rta
, rtasize
))
1254 switch (rta
->rta_type
)
1257 local
.ptr
= RTA_DATA(rta
);
1258 local
.len
= RTA_PAYLOAD(rta
);
1261 address
.ptr
= RTA_DATA(rta
);
1262 address
.len
= RTA_PAYLOAD(rta
);
1265 rta
= RTA_NEXT(rta
, rtasize
);
1268 /* For PPP interfaces, we need the IFA_LOCAL address,
1269 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1270 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1273 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
1275 else if (address
.ptr
)
1277 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
1285 this->lock
->write_lock(this->lock
);
1286 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
1287 (void**)&iface
, &msg
->ifa_index
) == SUCCESS
)
1289 addr_map_entry_t
*entry
, lookup
= {
1295 entry
= this->vips
->get(this->vips
, &lookup
);
1298 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1299 { /* mark as installed and signal waiting threads */
1300 entry
->addr
->installed
= TRUE
;
1303 { /* the address was already marked as uninstalled */
1305 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1306 addr_map_entry_remove(this->vips
, addr
, iface
);
1307 addr_entry_destroy(addr
);
1309 /* no roam events etc. for virtual IPs */
1310 this->condvar
->broadcast(this->condvar
);
1311 this->lock
->unlock(this->lock
);
1312 host
->destroy(host
);
1315 entry
= this->addrs
->get(this->addrs
, &lookup
);
1318 if (hdr
->nlmsg_type
== RTM_DELADDR
)
1322 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1326 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
1329 addr_map_entry_remove(this->addrs
, addr
, iface
);
1330 addr_entry_destroy(addr
);
1335 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1339 route_ifname
= strdup(iface
->ifname
);
1341 .ip
= host
->clone(host
),
1342 .flags
= msg
->ifa_flags
,
1343 .scope
= msg
->ifa_scope
,
1345 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1346 addr_map_entry_add(this->addrs
, addr
, iface
);
1347 if (event
&& iface
->usable
)
1349 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
1353 if (found
&& (iface
->flags
& IFF_UP
))
1358 { /* ignore events for interfaces excluded by config */
1359 update
= changed
= FALSE
;
1362 this->lock
->unlock(this->lock
);
1364 if (update
&& event
&& route_ifname
)
1366 queue_route_reinstall(this, route_ifname
);
1372 host
->destroy(host
);
1374 /* send an update to all IKE_SAs */
1375 if (update
&& event
&& changed
)
1377 fire_roam_event(this, TRUE
);
1382 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1384 static void process_route(private_kernel_netlink_net_t
*this, struct nlmsghdr
*hdr
)
1386 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1387 struct rtattr
*rta
= RTM_RTA(msg
);
1388 size_t rtasize
= RTM_PAYLOAD(hdr
);
1389 uint32_t rta_oif
= 0;
1390 host_t
*host
= NULL
;
1392 /* ignore routes added by us or in the local routing table (local addrs) */
1393 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1394 msg
->rtm_table
== RT_TABLE_LOCAL
))
1398 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1399 { /* ignore cached routes, seem to be created a lot for IPv6 */
1403 while (RTA_OK(rta
, rtasize
))
1405 switch (rta
->rta_type
)
1409 host
= host_create_from_chunk(msg
->rtm_family
,
1410 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1413 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1415 rta_oif
= *(uint32_t*)RTA_DATA(rta
);
1419 rta
= RTA_NEXT(rta
, rtasize
);
1421 this->lock
->read_lock(this->lock
);
1422 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1423 { /* ignore route changes for interfaces that are ignored or down */
1424 this->lock
->unlock(this->lock
);
1428 if (!host
&& rta_oif
)
1430 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
,
1433 if (!host
|| is_known_vip(this, host
))
1434 { /* ignore routes added for virtual IPs */
1435 this->lock
->unlock(this->lock
);
1439 this->lock
->unlock(this->lock
);
1440 fire_roam_event(this, FALSE
);
1441 host
->destroy(host
);
1445 * Receives events from kernel
1447 static bool receive_events(private_kernel_netlink_net_t
*this, int fd
,
1448 watcher_event_t event
)
1450 char response
[1536];
1451 struct nlmsghdr
*hdr
= (struct nlmsghdr
*)response
;
1452 struct sockaddr_nl addr
;
1453 socklen_t addr_len
= sizeof(addr
);
1456 len
= recvfrom(this->socket_events
, response
, sizeof(response
),
1457 MSG_DONTWAIT
, (struct sockaddr
*)&addr
, &addr_len
);
1463 /* interrupted, try again */
1466 /* no data ready, select again */
1469 DBG1(DBG_KNL
, "unable to receive from RT event socket %s (%d)",
1470 strerror(errno
), errno
);
1476 if (addr
.nl_pid
!= 0)
1477 { /* not from kernel. not interested, try another one */
1481 while (NLMSG_OK(hdr
, len
))
1483 /* looks good so far, dispatch netlink message */
1484 switch (hdr
->nlmsg_type
)
1488 process_addr(this, hdr
, TRUE
);
1492 process_link(this, hdr
, TRUE
);
1496 if (this->process_route
)
1498 process_route(this, hdr
);
1504 hdr
= NLMSG_NEXT(hdr
, len
);
1509 /** enumerator over addresses */
1511 private_kernel_netlink_net_t
* this;
1512 /** which addresses to enumerate */
1513 kernel_address_type_t which
;
1514 } address_enumerator_t
;
1517 * cleanup function for address enumerator
1519 static void address_enumerator_destroy(address_enumerator_t
*data
)
1521 data
->this->lock
->unlock(data
->this->lock
);
1526 * filter for addresses
1528 static bool filter_addresses(address_enumerator_t
*data
,
1529 addr_entry_t
** in
, host_t
** out
)
1531 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && (*in
)->refcount
)
1532 { /* skip virtual interfaces added by us */
1535 if (!(data
->which
& ADDR_TYPE_REGULAR
) && !(*in
)->refcount
)
1536 { /* address is regular, but not requested */
1539 if ((*in
)->scope
>= RT_SCOPE_LINK
)
1540 { /* skip addresses with a unusable scope */
1548 * enumerator constructor for interfaces
1550 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1551 address_enumerator_t
*data
)
1553 return enumerator_create_filter(
1554 iface
->addrs
->create_enumerator(iface
->addrs
),
1555 (void*)filter_addresses
, data
, NULL
);
1559 * filter for interfaces
1561 static bool filter_interfaces(address_enumerator_t
*data
, iface_entry_t
** in
,
1562 iface_entry_t
** out
)
1564 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !(*in
)->usable
)
1565 { /* skip interfaces excluded by config */
1568 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && ((*in
)->flags
& IFF_LOOPBACK
))
1569 { /* ignore loopback devices */
1572 if (!(data
->which
& ADDR_TYPE_DOWN
) && !((*in
)->flags
& IFF_UP
))
1573 { /* skip interfaces not up */
1580 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1581 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1583 address_enumerator_t
*data
;
1590 this->lock
->read_lock(this->lock
);
1591 return enumerator_create_nested(
1592 enumerator_create_filter(
1593 this->ifaces
->create_enumerator(this->ifaces
),
1594 (void*)filter_interfaces
, data
, NULL
),
1595 (void*)create_iface_enumerator
, data
,
1596 (void*)address_enumerator_destroy
);
1599 METHOD(kernel_net_t
, get_interface_name
, bool,
1600 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1602 addr_map_entry_t
*entry
, lookup
= {
1606 if (ip
->is_anyaddr(ip
))
1610 this->lock
->read_lock(this->lock
);
1611 /* first try to find it on an up and usable interface */
1612 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1613 (void*)addr_map_entry_match_up_and_usable
);
1618 *name
= strdup(entry
->iface
->ifname
);
1619 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1621 this->lock
->unlock(this->lock
);
1624 /* in a second step, consider virtual IPs installed by us */
1625 entry
= this->vips
->get_match(this->vips
, &lookup
,
1626 (void*)addr_map_entry_match_up_and_usable
);
1631 *name
= strdup(entry
->iface
->ifname
);
1632 DBG2(DBG_KNL
, "virtual IP %H is on interface %s", ip
, *name
);
1634 this->lock
->unlock(this->lock
);
1637 /* maybe it is installed on an ignored interface */
1638 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1639 (void*)addr_map_entry_match_up
);
1642 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1644 this->lock
->unlock(this->lock
);
1649 * get the index of an interface by name
1651 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1653 iface_entry_t
*iface
;
1656 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1658 this->lock
->read_lock(this->lock
);
1659 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1660 (void**)&iface
, name
) == SUCCESS
)
1662 ifindex
= iface
->ifindex
;
1664 this->lock
->unlock(this->lock
);
1668 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1674 * get the name of an interface by index (allocated)
1676 static char *get_interface_name_by_index(private_kernel_netlink_net_t
*this,
1679 iface_entry_t
*iface
;
1682 DBG2(DBG_KNL
, "getting iface name for index %d", index
);
1684 this->lock
->read_lock(this->lock
);
1685 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
1686 (void**)&iface
, &index
) == SUCCESS
)
1688 name
= strdup(iface
->ifname
);
1690 this->lock
->unlock(this->lock
);
1694 DBG1(DBG_KNL
, "unable to get interface name for %d", index
);
1700 * Store information about a route retrieved via RTNETLINK
1716 * Free a route entry
1718 static void rt_entry_destroy(rt_entry_t
*this)
1720 DESTROY_IF(this->src_host
);
1725 * Check if the route received with RTM_NEWROUTE is usable based on its type.
1727 static bool route_usable(struct nlmsghdr
*hdr
)
1731 msg
= NLMSG_DATA(hdr
);
1732 switch (msg
->rtm_type
)
1735 case RTN_UNREACHABLE
:
1745 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1746 * reused if not NULL.
1748 * Returned chunks point to internal data of the Netlink message.
1750 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1756 msg
= NLMSG_DATA(hdr
);
1758 rtasize
= RTM_PAYLOAD(hdr
);
1762 route
->gtw
= chunk_empty
;
1763 route
->pref_src
= chunk_empty
;
1764 route
->dst
= chunk_empty
;
1765 route
->dst_len
= msg
->rtm_dst_len
;
1766 route
->src
= chunk_empty
;
1767 route
->src_len
= msg
->rtm_src_len
;
1768 route
->table
= msg
->rtm_table
;
1770 route
->priority
= 0;
1775 .dst_len
= msg
->rtm_dst_len
,
1776 .src_len
= msg
->rtm_src_len
,
1777 .table
= msg
->rtm_table
,
1781 while (RTA_OK(rta
, rtasize
))
1783 switch (rta
->rta_type
)
1786 route
->pref_src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1789 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1792 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1795 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1798 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1800 route
->oif
= *(uint32_t*)RTA_DATA(rta
);
1804 if (RTA_PAYLOAD(rta
) == sizeof(route
->priority
))
1806 route
->priority
= *(uint32_t*)RTA_DATA(rta
);
1809 #ifdef HAVE_RTA_TABLE
1811 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1813 route
->table
= *(uint32_t*)RTA_DATA(rta
);
1816 #endif /* HAVE_RTA_TABLE*/
1818 rta
= RTA_NEXT(rta
, rtasize
);
1824 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1826 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1827 int prefix
, bool nexthop
, host_t
*candidate
,
1828 char **iface
, u_int recursion
)
1830 netlink_buf_t request
;
1831 struct nlmsghdr
*hdr
, *out
, *current
;
1835 linked_list_t
*routes
;
1836 rt_entry_t
*route
= NULL
, *best
= NULL
;
1837 enumerator_t
*enumerator
;
1838 host_t
*addr
= NULL
;
1842 if (recursion
> MAX_ROUTE_RECURSION
)
1846 chunk
= dest
->get_address(dest
);
1847 len
= chunk
.len
* 8;
1848 prefix
= prefix
< 0 ? len
: min(prefix
, len
);
1849 match_net
= prefix
!= len
;
1851 memset(&request
, 0, sizeof(request
));
1853 family
= dest
->get_family(dest
);
1855 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1856 hdr
->nlmsg_type
= RTM_GETROUTE
;
1857 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1859 msg
= NLMSG_DATA(hdr
);
1860 msg
->rtm_family
= family
;
1861 if (!match_net
&& this->rta_mark
&& this->routing_mark
.value
)
1863 /* if our routing rule excludes packets with a certain mark we can
1864 * get the preferred route without having to dump all routes */
1865 chunk
= chunk_from_thing(this->routing_mark
.value
);
1866 netlink_add_attribute(hdr
, RTA_MARK
, chunk
, sizeof(request
));
1868 else if (family
== AF_INET
|| this->rta_prefsrc_for_ipv6
||
1869 this->routing_table
|| match_net
)
1870 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1871 * as we want to ignore routes with virtual IPs we cannot use DUMP
1872 * if these routes are not installed in a separate table */
1873 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1877 chunk
= candidate
->get_address(candidate
);
1878 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1880 /* we use this below to match against the routes */
1881 chunk
= dest
->get_address(dest
);
1884 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1887 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1889 DBG2(DBG_KNL
, "getting %s to reach %H/%d failed",
1890 nexthop ?
"nexthop" : "address", dest
, prefix
);
1893 routes
= linked_list_create();
1894 this->lock
->read_lock(this->lock
);
1896 for (current
= out
; NLMSG_OK(current
, len
);
1897 current
= NLMSG_NEXT(current
, len
))
1899 switch (current
->nlmsg_type
)
1908 if (!route_usable(current
))
1912 route
= parse_route(current
, route
);
1914 table
= (uintptr_t)route
->table
;
1915 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
1916 (void**)&table
) == SUCCESS
)
1917 { /* route is from an excluded routing table */
1920 if (this->routing_table
!= 0 &&
1921 route
->table
== this->routing_table
)
1922 { /* route is from our own ipsec routing table */
1925 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
1926 { /* interface is down */
1929 if (!addr_in_subnet(chunk
, prefix
, route
->dst
, route
->dst_len
))
1930 { /* route destination does not contain dest */
1933 if (route
->pref_src
.ptr
)
1934 { /* verify source address, if any */
1935 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
1936 route
->pref_src
, 0);
1937 if (src
&& is_known_vip(this, src
))
1938 { /* ignore routes installed by us */
1942 route
->src_host
= src
;
1944 /* insert route, sorted by network prefix and priority */
1945 enumerator
= routes
->create_enumerator(routes
);
1946 while (enumerator
->enumerate(enumerator
, &other
))
1948 if (route
->dst_len
> other
->dst_len
)
1952 if (route
->dst_len
== other
->dst_len
&&
1953 route
->priority
< other
->priority
)
1958 routes
->insert_before(routes
, enumerator
, route
);
1959 enumerator
->destroy(enumerator
);
1970 rt_entry_destroy(route
);
1973 /* now we have a list of routes matching dest, sorted by net prefix.
1974 * we will look for source addresses for these routes and select the one
1975 * with the preferred source address, if possible */
1976 enumerator
= routes
->create_enumerator(routes
);
1977 while (enumerator
->enumerate(enumerator
, &route
))
1979 if (route
->src_host
)
1980 { /* got a source address with the route, if no preferred source
1981 * is given or it matches we are done, as this is the best route */
1982 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
1987 else if (route
->oif
)
1988 { /* no match yet, maybe it is assigned to the same interface */
1989 host_t
*src
= get_interface_address(this, route
->oif
,
1990 msg
->rtm_family
, dest
, candidate
);
1991 if (src
&& src
->ip_equals(src
, candidate
))
1993 route
->src_host
->destroy(route
->src_host
);
1994 route
->src_host
= src
;
2000 /* no luck yet with the source address. if this is the best (first)
2001 * route we store it as fallback in case we don't find a route with
2002 * the preferred source */
2003 best
= best ?
: route
;
2007 { /* no src, but a source selector, try to find a matching address */
2008 route
->src_host
= get_subnet_address(this, msg
->rtm_family
,
2009 route
->src
, route
->src_len
, dest
,
2011 if (route
->src_host
)
2012 { /* we handle this address the same as the one above */
2014 candidate
->ip_equals(candidate
, route
->src_host
))
2019 best
= best ?
: route
;
2024 { /* no src, but an interface - get address from it */
2025 route
->src_host
= get_interface_address(this, route
->oif
,
2026 msg
->rtm_family
, dest
, candidate
);
2027 if (route
->src_host
)
2028 { /* more of the same */
2030 candidate
->ip_equals(candidate
, route
->src_host
))
2035 best
= best ?
: route
;
2040 { /* no src, no iface, but a gateway - lookup src to reach gtw */
2043 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
2044 if (gtw
&& !gtw
->ip_equals(gtw
, dest
))
2046 route
->src_host
= get_route(this, gtw
, -1, FALSE
, candidate
,
2047 iface
, recursion
+ 1);
2050 if (route
->src_host
)
2051 { /* more of the same */
2053 candidate
->ip_equals(candidate
, route
->src_host
))
2058 best
= best ?
: route
;
2062 enumerator
->destroy(enumerator
);
2065 { /* nexthop lookup, return gateway and oif if any */
2070 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
2072 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
2073 if (iface
&& best
->oif
)
2075 *iface
= get_interface_name_by_index(this, best
->oif
);
2078 if (!addr
&& !match_net
)
2079 { /* fallback to destination address */
2080 addr
= dest
->clone(dest
);
2087 addr
= best
->src_host
->clone(best
->src_host
);
2090 this->lock
->unlock(this->lock
);
2091 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
2096 if (nexthop
&& iface
&& *iface
)
2098 DBG2(DBG_KNL
, "using %H as nexthop and %s as dev to reach %H/%d",
2099 addr
, *iface
, dest
, prefix
);
2103 DBG2(DBG_KNL
, "using %H as %s to reach %H/%d", addr
,
2104 nexthop ?
"nexthop" : "address", dest
, prefix
);
2107 else if (!recursion
)
2109 DBG2(DBG_KNL
, "no %s found to reach %H/%d",
2110 nexthop ?
"nexthop" : "address", dest
, prefix
);
2115 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
2116 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
2118 return get_route(this, dest
, -1, FALSE
, src
, NULL
, 0);
2121 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
2122 private_kernel_netlink_net_t
*this, host_t
*dest
, int prefix
, host_t
*src
,
2125 return get_route(this, dest
, prefix
, TRUE
, src
, iface
, 0);
2128 /** enumerator over subnets */
2130 enumerator_t
public;
2131 private_kernel_netlink_net_t
*private;
2132 /** message from the kernel */
2133 struct nlmsghdr
*msg
;
2134 /** current message from the kernel */
2135 struct nlmsghdr
*current
;
2136 /** remaining length */
2138 /** last subnet enumerated */
2140 } subnet_enumerator_t
;
2142 METHOD(enumerator_t
, destroy_subnet_enumerator
, void,
2143 subnet_enumerator_t
*this)
2145 DESTROY_IF(this->net
);
2150 METHOD(enumerator_t
, enumerate_subnets
, bool,
2151 subnet_enumerator_t
*this, host_t
**net
, uint8_t *mask
, char **ifname
)
2155 this->current
= this->msg
;
2159 this->current
= NLMSG_NEXT(this->current
, this->len
);
2160 DESTROY_IF(this->net
);
2164 while (NLMSG_OK(this->current
, this->len
))
2166 switch (this->current
->nlmsg_type
)
2175 chunk_t dst
= chunk_empty
;
2177 msg
= NLMSG_DATA(this->current
);
2179 if (!route_usable(this->current
))
2183 else if (msg
->rtm_table
&& (
2184 msg
->rtm_table
== RT_TABLE_LOCAL
||
2185 msg
->rtm_table
== this->private->routing_table
))
2186 { /* ignore our own and the local routing tables */
2191 rtasize
= RTM_PAYLOAD(this->current
);
2192 while (RTA_OK(rta
, rtasize
))
2194 if (rta
->rta_type
== RTA_DST
)
2196 dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
2199 rta
= RTA_NEXT(rta
, rtasize
);
2204 this->net
= host_create_from_chunk(msg
->rtm_family
, dst
, 0);
2206 *mask
= msg
->rtm_dst_len
;
2215 this->current
= NLMSG_NEXT(this->current
, this->len
);
2220 METHOD(kernel_net_t
, create_local_subnet_enumerator
, enumerator_t
*,
2221 private_kernel_netlink_net_t
*this)
2223 netlink_buf_t request
;
2224 struct nlmsghdr
*hdr
, *out
;
2227 subnet_enumerator_t
*enumerator
;
2229 memset(&request
, 0, sizeof(request
));
2232 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
2233 hdr
->nlmsg_type
= RTM_GETROUTE
;
2234 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2235 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
2237 msg
= NLMSG_DATA(hdr
);
2238 msg
->rtm_scope
= RT_SCOPE_LINK
;
2240 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
2242 DBG2(DBG_KNL
, "enumerating local subnets failed");
2243 return enumerator_create_empty();
2248 .enumerate
= (void*)_enumerate_subnets
,
2249 .destroy
= _destroy_subnet_enumerator
,
2255 return &enumerator
->public;
2259 * Manages the creation and deletion of ip addresses on an interface.
2260 * By setting the appropriate nlmsg_type, the ip will be set or unset.
2262 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2263 int flags
, int if_index
, host_t
*ip
, int prefix
)
2265 netlink_buf_t request
;
2266 struct nlmsghdr
*hdr
;
2267 struct ifaddrmsg
*msg
;
2270 memset(&request
, 0, sizeof(request
));
2272 chunk
= ip
->get_address(ip
);
2275 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2276 hdr
->nlmsg_type
= nlmsg_type
;
2277 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
2279 msg
= NLMSG_DATA(hdr
);
2280 msg
->ifa_family
= ip
->get_family(ip
);
2282 msg
->ifa_prefixlen
= prefix
< 0 ? chunk
.len
* 8 : prefix
;
2283 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
2284 msg
->ifa_index
= if_index
;
2286 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
2288 if (ip
->get_family(ip
) == AF_INET6
)
2290 msg
->ifa_flags
|= IFA_F_NODAD
;
2291 if (this->rta_prefsrc_for_ipv6
)
2293 /* if source routes are possible we let the virtual IP get
2294 * deprecated immediately (but mark it as valid forever) so it gets
2295 * only used if forced by our route, and not by the default IPv6
2296 * address selection */
2297 struct ifa_cacheinfo cache
= {
2298 .ifa_valid
= 0xFFFFFFFF,
2301 netlink_add_attribute(hdr
, IFA_CACHEINFO
, chunk_from_thing(cache
),
2305 return this->socket
->send_ack(this->socket
, hdr
);
2308 METHOD(kernel_net_t
, add_ip
, status_t
,
2309 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2312 addr_map_entry_t
*entry
, lookup
= {
2315 iface_entry_t
*iface
= NULL
;
2317 if (!this->install_virtual_ip
)
2318 { /* disabled by config */
2322 this->lock
->write_lock(this->lock
);
2323 /* the virtual IP might actually be installed as regular IP, in which case
2324 * we don't track it as virtual IP */
2325 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2326 (void*)addr_map_entry_match
);
2328 { /* otherwise it might already be installed as virtual IP */
2329 entry
= this->vips
->get_match(this->vips
, &lookup
,
2330 (void*)addr_map_entry_match
);
2332 { /* the vip we found can be in one of three states: 1) installed and
2333 * ready, 2) just added by another thread, but not yet confirmed to
2334 * be installed by the kernel, 3) just deleted, but not yet gone.
2335 * Then while we wait below, several things could happen (as we
2336 * release the lock). For instance, the interface could disappear,
2337 * or the IP is finally deleted, and it reappears on a different
2338 * interface. All these cases are handled by the call below. */
2339 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2341 this->condvar
->wait(this->condvar
, this->lock
);
2345 entry
->addr
->refcount
++;
2351 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
2352 entry
->iface
->ifname
);
2353 this->lock
->unlock(this->lock
);
2356 /* try to find the target interface, either by config or via src ip */
2357 if (!this->install_virtual_ip_on
||
2358 this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
2359 (void**)&iface
, this->install_virtual_ip_on
) != SUCCESS
)
2361 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
2362 (void**)&iface
, iface_name
) != SUCCESS
)
2363 { /* if we don't find the requested interface we just use the first */
2364 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
2374 .ip
= virtual_ip
->clone(virtual_ip
),
2376 .scope
= RT_SCOPE_UNIVERSE
,
2378 iface
->addrs
->insert_last(iface
->addrs
, addr
);
2379 addr_map_entry_add(this->vips
, addr
, iface
);
2380 ifi
= iface
->ifindex
;
2381 this->lock
->unlock(this->lock
);
2382 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
2383 ifi
, virtual_ip
, prefix
) == SUCCESS
)
2385 this->lock
->write_lock(this->lock
);
2386 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2387 { /* wait until address appears */
2388 this->condvar
->wait(this->condvar
, this->lock
);
2391 { /* we fail if the interface got deleted in the meantime */
2392 ifname
= strdup(entry
->iface
->ifname
);
2393 this->lock
->unlock(this->lock
);
2394 DBG2(DBG_KNL
, "virtual IP %H installed on %s",
2395 virtual_ip
, ifname
);
2396 /* during IKEv1 reauthentication, children get moved from
2397 * old the new SA before the virtual IP is available. This
2398 * kills the route for our virtual IP, reinstall. */
2399 queue_route_reinstall(this, ifname
);
2402 this->lock
->unlock(this->lock
);
2404 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
2407 this->lock
->unlock(this->lock
);
2408 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
2413 METHOD(kernel_net_t
, del_ip
, status_t
,
2414 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2417 addr_map_entry_t
*entry
, lookup
= {
2421 if (!this->install_virtual_ip
)
2422 { /* disabled by config */
2426 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
2428 this->lock
->write_lock(this->lock
);
2429 entry
= this->vips
->get_match(this->vips
, &lookup
,
2430 (void*)addr_map_entry_match
);
2432 { /* we didn't install this IP as virtual IP */
2433 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2434 (void*)addr_map_entry_match
);
2437 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
2438 entry
->iface
->ifname
);
2439 this->lock
->unlock(this->lock
);
2442 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
2443 this->lock
->unlock(this->lock
);
2446 if (entry
->addr
->refcount
== 1)
2451 /* we set this flag so that threads calling add_ip will block and wait
2452 * until the entry is gone, also so we can wait below */
2453 entry
->addr
->installed
= FALSE
;
2454 ifi
= entry
->iface
->ifindex
;
2455 this->lock
->unlock(this->lock
);
2456 status
= manage_ipaddr(this, RTM_DELADDR
, 0, ifi
, virtual_ip
, prefix
);
2457 if (status
== SUCCESS
&& wait
)
2458 { /* wait until the address is really gone */
2459 this->lock
->write_lock(this->lock
);
2460 while (is_known_vip(this, virtual_ip
))
2462 this->condvar
->wait(this->condvar
, this->lock
);
2464 this->lock
->unlock(this->lock
);
2470 entry
->addr
->refcount
--;
2472 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
2474 this->lock
->unlock(this->lock
);
2479 * Manages source routes in the routing table.
2480 * By setting the appropriate nlmsg_type, the route gets added or removed.
2482 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
2483 int nlmsg_type
, int flags
, chunk_t dst_net
,
2484 uint8_t prefixlen
, host_t
*gateway
,
2485 host_t
*src_ip
, char *if_name
)
2487 netlink_buf_t request
;
2488 struct nlmsghdr
*hdr
;
2494 /* if route is 0.0.0.0/0, we can't install it, as it would
2495 * overwrite the default route. Instead, we add two routes:
2496 * 0.0.0.0/1 and 128.0.0.0/1 */
2497 if (this->routing_table
== 0 && prefixlen
== 0)
2500 uint8_t half_prefixlen
;
2503 half_net
= chunk_alloca(dst_net
.len
);
2504 memset(half_net
.ptr
, 0, half_net
.len
);
2507 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
2508 gateway
, src_ip
, if_name
);
2509 half_net
.ptr
[0] |= 0x80;
2510 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
2511 gateway
, src_ip
, if_name
);
2515 memset(&request
, 0, sizeof(request
));
2518 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2519 hdr
->nlmsg_type
= nlmsg_type
;
2520 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2522 msg
= NLMSG_DATA(hdr
);
2523 msg
->rtm_family
= src_ip
->get_family(src_ip
);
2524 msg
->rtm_dst_len
= prefixlen
;
2525 msg
->rtm_table
= this->routing_table
;
2526 msg
->rtm_protocol
= RTPROT_STATIC
;
2527 msg
->rtm_type
= RTN_UNICAST
;
2528 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2530 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
2531 chunk
= src_ip
->get_address(src_ip
);
2532 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
2533 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
2535 chunk
= gateway
->get_address(gateway
);
2536 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
2538 ifindex
= get_interface_index(this, if_name
);
2539 chunk
.ptr
= (char*)&ifindex
;
2540 chunk
.len
= sizeof(ifindex
);
2541 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
2543 if (this->mtu
|| this->mss
)
2545 chunk
= chunk_alloca(RTA_LENGTH((sizeof(struct rtattr
) +
2546 sizeof(uint32_t)) * 2));
2548 rta
= (struct rtattr
*)chunk
.ptr
;
2551 rta
->rta_type
= RTAX_MTU
;
2552 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2553 memcpy(RTA_DATA(rta
), &this->mtu
, sizeof(uint32_t));
2554 chunk
.len
= rta
->rta_len
;
2558 rta
= (struct rtattr
*)(chunk
.ptr
+ RTA_ALIGN(chunk
.len
));
2559 rta
->rta_type
= RTAX_ADVMSS
;
2560 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2561 memcpy(RTA_DATA(rta
), &this->mss
, sizeof(uint32_t));
2562 chunk
.len
= RTA_ALIGN(chunk
.len
) + rta
->rta_len
;
2564 netlink_add_attribute(hdr
, RTA_METRICS
, chunk
, sizeof(request
));
2567 return this->socket
->send_ack(this->socket
, hdr
);
2570 METHOD(kernel_net_t
, add_route
, status_t
,
2571 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2572 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
2575 route_entry_t
*found
, route
= {
2577 .prefixlen
= prefixlen
,
2583 this->routes_lock
->lock(this->routes_lock
);
2584 found
= this->routes
->get(this->routes
, &route
);
2587 this->routes_lock
->unlock(this->routes_lock
);
2588 return ALREADY_DONE
;
2590 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
2591 dst_net
, prefixlen
, gateway
, src_ip
, if_name
);
2592 if (status
== SUCCESS
)
2594 found
= route_entry_clone(&route
);
2595 this->routes
->put(this->routes
, found
, found
);
2597 this->routes_lock
->unlock(this->routes_lock
);
2601 METHOD(kernel_net_t
, del_route
, status_t
,
2602 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2603 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
2606 route_entry_t
*found
, route
= {
2608 .prefixlen
= prefixlen
,
2614 this->routes_lock
->lock(this->routes_lock
);
2615 found
= this->routes
->get(this->routes
, &route
);
2618 this->routes_lock
->unlock(this->routes_lock
);
2621 this->routes
->remove(this->routes
, found
);
2622 route_entry_destroy(found
);
2623 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
2624 gateway
, src_ip
, if_name
);
2625 this->routes_lock
->unlock(this->routes_lock
);
2630 * Initialize a list of local addresses.
2632 static status_t
init_address_list(private_kernel_netlink_net_t
*this)
2634 netlink_buf_t request
;
2635 struct nlmsghdr
*out
, *current
, *in
;
2636 struct rtgenmsg
*msg
;
2638 enumerator_t
*ifaces
, *addrs
;
2639 iface_entry_t
*iface
;
2642 DBG2(DBG_KNL
, "known interfaces and IP addresses:");
2644 memset(&request
, 0, sizeof(request
));
2647 in
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtgenmsg
));
2648 in
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_MATCH
| NLM_F_ROOT
;
2649 msg
= NLMSG_DATA(in
);
2650 msg
->rtgen_family
= AF_UNSPEC
;
2653 in
->nlmsg_type
= RTM_GETLINK
;
2654 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2659 while (NLMSG_OK(current
, len
))
2661 switch (current
->nlmsg_type
)
2666 process_link(this, current
, FALSE
);
2669 current
= NLMSG_NEXT(current
, len
);
2676 /* get all interface addresses */
2677 in
->nlmsg_type
= RTM_GETADDR
;
2678 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2683 while (NLMSG_OK(current
, len
))
2685 switch (current
->nlmsg_type
)
2690 process_addr(this, current
, FALSE
);
2693 current
= NLMSG_NEXT(current
, len
);
2700 this->lock
->read_lock(this->lock
);
2701 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
2702 while (ifaces
->enumerate(ifaces
, &iface
))
2704 if (iface_entry_up_and_usable(iface
))
2706 DBG2(DBG_KNL
, " %s", iface
->ifname
);
2707 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
2708 while (addrs
->enumerate(addrs
, (void**)&addr
))
2710 DBG2(DBG_KNL
, " %H", addr
->ip
);
2712 addrs
->destroy(addrs
);
2715 ifaces
->destroy(ifaces
);
2716 this->lock
->unlock(this->lock
);
2721 * create or delete a rule to use our routing table
2723 static status_t
manage_rule(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2724 int family
, uint32_t table
, uint32_t prio
)
2726 netlink_buf_t request
;
2727 struct nlmsghdr
*hdr
;
2732 memset(&request
, 0, sizeof(request
));
2734 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2735 hdr
->nlmsg_type
= nlmsg_type
;
2736 if (nlmsg_type
== RTM_NEWRULE
)
2738 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2740 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2742 msg
= NLMSG_DATA(hdr
);
2743 msg
->rtm_table
= table
;
2744 msg
->rtm_family
= family
;
2745 msg
->rtm_protocol
= RTPROT_BOOT
;
2746 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2747 msg
->rtm_type
= RTN_UNICAST
;
2749 chunk
= chunk_from_thing(prio
);
2750 netlink_add_attribute(hdr
, RTA_PRIORITY
, chunk
, sizeof(request
));
2752 fwmark
= lib
->settings
->get_str(lib
->settings
,
2753 "%s.plugins.kernel-netlink.fwmark", NULL
, lib
->ns
);
2756 #ifdef HAVE_LINUX_FIB_RULES_H
2759 if (fwmark
[0] == '!')
2761 msg
->rtm_flags
|= FIB_RULE_INVERT
;
2764 if (mark_from_string(fwmark
, &mark
))
2766 chunk
= chunk_from_thing(mark
.value
);
2767 netlink_add_attribute(hdr
, FRA_FWMARK
, chunk
, sizeof(request
));
2768 chunk
= chunk_from_thing(mark
.mask
);
2769 netlink_add_attribute(hdr
, FRA_FWMASK
, chunk
, sizeof(request
));
2770 if (msg
->rtm_flags
& FIB_RULE_INVERT
)
2772 this->routing_mark
= mark
;
2776 DBG1(DBG_KNL
, "setting firewall mark on routing rule is not supported");
2779 return this->socket
->send_ack(this->socket
, hdr
);
2783 * check for kernel features (currently only via version number)
2785 static void check_kernel_features(private_kernel_netlink_net_t
*this)
2787 struct utsname utsname
;
2790 if (uname(&utsname
) == 0)
2792 switch(sscanf(utsname
.release
, "%d.%d.%d", &a
, &b
, &c
))
2797 if (b
== 6 && c
>= 36)
2799 this->rta_mark
= TRUE
;
2801 DBG2(DBG_KNL
, "detected Linux %d.%d.%d, no support for "
2802 "RTA_PREFSRC for IPv6 routes", a
, b
, c
);
2807 /* only 3.x+ uses two part version numbers */
2808 this->rta_prefsrc_for_ipv6
= TRUE
;
2809 this->rta_mark
= TRUE
;
2818 * Destroy an address to iface map
2820 static void addr_map_destroy(hashtable_t
*map
)
2822 enumerator_t
*enumerator
;
2823 addr_map_entry_t
*addr
;
2825 enumerator
= map
->create_enumerator(map
);
2826 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&addr
))
2830 enumerator
->destroy(enumerator
);
2834 METHOD(kernel_net_t
, destroy
, void,
2835 private_kernel_netlink_net_t
*this)
2837 enumerator_t
*enumerator
;
2838 route_entry_t
*route
;
2840 if (this->routing_table
)
2842 manage_rule(this, RTM_DELRULE
, AF_INET
, this->routing_table
,
2843 this->routing_table_prio
);
2844 manage_rule(this, RTM_DELRULE
, AF_INET6
, this->routing_table
,
2845 this->routing_table_prio
);
2847 if (this->socket_events
> 0)
2849 lib
->watcher
->remove(lib
->watcher
, this->socket_events
);
2850 close(this->socket_events
);
2852 enumerator
= this->routes
->create_enumerator(this->routes
);
2853 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
2855 manage_srcroute(this, RTM_DELROUTE
, 0, route
->dst_net
, route
->prefixlen
,
2856 route
->gateway
, route
->src_ip
, route
->if_name
);
2857 route_entry_destroy(route
);
2859 enumerator
->destroy(enumerator
);
2860 this->routes
->destroy(this->routes
);
2861 this->routes_lock
->destroy(this->routes_lock
);
2862 DESTROY_IF(this->socket
);
2864 net_changes_clear(this);
2865 this->net_changes
->destroy(this->net_changes
);
2866 this->net_changes_lock
->destroy(this->net_changes_lock
);
2868 addr_map_destroy(this->addrs
);
2869 addr_map_destroy(this->vips
);
2871 this->ifaces
->destroy_function(this->ifaces
, (void*)iface_entry_destroy
);
2872 this->rt_exclude
->destroy(this->rt_exclude
);
2873 this->roam_lock
->destroy(this->roam_lock
);
2874 this->condvar
->destroy(this->condvar
);
2875 this->lock
->destroy(this->lock
);
2880 * Described in header.
2882 kernel_netlink_net_t
*kernel_netlink_net_create()
2884 private_kernel_netlink_net_t
*this;
2885 enumerator_t
*enumerator
;
2886 bool register_for_events
= TRUE
;
2892 .get_interface
= _get_interface_name
,
2893 .create_address_enumerator
= _create_address_enumerator
,
2894 .create_local_subnet_enumerator
= _create_local_subnet_enumerator
,
2895 .get_source_addr
= _get_source_addr
,
2896 .get_nexthop
= _get_nexthop
,
2899 .add_route
= _add_route
,
2900 .del_route
= _del_route
,
2901 .destroy
= _destroy
,
2904 .socket
= netlink_socket_create(NETLINK_ROUTE
, rt_msg_names
,
2905 lib
->settings
->get_bool(lib
->settings
,
2906 "%s.plugins.kernel-netlink.parallel_route", FALSE
, lib
->ns
)),
2907 .rt_exclude
= linked_list_create(),
2908 .routes
= hashtable_create((hashtable_hash_t
)route_entry_hash
,
2909 (hashtable_equals_t
)route_entry_equals
, 16),
2910 .net_changes
= hashtable_create(
2911 (hashtable_hash_t
)net_change_hash
,
2912 (hashtable_equals_t
)net_change_equals
, 16),
2913 .addrs
= hashtable_create(
2914 (hashtable_hash_t
)addr_map_entry_hash
,
2915 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2916 .vips
= hashtable_create((hashtable_hash_t
)addr_map_entry_hash
,
2917 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2918 .routes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2919 .net_changes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2920 .ifaces
= linked_list_create(),
2921 .lock
= rwlock_create(RWLOCK_TYPE_DEFAULT
),
2922 .condvar
= rwlock_condvar_create(),
2923 .roam_lock
= spinlock_create(),
2924 .routing_table
= lib
->settings
->get_int(lib
->settings
,
2925 "%s.routing_table", ROUTING_TABLE
, lib
->ns
),
2926 .routing_table_prio
= lib
->settings
->get_int(lib
->settings
,
2927 "%s.routing_table_prio", ROUTING_TABLE_PRIO
, lib
->ns
),
2928 .process_route
= lib
->settings
->get_bool(lib
->settings
,
2929 "%s.process_route", TRUE
, lib
->ns
),
2930 .install_virtual_ip
= lib
->settings
->get_bool(lib
->settings
,
2931 "%s.install_virtual_ip", TRUE
, lib
->ns
),
2932 .install_virtual_ip_on
= lib
->settings
->get_str(lib
->settings
,
2933 "%s.install_virtual_ip_on", NULL
, lib
->ns
),
2934 .prefer_temporary_addrs
= lib
->settings
->get_bool(lib
->settings
,
2935 "%s.prefer_temporary_addrs", FALSE
, lib
->ns
),
2936 .roam_events
= lib
->settings
->get_bool(lib
->settings
,
2937 "%s.plugins.kernel-netlink.roam_events", TRUE
, lib
->ns
),
2938 .mtu
= lib
->settings
->get_int(lib
->settings
,
2939 "%s.plugins.kernel-netlink.mtu", 0, lib
->ns
),
2940 .mss
= lib
->settings
->get_int(lib
->settings
,
2941 "%s.plugins.kernel-netlink.mss", 0, lib
->ns
),
2943 timerclear(&this->last_route_reinstall
);
2944 timerclear(&this->next_roam
);
2946 check_kernel_features(this);
2948 if (streq(lib
->ns
, "starter"))
2949 { /* starter has no threads, so we do not register for kernel events */
2950 register_for_events
= FALSE
;
2953 exclude
= lib
->settings
->get_str(lib
->settings
,
2954 "%s.ignore_routing_tables", NULL
, lib
->ns
);
2960 enumerator
= enumerator_create_token(exclude
, " ", " ");
2961 while (enumerator
->enumerate(enumerator
, &token
))
2964 table
= strtoul(token
, NULL
, 10);
2968 this->rt_exclude
->insert_last(this->rt_exclude
, (void*)table
);
2971 enumerator
->destroy(enumerator
);
2974 if (register_for_events
)
2976 struct sockaddr_nl addr
;
2978 memset(&addr
, 0, sizeof(addr
));
2979 addr
.nl_family
= AF_NETLINK
;
2981 /* create and bind RT socket for events (address/interface/route changes) */
2982 this->socket_events
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
2983 if (this->socket_events
< 0)
2985 DBG1(DBG_KNL
, "unable to create RT event socket: %s (%d)",
2986 strerror(errno
), errno
);
2990 addr
.nl_groups
= RTMGRP_IPV4_IFADDR
| RTMGRP_IPV6_IFADDR
|
2991 RTMGRP_IPV4_ROUTE
| RTMGRP_IPV6_ROUTE
| RTMGRP_LINK
;
2992 if (bind(this->socket_events
, (struct sockaddr
*)&addr
, sizeof(addr
)))
2994 DBG1(DBG_KNL
, "unable to bind RT event socket: %s (%d)",
2995 strerror(errno
), errno
);
3000 lib
->watcher
->add(lib
->watcher
, this->socket_events
, WATCHER_READ
,
3001 (watcher_cb_t
)receive_events
, this);
3004 if (init_address_list(this) != SUCCESS
)
3006 DBG1(DBG_KNL
, "unable to get interface list");
3011 if (this->routing_table
)
3013 if (manage_rule(this, RTM_NEWRULE
, AF_INET
, this->routing_table
,
3014 this->routing_table_prio
) != SUCCESS
)
3016 DBG1(DBG_KNL
, "unable to create IPv4 routing table rule");
3018 if (manage_rule(this, RTM_NEWRULE
, AF_INET6
, this->routing_table
,
3019 this->routing_table_prio
) != SUCCESS
)
3021 DBG1(DBG_KNL
, "unable to create IPv6 routing table rule");
3025 return &this->public;