2 * Copyright (C) 2008-2012 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
48 #include "kernel_netlink_net.h"
49 #include "kernel_netlink_shared.h"
53 #include <threading/thread.h>
54 #include <threading/mutex.h>
55 #include <threading/rwlock.h>
56 #include <threading/rwlock_condvar.h>
57 #include <threading/spinlock.h>
58 #include <utils/hashtable.h>
59 #include <utils/linked_list.h>
60 #include <processing/jobs/callback_job.h>
62 /** delay before firing roam events (ms) */
63 #define ROAM_DELAY 100
65 /** delay before reinstalling routes (ms) */
66 #define ROUTE_DELAY 100
68 typedef struct addr_entry_t addr_entry_t
;
71 * IP address in an iface_entry_t
78 /** scope of the address */
81 /** number of times this IP is used, if virtual (i.e. managed by us) */
84 /** TRUE once it is installed, if virtual */
89 * destroy a addr_entry_t object
91 static void addr_entry_destroy(addr_entry_t
*this)
93 this->ip
->destroy(this->ip
);
97 typedef struct iface_entry_t iface_entry_t
;
100 * A network interface on this system, containing addr_entry_t's
102 struct iface_entry_t
{
104 /** interface index */
107 /** name of the interface */
108 char ifname
[IFNAMSIZ
];
110 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
113 /** list of addresses as host_t */
114 linked_list_t
*addrs
;
116 /** TRUE if usable by config */
121 * destroy an interface entry
123 static void iface_entry_destroy(iface_entry_t
*this)
125 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
130 * find an interface entry by index
132 static bool iface_entry_by_index(iface_entry_t
*this, int *ifindex
)
134 return this->ifindex
== *ifindex
;
138 * find an interface entry by name
140 static bool iface_entry_by_name(iface_entry_t
*this, char *ifname
)
142 return streq(this->ifname
, ifname
);
146 * check if an interface is up
148 static inline bool iface_entry_up(iface_entry_t
*iface
)
150 return (iface
->flags
& IFF_UP
) == IFF_UP
;
154 * check if an interface is up and usable
156 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
158 return iface
->usable
&& iface_entry_up(iface
);
161 typedef struct addr_map_entry_t addr_map_entry_t
;
164 * Entry that maps an IP address to an interface entry
166 struct addr_map_entry_t
{
167 /** The IP address */
170 /** The address entry for this IP address */
173 /** The interface this address is installed on */
174 iface_entry_t
*iface
;
178 * Hash a addr_map_entry_t object, all entries with the same IP address
179 * are stored in the same bucket
181 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
183 return chunk_hash(this->ip
->get_address(this->ip
));
187 * Compare two addr_map_entry_t objects, two entries are equal if they are
188 * installed on the same interface
190 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
192 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
193 a
->ip
->ip_equals(a
->ip
, b
->ip
);
197 * Used with get_match this finds an address entry if it is installed on
198 * an up and usable interface
200 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
203 return iface_entry_up_and_usable(b
->iface
) &&
204 a
->ip
->ip_equals(a
->ip
, b
->ip
);
208 * Used with get_match this finds an address entry if it is installed on
209 * any active local interface
211 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
213 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
217 * Used with get_match this finds an address entry if it is installed on
218 * any local interface
220 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
222 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
225 typedef struct route_entry_t route_entry_t
;
228 * Installed routing entry
230 struct route_entry_t
{
231 /** Name of the interface the route is bound to */
234 /** Source ip of the route */
237 /** Gateway for this route */
240 /** Destination net */
243 /** Destination net prefixlen */
248 * Clone a route_entry_t object.
250 static route_entry_t
*route_entry_clone(route_entry_t
*this)
252 route_entry_t
*route
;
255 .if_name
= strdup(this->if_name
),
256 .src_ip
= this->src_ip
->clone(this->src_ip
),
257 .gateway
= this->gateway
->clone(this->gateway
),
258 .dst_net
= chunk_clone(this->dst_net
),
259 .prefixlen
= this->prefixlen
,
265 * Destroy a route_entry_t object
267 static void route_entry_destroy(route_entry_t
*this)
270 DESTROY_IF(this->src_ip
);
271 DESTROY_IF(this->gateway
);
272 chunk_free(&this->dst_net
);
277 * Hash a route_entry_t object
279 static u_int
route_entry_hash(route_entry_t
*this)
281 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
282 chunk_hash(this->dst_net
));
286 * Compare two route_entry_t objects
288 static bool route_entry_equals(route_entry_t
*a
, route_entry_t
*b
)
290 return a
->if_name
&& b
->if_name
&& streq(a
->if_name
, b
->if_name
) &&
291 a
->src_ip
->ip_equals(a
->src_ip
, b
->src_ip
) &&
292 a
->gateway
->ip_equals(a
->gateway
, b
->gateway
) &&
293 chunk_equals(a
->dst_net
, b
->dst_net
) && a
->prefixlen
== b
->prefixlen
;
296 typedef struct net_change_t net_change_t
;
299 * Queued network changes
301 struct net_change_t
{
302 /** Name of the interface that got activated (or an IP appeared on) */
307 * Destroy a net_change_t object
309 static void net_change_destroy(net_change_t
*this)
316 * Hash a net_change_t object
318 static u_int
net_change_hash(net_change_t
*this)
320 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
324 * Compare two net_change_t objects
326 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
328 return streq(a
->if_name
, b
->if_name
);
331 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
334 * Private variables and functions of kernel_netlink_net class.
336 struct private_kernel_netlink_net_t
{
338 * Public part of the kernel_netlink_net_t object.
340 kernel_netlink_net_t
public;
343 * lock to access various lists and maps
348 * condition variable to signal virtual IP add/removal
350 rwlock_condvar_t
*condvar
;
353 * Cached list of interfaces and its addresses (iface_entry_t)
355 linked_list_t
*ifaces
;
358 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
363 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
368 * netlink rt socket (routing)
370 netlink_socket_t
*socket
;
373 * Netlink rt socket to receive address change events
378 * earliest time of the next roam event
383 * lock to check and update roam event time
385 spinlock_t
*roam_lock
;
388 * routing table to install routes
393 * priority of used routing table
395 int routing_table_prio
;
405 mutex_t
*routes_lock
;
408 * interface changes which may trigger route reinstallation
410 hashtable_t
*net_changes
;
413 * mutex for route reinstallation triggers
415 mutex_t
*net_changes_lock
;
418 * time of last route reinstallation
420 timeval_t last_route_reinstall
;
423 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
428 * whether to actually install virtual IPs
430 bool install_virtual_ip
;
433 * the name of the interface virtual IP addresses are installed on
435 char *install_virtual_ip_on
;
438 * whether preferred source addresses can be specified for IPv6 routes
440 bool rta_prefsrc_for_ipv6
;
443 * list with routing tables to be excluded from route lookup
445 linked_list_t
*rt_exclude
;
449 * Forward declaration
451 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
452 int nlmsg_type
, int flags
, chunk_t dst_net
,
453 u_int8_t prefixlen
, host_t
*gateway
,
454 host_t
*src_ip
, char *if_name
);
457 * Clear the queued network changes.
459 static void net_changes_clear(private_kernel_netlink_net_t
*this)
461 enumerator_t
*enumerator
;
462 net_change_t
*change
;
464 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
465 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
467 this->net_changes
->remove_at(this->net_changes
, enumerator
);
468 net_change_destroy(change
);
470 enumerator
->destroy(enumerator
);
474 * Act upon queued network changes.
476 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
478 enumerator_t
*enumerator
;
479 route_entry_t
*route
;
481 this->net_changes_lock
->lock(this->net_changes_lock
);
482 this->routes_lock
->lock(this->routes_lock
);
484 enumerator
= this->routes
->create_enumerator(this->routes
);
485 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
487 net_change_t
*change
, lookup
= {
488 .if_name
= route
->if_name
,
490 /* check if a change for the outgoing interface is queued */
491 change
= this->net_changes
->get(this->net_changes
, &lookup
);
493 { /* in case src_ip is not on the outgoing interface */
494 if (this->public.interface
.get_interface(&this->public.interface
,
495 route
->src_ip
, &lookup
.if_name
))
497 if (!streq(lookup
.if_name
, route
->if_name
))
499 change
= this->net_changes
->get(this->net_changes
, &lookup
);
501 free(lookup
.if_name
);
506 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
507 route
->dst_net
, route
->prefixlen
, route
->gateway
,
508 route
->src_ip
, route
->if_name
);
511 enumerator
->destroy(enumerator
);
512 this->routes_lock
->unlock(this->routes_lock
);
514 net_changes_clear(this);
515 this->net_changes_lock
->unlock(this->net_changes_lock
);
516 return JOB_REQUEUE_NONE
;
520 * Queue route reinstallation caused by network changes for a given interface.
522 * The route reinstallation is delayed for a while and only done once for
523 * several calls during this delay, in order to avoid doing it too often.
524 * The interface name is freed.
526 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
529 net_change_t
*update
, *found
;
537 this->net_changes_lock
->lock(this->net_changes_lock
);
538 found
= this->net_changes
->put(this->net_changes
, update
, update
);
541 net_change_destroy(found
);
543 time_monotonic(&now
);
544 if (timercmp(&now
, &this->last_route_reinstall
, >))
546 timeval_add_ms(&now
, ROUTE_DELAY
);
547 this->last_route_reinstall
= now
;
549 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
551 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
553 this->net_changes_lock
->unlock(this->net_changes_lock
);
557 * check if the given IP is known as virtual IP and currently installed
559 * this function will also return TRUE if the virtual IP entry disappeared.
560 * in that case the returned entry will be NULL.
562 * this->lock must be held when calling this function
564 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
565 host_t
*ip
, addr_map_entry_t
**entry
)
567 addr_map_entry_t lookup
= {
571 *entry
= this->vips
->get_match(this->vips
, &lookup
,
572 (void*)addr_map_entry_match
);
574 { /* the virtual IP disappeared */
577 return (*entry
)->addr
->installed
;
581 * check if the given IP is known as virtual IP
583 * this->lock must be held when calling this function
585 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
587 addr_map_entry_t lookup
= {
591 return this->vips
->get_match(this->vips
, &lookup
,
592 (void*)addr_map_entry_match
) != NULL
;
596 * Add an address map entry
598 static void addr_map_entry_add(hashtable_t
*map
, addr_entry_t
*addr
,
599 iface_entry_t
*iface
)
601 addr_map_entry_t
*entry
;
608 entry
= map
->put(map
, entry
, entry
);
613 * Remove an address map entry
615 static void addr_map_entry_remove(hashtable_t
*map
, addr_entry_t
*addr
,
616 iface_entry_t
*iface
)
618 addr_map_entry_t
*entry
, lookup
= {
624 entry
= map
->remove(map
, &lookup
);
629 * get the first non-virtual ip address on the given interface.
630 * if a candidate address is given, we first search for that address and if not
631 * found return the address as above.
632 * returned host is a clone, has to be freed by caller.
634 * this->lock must be held when calling this function
636 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
637 int ifindex
, int family
, host_t
*candidate
)
639 iface_entry_t
*iface
;
644 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
645 (void**)&iface
, &ifindex
) == SUCCESS
)
648 { /* only use interfaces not excluded by config */
649 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
650 while (addrs
->enumerate(addrs
, &addr
))
653 { /* ignore virtual IP addresses */
656 if (addr
->ip
->get_family(addr
->ip
) == family
)
658 if (!candidate
|| candidate
->ip_equals(candidate
, addr
->ip
))
659 { /* stop at the first address if we don't search for a
660 * candidate or if the candidate matches */
665 { /* store the first address as fallback if candidate is
671 addrs
->destroy(addrs
);
674 return ip ? ip
->clone(ip
) : NULL
;
678 * callback function that raises the delayed roam event
680 static job_requeue_t
roam_event(uintptr_t address
)
682 hydra
->kernel_interface
->roam(hydra
->kernel_interface
, address
!= 0);
683 return JOB_REQUEUE_NONE
;
687 * fire a roaming event. we delay it for a bit and fire only one event
688 * for multiple calls. otherwise we would create too many events.
690 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
695 time_monotonic(&now
);
696 this->roam_lock
->lock(this->roam_lock
);
697 if (!timercmp(&now
, &this->next_roam
, >))
699 this->roam_lock
->unlock(this->roam_lock
);
702 timeval_add_ms(&now
, ROAM_DELAY
);
703 this->next_roam
= now
;
704 this->roam_lock
->unlock(this->roam_lock
);
706 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
707 (void*)(uintptr_t)(address ?
1 : 0),
709 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
713 * check if an interface with a given index is up and usable
715 * this->lock must be locked when calling this function
717 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
720 iface_entry_t
*iface
;
722 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
723 (void**)&iface
, &index
) == SUCCESS
)
725 return iface_entry_up_and_usable(iface
);
731 * unregister the current addr_entry_t from the hashtable it is stored in
733 * this->lock must be locked when calling this function
735 static void addr_entry_unregister(addr_entry_t
*addr
, iface_entry_t
*iface
,
736 private_kernel_netlink_net_t
*this)
740 addr_map_entry_remove(this->vips
, addr
, iface
);
741 this->condvar
->broadcast(this->condvar
);
744 addr_map_entry_remove(this->addrs
, addr
, iface
);
748 * process RTM_NEWLINK/RTM_DELLINK from kernel
750 static void process_link(private_kernel_netlink_net_t
*this,
751 struct nlmsghdr
*hdr
, bool event
)
753 struct ifinfomsg
* msg
= (struct ifinfomsg
*)(NLMSG_DATA(hdr
));
754 struct rtattr
*rta
= IFLA_RTA(msg
);
755 size_t rtasize
= IFLA_PAYLOAD (hdr
);
756 enumerator_t
*enumerator
;
757 iface_entry_t
*current
, *entry
= NULL
;
759 bool update
= FALSE
, update_routes
= FALSE
;
761 while (RTA_OK(rta
, rtasize
))
763 switch (rta
->rta_type
)
766 name
= RTA_DATA(rta
);
769 rta
= RTA_NEXT(rta
, rtasize
);
776 this->lock
->write_lock(this->lock
);
777 switch (hdr
->nlmsg_type
)
781 if (this->ifaces
->find_first(this->ifaces
,
782 (void*)iface_entry_by_index
, (void**)&entry
,
783 &msg
->ifi_index
) != SUCCESS
)
786 .ifindex
= msg
->ifi_index
,
787 .addrs
= linked_list_create(),
788 .usable
= hydra
->kernel_interface
->is_interface_usable(
789 hydra
->kernel_interface
, name
),
791 this->ifaces
->insert_last(this->ifaces
, entry
);
793 strncpy(entry
->ifname
, name
, IFNAMSIZ
);
794 entry
->ifname
[IFNAMSIZ
-1] = '\0';
795 if (event
&& entry
->usable
)
797 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
799 update
= update_routes
= TRUE
;
800 DBG1(DBG_KNL
, "interface %s activated", name
);
802 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
805 DBG1(DBG_KNL
, "interface %s deactivated", name
);
808 entry
->flags
= msg
->ifi_flags
;
813 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
814 while (enumerator
->enumerate(enumerator
, ¤t
))
816 if (current
->ifindex
== msg
->ifi_index
)
818 if (event
&& current
->usable
)
821 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
823 /* TODO: move virtual IPs installed on this interface to
824 * another interface? */
825 this->ifaces
->remove_at(this->ifaces
, enumerator
);
826 current
->addrs
->invoke_function(current
->addrs
,
827 (void*)addr_entry_unregister
, current
, this);
828 iface_entry_destroy(current
);
832 enumerator
->destroy(enumerator
);
836 this->lock
->unlock(this->lock
);
838 if (update_routes
&& event
)
840 queue_route_reinstall(this, strdup(name
));
845 fire_roam_event(this, TRUE
);
850 * process RTM_NEWADDR/RTM_DELADDR from kernel
852 static void process_addr(private_kernel_netlink_net_t
*this,
853 struct nlmsghdr
*hdr
, bool event
)
855 struct ifaddrmsg
* msg
= (struct ifaddrmsg
*)(NLMSG_DATA(hdr
));
856 struct rtattr
*rta
= IFA_RTA(msg
);
857 size_t rtasize
= IFA_PAYLOAD (hdr
);
859 iface_entry_t
*iface
;
860 chunk_t local
= chunk_empty
, address
= chunk_empty
;
861 char *route_ifname
= NULL
;
862 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
864 while (RTA_OK(rta
, rtasize
))
866 switch (rta
->rta_type
)
869 local
.ptr
= RTA_DATA(rta
);
870 local
.len
= RTA_PAYLOAD(rta
);
873 address
.ptr
= RTA_DATA(rta
);
874 address
.len
= RTA_PAYLOAD(rta
);
877 rta
= RTA_NEXT(rta
, rtasize
);
880 /* For PPP interfaces, we need the IFA_LOCAL address,
881 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
882 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
885 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
887 else if (address
.ptr
)
889 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
897 this->lock
->write_lock(this->lock
);
898 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
899 (void**)&iface
, &msg
->ifa_index
) == SUCCESS
)
901 addr_map_entry_t
*entry
, lookup
= {
907 entry
= this->vips
->get(this->vips
, &lookup
);
910 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
911 { /* mark as installed and signal waiting threads */
912 entry
->addr
->installed
= TRUE
;
915 { /* the address was already marked as uninstalled */
917 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
918 addr_map_entry_remove(this->vips
, addr
, iface
);
919 addr_entry_destroy(addr
);
921 /* no roam events etc. for virtual IPs */
922 this->condvar
->broadcast(this->condvar
);
923 this->lock
->unlock(this->lock
);
927 entry
= this->addrs
->get(this->addrs
, &lookup
);
930 if (hdr
->nlmsg_type
== RTM_DELADDR
)
934 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
938 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
941 addr_map_entry_remove(this->addrs
, addr
, iface
);
942 addr_entry_destroy(addr
);
947 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
951 route_ifname
= strdup(iface
->ifname
);
953 .ip
= host
->clone(host
),
954 .scope
= msg
->ifa_scope
,
956 iface
->addrs
->insert_last(iface
->addrs
, addr
);
957 addr_map_entry_add(this->addrs
, addr
, iface
);
958 if (event
&& iface
->usable
)
960 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
964 if (found
&& (iface
->flags
& IFF_UP
))
969 { /* ignore events for interfaces excluded by config */
970 update
= changed
= FALSE
;
973 this->lock
->unlock(this->lock
);
975 if (update
&& event
&& route_ifname
)
977 queue_route_reinstall(this, route_ifname
);
985 /* send an update to all IKE_SAs */
986 if (update
&& event
&& changed
)
988 fire_roam_event(this, TRUE
);
993 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
995 static void process_route(private_kernel_netlink_net_t
*this, struct nlmsghdr
*hdr
)
997 struct rtmsg
* msg
= (struct rtmsg
*)(NLMSG_DATA(hdr
));
998 struct rtattr
*rta
= RTM_RTA(msg
);
999 size_t rtasize
= RTM_PAYLOAD(hdr
);
1000 u_int32_t rta_oif
= 0;
1001 host_t
*host
= NULL
;
1003 /* ignore routes added by us or in the local routing table (local addrs) */
1004 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1005 msg
->rtm_table
== RT_TABLE_LOCAL
))
1009 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1010 { /* ignore cached routes, seem to be created a lot for IPv6 */
1014 while (RTA_OK(rta
, rtasize
))
1016 switch (rta
->rta_type
)
1020 host
= host_create_from_chunk(msg
->rtm_family
,
1021 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1024 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1026 rta_oif
= *(u_int32_t
*)RTA_DATA(rta
);
1030 rta
= RTA_NEXT(rta
, rtasize
);
1032 this->lock
->read_lock(this->lock
);
1033 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1034 { /* ignore route changes for interfaces that are ignored or down */
1035 this->lock
->unlock(this->lock
);
1039 if (!host
&& rta_oif
)
1041 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
, NULL
);
1043 if (!host
|| is_known_vip(this, host
))
1044 { /* ignore routes added for virtual IPs */
1045 this->lock
->unlock(this->lock
);
1049 this->lock
->unlock(this->lock
);
1050 fire_roam_event(this, FALSE
);
1051 host
->destroy(host
);
1055 * Receives events from kernel
1057 static job_requeue_t
receive_events(private_kernel_netlink_net_t
*this)
1059 char response
[1024];
1060 struct nlmsghdr
*hdr
= (struct nlmsghdr
*)response
;
1061 struct sockaddr_nl addr
;
1062 socklen_t addr_len
= sizeof(addr
);
1066 oldstate
= thread_cancelability(TRUE
);
1067 len
= recvfrom(this->socket_events
, response
, sizeof(response
), 0,
1068 (struct sockaddr
*)&addr
, &addr_len
);
1069 thread_cancelability(oldstate
);
1076 /* interrupted, try again */
1077 return JOB_REQUEUE_DIRECT
;
1079 /* no data ready, select again */
1080 return JOB_REQUEUE_DIRECT
;
1082 DBG1(DBG_KNL
, "unable to receive from rt event socket");
1084 return JOB_REQUEUE_FAIR
;
1088 if (addr
.nl_pid
!= 0)
1089 { /* not from kernel. not interested, try another one */
1090 return JOB_REQUEUE_DIRECT
;
1093 while (NLMSG_OK(hdr
, len
))
1095 /* looks good so far, dispatch netlink message */
1096 switch (hdr
->nlmsg_type
)
1100 process_addr(this, hdr
, TRUE
);
1104 process_link(this, hdr
, TRUE
);
1108 if (this->process_route
)
1110 process_route(this, hdr
);
1116 hdr
= NLMSG_NEXT(hdr
, len
);
1118 return JOB_REQUEUE_DIRECT
;
1121 /** enumerator over addresses */
1123 private_kernel_netlink_net_t
* this;
1124 /** which addresses to enumerate */
1125 kernel_address_type_t which
;
1126 } address_enumerator_t
;
1129 * cleanup function for address enumerator
1131 static void address_enumerator_destroy(address_enumerator_t
*data
)
1133 data
->this->lock
->unlock(data
->this->lock
);
1138 * filter for addresses
1140 static bool filter_addresses(address_enumerator_t
*data
,
1141 addr_entry_t
** in
, host_t
** out
)
1143 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && (*in
)->refcount
)
1144 { /* skip virtual interfaces added by us */
1147 if ((*in
)->scope
>= RT_SCOPE_LINK
)
1148 { /* skip addresses with a unusable scope */
1156 * enumerator constructor for interfaces
1158 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1159 address_enumerator_t
*data
)
1161 return enumerator_create_filter(
1162 iface
->addrs
->create_enumerator(iface
->addrs
),
1163 (void*)filter_addresses
, data
, NULL
);
1167 * filter for interfaces
1169 static bool filter_interfaces(address_enumerator_t
*data
, iface_entry_t
** in
,
1170 iface_entry_t
** out
)
1172 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !(*in
)->usable
)
1173 { /* skip interfaces excluded by config */
1176 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && ((*in
)->flags
& IFF_LOOPBACK
))
1177 { /* ignore loopback devices */
1180 if (!(data
->which
& ADDR_TYPE_DOWN
) && !((*in
)->flags
& IFF_UP
))
1181 { /* skip interfaces not up */
1188 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1189 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1191 address_enumerator_t
*data
= malloc_thing(address_enumerator_t
);
1193 data
->which
= which
;
1195 this->lock
->read_lock(this->lock
);
1196 return enumerator_create_nested(
1197 enumerator_create_filter(
1198 this->ifaces
->create_enumerator(this->ifaces
),
1199 (void*)filter_interfaces
, data
, NULL
),
1200 (void*)create_iface_enumerator
, data
,
1201 (void*)address_enumerator_destroy
);
1204 METHOD(kernel_net_t
, get_interface_name
, bool,
1205 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1207 addr_map_entry_t
*entry
, lookup
= {
1211 if (ip
->is_anyaddr(ip
))
1215 this->lock
->read_lock(this->lock
);
1216 /* first try to find it on an up and usable interface */
1217 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1218 (void*)addr_map_entry_match_up_and_usable
);
1223 *name
= strdup(entry
->iface
->ifname
);
1224 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1226 this->lock
->unlock(this->lock
);
1229 /* maybe it is installed on an ignored interface */
1230 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1231 (void*)addr_map_entry_match_up
);
1234 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1236 this->lock
->unlock(this->lock
);
1241 * get the index of an interface by name
1243 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1245 iface_entry_t
*iface
;
1248 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1250 this->lock
->read_lock(this->lock
);
1251 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1252 (void**)&iface
, name
) == SUCCESS
)
1254 ifindex
= iface
->ifindex
;
1256 this->lock
->unlock(this->lock
);
1260 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1266 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1268 static bool addr_in_subnet(chunk_t addr
, chunk_t net
, int net_len
)
1270 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1274 { /* any address matches a /0 network */
1277 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
)
1281 /* scan through all bytes in network order */
1286 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
1290 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
1302 * Store information about a route retrieved via RTNETLINK
1315 * Free a route entry
1317 static void rt_entry_destroy(rt_entry_t
*this)
1319 DESTROY_IF(this->src_host
);
1324 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1325 * reused if not NULL.
1327 * Returned chunks point to internal data of the Netlink message.
1329 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1335 msg
= (struct rtmsg
*)(NLMSG_DATA(hdr
));
1337 rtasize
= RTM_PAYLOAD(hdr
);
1341 route
->gtw
= chunk_empty
;
1342 route
->src
= chunk_empty
;
1343 route
->dst
= chunk_empty
;
1344 route
->dst_len
= msg
->rtm_dst_len
;
1345 route
->table
= msg
->rtm_table
;
1351 .dst_len
= msg
->rtm_dst_len
,
1352 .table
= msg
->rtm_table
,
1356 while (RTA_OK(rta
, rtasize
))
1358 switch (rta
->rta_type
)
1361 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1364 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1367 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1370 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1372 route
->oif
= *(u_int32_t
*)RTA_DATA(rta
);
1375 #ifdef HAVE_RTA_TABLE
1377 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1379 route
->table
= *(u_int32_t
*)RTA_DATA(rta
);
1382 #endif /* HAVE_RTA_TABLE*/
1384 rta
= RTA_NEXT(rta
, rtasize
);
1390 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1392 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1393 bool nexthop
, host_t
*candidate
)
1395 netlink_buf_t request
;
1396 struct nlmsghdr
*hdr
, *out
, *current
;
1400 linked_list_t
*routes
;
1401 rt_entry_t
*route
= NULL
, *best
= NULL
;
1402 enumerator_t
*enumerator
;
1403 host_t
*addr
= NULL
;
1405 memset(&request
, 0, sizeof(request
));
1407 hdr
= (struct nlmsghdr
*)request
;
1408 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1409 if (dest
->get_family(dest
) == AF_INET
|| this->rta_prefsrc_for_ipv6
||
1410 this->routing_table
)
1411 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1412 * as we want to ignore routes with virtual IPs we cannot use DUMP
1413 * if these routes are not installed in a separate table */
1414 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1416 hdr
->nlmsg_type
= RTM_GETROUTE
;
1417 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1419 msg
= (struct rtmsg
*)NLMSG_DATA(hdr
);
1420 msg
->rtm_family
= dest
->get_family(dest
);
1423 chunk
= candidate
->get_address(candidate
);
1424 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1426 chunk
= dest
->get_address(dest
);
1427 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1429 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1431 DBG2(DBG_KNL
, "getting %s to reach %H failed",
1432 nexthop ?
"nexthop" : "address", dest
);
1435 routes
= linked_list_create();
1436 this->lock
->read_lock(this->lock
);
1438 for (current
= out
; NLMSG_OK(current
, len
);
1439 current
= NLMSG_NEXT(current
, len
))
1441 switch (current
->nlmsg_type
)
1450 route
= parse_route(current
, route
);
1452 table
= (uintptr_t)route
->table
;
1453 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
1454 (void**)&table
) == SUCCESS
)
1455 { /* route is from an excluded routing table */
1458 if (this->routing_table
!= 0 &&
1459 route
->table
== this->routing_table
)
1460 { /* route is from our own ipsec routing table */
1463 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
1464 { /* interface is down */
1467 if (!addr_in_subnet(chunk
, route
->dst
, route
->dst_len
))
1468 { /* route destination does not contain dest */
1472 { /* verify source address, if any */
1473 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
1475 if (src
&& is_known_vip(this, src
))
1476 { /* ignore routes installed by us */
1480 route
->src_host
= src
;
1482 /* insert route, sorted by decreasing network prefix */
1483 enumerator
= routes
->create_enumerator(routes
);
1484 while (enumerator
->enumerate(enumerator
, &other
))
1486 if (route
->dst_len
> other
->dst_len
)
1491 routes
->insert_before(routes
, enumerator
, route
);
1492 enumerator
->destroy(enumerator
);
1503 rt_entry_destroy(route
);
1506 /* now we have a list of routes matching dest, sorted by net prefix.
1507 * we will look for source addresses for these routes and select the one
1508 * with the preferred source address, if possible */
1509 enumerator
= routes
->create_enumerator(routes
);
1510 while (enumerator
->enumerate(enumerator
, &route
))
1512 if (route
->src_host
)
1513 { /* got a source address with the route, if no preferred source
1514 * is given or it matches we are done, as this is the best route */
1515 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
1520 else if (route
->oif
)
1521 { /* no match yet, maybe it is assigned to the same interface */
1522 host_t
*src
= get_interface_address(this, route
->oif
,
1523 msg
->rtm_family
, candidate
);
1524 if (src
&& src
->ip_equals(src
, candidate
))
1526 route
->src_host
->destroy(route
->src_host
);
1527 route
->src_host
= src
;
1533 /* no luck yet with the source address. if this is the best (first)
1534 * route we store it as fallback in case we don't find a route with
1535 * the preferred source */
1536 best
= best ?
: route
;
1540 { /* no src, but an interface - get address from it */
1541 route
->src_host
= get_interface_address(this, route
->oif
,
1542 msg
->rtm_family
, candidate
);
1543 if (route
->src_host
)
1544 { /* we handle this address the same as the one above */
1546 candidate
->ip_equals(candidate
, route
->src_host
))
1551 best
= best ?
: route
;
1556 { /* no src, no iface, but a gateway - lookup src to reach gtw */
1559 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
1560 route
->src_host
= get_route(this, gtw
, FALSE
, candidate
);
1562 if (route
->src_host
)
1563 { /* more of the same */
1565 candidate
->ip_equals(candidate
, route
->src_host
))
1570 best
= best ?
: route
;
1574 enumerator
->destroy(enumerator
);
1577 { /* nexthop lookup, return gateway if any */
1578 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
1580 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
1582 addr
= addr ?
: dest
->clone(dest
);
1588 addr
= best
->src_host
->clone(best
->src_host
);
1591 this->lock
->unlock(this->lock
);
1592 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
1597 DBG2(DBG_KNL
, "using %H as %s to reach %H", addr
,
1598 nexthop ?
"nexthop" : "address", dest
);
1602 DBG2(DBG_KNL
, "no %s found to reach %H",
1603 nexthop ?
"nexthop" : "address", dest
);
1608 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
1609 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
1611 return get_route(this, dest
, FALSE
, src
);
1614 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
1615 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
1617 return get_route(this, dest
, TRUE
, src
);
1621 * Manages the creation and deletion of ip addresses on an interface.
1622 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1624 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
1625 int flags
, int if_index
, host_t
*ip
)
1627 netlink_buf_t request
;
1628 struct nlmsghdr
*hdr
;
1629 struct ifaddrmsg
*msg
;
1632 memset(&request
, 0, sizeof(request
));
1634 chunk
= ip
->get_address(ip
);
1636 hdr
= (struct nlmsghdr
*)request
;
1637 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
1638 hdr
->nlmsg_type
= nlmsg_type
;
1639 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
1641 msg
= (struct ifaddrmsg
*)NLMSG_DATA(hdr
);
1642 msg
->ifa_family
= ip
->get_family(ip
);
1644 msg
->ifa_prefixlen
= 8 * chunk
.len
;
1645 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
1646 msg
->ifa_index
= if_index
;
1648 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
1650 return this->socket
->send_ack(this->socket
, hdr
);
1653 METHOD(kernel_net_t
, add_ip
, status_t
,
1654 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, host_t
*iface_ip
)
1656 addr_map_entry_t
*entry
, lookup
= {
1659 iface_entry_t
*iface
= NULL
;
1661 if (!this->install_virtual_ip
)
1662 { /* disabled by config */
1666 this->lock
->write_lock(this->lock
);
1667 /* the virtual IP might actually be installed as regular IP, in which case
1668 * we don't track it as virtual IP */
1669 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1670 (void*)addr_map_entry_match
);
1672 { /* otherwise it might already be installed as virtual IP */
1673 entry
= this->vips
->get_match(this->vips
, &lookup
,
1674 (void*)addr_map_entry_match
);
1676 { /* the vip we found can be in one of three states: 1) installed and
1677 * ready, 2) just added by another thread, but not yet confirmed to
1678 * be installed by the kernel, 3) just deleted, but not yet gone.
1679 * Then while we wait below, several things could happen (as we
1680 * release the lock). For instance, the interface could disappear,
1681 * or the IP is finally deleted, and it reappears on a different
1682 * interface. All these cases are handled by the call below. */
1683 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
1685 this->condvar
->wait(this->condvar
, this->lock
);
1689 entry
->addr
->refcount
++;
1695 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
1696 entry
->iface
->ifname
);
1697 this->lock
->unlock(this->lock
);
1700 /* try to find the target interface, either by config or via src ip */
1701 if (!this->install_virtual_ip_on
||
1702 this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1703 (void**)&iface
, this->install_virtual_ip_on
) != SUCCESS
)
1705 lookup
.ip
= iface_ip
;
1706 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1707 (void*)addr_map_entry_match
);
1709 { /* if we don't find the requested interface we just use the first */
1710 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
1714 iface
= entry
->iface
;
1722 .ip
= virtual_ip
->clone(virtual_ip
),
1724 .scope
= RT_SCOPE_UNIVERSE
,
1726 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1727 addr_map_entry_add(this->vips
, addr
, iface
);
1728 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
1729 iface
->ifindex
, virtual_ip
) == SUCCESS
)
1731 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
1732 { /* wait until address appears */
1733 this->condvar
->wait(this->condvar
, this->lock
);
1736 { /* we fail if the interface got deleted in the meantime */
1737 DBG2(DBG_KNL
, "virtual IP %H installed on %s", virtual_ip
,
1738 entry
->iface
->ifname
);
1739 this->lock
->unlock(this->lock
);
1743 this->lock
->unlock(this->lock
);
1744 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
1747 this->lock
->unlock(this->lock
);
1748 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
1753 METHOD(kernel_net_t
, del_ip
, status_t
,
1754 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
)
1756 addr_map_entry_t
*entry
, lookup
= {
1760 if (!this->install_virtual_ip
)
1761 { /* disabled by config */
1765 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
1767 this->lock
->write_lock(this->lock
);
1768 entry
= this->vips
->get_match(this->vips
, &lookup
,
1769 (void*)addr_map_entry_match
);
1771 { /* we didn't install this IP as virtual IP */
1772 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1773 (void*)addr_map_entry_match
);
1776 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
1777 entry
->iface
->ifname
);
1778 this->lock
->unlock(this->lock
);
1781 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
1782 this->lock
->unlock(this->lock
);
1785 if (entry
->addr
->refcount
== 1)
1789 /* we set this flag so that threads calling add_ip will block and wait
1790 * until the entry is gone, also so we can wait below */
1791 entry
->addr
->installed
= FALSE
;
1792 status
= manage_ipaddr(this, RTM_DELADDR
, 0, entry
->iface
->ifindex
,
1794 if (status
== SUCCESS
)
1795 { /* wait until the address is really gone */
1796 while (is_known_vip(this, virtual_ip
))
1798 this->condvar
->wait(this->condvar
, this->lock
);
1801 this->lock
->unlock(this->lock
);
1806 entry
->addr
->refcount
--;
1808 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
1810 this->lock
->unlock(this->lock
);
1815 * Manages source routes in the routing table.
1816 * By setting the appropriate nlmsg_type, the route gets added or removed.
1818 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
1819 int nlmsg_type
, int flags
, chunk_t dst_net
,
1820 u_int8_t prefixlen
, host_t
*gateway
,
1821 host_t
*src_ip
, char *if_name
)
1823 netlink_buf_t request
;
1824 struct nlmsghdr
*hdr
;
1829 /* if route is 0.0.0.0/0, we can't install it, as it would
1830 * overwrite the default route. Instead, we add two routes:
1831 * 0.0.0.0/1 and 128.0.0.0/1 */
1832 if (this->routing_table
== 0 && prefixlen
== 0)
1835 u_int8_t half_prefixlen
;
1838 half_net
= chunk_alloca(dst_net
.len
);
1839 memset(half_net
.ptr
, 0, half_net
.len
);
1842 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
1843 gateway
, src_ip
, if_name
);
1844 half_net
.ptr
[0] |= 0x80;
1845 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
1846 gateway
, src_ip
, if_name
);
1850 memset(&request
, 0, sizeof(request
));
1852 hdr
= (struct nlmsghdr
*)request
;
1853 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
1854 hdr
->nlmsg_type
= nlmsg_type
;
1855 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1857 msg
= (struct rtmsg
*)NLMSG_DATA(hdr
);
1858 msg
->rtm_family
= src_ip
->get_family(src_ip
);
1859 msg
->rtm_dst_len
= prefixlen
;
1860 msg
->rtm_table
= this->routing_table
;
1861 msg
->rtm_protocol
= RTPROT_STATIC
;
1862 msg
->rtm_type
= RTN_UNICAST
;
1863 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1865 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
1866 chunk
= src_ip
->get_address(src_ip
);
1867 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1868 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
1870 chunk
= gateway
->get_address(gateway
);
1871 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
1873 ifindex
= get_interface_index(this, if_name
);
1874 chunk
.ptr
= (char*)&ifindex
;
1875 chunk
.len
= sizeof(ifindex
);
1876 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
1878 return this->socket
->send_ack(this->socket
, hdr
);
1881 METHOD(kernel_net_t
, add_route
, status_t
,
1882 private_kernel_netlink_net_t
*this, chunk_t dst_net
, u_int8_t prefixlen
,
1883 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
1886 route_entry_t
*found
, route
= {
1888 .prefixlen
= prefixlen
,
1894 this->routes_lock
->lock(this->routes_lock
);
1895 found
= this->routes
->get(this->routes
, &route
);
1898 this->routes_lock
->unlock(this->routes_lock
);
1899 return ALREADY_DONE
;
1901 found
= route_entry_clone(&route
);
1902 this->routes
->put(this->routes
, found
, found
);
1903 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
1904 dst_net
, prefixlen
, gateway
, src_ip
, if_name
);
1905 this->routes_lock
->unlock(this->routes_lock
);
1909 METHOD(kernel_net_t
, del_route
, status_t
,
1910 private_kernel_netlink_net_t
*this, chunk_t dst_net
, u_int8_t prefixlen
,
1911 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
1914 route_entry_t
*found
, route
= {
1916 .prefixlen
= prefixlen
,
1922 this->routes_lock
->lock(this->routes_lock
);
1923 found
= this->routes
->get(this->routes
, &route
);
1926 this->routes_lock
->unlock(this->routes_lock
);
1929 this->routes
->remove(this->routes
, found
);
1930 route_entry_destroy(found
);
1931 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
1932 gateway
, src_ip
, if_name
);
1933 this->routes_lock
->unlock(this->routes_lock
);
1938 * Initialize a list of local addresses.
1940 static status_t
init_address_list(private_kernel_netlink_net_t
*this)
1942 netlink_buf_t request
;
1943 struct nlmsghdr
*out
, *current
, *in
;
1944 struct rtgenmsg
*msg
;
1946 enumerator_t
*ifaces
, *addrs
;
1947 iface_entry_t
*iface
;
1950 DBG2(DBG_KNL
, "known interfaces and IP addresses:");
1952 memset(&request
, 0, sizeof(request
));
1954 in
= (struct nlmsghdr
*)&request
;
1955 in
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtgenmsg
));
1956 in
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_MATCH
| NLM_F_ROOT
;
1957 msg
= (struct rtgenmsg
*)NLMSG_DATA(in
);
1958 msg
->rtgen_family
= AF_UNSPEC
;
1961 in
->nlmsg_type
= RTM_GETLINK
;
1962 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
1967 while (NLMSG_OK(current
, len
))
1969 switch (current
->nlmsg_type
)
1974 process_link(this, current
, FALSE
);
1977 current
= NLMSG_NEXT(current
, len
);
1984 /* get all interface addresses */
1985 in
->nlmsg_type
= RTM_GETADDR
;
1986 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
1991 while (NLMSG_OK(current
, len
))
1993 switch (current
->nlmsg_type
)
1998 process_addr(this, current
, FALSE
);
2001 current
= NLMSG_NEXT(current
, len
);
2008 this->lock
->read_lock(this->lock
);
2009 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
2010 while (ifaces
->enumerate(ifaces
, &iface
))
2012 if (iface_entry_up_and_usable(iface
))
2014 DBG2(DBG_KNL
, " %s", iface
->ifname
);
2015 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
2016 while (addrs
->enumerate(addrs
, (void**)&addr
))
2018 DBG2(DBG_KNL
, " %H", addr
->ip
);
2020 addrs
->destroy(addrs
);
2023 ifaces
->destroy(ifaces
);
2024 this->lock
->unlock(this->lock
);
2029 * create or delete a rule to use our routing table
2031 static status_t
manage_rule(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2032 int family
, u_int32_t table
, u_int32_t prio
)
2034 netlink_buf_t request
;
2035 struct nlmsghdr
*hdr
;
2039 memset(&request
, 0, sizeof(request
));
2040 hdr
= (struct nlmsghdr
*)request
;
2041 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2042 hdr
->nlmsg_type
= nlmsg_type
;
2043 if (nlmsg_type
== RTM_NEWRULE
)
2045 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2047 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2049 msg
= (struct rtmsg
*)NLMSG_DATA(hdr
);
2050 msg
->rtm_table
= table
;
2051 msg
->rtm_family
= family
;
2052 msg
->rtm_protocol
= RTPROT_BOOT
;
2053 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2054 msg
->rtm_type
= RTN_UNICAST
;
2056 chunk
= chunk_from_thing(prio
);
2057 netlink_add_attribute(hdr
, RTA_PRIORITY
, chunk
, sizeof(request
));
2059 return this->socket
->send_ack(this->socket
, hdr
);
2063 * check for kernel features (currently only via version number)
2065 static void check_kernel_features(private_kernel_netlink_net_t
*this)
2067 struct utsname utsname
;
2070 if (uname(&utsname
) == 0)
2072 switch(sscanf(utsname
.release
, "%d.%d.%d", &a
, &b
, &c
))
2077 DBG2(DBG_KNL
, "detected Linux %d.%d.%d, no support for "
2078 "RTA_PREFSRC for IPv6 routes", a
, b
, c
);
2083 /* only 3.x+ uses two part version numbers */
2084 this->rta_prefsrc_for_ipv6
= TRUE
;
2093 * Destroy an address to iface map
2095 static void addr_map_destroy(hashtable_t
*map
)
2097 enumerator_t
*enumerator
;
2098 addr_map_entry_t
*addr
;
2100 enumerator
= map
->create_enumerator(map
);
2101 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&addr
))
2105 enumerator
->destroy(enumerator
);
2109 METHOD(kernel_net_t
, destroy
, void,
2110 private_kernel_netlink_net_t
*this)
2112 enumerator_t
*enumerator
;
2113 route_entry_t
*route
;
2115 if (this->routing_table
)
2117 manage_rule(this, RTM_DELRULE
, AF_INET
, this->routing_table
,
2118 this->routing_table_prio
);
2119 manage_rule(this, RTM_DELRULE
, AF_INET6
, this->routing_table
,
2120 this->routing_table_prio
);
2122 if (this->socket_events
> 0)
2124 close(this->socket_events
);
2126 enumerator
= this->routes
->create_enumerator(this->routes
);
2127 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
2129 manage_srcroute(this, RTM_DELROUTE
, 0, route
->dst_net
, route
->prefixlen
,
2130 route
->gateway
, route
->src_ip
, route
->if_name
);
2131 route_entry_destroy(route
);
2133 enumerator
->destroy(enumerator
);
2134 this->routes
->destroy(this->routes
);
2135 this->routes_lock
->destroy(this->routes_lock
);
2136 DESTROY_IF(this->socket
);
2138 net_changes_clear(this);
2139 this->net_changes
->destroy(this->net_changes
);
2140 this->net_changes_lock
->destroy(this->net_changes_lock
);
2142 addr_map_destroy(this->addrs
);
2143 addr_map_destroy(this->vips
);
2145 this->ifaces
->destroy_function(this->ifaces
, (void*)iface_entry_destroy
);
2146 this->rt_exclude
->destroy(this->rt_exclude
);
2147 this->roam_lock
->destroy(this->roam_lock
);
2148 this->condvar
->destroy(this->condvar
);
2149 this->lock
->destroy(this->lock
);
2154 * Described in header.
2156 kernel_netlink_net_t
*kernel_netlink_net_create()
2158 private_kernel_netlink_net_t
*this;
2159 enumerator_t
*enumerator
;
2160 bool register_for_events
= TRUE
;
2166 .get_interface
= _get_interface_name
,
2167 .create_address_enumerator
= _create_address_enumerator
,
2168 .get_source_addr
= _get_source_addr
,
2169 .get_nexthop
= _get_nexthop
,
2172 .add_route
= _add_route
,
2173 .del_route
= _del_route
,
2174 .destroy
= _destroy
,
2177 .socket
= netlink_socket_create(NETLINK_ROUTE
),
2178 .rt_exclude
= linked_list_create(),
2179 .routes
= hashtable_create((hashtable_hash_t
)route_entry_hash
,
2180 (hashtable_equals_t
)route_entry_equals
, 16),
2181 .net_changes
= hashtable_create(
2182 (hashtable_hash_t
)net_change_hash
,
2183 (hashtable_equals_t
)net_change_equals
, 16),
2184 .addrs
= hashtable_create(
2185 (hashtable_hash_t
)addr_map_entry_hash
,
2186 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2187 .vips
= hashtable_create((hashtable_hash_t
)addr_map_entry_hash
,
2188 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2189 .routes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2190 .net_changes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2191 .ifaces
= linked_list_create(),
2192 .lock
= rwlock_create(RWLOCK_TYPE_DEFAULT
),
2193 .condvar
= rwlock_condvar_create(),
2194 .roam_lock
= spinlock_create(),
2195 .routing_table
= lib
->settings
->get_int(lib
->settings
,
2196 "%s.routing_table", ROUTING_TABLE
, hydra
->daemon
),
2197 .routing_table_prio
= lib
->settings
->get_int(lib
->settings
,
2198 "%s.routing_table_prio", ROUTING_TABLE_PRIO
, hydra
->daemon
),
2199 .process_route
= lib
->settings
->get_bool(lib
->settings
,
2200 "%s.process_route", TRUE
, hydra
->daemon
),
2201 .install_virtual_ip
= lib
->settings
->get_bool(lib
->settings
,
2202 "%s.install_virtual_ip", TRUE
, hydra
->daemon
),
2203 .install_virtual_ip_on
= lib
->settings
->get_str(lib
->settings
,
2204 "%s.install_virtual_ip_on", NULL
, hydra
->daemon
),
2206 timerclear(&this->last_route_reinstall
);
2207 timerclear(&this->next_roam
);
2209 check_kernel_features(this);
2211 if (streq(hydra
->daemon
, "starter"))
2212 { /* starter has no threads, so we do not register for kernel events */
2213 register_for_events
= FALSE
;
2216 exclude
= lib
->settings
->get_str(lib
->settings
,
2217 "%s.ignore_routing_tables", NULL
, hydra
->daemon
);
2223 enumerator
= enumerator_create_token(exclude
, " ", " ");
2224 while (enumerator
->enumerate(enumerator
, &token
))
2227 table
= strtoul(token
, NULL
, 10);
2231 this->rt_exclude
->insert_last(this->rt_exclude
, (void*)table
);
2234 enumerator
->destroy(enumerator
);
2237 if (register_for_events
)
2239 struct sockaddr_nl addr
;
2241 memset(&addr
, 0, sizeof(addr
));
2242 addr
.nl_family
= AF_NETLINK
;
2244 /* create and bind RT socket for events (address/interface/route changes) */
2245 this->socket_events
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
2246 if (this->socket_events
< 0)
2248 DBG1(DBG_KNL
, "unable to create RT event socket");
2252 addr
.nl_groups
= RTMGRP_IPV4_IFADDR
| RTMGRP_IPV6_IFADDR
|
2253 RTMGRP_IPV4_ROUTE
| RTMGRP_IPV6_ROUTE
| RTMGRP_LINK
;
2254 if (bind(this->socket_events
, (struct sockaddr
*)&addr
, sizeof(addr
)))
2256 DBG1(DBG_KNL
, "unable to bind RT event socket");
2261 lib
->processor
->queue_job(lib
->processor
,
2262 (job_t
*)callback_job_create_with_prio(
2263 (callback_job_cb_t
)receive_events
, this, NULL
,
2264 (callback_job_cancel_t
)return_false
, JOB_PRIO_CRITICAL
));
2267 if (init_address_list(this) != SUCCESS
)
2269 DBG1(DBG_KNL
, "unable to get interface list");
2274 if (this->routing_table
)
2276 if (manage_rule(this, RTM_NEWRULE
, AF_INET
, this->routing_table
,
2277 this->routing_table_prio
) != SUCCESS
)
2279 DBG1(DBG_KNL
, "unable to create IPv4 routing table rule");
2281 if (manage_rule(this, RTM_NEWRULE
, AF_INET6
, this->routing_table
,
2282 this->routing_table_prio
) != SUCCESS
)
2284 DBG1(DBG_KNL
, "unable to create IPv6 routing table rule");
2288 return &this->public;