kernel-netlink: Add global option to set MTU on installed routes
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008-2014 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 */
16
17 /*
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
20 *
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37 * THE SOFTWARE.
38 */
39
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <unistd.h>
45 #include <errno.h>
46 #include <net/if.h>
47 #ifdef HAVE_LINUX_FIB_RULES_H
48 #include <linux/fib_rules.h>
49 #endif
50
51 #include "kernel_netlink_net.h"
52 #include "kernel_netlink_shared.h"
53
54 #include <hydra.h>
55 #include <utils/debug.h>
56 #include <threading/mutex.h>
57 #include <threading/rwlock.h>
58 #include <threading/rwlock_condvar.h>
59 #include <threading/spinlock.h>
60 #include <collections/hashtable.h>
61 #include <collections/linked_list.h>
62 #include <processing/jobs/callback_job.h>
63
64 /** delay before firing roam events (ms) */
65 #define ROAM_DELAY 100
66
67 /** delay before reinstalling routes (ms) */
68 #define ROUTE_DELAY 100
69
70 /** maximum recursion when searching for addresses in get_route() */
71 #define MAX_ROUTE_RECURSION 2
72
73 #ifndef ROUTING_TABLE
74 #define ROUTING_TABLE 0
75 #endif
76
77 #ifndef ROUTING_TABLE_PRIO
78 #define ROUTING_TABLE_PRIO 0
79 #endif
80
81 typedef struct addr_entry_t addr_entry_t;
82
83 /**
84 * IP address in an iface_entry_t
85 */
86 struct addr_entry_t {
87
88 /** the ip address */
89 host_t *ip;
90
91 /** address flags */
92 u_char flags;
93
94 /** scope of the address */
95 u_char scope;
96
97 /** number of times this IP is used, if virtual (i.e. managed by us) */
98 u_int refcount;
99
100 /** TRUE once it is installed, if virtual */
101 bool installed;
102 };
103
104 /**
105 * destroy a addr_entry_t object
106 */
107 static void addr_entry_destroy(addr_entry_t *this)
108 {
109 this->ip->destroy(this->ip);
110 free(this);
111 }
112
113 typedef struct iface_entry_t iface_entry_t;
114
115 /**
116 * A network interface on this system, containing addr_entry_t's
117 */
118 struct iface_entry_t {
119
120 /** interface index */
121 int ifindex;
122
123 /** name of the interface */
124 char ifname[IFNAMSIZ];
125
126 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
127 u_int flags;
128
129 /** list of addresses as host_t */
130 linked_list_t *addrs;
131
132 /** TRUE if usable by config */
133 bool usable;
134 };
135
136 /**
137 * destroy an interface entry
138 */
139 static void iface_entry_destroy(iface_entry_t *this)
140 {
141 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
142 free(this);
143 }
144
145 /**
146 * find an interface entry by index
147 */
148 static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
149 {
150 return this->ifindex == *ifindex;
151 }
152
153 /**
154 * find an interface entry by name
155 */
156 static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
157 {
158 return streq(this->ifname, ifname);
159 }
160
161 /**
162 * check if an interface is up
163 */
164 static inline bool iface_entry_up(iface_entry_t *iface)
165 {
166 return (iface->flags & IFF_UP) == IFF_UP;
167 }
168
169 /**
170 * check if an interface is up and usable
171 */
172 static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
173 {
174 return iface->usable && iface_entry_up(iface);
175 }
176
177 typedef struct addr_map_entry_t addr_map_entry_t;
178
179 /**
180 * Entry that maps an IP address to an interface entry
181 */
182 struct addr_map_entry_t {
183 /** The IP address */
184 host_t *ip;
185
186 /** The address entry for this IP address */
187 addr_entry_t *addr;
188
189 /** The interface this address is installed on */
190 iface_entry_t *iface;
191 };
192
193 /**
194 * Hash a addr_map_entry_t object, all entries with the same IP address
195 * are stored in the same bucket
196 */
197 static u_int addr_map_entry_hash(addr_map_entry_t *this)
198 {
199 return chunk_hash(this->ip->get_address(this->ip));
200 }
201
202 /**
203 * Compare two addr_map_entry_t objects, two entries are equal if they are
204 * installed on the same interface
205 */
206 static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
207 {
208 return a->iface->ifindex == b->iface->ifindex &&
209 a->ip->ip_equals(a->ip, b->ip);
210 }
211
212 /**
213 * Used with get_match this finds an address entry if it is installed on
214 * an up and usable interface
215 */
216 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
217 addr_map_entry_t *b)
218 {
219 return iface_entry_up_and_usable(b->iface) &&
220 a->ip->ip_equals(a->ip, b->ip);
221 }
222
223 /**
224 * Used with get_match this finds an address entry if it is installed on
225 * any active local interface
226 */
227 static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
228 {
229 return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
230 }
231
232 /**
233 * Used with get_match this finds an address entry if it is installed on
234 * any local interface
235 */
236 static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
237 {
238 return a->ip->ip_equals(a->ip, b->ip);
239 }
240
241 typedef struct route_entry_t route_entry_t;
242
243 /**
244 * Installed routing entry
245 */
246 struct route_entry_t {
247 /** Name of the interface the route is bound to */
248 char *if_name;
249
250 /** Source ip of the route */
251 host_t *src_ip;
252
253 /** Gateway for this route */
254 host_t *gateway;
255
256 /** Destination net */
257 chunk_t dst_net;
258
259 /** Destination net prefixlen */
260 u_int8_t prefixlen;
261 };
262
263 /**
264 * Clone a route_entry_t object.
265 */
266 static route_entry_t *route_entry_clone(route_entry_t *this)
267 {
268 route_entry_t *route;
269
270 INIT(route,
271 .if_name = strdup(this->if_name),
272 .src_ip = this->src_ip->clone(this->src_ip),
273 .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
274 .dst_net = chunk_clone(this->dst_net),
275 .prefixlen = this->prefixlen,
276 );
277 return route;
278 }
279
280 /**
281 * Destroy a route_entry_t object
282 */
283 static void route_entry_destroy(route_entry_t *this)
284 {
285 free(this->if_name);
286 DESTROY_IF(this->src_ip);
287 DESTROY_IF(this->gateway);
288 chunk_free(&this->dst_net);
289 free(this);
290 }
291
292 /**
293 * Hash a route_entry_t object
294 */
295 static u_int route_entry_hash(route_entry_t *this)
296 {
297 return chunk_hash_inc(chunk_from_thing(this->prefixlen),
298 chunk_hash(this->dst_net));
299 }
300
301 /**
302 * Compare two route_entry_t objects
303 */
304 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
305 {
306 if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
307 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
308 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
309 {
310 return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
311 a->gateway->ip_equals(a->gateway, b->gateway));
312 }
313 return FALSE;
314 }
315
316 typedef struct net_change_t net_change_t;
317
318 /**
319 * Queued network changes
320 */
321 struct net_change_t {
322 /** Name of the interface that got activated (or an IP appeared on) */
323 char *if_name;
324 };
325
326 /**
327 * Destroy a net_change_t object
328 */
329 static void net_change_destroy(net_change_t *this)
330 {
331 free(this->if_name);
332 free(this);
333 }
334
335 /**
336 * Hash a net_change_t object
337 */
338 static u_int net_change_hash(net_change_t *this)
339 {
340 return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
341 }
342
343 /**
344 * Compare two net_change_t objects
345 */
346 static bool net_change_equals(net_change_t *a, net_change_t *b)
347 {
348 return streq(a->if_name, b->if_name);
349 }
350
351 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
352
353 /**
354 * Private variables and functions of kernel_netlink_net class.
355 */
356 struct private_kernel_netlink_net_t {
357 /**
358 * Public part of the kernel_netlink_net_t object.
359 */
360 kernel_netlink_net_t public;
361
362 /**
363 * lock to access various lists and maps
364 */
365 rwlock_t *lock;
366
367 /**
368 * condition variable to signal virtual IP add/removal
369 */
370 rwlock_condvar_t *condvar;
371
372 /**
373 * Cached list of interfaces and its addresses (iface_entry_t)
374 */
375 linked_list_t *ifaces;
376
377 /**
378 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
379 */
380 hashtable_t *addrs;
381
382 /**
383 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
384 */
385 hashtable_t *vips;
386
387 /**
388 * netlink rt socket (routing)
389 */
390 netlink_socket_t *socket;
391
392 /**
393 * Netlink rt socket to receive address change events
394 */
395 int socket_events;
396
397 /**
398 * earliest time of the next roam event
399 */
400 timeval_t next_roam;
401
402 /**
403 * roam event due to address change
404 */
405 bool roam_address;
406
407 /**
408 * lock to check and update roam event time
409 */
410 spinlock_t *roam_lock;
411
412 /**
413 * routing table to install routes
414 */
415 int routing_table;
416
417 /**
418 * priority of used routing table
419 */
420 int routing_table_prio;
421
422 /**
423 * installed routes
424 */
425 hashtable_t *routes;
426
427 /**
428 * mutex for routes
429 */
430 mutex_t *routes_lock;
431
432 /**
433 * interface changes which may trigger route reinstallation
434 */
435 hashtable_t *net_changes;
436
437 /**
438 * mutex for route reinstallation triggers
439 */
440 mutex_t *net_changes_lock;
441
442 /**
443 * time of last route reinstallation
444 */
445 timeval_t last_route_reinstall;
446
447 /**
448 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
449 */
450 bool process_route;
451
452 /**
453 * whether to trigger roam events
454 */
455 bool roam_events;
456
457 /**
458 * whether to actually install virtual IPs
459 */
460 bool install_virtual_ip;
461
462 /**
463 * the name of the interface virtual IP addresses are installed on
464 */
465 char *install_virtual_ip_on;
466
467 /**
468 * whether preferred source addresses can be specified for IPv6 routes
469 */
470 bool rta_prefsrc_for_ipv6;
471
472 /**
473 * whether to prefer temporary IPv6 addresses over public ones
474 */
475 bool prefer_temporary_addrs;
476
477 /**
478 * list with routing tables to be excluded from route lookup
479 */
480 linked_list_t *rt_exclude;
481
482 /**
483 * MTU to set on installed routes
484 */
485 u_int32_t mtu;
486 };
487
488 /**
489 * Forward declaration
490 */
491 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
492 int nlmsg_type, int flags, chunk_t dst_net,
493 u_int8_t prefixlen, host_t *gateway,
494 host_t *src_ip, char *if_name);
495
496 /**
497 * Clear the queued network changes.
498 */
499 static void net_changes_clear(private_kernel_netlink_net_t *this)
500 {
501 enumerator_t *enumerator;
502 net_change_t *change;
503
504 enumerator = this->net_changes->create_enumerator(this->net_changes);
505 while (enumerator->enumerate(enumerator, NULL, (void**)&change))
506 {
507 this->net_changes->remove_at(this->net_changes, enumerator);
508 net_change_destroy(change);
509 }
510 enumerator->destroy(enumerator);
511 }
512
513 /**
514 * Act upon queued network changes.
515 */
516 static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
517 {
518 enumerator_t *enumerator;
519 route_entry_t *route;
520
521 this->net_changes_lock->lock(this->net_changes_lock);
522 this->routes_lock->lock(this->routes_lock);
523
524 enumerator = this->routes->create_enumerator(this->routes);
525 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
526 {
527 net_change_t *change, lookup = {
528 .if_name = route->if_name,
529 };
530 /* check if a change for the outgoing interface is queued */
531 change = this->net_changes->get(this->net_changes, &lookup);
532 if (!change)
533 { /* in case src_ip is not on the outgoing interface */
534 if (this->public.interface.get_interface(&this->public.interface,
535 route->src_ip, &lookup.if_name))
536 {
537 if (!streq(lookup.if_name, route->if_name))
538 {
539 change = this->net_changes->get(this->net_changes, &lookup);
540 }
541 free(lookup.if_name);
542 }
543 }
544 if (change)
545 {
546 manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
547 route->dst_net, route->prefixlen, route->gateway,
548 route->src_ip, route->if_name);
549 }
550 }
551 enumerator->destroy(enumerator);
552 this->routes_lock->unlock(this->routes_lock);
553
554 net_changes_clear(this);
555 this->net_changes_lock->unlock(this->net_changes_lock);
556 return JOB_REQUEUE_NONE;
557 }
558
559 /**
560 * Queue route reinstallation caused by network changes for a given interface.
561 *
562 * The route reinstallation is delayed for a while and only done once for
563 * several calls during this delay, in order to avoid doing it too often.
564 * The interface name is freed.
565 */
566 static void queue_route_reinstall(private_kernel_netlink_net_t *this,
567 char *if_name)
568 {
569 net_change_t *update, *found;
570 timeval_t now;
571 job_t *job;
572
573 INIT(update,
574 .if_name = if_name
575 );
576
577 this->net_changes_lock->lock(this->net_changes_lock);
578 found = this->net_changes->put(this->net_changes, update, update);
579 if (found)
580 {
581 net_change_destroy(found);
582 }
583 time_monotonic(&now);
584 if (timercmp(&now, &this->last_route_reinstall, >))
585 {
586 timeval_add_ms(&now, ROUTE_DELAY);
587 this->last_route_reinstall = now;
588
589 job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
590 this, NULL, NULL);
591 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
592 }
593 this->net_changes_lock->unlock(this->net_changes_lock);
594 }
595
596 /**
597 * check if the given IP is known as virtual IP and currently installed
598 *
599 * this function will also return TRUE if the virtual IP entry disappeared.
600 * in that case the returned entry will be NULL.
601 *
602 * this->lock must be held when calling this function
603 */
604 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
605 host_t *ip, addr_map_entry_t **entry)
606 {
607 addr_map_entry_t lookup = {
608 .ip = ip,
609 };
610
611 *entry = this->vips->get_match(this->vips, &lookup,
612 (void*)addr_map_entry_match);
613 if (*entry == NULL)
614 { /* the virtual IP disappeared */
615 return TRUE;
616 }
617 return (*entry)->addr->installed;
618 }
619
620 /**
621 * check if the given IP is known as virtual IP
622 *
623 * this->lock must be held when calling this function
624 */
625 static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
626 {
627 addr_map_entry_t lookup = {
628 .ip = ip,
629 };
630
631 return this->vips->get_match(this->vips, &lookup,
632 (void*)addr_map_entry_match) != NULL;
633 }
634
635 /**
636 * Add an address map entry
637 */
638 static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
639 iface_entry_t *iface)
640 {
641 addr_map_entry_t *entry;
642
643 INIT(entry,
644 .ip = addr->ip,
645 .addr = addr,
646 .iface = iface,
647 );
648 entry = map->put(map, entry, entry);
649 free(entry);
650 }
651
652 /**
653 * Remove an address map entry
654 */
655 static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
656 iface_entry_t *iface)
657 {
658 addr_map_entry_t *entry, lookup = {
659 .ip = addr->ip,
660 .addr = addr,
661 .iface = iface,
662 };
663
664 entry = map->remove(map, &lookup);
665 free(entry);
666 }
667
668 /**
669 * Determine the type or scope of the given unicast IP address. This is not
670 * the same thing returned in rtm_scope/ifa_scope.
671 *
672 * We use return values as defined in RFC 6724 (referring to RFC 4291).
673 */
674 static u_char get_scope(host_t *ip)
675 {
676 chunk_t addr;
677
678 addr = ip->get_address(ip);
679 switch (addr.len)
680 {
681 case 4:
682 /* we use the mapping defined in RFC 6724, 3.2 */
683 if (addr.ptr[0] == 127)
684 { /* link-local, same as the IPv6 loopback address */
685 return 2;
686 }
687 if (addr.ptr[0] == 169 && addr.ptr[1] == 254)
688 { /* link-local */
689 return 2;
690 }
691 break;
692 case 16:
693 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr*)addr.ptr))
694 { /* link-local, according to RFC 4291, 2.5.3 */
695 return 2;
696 }
697 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr*)addr.ptr))
698 {
699 return 2;
700 }
701 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr*)addr.ptr))
702 { /* deprecated, according to RFC 4291, 2.5.7 */
703 return 5;
704 }
705 break;
706 default:
707 break;
708 }
709 /* global */
710 return 14;
711 }
712
713 /**
714 * Returns the length of the common prefix in bits up to the length of a's
715 * prefix, defined by RFC 6724 as the portion of the address not including the
716 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
717 */
718 static u_char common_prefix(host_t *a, host_t *b)
719 {
720 chunk_t aa, ba;
721 u_char byte, bits = 0, match;
722
723 aa = a->get_address(a);
724 ba = b->get_address(b);
725 for (byte = 0; byte < 8; byte++)
726 {
727 if (aa.ptr[byte] != ba.ptr[byte])
728 {
729 match = aa.ptr[byte] ^ ba.ptr[byte];
730 for (bits = 8; match; match >>= 1)
731 {
732 bits--;
733 }
734 break;
735 }
736 }
737 return byte * 8 + bits;
738 }
739
740 /**
741 * Compare two IP addresses and return TRUE if the second address is the better
742 * choice of the two to reach the destination.
743 * For IPv6 we approximately follow RFC 6724.
744 */
745 static bool is_address_better(private_kernel_netlink_net_t *this,
746 addr_entry_t *a, addr_entry_t *b, host_t *d)
747 {
748 u_char sa, sb, sd, pa, pb;
749
750 /* rule 2: prefer appropriate scope */
751 if (d)
752 {
753 sa = get_scope(a->ip);
754 sb = get_scope(b->ip);
755 sd = get_scope(d);
756 if (sa < sb)
757 {
758 return sa < sd;
759 }
760 else if (sb < sa)
761 {
762 return sb >= sd;
763 }
764 }
765 if (a->ip->get_family(a->ip) == AF_INET)
766 { /* stop here for IPv4, default to addresses found earlier */
767 return FALSE;
768 }
769 /* rule 3: avoid deprecated addresses (RFC 4862) */
770 if ((a->flags & IFA_F_DEPRECATED) != (b->flags & IFA_F_DEPRECATED))
771 {
772 return a->flags & IFA_F_DEPRECATED;
773 }
774 /* rule 4 is not applicable as we don't know if an address is a home or
775 * care-of addresses.
776 * rule 5 does not apply as we only compare addresses from one interface
777 * rule 6 requires a policy table (optionally configurable) to match
778 * configurable labels
779 */
780 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
781 if ((a->flags & IFA_F_TEMPORARY) != (b->flags & IFA_F_TEMPORARY))
782 {
783 if (this->prefer_temporary_addrs)
784 {
785 return b->flags & IFA_F_TEMPORARY;
786 }
787 return a->flags & IFA_F_TEMPORARY;
788 }
789 /* rule 8: use longest matching prefix */
790 if (d)
791 {
792 pa = common_prefix(a->ip, d);
793 pb = common_prefix(b->ip, d);
794 if (pa != pb)
795 {
796 return pb > pa;
797 }
798 }
799 /* default to addresses found earlier */
800 return FALSE;
801 }
802
803 /**
804 * Get a non-virtual IP address on the given interface.
805 *
806 * If a candidate address is given, we first search for that address and if not
807 * found return the address as above.
808 * Returned host is a clone, has to be freed by caller.
809 *
810 * this->lock must be held when calling this function.
811 */
812 static host_t *get_interface_address(private_kernel_netlink_net_t *this,
813 int ifindex, int family, host_t *dest,
814 host_t *candidate)
815 {
816 iface_entry_t *iface;
817 enumerator_t *addrs;
818 addr_entry_t *addr, *best = NULL;
819
820 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
821 (void**)&iface, &ifindex) == SUCCESS)
822 {
823 if (iface->usable)
824 { /* only use interfaces not excluded by config */
825 addrs = iface->addrs->create_enumerator(iface->addrs);
826 while (addrs->enumerate(addrs, &addr))
827 {
828 if (addr->refcount ||
829 addr->ip->get_family(addr->ip) != family)
830 { /* ignore virtual IP addresses and ensure family matches */
831 continue;
832 }
833 if (candidate && candidate->ip_equals(candidate, addr->ip))
834 { /* stop if we find the candidate */
835 best = addr;
836 break;
837 }
838 else if (!best || is_address_better(this, best, addr, dest))
839 {
840 best = addr;
841 }
842 }
843 addrs->destroy(addrs);
844 }
845 }
846 return best ? best->ip->clone(best->ip) : NULL;
847 }
848
849 /**
850 * callback function that raises the delayed roam event
851 */
852 static job_requeue_t roam_event(private_kernel_netlink_net_t *this)
853 {
854 bool address;
855
856 this->roam_lock->lock(this->roam_lock);
857 address = this->roam_address;
858 this->roam_address = FALSE;
859 this->roam_lock->unlock(this->roam_lock);
860 hydra->kernel_interface->roam(hydra->kernel_interface, address);
861 return JOB_REQUEUE_NONE;
862 }
863
864 /**
865 * fire a roaming event. we delay it for a bit and fire only one event
866 * for multiple calls. otherwise we would create too many events.
867 */
868 static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
869 {
870 timeval_t now;
871 job_t *job;
872
873 if (!this->roam_events)
874 {
875 return;
876 }
877
878 time_monotonic(&now);
879 this->roam_lock->lock(this->roam_lock);
880 this->roam_address |= address;
881 if (!timercmp(&now, &this->next_roam, >))
882 {
883 this->roam_lock->unlock(this->roam_lock);
884 return;
885 }
886 timeval_add_ms(&now, ROAM_DELAY);
887 this->next_roam = now;
888 this->roam_lock->unlock(this->roam_lock);
889
890 job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
891 this, NULL, NULL);
892 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
893 }
894
895 /**
896 * check if an interface with a given index is up and usable
897 *
898 * this->lock must be locked when calling this function
899 */
900 static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
901 int index)
902 {
903 iface_entry_t *iface;
904
905 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
906 (void**)&iface, &index) == SUCCESS)
907 {
908 return iface_entry_up_and_usable(iface);
909 }
910 return FALSE;
911 }
912
913 /**
914 * unregister the current addr_entry_t from the hashtable it is stored in
915 *
916 * this->lock must be locked when calling this function
917 */
918 static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
919 private_kernel_netlink_net_t *this)
920 {
921 if (addr->refcount)
922 {
923 addr_map_entry_remove(this->vips, addr, iface);
924 this->condvar->broadcast(this->condvar);
925 return;
926 }
927 addr_map_entry_remove(this->addrs, addr, iface);
928 }
929
930 /**
931 * process RTM_NEWLINK/RTM_DELLINK from kernel
932 */
933 static void process_link(private_kernel_netlink_net_t *this,
934 struct nlmsghdr *hdr, bool event)
935 {
936 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
937 struct rtattr *rta = IFLA_RTA(msg);
938 size_t rtasize = IFLA_PAYLOAD (hdr);
939 enumerator_t *enumerator;
940 iface_entry_t *current, *entry = NULL;
941 char *name = NULL;
942 bool update = FALSE, update_routes = FALSE;
943
944 while (RTA_OK(rta, rtasize))
945 {
946 switch (rta->rta_type)
947 {
948 case IFLA_IFNAME:
949 name = RTA_DATA(rta);
950 break;
951 }
952 rta = RTA_NEXT(rta, rtasize);
953 }
954 if (!name)
955 {
956 name = "(unknown)";
957 }
958
959 this->lock->write_lock(this->lock);
960 switch (hdr->nlmsg_type)
961 {
962 case RTM_NEWLINK:
963 {
964 if (this->ifaces->find_first(this->ifaces,
965 (void*)iface_entry_by_index, (void**)&entry,
966 &msg->ifi_index) != SUCCESS)
967 {
968 INIT(entry,
969 .ifindex = msg->ifi_index,
970 .addrs = linked_list_create(),
971 .usable = hydra->kernel_interface->is_interface_usable(
972 hydra->kernel_interface, name),
973 );
974 this->ifaces->insert_last(this->ifaces, entry);
975 }
976 strncpy(entry->ifname, name, IFNAMSIZ);
977 entry->ifname[IFNAMSIZ-1] = '\0';
978 if (event && entry->usable)
979 {
980 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
981 {
982 update = update_routes = TRUE;
983 DBG1(DBG_KNL, "interface %s activated", name);
984 }
985 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
986 {
987 update = TRUE;
988 DBG1(DBG_KNL, "interface %s deactivated", name);
989 }
990 }
991 entry->flags = msg->ifi_flags;
992 break;
993 }
994 case RTM_DELLINK:
995 {
996 enumerator = this->ifaces->create_enumerator(this->ifaces);
997 while (enumerator->enumerate(enumerator, &current))
998 {
999 if (current->ifindex == msg->ifi_index)
1000 {
1001 if (event && current->usable)
1002 {
1003 update = TRUE;
1004 DBG1(DBG_KNL, "interface %s deleted", current->ifname);
1005 }
1006 /* TODO: move virtual IPs installed on this interface to
1007 * another interface? */
1008 this->ifaces->remove_at(this->ifaces, enumerator);
1009 current->addrs->invoke_function(current->addrs,
1010 (void*)addr_entry_unregister, current, this);
1011 iface_entry_destroy(current);
1012 break;
1013 }
1014 }
1015 enumerator->destroy(enumerator);
1016 break;
1017 }
1018 }
1019 this->lock->unlock(this->lock);
1020
1021 if (update_routes && event)
1022 {
1023 queue_route_reinstall(this, strdup(name));
1024 }
1025
1026 if (update && event)
1027 {
1028 fire_roam_event(this, TRUE);
1029 }
1030 }
1031
1032 /**
1033 * process RTM_NEWADDR/RTM_DELADDR from kernel
1034 */
1035 static void process_addr(private_kernel_netlink_net_t *this,
1036 struct nlmsghdr *hdr, bool event)
1037 {
1038 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
1039 struct rtattr *rta = IFA_RTA(msg);
1040 size_t rtasize = IFA_PAYLOAD (hdr);
1041 host_t *host = NULL;
1042 iface_entry_t *iface;
1043 chunk_t local = chunk_empty, address = chunk_empty;
1044 char *route_ifname = NULL;
1045 bool update = FALSE, found = FALSE, changed = FALSE;
1046
1047 while (RTA_OK(rta, rtasize))
1048 {
1049 switch (rta->rta_type)
1050 {
1051 case IFA_LOCAL:
1052 local.ptr = RTA_DATA(rta);
1053 local.len = RTA_PAYLOAD(rta);
1054 break;
1055 case IFA_ADDRESS:
1056 address.ptr = RTA_DATA(rta);
1057 address.len = RTA_PAYLOAD(rta);
1058 break;
1059 }
1060 rta = RTA_NEXT(rta, rtasize);
1061 }
1062
1063 /* For PPP interfaces, we need the IFA_LOCAL address,
1064 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1065 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1066 if (local.ptr)
1067 {
1068 host = host_create_from_chunk(msg->ifa_family, local, 0);
1069 }
1070 else if (address.ptr)
1071 {
1072 host = host_create_from_chunk(msg->ifa_family, address, 0);
1073 }
1074
1075 if (host == NULL)
1076 { /* bad family? */
1077 return;
1078 }
1079
1080 this->lock->write_lock(this->lock);
1081 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
1082 (void**)&iface, &msg->ifa_index) == SUCCESS)
1083 {
1084 addr_map_entry_t *entry, lookup = {
1085 .ip = host,
1086 .iface = iface,
1087 };
1088 addr_entry_t *addr;
1089
1090 entry = this->vips->get(this->vips, &lookup);
1091 if (entry)
1092 {
1093 if (hdr->nlmsg_type == RTM_NEWADDR)
1094 { /* mark as installed and signal waiting threads */
1095 entry->addr->installed = TRUE;
1096 }
1097 else
1098 { /* the address was already marked as uninstalled */
1099 addr = entry->addr;
1100 iface->addrs->remove(iface->addrs, addr, NULL);
1101 addr_map_entry_remove(this->vips, addr, iface);
1102 addr_entry_destroy(addr);
1103 }
1104 /* no roam events etc. for virtual IPs */
1105 this->condvar->broadcast(this->condvar);
1106 this->lock->unlock(this->lock);
1107 host->destroy(host);
1108 return;
1109 }
1110 entry = this->addrs->get(this->addrs, &lookup);
1111 if (entry)
1112 {
1113 if (hdr->nlmsg_type == RTM_DELADDR)
1114 {
1115 found = TRUE;
1116 addr = entry->addr;
1117 iface->addrs->remove(iface->addrs, addr, NULL);
1118 if (iface->usable)
1119 {
1120 changed = TRUE;
1121 DBG1(DBG_KNL, "%H disappeared from %s", host,
1122 iface->ifname);
1123 }
1124 addr_map_entry_remove(this->addrs, addr, iface);
1125 addr_entry_destroy(addr);
1126 }
1127 }
1128 else
1129 {
1130 if (hdr->nlmsg_type == RTM_NEWADDR)
1131 {
1132 found = TRUE;
1133 changed = TRUE;
1134 route_ifname = strdup(iface->ifname);
1135 INIT(addr,
1136 .ip = host->clone(host),
1137 .flags = msg->ifa_flags,
1138 .scope = msg->ifa_scope,
1139 );
1140 iface->addrs->insert_last(iface->addrs, addr);
1141 addr_map_entry_add(this->addrs, addr, iface);
1142 if (event && iface->usable)
1143 {
1144 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
1145 }
1146 }
1147 }
1148 if (found && (iface->flags & IFF_UP))
1149 {
1150 update = TRUE;
1151 }
1152 if (!iface->usable)
1153 { /* ignore events for interfaces excluded by config */
1154 update = changed = FALSE;
1155 }
1156 }
1157 this->lock->unlock(this->lock);
1158
1159 if (update && event && route_ifname)
1160 {
1161 queue_route_reinstall(this, route_ifname);
1162 }
1163 else
1164 {
1165 free(route_ifname);
1166 }
1167 host->destroy(host);
1168
1169 /* send an update to all IKE_SAs */
1170 if (update && event && changed)
1171 {
1172 fire_roam_event(this, TRUE);
1173 }
1174 }
1175
1176 /**
1177 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1178 */
1179 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
1180 {
1181 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
1182 struct rtattr *rta = RTM_RTA(msg);
1183 size_t rtasize = RTM_PAYLOAD(hdr);
1184 u_int32_t rta_oif = 0;
1185 host_t *host = NULL;
1186
1187 /* ignore routes added by us or in the local routing table (local addrs) */
1188 if (msg->rtm_table && (msg->rtm_table == this->routing_table ||
1189 msg->rtm_table == RT_TABLE_LOCAL))
1190 {
1191 return;
1192 }
1193 else if (msg->rtm_flags & RTM_F_CLONED)
1194 { /* ignore cached routes, seem to be created a lot for IPv6 */
1195 return;
1196 }
1197
1198 while (RTA_OK(rta, rtasize))
1199 {
1200 switch (rta->rta_type)
1201 {
1202 case RTA_PREFSRC:
1203 DESTROY_IF(host);
1204 host = host_create_from_chunk(msg->rtm_family,
1205 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
1206 break;
1207 case RTA_OIF:
1208 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1209 {
1210 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1211 }
1212 break;
1213 }
1214 rta = RTA_NEXT(rta, rtasize);
1215 }
1216 this->lock->read_lock(this->lock);
1217 if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
1218 { /* ignore route changes for interfaces that are ignored or down */
1219 this->lock->unlock(this->lock);
1220 DESTROY_IF(host);
1221 return;
1222 }
1223 if (!host && rta_oif)
1224 {
1225 host = get_interface_address(this, rta_oif, msg->rtm_family,
1226 NULL, NULL);
1227 }
1228 if (!host || is_known_vip(this, host))
1229 { /* ignore routes added for virtual IPs */
1230 this->lock->unlock(this->lock);
1231 DESTROY_IF(host);
1232 return;
1233 }
1234 this->lock->unlock(this->lock);
1235 fire_roam_event(this, FALSE);
1236 host->destroy(host);
1237 }
1238
1239 /**
1240 * Receives events from kernel
1241 */
1242 static bool receive_events(private_kernel_netlink_net_t *this, int fd,
1243 watcher_event_t event)
1244 {
1245 char response[1536];
1246 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1247 struct sockaddr_nl addr;
1248 socklen_t addr_len = sizeof(addr);
1249 int len;
1250
1251 len = recvfrom(this->socket_events, response, sizeof(response),
1252 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
1253 if (len < 0)
1254 {
1255 switch (errno)
1256 {
1257 case EINTR:
1258 /* interrupted, try again */
1259 return TRUE;
1260 case EAGAIN:
1261 /* no data ready, select again */
1262 return TRUE;
1263 default:
1264 DBG1(DBG_KNL, "unable to receive from rt event socket");
1265 sleep(1);
1266 return TRUE;
1267 }
1268 }
1269
1270 if (addr.nl_pid != 0)
1271 { /* not from kernel. not interested, try another one */
1272 return TRUE;
1273 }
1274
1275 while (NLMSG_OK(hdr, len))
1276 {
1277 /* looks good so far, dispatch netlink message */
1278 switch (hdr->nlmsg_type)
1279 {
1280 case RTM_NEWADDR:
1281 case RTM_DELADDR:
1282 process_addr(this, hdr, TRUE);
1283 break;
1284 case RTM_NEWLINK:
1285 case RTM_DELLINK:
1286 process_link(this, hdr, TRUE);
1287 break;
1288 case RTM_NEWROUTE:
1289 case RTM_DELROUTE:
1290 if (this->process_route)
1291 {
1292 process_route(this, hdr);
1293 }
1294 break;
1295 default:
1296 break;
1297 }
1298 hdr = NLMSG_NEXT(hdr, len);
1299 }
1300 return TRUE;
1301 }
1302
1303 /** enumerator over addresses */
1304 typedef struct {
1305 private_kernel_netlink_net_t* this;
1306 /** which addresses to enumerate */
1307 kernel_address_type_t which;
1308 } address_enumerator_t;
1309
1310 /**
1311 * cleanup function for address enumerator
1312 */
1313 static void address_enumerator_destroy(address_enumerator_t *data)
1314 {
1315 data->this->lock->unlock(data->this->lock);
1316 free(data);
1317 }
1318
1319 /**
1320 * filter for addresses
1321 */
1322 static bool filter_addresses(address_enumerator_t *data,
1323 addr_entry_t** in, host_t** out)
1324 {
1325 if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
1326 { /* skip virtual interfaces added by us */
1327 return FALSE;
1328 }
1329 if (!(data->which & ADDR_TYPE_REGULAR) && !(*in)->refcount)
1330 { /* address is regular, but not requested */
1331 return FALSE;
1332 }
1333 if ((*in)->scope >= RT_SCOPE_LINK)
1334 { /* skip addresses with a unusable scope */
1335 return FALSE;
1336 }
1337 *out = (*in)->ip;
1338 return TRUE;
1339 }
1340
1341 /**
1342 * enumerator constructor for interfaces
1343 */
1344 static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
1345 address_enumerator_t *data)
1346 {
1347 return enumerator_create_filter(
1348 iface->addrs->create_enumerator(iface->addrs),
1349 (void*)filter_addresses, data, NULL);
1350 }
1351
1352 /**
1353 * filter for interfaces
1354 */
1355 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
1356 iface_entry_t** out)
1357 {
1358 if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
1359 { /* skip interfaces excluded by config */
1360 return FALSE;
1361 }
1362 if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
1363 { /* ignore loopback devices */
1364 return FALSE;
1365 }
1366 if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
1367 { /* skip interfaces not up */
1368 return FALSE;
1369 }
1370 *out = *in;
1371 return TRUE;
1372 }
1373
1374 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
1375 private_kernel_netlink_net_t *this, kernel_address_type_t which)
1376 {
1377 address_enumerator_t *data;
1378
1379 INIT(data,
1380 .this = this,
1381 .which = which,
1382 );
1383
1384 this->lock->read_lock(this->lock);
1385 return enumerator_create_nested(
1386 enumerator_create_filter(
1387 this->ifaces->create_enumerator(this->ifaces),
1388 (void*)filter_interfaces, data, NULL),
1389 (void*)create_iface_enumerator, data,
1390 (void*)address_enumerator_destroy);
1391 }
1392
1393 METHOD(kernel_net_t, get_interface_name, bool,
1394 private_kernel_netlink_net_t *this, host_t* ip, char **name)
1395 {
1396 addr_map_entry_t *entry, lookup = {
1397 .ip = ip,
1398 };
1399
1400 if (ip->is_anyaddr(ip))
1401 {
1402 return FALSE;
1403 }
1404 this->lock->read_lock(this->lock);
1405 /* first try to find it on an up and usable interface */
1406 entry = this->addrs->get_match(this->addrs, &lookup,
1407 (void*)addr_map_entry_match_up_and_usable);
1408 if (entry)
1409 {
1410 if (name)
1411 {
1412 *name = strdup(entry->iface->ifname);
1413 DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
1414 }
1415 this->lock->unlock(this->lock);
1416 return TRUE;
1417 }
1418 /* in a second step, consider virtual IPs installed by us */
1419 entry = this->vips->get_match(this->vips, &lookup,
1420 (void*)addr_map_entry_match_up_and_usable);
1421 if (entry)
1422 {
1423 if (name)
1424 {
1425 *name = strdup(entry->iface->ifname);
1426 DBG2(DBG_KNL, "virtual IP %H is on interface %s", ip, *name);
1427 }
1428 this->lock->unlock(this->lock);
1429 return TRUE;
1430 }
1431 /* maybe it is installed on an ignored interface */
1432 entry = this->addrs->get_match(this->addrs, &lookup,
1433 (void*)addr_map_entry_match_up);
1434 if (!entry)
1435 {
1436 DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
1437 }
1438 this->lock->unlock(this->lock);
1439 return FALSE;
1440 }
1441
1442 /**
1443 * get the index of an interface by name
1444 */
1445 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
1446 {
1447 iface_entry_t *iface;
1448 int ifindex = 0;
1449
1450 DBG2(DBG_KNL, "getting iface index for %s", name);
1451
1452 this->lock->read_lock(this->lock);
1453 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1454 (void**)&iface, name) == SUCCESS)
1455 {
1456 ifindex = iface->ifindex;
1457 }
1458 this->lock->unlock(this->lock);
1459
1460 if (ifindex == 0)
1461 {
1462 DBG1(DBG_KNL, "unable to get interface index for %s", name);
1463 }
1464 return ifindex;
1465 }
1466
1467 /**
1468 * check if an address or net (addr with prefix net bits) is in
1469 * subnet (net with net_len net bits)
1470 */
1471 static bool addr_in_subnet(chunk_t addr, int prefix, chunk_t net, int net_len)
1472 {
1473 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1474 int byte = 0;
1475
1476 if (net_len == 0)
1477 { /* any address matches a /0 network */
1478 return TRUE;
1479 }
1480 if (addr.len != net.len || net_len > 8 * net.len || prefix < net_len)
1481 {
1482 return FALSE;
1483 }
1484 /* scan through all bytes in network order */
1485 while (net_len > 0)
1486 {
1487 if (net_len < 8)
1488 {
1489 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
1490 }
1491 else
1492 {
1493 if (addr.ptr[byte] != net.ptr[byte])
1494 {
1495 return FALSE;
1496 }
1497 byte++;
1498 net_len -= 8;
1499 }
1500 }
1501 return TRUE;
1502 }
1503
1504 /**
1505 * Store information about a route retrieved via RTNETLINK
1506 */
1507 typedef struct {
1508 chunk_t gtw;
1509 chunk_t src;
1510 chunk_t dst;
1511 host_t *src_host;
1512 u_int8_t dst_len;
1513 u_int32_t table;
1514 u_int32_t oif;
1515 } rt_entry_t;
1516
1517 /**
1518 * Free a route entry
1519 */
1520 static void rt_entry_destroy(rt_entry_t *this)
1521 {
1522 DESTROY_IF(this->src_host);
1523 free(this);
1524 }
1525
1526 /**
1527 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1528 * reused if not NULL.
1529 *
1530 * Returned chunks point to internal data of the Netlink message.
1531 */
1532 static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
1533 {
1534 struct rtattr *rta;
1535 struct rtmsg *msg;
1536 size_t rtasize;
1537
1538 msg = (struct rtmsg*)(NLMSG_DATA(hdr));
1539 rta = RTM_RTA(msg);
1540 rtasize = RTM_PAYLOAD(hdr);
1541
1542 if (route)
1543 {
1544 route->gtw = chunk_empty;
1545 route->src = chunk_empty;
1546 route->dst = chunk_empty;
1547 route->dst_len = msg->rtm_dst_len;
1548 route->table = msg->rtm_table;
1549 route->oif = 0;
1550 }
1551 else
1552 {
1553 INIT(route,
1554 .dst_len = msg->rtm_dst_len,
1555 .table = msg->rtm_table,
1556 );
1557 }
1558
1559 while (RTA_OK(rta, rtasize))
1560 {
1561 switch (rta->rta_type)
1562 {
1563 case RTA_PREFSRC:
1564 route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1565 break;
1566 case RTA_GATEWAY:
1567 route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1568 break;
1569 case RTA_DST:
1570 route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1571 break;
1572 case RTA_OIF:
1573 if (RTA_PAYLOAD(rta) == sizeof(route->oif))
1574 {
1575 route->oif = *(u_int32_t*)RTA_DATA(rta);
1576 }
1577 break;
1578 #ifdef HAVE_RTA_TABLE
1579 case RTA_TABLE:
1580 if (RTA_PAYLOAD(rta) == sizeof(route->table))
1581 {
1582 route->table = *(u_int32_t*)RTA_DATA(rta);
1583 }
1584 break;
1585 #endif /* HAVE_RTA_TABLE*/
1586 }
1587 rta = RTA_NEXT(rta, rtasize);
1588 }
1589 return route;
1590 }
1591
1592 /**
1593 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1594 */
1595 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
1596 int prefix, bool nexthop, host_t *candidate,
1597 u_int recursion)
1598 {
1599 netlink_buf_t request;
1600 struct nlmsghdr *hdr, *out, *current;
1601 struct rtmsg *msg;
1602 chunk_t chunk;
1603 size_t len;
1604 linked_list_t *routes;
1605 rt_entry_t *route = NULL, *best = NULL;
1606 enumerator_t *enumerator;
1607 host_t *addr = NULL;
1608 bool match_net;
1609 int family;
1610
1611 if (recursion > MAX_ROUTE_RECURSION)
1612 {
1613 return NULL;
1614 }
1615 chunk = dest->get_address(dest);
1616 len = chunk.len * 8;
1617 prefix = prefix < 0 ? len : min(prefix, len);
1618 match_net = prefix != len;
1619
1620 memset(&request, 0, sizeof(request));
1621
1622 family = dest->get_family(dest);
1623 hdr = (struct nlmsghdr*)request;
1624 hdr->nlmsg_flags = NLM_F_REQUEST;
1625 if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
1626 this->routing_table || match_net)
1627 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1628 * as we want to ignore routes with virtual IPs we cannot use DUMP
1629 * if these routes are not installed in a separate table */
1630 hdr->nlmsg_flags |= NLM_F_DUMP;
1631 }
1632 hdr->nlmsg_type = RTM_GETROUTE;
1633 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1634
1635 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1636 msg->rtm_family = family;
1637 if (candidate)
1638 {
1639 chunk = candidate->get_address(candidate);
1640 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1641 }
1642 if (!match_net)
1643 {
1644 chunk = dest->get_address(dest);
1645 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1646 }
1647
1648 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
1649 {
1650 DBG2(DBG_KNL, "getting %s to reach %H/%d failed",
1651 nexthop ? "nexthop" : "address", dest, prefix);
1652 return NULL;
1653 }
1654 routes = linked_list_create();
1655 this->lock->read_lock(this->lock);
1656
1657 for (current = out; NLMSG_OK(current, len);
1658 current = NLMSG_NEXT(current, len))
1659 {
1660 switch (current->nlmsg_type)
1661 {
1662 case NLMSG_DONE:
1663 break;
1664 case RTM_NEWROUTE:
1665 {
1666 rt_entry_t *other;
1667 uintptr_t table;
1668
1669 route = parse_route(current, route);
1670
1671 table = (uintptr_t)route->table;
1672 if (this->rt_exclude->find_first(this->rt_exclude, NULL,
1673 (void**)&table) == SUCCESS)
1674 { /* route is from an excluded routing table */
1675 continue;
1676 }
1677 if (this->routing_table != 0 &&
1678 route->table == this->routing_table)
1679 { /* route is from our own ipsec routing table */
1680 continue;
1681 }
1682 if (route->oif && !is_interface_up_and_usable(this, route->oif))
1683 { /* interface is down */
1684 continue;
1685 }
1686 if (!addr_in_subnet(chunk, prefix, route->dst, route->dst_len))
1687 { /* route destination does not contain dest */
1688 continue;
1689 }
1690 if (route->src.ptr)
1691 { /* verify source address, if any */
1692 host_t *src = host_create_from_chunk(msg->rtm_family,
1693 route->src, 0);
1694 if (src && is_known_vip(this, src))
1695 { /* ignore routes installed by us */
1696 src->destroy(src);
1697 continue;
1698 }
1699 route->src_host = src;
1700 }
1701 /* insert route, sorted by decreasing network prefix */
1702 enumerator = routes->create_enumerator(routes);
1703 while (enumerator->enumerate(enumerator, &other))
1704 {
1705 if (route->dst_len > other->dst_len)
1706 {
1707 break;
1708 }
1709 }
1710 routes->insert_before(routes, enumerator, route);
1711 enumerator->destroy(enumerator);
1712 route = NULL;
1713 continue;
1714 }
1715 default:
1716 continue;
1717 }
1718 break;
1719 }
1720 if (route)
1721 {
1722 rt_entry_destroy(route);
1723 }
1724
1725 /* now we have a list of routes matching dest, sorted by net prefix.
1726 * we will look for source addresses for these routes and select the one
1727 * with the preferred source address, if possible */
1728 enumerator = routes->create_enumerator(routes);
1729 while (enumerator->enumerate(enumerator, &route))
1730 {
1731 if (route->src_host)
1732 { /* got a source address with the route, if no preferred source
1733 * is given or it matches we are done, as this is the best route */
1734 if (!candidate || candidate->ip_equals(candidate, route->src_host))
1735 {
1736 best = route;
1737 break;
1738 }
1739 else if (route->oif)
1740 { /* no match yet, maybe it is assigned to the same interface */
1741 host_t *src = get_interface_address(this, route->oif,
1742 msg->rtm_family, dest, candidate);
1743 if (src && src->ip_equals(src, candidate))
1744 {
1745 route->src_host->destroy(route->src_host);
1746 route->src_host = src;
1747 best = route;
1748 break;
1749 }
1750 DESTROY_IF(src);
1751 }
1752 /* no luck yet with the source address. if this is the best (first)
1753 * route we store it as fallback in case we don't find a route with
1754 * the preferred source */
1755 best = best ?: route;
1756 continue;
1757 }
1758 if (route->oif)
1759 { /* no src, but an interface - get address from it */
1760 route->src_host = get_interface_address(this, route->oif,
1761 msg->rtm_family, dest, candidate);
1762 if (route->src_host)
1763 { /* we handle this address the same as the one above */
1764 if (!candidate ||
1765 candidate->ip_equals(candidate, route->src_host))
1766 {
1767 best = route;
1768 break;
1769 }
1770 best = best ?: route;
1771 continue;
1772 }
1773 }
1774 if (route->gtw.ptr)
1775 { /* no src, no iface, but a gateway - lookup src to reach gtw */
1776 host_t *gtw;
1777
1778 gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
1779 if (gtw && !gtw->ip_equals(gtw, dest))
1780 {
1781 route->src_host = get_route(this, gtw, -1, FALSE, candidate,
1782 recursion + 1);
1783 }
1784 DESTROY_IF(gtw);
1785 if (route->src_host)
1786 { /* more of the same */
1787 if (!candidate ||
1788 candidate->ip_equals(candidate, route->src_host))
1789 {
1790 best = route;
1791 break;
1792 }
1793 best = best ?: route;
1794 }
1795 }
1796 }
1797 enumerator->destroy(enumerator);
1798
1799 if (nexthop)
1800 { /* nexthop lookup, return gateway if any */
1801 if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
1802 {
1803 addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
1804 }
1805 if (!addr && !match_net)
1806 { /* fallback to destination address */
1807 addr = dest->clone(dest);
1808 }
1809 }
1810 else
1811 {
1812 if (best)
1813 {
1814 addr = best->src_host->clone(best->src_host);
1815 }
1816 }
1817 this->lock->unlock(this->lock);
1818 routes->destroy_function(routes, (void*)rt_entry_destroy);
1819 free(out);
1820
1821 if (addr)
1822 {
1823 DBG2(DBG_KNL, "using %H as %s to reach %H/%d", addr,
1824 nexthop ? "nexthop" : "address", dest, prefix);
1825 }
1826 else if (!recursion)
1827 {
1828 DBG2(DBG_KNL, "no %s found to reach %H/%d",
1829 nexthop ? "nexthop" : "address", dest, prefix);
1830 }
1831 return addr;
1832 }
1833
1834 METHOD(kernel_net_t, get_source_addr, host_t*,
1835 private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
1836 {
1837 return get_route(this, dest, -1, FALSE, src, 0);
1838 }
1839
1840 METHOD(kernel_net_t, get_nexthop, host_t*,
1841 private_kernel_netlink_net_t *this, host_t *dest, int prefix, host_t *src)
1842 {
1843 return get_route(this, dest, prefix, TRUE, src, 0);
1844 }
1845
1846 /**
1847 * Manages the creation and deletion of ip addresses on an interface.
1848 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1849 */
1850 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
1851 int flags, int if_index, host_t *ip, int prefix)
1852 {
1853 netlink_buf_t request;
1854 struct nlmsghdr *hdr;
1855 struct ifaddrmsg *msg;
1856 chunk_t chunk;
1857
1858 memset(&request, 0, sizeof(request));
1859
1860 chunk = ip->get_address(ip);
1861
1862 hdr = (struct nlmsghdr*)request;
1863 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1864 hdr->nlmsg_type = nlmsg_type;
1865 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1866
1867 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1868 msg->ifa_family = ip->get_family(ip);
1869 msg->ifa_flags = 0;
1870 msg->ifa_prefixlen = prefix < 0 ? chunk.len * 8 : prefix;
1871 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1872 msg->ifa_index = if_index;
1873
1874 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1875
1876 if (ip->get_family(ip) == AF_INET6 && this->rta_prefsrc_for_ipv6)
1877 { /* if source routes are possible we let the virtual IP get deprecated
1878 * immediately (but mark it as valid forever) so it gets only used if
1879 * forced by our route, and not by the default IPv6 address selection */
1880 struct ifa_cacheinfo cache = {
1881 .ifa_valid = 0xFFFFFFFF,
1882 .ifa_prefered = 0,
1883 };
1884 netlink_add_attribute(hdr, IFA_CACHEINFO, chunk_from_thing(cache),
1885 sizeof(request));
1886 }
1887 return this->socket->send_ack(this->socket, hdr);
1888 }
1889
1890 METHOD(kernel_net_t, add_ip, status_t,
1891 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
1892 char *iface_name)
1893 {
1894 addr_map_entry_t *entry, lookup = {
1895 .ip = virtual_ip,
1896 };
1897 iface_entry_t *iface = NULL;
1898
1899 if (!this->install_virtual_ip)
1900 { /* disabled by config */
1901 return SUCCESS;
1902 }
1903
1904 this->lock->write_lock(this->lock);
1905 /* the virtual IP might actually be installed as regular IP, in which case
1906 * we don't track it as virtual IP */
1907 entry = this->addrs->get_match(this->addrs, &lookup,
1908 (void*)addr_map_entry_match);
1909 if (!entry)
1910 { /* otherwise it might already be installed as virtual IP */
1911 entry = this->vips->get_match(this->vips, &lookup,
1912 (void*)addr_map_entry_match);
1913 if (entry)
1914 { /* the vip we found can be in one of three states: 1) installed and
1915 * ready, 2) just added by another thread, but not yet confirmed to
1916 * be installed by the kernel, 3) just deleted, but not yet gone.
1917 * Then while we wait below, several things could happen (as we
1918 * release the lock). For instance, the interface could disappear,
1919 * or the IP is finally deleted, and it reappears on a different
1920 * interface. All these cases are handled by the call below. */
1921 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
1922 {
1923 this->condvar->wait(this->condvar, this->lock);
1924 }
1925 if (entry)
1926 {
1927 entry->addr->refcount++;
1928 }
1929 }
1930 }
1931 if (entry)
1932 {
1933 DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
1934 entry->iface->ifname);
1935 this->lock->unlock(this->lock);
1936 return SUCCESS;
1937 }
1938 /* try to find the target interface, either by config or via src ip */
1939 if (!this->install_virtual_ip_on ||
1940 this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1941 (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
1942 {
1943 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1944 (void**)&iface, iface_name) != SUCCESS)
1945 { /* if we don't find the requested interface we just use the first */
1946 this->ifaces->get_first(this->ifaces, (void**)&iface);
1947 }
1948 }
1949 if (iface)
1950 {
1951 addr_entry_t *addr;
1952
1953 INIT(addr,
1954 .ip = virtual_ip->clone(virtual_ip),
1955 .refcount = 1,
1956 .scope = RT_SCOPE_UNIVERSE,
1957 );
1958 iface->addrs->insert_last(iface->addrs, addr);
1959 addr_map_entry_add(this->vips, addr, iface);
1960 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1961 iface->ifindex, virtual_ip, prefix) == SUCCESS)
1962 {
1963 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
1964 { /* wait until address appears */
1965 this->condvar->wait(this->condvar, this->lock);
1966 }
1967 if (entry)
1968 { /* we fail if the interface got deleted in the meantime */
1969 DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
1970 entry->iface->ifname);
1971 this->lock->unlock(this->lock);
1972 /* during IKEv1 reauthentication, children get moved from
1973 * old the new SA before the virtual IP is available. This
1974 * kills the route for our virtual IP, reinstall. */
1975 queue_route_reinstall(this, strdup(entry->iface->ifname));
1976 return SUCCESS;
1977 }
1978 }
1979 this->lock->unlock(this->lock);
1980 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1981 return FAILED;
1982 }
1983 this->lock->unlock(this->lock);
1984 DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
1985 virtual_ip);
1986 return FAILED;
1987 }
1988
1989 METHOD(kernel_net_t, del_ip, status_t,
1990 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
1991 bool wait)
1992 {
1993 addr_map_entry_t *entry, lookup = {
1994 .ip = virtual_ip,
1995 };
1996
1997 if (!this->install_virtual_ip)
1998 { /* disabled by config */
1999 return SUCCESS;
2000 }
2001
2002 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
2003
2004 this->lock->write_lock(this->lock);
2005 entry = this->vips->get_match(this->vips, &lookup,
2006 (void*)addr_map_entry_match);
2007 if (!entry)
2008 { /* we didn't install this IP as virtual IP */
2009 entry = this->addrs->get_match(this->addrs, &lookup,
2010 (void*)addr_map_entry_match);
2011 if (entry)
2012 {
2013 DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
2014 entry->iface->ifname);
2015 this->lock->unlock(this->lock);
2016 return SUCCESS;
2017 }
2018 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
2019 this->lock->unlock(this->lock);
2020 return FAILED;
2021 }
2022 if (entry->addr->refcount == 1)
2023 {
2024 status_t status;
2025
2026 /* we set this flag so that threads calling add_ip will block and wait
2027 * until the entry is gone, also so we can wait below */
2028 entry->addr->installed = FALSE;
2029 status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
2030 virtual_ip, prefix);
2031 if (status == SUCCESS && wait)
2032 { /* wait until the address is really gone */
2033 while (is_known_vip(this, virtual_ip))
2034 {
2035 this->condvar->wait(this->condvar, this->lock);
2036 }
2037 }
2038 this->lock->unlock(this->lock);
2039 return status;
2040 }
2041 else
2042 {
2043 entry->addr->refcount--;
2044 }
2045 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
2046 virtual_ip);
2047 this->lock->unlock(this->lock);
2048 return SUCCESS;
2049 }
2050
2051 /**
2052 * Manages source routes in the routing table.
2053 * By setting the appropriate nlmsg_type, the route gets added or removed.
2054 */
2055 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
2056 int nlmsg_type, int flags, chunk_t dst_net,
2057 u_int8_t prefixlen, host_t *gateway,
2058 host_t *src_ip, char *if_name)
2059 {
2060 netlink_buf_t request;
2061 struct nlmsghdr *hdr;
2062 struct rtmsg *msg;
2063 struct rtattr *rta;
2064 int ifindex;
2065 chunk_t chunk;
2066
2067 /* if route is 0.0.0.0/0, we can't install it, as it would
2068 * overwrite the default route. Instead, we add two routes:
2069 * 0.0.0.0/1 and 128.0.0.0/1 */
2070 if (this->routing_table == 0 && prefixlen == 0)
2071 {
2072 chunk_t half_net;
2073 u_int8_t half_prefixlen;
2074 status_t status;
2075
2076 half_net = chunk_alloca(dst_net.len);
2077 memset(half_net.ptr, 0, half_net.len);
2078 half_prefixlen = 1;
2079
2080 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2081 gateway, src_ip, if_name);
2082 half_net.ptr[0] |= 0x80;
2083 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2084 gateway, src_ip, if_name);
2085 return status;
2086 }
2087
2088 memset(&request, 0, sizeof(request));
2089
2090 hdr = (struct nlmsghdr*)request;
2091 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
2092 hdr->nlmsg_type = nlmsg_type;
2093 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2094
2095 msg = (struct rtmsg*)NLMSG_DATA(hdr);
2096 msg->rtm_family = src_ip->get_family(src_ip);
2097 msg->rtm_dst_len = prefixlen;
2098 msg->rtm_table = this->routing_table;
2099 msg->rtm_protocol = RTPROT_STATIC;
2100 msg->rtm_type = RTN_UNICAST;
2101 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2102
2103 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
2104 chunk = src_ip->get_address(src_ip);
2105 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
2106 if (gateway && gateway->get_family(gateway) == src_ip->get_family(src_ip))
2107 {
2108 chunk = gateway->get_address(gateway);
2109 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
2110 }
2111 ifindex = get_interface_index(this, if_name);
2112 chunk.ptr = (char*)&ifindex;
2113 chunk.len = sizeof(ifindex);
2114 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
2115
2116 if (this->mtu)
2117 {
2118 chunk = chunk_alloca(RTA_LENGTH(sizeof(struct rtattr) +
2119 sizeof(u_int32_t)));
2120 rta = (struct rtattr*)chunk.ptr;
2121 rta->rta_type = RTAX_MTU;
2122 rta->rta_len = chunk.len;
2123 memcpy(RTA_DATA(rta), &this->mtu, sizeof(u_int32_t));
2124 netlink_add_attribute(hdr, RTA_METRICS, chunk, sizeof(request));
2125 }
2126
2127 return this->socket->send_ack(this->socket, hdr);
2128 }
2129
2130 METHOD(kernel_net_t, add_route, status_t,
2131 private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
2132 host_t *gateway, host_t *src_ip, char *if_name)
2133 {
2134 status_t status;
2135 route_entry_t *found, route = {
2136 .dst_net = dst_net,
2137 .prefixlen = prefixlen,
2138 .gateway = gateway,
2139 .src_ip = src_ip,
2140 .if_name = if_name,
2141 };
2142
2143 this->routes_lock->lock(this->routes_lock);
2144 found = this->routes->get(this->routes, &route);
2145 if (found)
2146 {
2147 this->routes_lock->unlock(this->routes_lock);
2148 return ALREADY_DONE;
2149 }
2150 status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2151 dst_net, prefixlen, gateway, src_ip, if_name);
2152 if (status == SUCCESS)
2153 {
2154 found = route_entry_clone(&route);
2155 this->routes->put(this->routes, found, found);
2156 }
2157 this->routes_lock->unlock(this->routes_lock);
2158 return status;
2159 }
2160
2161 METHOD(kernel_net_t, del_route, status_t,
2162 private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
2163 host_t *gateway, host_t *src_ip, char *if_name)
2164 {
2165 status_t status;
2166 route_entry_t *found, route = {
2167 .dst_net = dst_net,
2168 .prefixlen = prefixlen,
2169 .gateway = gateway,
2170 .src_ip = src_ip,
2171 .if_name = if_name,
2172 };
2173
2174 this->routes_lock->lock(this->routes_lock);
2175 found = this->routes->get(this->routes, &route);
2176 if (!found)
2177 {
2178 this->routes_lock->unlock(this->routes_lock);
2179 return NOT_FOUND;
2180 }
2181 this->routes->remove(this->routes, found);
2182 route_entry_destroy(found);
2183 status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
2184 gateway, src_ip, if_name);
2185 this->routes_lock->unlock(this->routes_lock);
2186 return status;
2187 }
2188
2189 /**
2190 * Initialize a list of local addresses.
2191 */
2192 static status_t init_address_list(private_kernel_netlink_net_t *this)
2193 {
2194 netlink_buf_t request;
2195 struct nlmsghdr *out, *current, *in;
2196 struct rtgenmsg *msg;
2197 size_t len;
2198 enumerator_t *ifaces, *addrs;
2199 iface_entry_t *iface;
2200 addr_entry_t *addr;
2201
2202 DBG2(DBG_KNL, "known interfaces and IP addresses:");
2203
2204 memset(&request, 0, sizeof(request));
2205
2206 in = (struct nlmsghdr*)&request;
2207 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
2208 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
2209 msg = (struct rtgenmsg*)NLMSG_DATA(in);
2210 msg->rtgen_family = AF_UNSPEC;
2211
2212 /* get all links */
2213 in->nlmsg_type = RTM_GETLINK;
2214 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2215 {
2216 return FAILED;
2217 }
2218 current = out;
2219 while (NLMSG_OK(current, len))
2220 {
2221 switch (current->nlmsg_type)
2222 {
2223 case NLMSG_DONE:
2224 break;
2225 case RTM_NEWLINK:
2226 process_link(this, current, FALSE);
2227 /* fall through */
2228 default:
2229 current = NLMSG_NEXT(current, len);
2230 continue;
2231 }
2232 break;
2233 }
2234 free(out);
2235
2236 /* get all interface addresses */
2237 in->nlmsg_type = RTM_GETADDR;
2238 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2239 {
2240 return FAILED;
2241 }
2242 current = out;
2243 while (NLMSG_OK(current, len))
2244 {
2245 switch (current->nlmsg_type)
2246 {
2247 case NLMSG_DONE:
2248 break;
2249 case RTM_NEWADDR:
2250 process_addr(this, current, FALSE);
2251 /* fall through */
2252 default:
2253 current = NLMSG_NEXT(current, len);
2254 continue;
2255 }
2256 break;
2257 }
2258 free(out);
2259
2260 this->lock->read_lock(this->lock);
2261 ifaces = this->ifaces->create_enumerator(this->ifaces);
2262 while (ifaces->enumerate(ifaces, &iface))
2263 {
2264 if (iface_entry_up_and_usable(iface))
2265 {
2266 DBG2(DBG_KNL, " %s", iface->ifname);
2267 addrs = iface->addrs->create_enumerator(iface->addrs);
2268 while (addrs->enumerate(addrs, (void**)&addr))
2269 {
2270 DBG2(DBG_KNL, " %H", addr->ip);
2271 }
2272 addrs->destroy(addrs);
2273 }
2274 }
2275 ifaces->destroy(ifaces);
2276 this->lock->unlock(this->lock);
2277 return SUCCESS;
2278 }
2279
2280 /**
2281 * create or delete a rule to use our routing table
2282 */
2283 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
2284 int family, u_int32_t table, u_int32_t prio)
2285 {
2286 netlink_buf_t request;
2287 struct nlmsghdr *hdr;
2288 struct rtmsg *msg;
2289 chunk_t chunk;
2290 char *fwmark;
2291
2292 memset(&request, 0, sizeof(request));
2293 hdr = (struct nlmsghdr*)request;
2294 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2295 hdr->nlmsg_type = nlmsg_type;
2296 if (nlmsg_type == RTM_NEWRULE)
2297 {
2298 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
2299 }
2300 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2301
2302 msg = (struct rtmsg*)NLMSG_DATA(hdr);
2303 msg->rtm_table = table;
2304 msg->rtm_family = family;
2305 msg->rtm_protocol = RTPROT_BOOT;
2306 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2307 msg->rtm_type = RTN_UNICAST;
2308
2309 chunk = chunk_from_thing(prio);
2310 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
2311
2312 fwmark = lib->settings->get_str(lib->settings,
2313 "%s.plugins.kernel-netlink.fwmark", NULL, lib->ns);
2314 if (fwmark)
2315 {
2316 #ifdef HAVE_LINUX_FIB_RULES_H
2317 mark_t mark;
2318
2319 if (fwmark[0] == '!')
2320 {
2321 msg->rtm_flags |= FIB_RULE_INVERT;
2322 fwmark++;
2323 }
2324 if (mark_from_string(fwmark, &mark))
2325 {
2326 chunk = chunk_from_thing(mark.value);
2327 netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
2328 chunk = chunk_from_thing(mark.mask);
2329 netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
2330 }
2331 #else
2332 DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
2333 #endif
2334 }
2335 return this->socket->send_ack(this->socket, hdr);
2336 }
2337
2338 /**
2339 * check for kernel features (currently only via version number)
2340 */
2341 static void check_kernel_features(private_kernel_netlink_net_t *this)
2342 {
2343 struct utsname utsname;
2344 int a, b, c;
2345
2346 if (uname(&utsname) == 0)
2347 {
2348 switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
2349 {
2350 case 3:
2351 if (a == 2)
2352 {
2353 DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
2354 "RTA_PREFSRC for IPv6 routes", a, b, c);
2355 break;
2356 }
2357 /* fall-through */
2358 case 2:
2359 /* only 3.x+ uses two part version numbers */
2360 this->rta_prefsrc_for_ipv6 = TRUE;
2361 break;
2362 default:
2363 break;
2364 }
2365 }
2366 }
2367
2368 /**
2369 * Destroy an address to iface map
2370 */
2371 static void addr_map_destroy(hashtable_t *map)
2372 {
2373 enumerator_t *enumerator;
2374 addr_map_entry_t *addr;
2375
2376 enumerator = map->create_enumerator(map);
2377 while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
2378 {
2379 free(addr);
2380 }
2381 enumerator->destroy(enumerator);
2382 map->destroy(map);
2383 }
2384
2385 METHOD(kernel_net_t, destroy, void,
2386 private_kernel_netlink_net_t *this)
2387 {
2388 enumerator_t *enumerator;
2389 route_entry_t *route;
2390
2391 if (this->routing_table)
2392 {
2393 manage_rule(this, RTM_DELRULE, AF_INET, this->routing_table,
2394 this->routing_table_prio);
2395 manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
2396 this->routing_table_prio);
2397 }
2398 if (this->socket_events > 0)
2399 {
2400 lib->watcher->remove(lib->watcher, this->socket_events);
2401 close(this->socket_events);
2402 }
2403 enumerator = this->routes->create_enumerator(this->routes);
2404 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
2405 {
2406 manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
2407 route->gateway, route->src_ip, route->if_name);
2408 route_entry_destroy(route);
2409 }
2410 enumerator->destroy(enumerator);
2411 this->routes->destroy(this->routes);
2412 this->routes_lock->destroy(this->routes_lock);
2413 DESTROY_IF(this->socket);
2414
2415 net_changes_clear(this);
2416 this->net_changes->destroy(this->net_changes);
2417 this->net_changes_lock->destroy(this->net_changes_lock);
2418
2419 addr_map_destroy(this->addrs);
2420 addr_map_destroy(this->vips);
2421
2422 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2423 this->rt_exclude->destroy(this->rt_exclude);
2424 this->roam_lock->destroy(this->roam_lock);
2425 this->condvar->destroy(this->condvar);
2426 this->lock->destroy(this->lock);
2427 free(this);
2428 }
2429
2430 /*
2431 * Described in header.
2432 */
2433 kernel_netlink_net_t *kernel_netlink_net_create()
2434 {
2435 private_kernel_netlink_net_t *this;
2436 enumerator_t *enumerator;
2437 bool register_for_events = TRUE;
2438 char *exclude;
2439
2440 INIT(this,
2441 .public = {
2442 .interface = {
2443 .get_interface = _get_interface_name,
2444 .create_address_enumerator = _create_address_enumerator,
2445 .get_source_addr = _get_source_addr,
2446 .get_nexthop = _get_nexthop,
2447 .add_ip = _add_ip,
2448 .del_ip = _del_ip,
2449 .add_route = _add_route,
2450 .del_route = _del_route,
2451 .destroy = _destroy,
2452 },
2453 },
2454 .socket = netlink_socket_create(NETLINK_ROUTE),
2455 .rt_exclude = linked_list_create(),
2456 .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
2457 (hashtable_equals_t)route_entry_equals, 16),
2458 .net_changes = hashtable_create(
2459 (hashtable_hash_t)net_change_hash,
2460 (hashtable_equals_t)net_change_equals, 16),
2461 .addrs = hashtable_create(
2462 (hashtable_hash_t)addr_map_entry_hash,
2463 (hashtable_equals_t)addr_map_entry_equals, 16),
2464 .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
2465 (hashtable_equals_t)addr_map_entry_equals, 16),
2466 .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2467 .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2468 .ifaces = linked_list_create(),
2469 .lock = rwlock_create(RWLOCK_TYPE_DEFAULT),
2470 .condvar = rwlock_condvar_create(),
2471 .roam_lock = spinlock_create(),
2472 .routing_table = lib->settings->get_int(lib->settings,
2473 "%s.routing_table", ROUTING_TABLE, lib->ns),
2474 .routing_table_prio = lib->settings->get_int(lib->settings,
2475 "%s.routing_table_prio", ROUTING_TABLE_PRIO, lib->ns),
2476 .process_route = lib->settings->get_bool(lib->settings,
2477 "%s.process_route", TRUE, lib->ns),
2478 .install_virtual_ip = lib->settings->get_bool(lib->settings,
2479 "%s.install_virtual_ip", TRUE, lib->ns),
2480 .install_virtual_ip_on = lib->settings->get_str(lib->settings,
2481 "%s.install_virtual_ip_on", NULL, lib->ns),
2482 .prefer_temporary_addrs = lib->settings->get_bool(lib->settings,
2483 "%s.prefer_temporary_addrs", FALSE, lib->ns),
2484 .roam_events = lib->settings->get_bool(lib->settings,
2485 "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
2486 .mtu = lib->settings->get_int(lib->settings,
2487 "%s.plugins.kernel-netlink.mtu", 0, lib->ns),
2488 );
2489 timerclear(&this->last_route_reinstall);
2490 timerclear(&this->next_roam);
2491
2492 check_kernel_features(this);
2493
2494 if (streq(lib->ns, "starter"))
2495 { /* starter has no threads, so we do not register for kernel events */
2496 register_for_events = FALSE;
2497 }
2498
2499 exclude = lib->settings->get_str(lib->settings,
2500 "%s.ignore_routing_tables", NULL, lib->ns);
2501 if (exclude)
2502 {
2503 char *token;
2504 uintptr_t table;
2505
2506 enumerator = enumerator_create_token(exclude, " ", " ");
2507 while (enumerator->enumerate(enumerator, &token))
2508 {
2509 errno = 0;
2510 table = strtoul(token, NULL, 10);
2511
2512 if (errno == 0)
2513 {
2514 this->rt_exclude->insert_last(this->rt_exclude, (void*)table);
2515 }
2516 }
2517 enumerator->destroy(enumerator);
2518 }
2519
2520 if (register_for_events)
2521 {
2522 struct sockaddr_nl addr;
2523
2524 memset(&addr, 0, sizeof(addr));
2525 addr.nl_family = AF_NETLINK;
2526
2527 /* create and bind RT socket for events (address/interface/route changes) */
2528 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2529 if (this->socket_events < 0)
2530 {
2531 DBG1(DBG_KNL, "unable to create RT event socket");
2532 destroy(this);
2533 return NULL;
2534 }
2535 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2536 RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
2537 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
2538 {
2539 DBG1(DBG_KNL, "unable to bind RT event socket");
2540 destroy(this);
2541 return NULL;
2542 }
2543
2544 lib->watcher->add(lib->watcher, this->socket_events, WATCHER_READ,
2545 (watcher_cb_t)receive_events, this);
2546 }
2547
2548 if (init_address_list(this) != SUCCESS)
2549 {
2550 DBG1(DBG_KNL, "unable to get interface list");
2551 destroy(this);
2552 return NULL;
2553 }
2554
2555 if (this->routing_table)
2556 {
2557 if (manage_rule(this, RTM_NEWRULE, AF_INET, this->routing_table,
2558 this->routing_table_prio) != SUCCESS)
2559 {
2560 DBG1(DBG_KNL, "unable to create IPv4 routing table rule");
2561 }
2562 if (manage_rule(this, RTM_NEWRULE, AF_INET6, this->routing_table,
2563 this->routing_table_prio) != SUCCESS)
2564 {
2565 DBG1(DBG_KNL, "unable to create IPv6 routing table rule");
2566 }
2567 }
2568
2569 return &this->public;
2570 }