kernel-netlink: Cast IPv6 address blobs to the proper type
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008-2014 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 */
16
17 /*
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
20 *
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37 * THE SOFTWARE.
38 */
39
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <unistd.h>
45 #include <errno.h>
46 #include <net/if.h>
47 #ifdef HAVE_LINUX_FIB_RULES_H
48 #include <linux/fib_rules.h>
49 #endif
50
51 #include "kernel_netlink_net.h"
52 #include "kernel_netlink_shared.h"
53
54 #include <hydra.h>
55 #include <utils/debug.h>
56 #include <threading/mutex.h>
57 #include <threading/rwlock.h>
58 #include <threading/rwlock_condvar.h>
59 #include <threading/spinlock.h>
60 #include <collections/hashtable.h>
61 #include <collections/linked_list.h>
62 #include <processing/jobs/callback_job.h>
63
64 /** delay before firing roam events (ms) */
65 #define ROAM_DELAY 100
66
67 /** delay before reinstalling routes (ms) */
68 #define ROUTE_DELAY 100
69
70 /** maximum recursion when searching for addresses in get_route() */
71 #define MAX_ROUTE_RECURSION 2
72
73 #ifndef ROUTING_TABLE
74 #define ROUTING_TABLE 0
75 #endif
76
77 #ifndef ROUTING_TABLE_PRIO
78 #define ROUTING_TABLE_PRIO 0
79 #endif
80
81 typedef struct addr_entry_t addr_entry_t;
82
83 /**
84 * IP address in an iface_entry_t
85 */
86 struct addr_entry_t {
87
88 /** the ip address */
89 host_t *ip;
90
91 /** address flags */
92 u_char flags;
93
94 /** scope of the address */
95 u_char scope;
96
97 /** number of times this IP is used, if virtual (i.e. managed by us) */
98 u_int refcount;
99
100 /** TRUE once it is installed, if virtual */
101 bool installed;
102 };
103
104 /**
105 * destroy a addr_entry_t object
106 */
107 static void addr_entry_destroy(addr_entry_t *this)
108 {
109 this->ip->destroy(this->ip);
110 free(this);
111 }
112
113 typedef struct iface_entry_t iface_entry_t;
114
115 /**
116 * A network interface on this system, containing addr_entry_t's
117 */
118 struct iface_entry_t {
119
120 /** interface index */
121 int ifindex;
122
123 /** name of the interface */
124 char ifname[IFNAMSIZ];
125
126 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
127 u_int flags;
128
129 /** list of addresses as host_t */
130 linked_list_t *addrs;
131
132 /** TRUE if usable by config */
133 bool usable;
134 };
135
136 /**
137 * destroy an interface entry
138 */
139 static void iface_entry_destroy(iface_entry_t *this)
140 {
141 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
142 free(this);
143 }
144
145 /**
146 * find an interface entry by index
147 */
148 static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
149 {
150 return this->ifindex == *ifindex;
151 }
152
153 /**
154 * find an interface entry by name
155 */
156 static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
157 {
158 return streq(this->ifname, ifname);
159 }
160
161 /**
162 * check if an interface is up
163 */
164 static inline bool iface_entry_up(iface_entry_t *iface)
165 {
166 return (iface->flags & IFF_UP) == IFF_UP;
167 }
168
169 /**
170 * check if an interface is up and usable
171 */
172 static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
173 {
174 return iface->usable && iface_entry_up(iface);
175 }
176
177 typedef struct addr_map_entry_t addr_map_entry_t;
178
179 /**
180 * Entry that maps an IP address to an interface entry
181 */
182 struct addr_map_entry_t {
183 /** The IP address */
184 host_t *ip;
185
186 /** The address entry for this IP address */
187 addr_entry_t *addr;
188
189 /** The interface this address is installed on */
190 iface_entry_t *iface;
191 };
192
193 /**
194 * Hash a addr_map_entry_t object, all entries with the same IP address
195 * are stored in the same bucket
196 */
197 static u_int addr_map_entry_hash(addr_map_entry_t *this)
198 {
199 return chunk_hash(this->ip->get_address(this->ip));
200 }
201
202 /**
203 * Compare two addr_map_entry_t objects, two entries are equal if they are
204 * installed on the same interface
205 */
206 static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
207 {
208 return a->iface->ifindex == b->iface->ifindex &&
209 a->ip->ip_equals(a->ip, b->ip);
210 }
211
212 /**
213 * Used with get_match this finds an address entry if it is installed on
214 * an up and usable interface
215 */
216 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
217 addr_map_entry_t *b)
218 {
219 return iface_entry_up_and_usable(b->iface) &&
220 a->ip->ip_equals(a->ip, b->ip);
221 }
222
223 /**
224 * Used with get_match this finds an address entry if it is installed on
225 * any active local interface
226 */
227 static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
228 {
229 return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
230 }
231
232 /**
233 * Used with get_match this finds an address entry if it is installed on
234 * any local interface
235 */
236 static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
237 {
238 return a->ip->ip_equals(a->ip, b->ip);
239 }
240
241 typedef struct route_entry_t route_entry_t;
242
243 /**
244 * Installed routing entry
245 */
246 struct route_entry_t {
247 /** Name of the interface the route is bound to */
248 char *if_name;
249
250 /** Source ip of the route */
251 host_t *src_ip;
252
253 /** Gateway for this route */
254 host_t *gateway;
255
256 /** Destination net */
257 chunk_t dst_net;
258
259 /** Destination net prefixlen */
260 u_int8_t prefixlen;
261 };
262
263 /**
264 * Clone a route_entry_t object.
265 */
266 static route_entry_t *route_entry_clone(route_entry_t *this)
267 {
268 route_entry_t *route;
269
270 INIT(route,
271 .if_name = strdup(this->if_name),
272 .src_ip = this->src_ip->clone(this->src_ip),
273 .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
274 .dst_net = chunk_clone(this->dst_net),
275 .prefixlen = this->prefixlen,
276 );
277 return route;
278 }
279
280 /**
281 * Destroy a route_entry_t object
282 */
283 static void route_entry_destroy(route_entry_t *this)
284 {
285 free(this->if_name);
286 DESTROY_IF(this->src_ip);
287 DESTROY_IF(this->gateway);
288 chunk_free(&this->dst_net);
289 free(this);
290 }
291
292 /**
293 * Hash a route_entry_t object
294 */
295 static u_int route_entry_hash(route_entry_t *this)
296 {
297 return chunk_hash_inc(chunk_from_thing(this->prefixlen),
298 chunk_hash(this->dst_net));
299 }
300
301 /**
302 * Compare two route_entry_t objects
303 */
304 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
305 {
306 if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
307 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
308 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
309 {
310 return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
311 a->gateway->ip_equals(a->gateway, b->gateway));
312 }
313 return FALSE;
314 }
315
316 typedef struct net_change_t net_change_t;
317
318 /**
319 * Queued network changes
320 */
321 struct net_change_t {
322 /** Name of the interface that got activated (or an IP appeared on) */
323 char *if_name;
324 };
325
326 /**
327 * Destroy a net_change_t object
328 */
329 static void net_change_destroy(net_change_t *this)
330 {
331 free(this->if_name);
332 free(this);
333 }
334
335 /**
336 * Hash a net_change_t object
337 */
338 static u_int net_change_hash(net_change_t *this)
339 {
340 return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
341 }
342
343 /**
344 * Compare two net_change_t objects
345 */
346 static bool net_change_equals(net_change_t *a, net_change_t *b)
347 {
348 return streq(a->if_name, b->if_name);
349 }
350
351 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
352
353 /**
354 * Private variables and functions of kernel_netlink_net class.
355 */
356 struct private_kernel_netlink_net_t {
357 /**
358 * Public part of the kernel_netlink_net_t object.
359 */
360 kernel_netlink_net_t public;
361
362 /**
363 * lock to access various lists and maps
364 */
365 rwlock_t *lock;
366
367 /**
368 * condition variable to signal virtual IP add/removal
369 */
370 rwlock_condvar_t *condvar;
371
372 /**
373 * Cached list of interfaces and its addresses (iface_entry_t)
374 */
375 linked_list_t *ifaces;
376
377 /**
378 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
379 */
380 hashtable_t *addrs;
381
382 /**
383 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
384 */
385 hashtable_t *vips;
386
387 /**
388 * netlink rt socket (routing)
389 */
390 netlink_socket_t *socket;
391
392 /**
393 * Netlink rt socket to receive address change events
394 */
395 int socket_events;
396
397 /**
398 * earliest time of the next roam event
399 */
400 timeval_t next_roam;
401
402 /**
403 * roam event due to address change
404 */
405 bool roam_address;
406
407 /**
408 * lock to check and update roam event time
409 */
410 spinlock_t *roam_lock;
411
412 /**
413 * routing table to install routes
414 */
415 int routing_table;
416
417 /**
418 * priority of used routing table
419 */
420 int routing_table_prio;
421
422 /**
423 * installed routes
424 */
425 hashtable_t *routes;
426
427 /**
428 * mutex for routes
429 */
430 mutex_t *routes_lock;
431
432 /**
433 * interface changes which may trigger route reinstallation
434 */
435 hashtable_t *net_changes;
436
437 /**
438 * mutex for route reinstallation triggers
439 */
440 mutex_t *net_changes_lock;
441
442 /**
443 * time of last route reinstallation
444 */
445 timeval_t last_route_reinstall;
446
447 /**
448 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
449 */
450 bool process_route;
451
452 /**
453 * whether to trigger roam events
454 */
455 bool roam_events;
456
457 /**
458 * whether to actually install virtual IPs
459 */
460 bool install_virtual_ip;
461
462 /**
463 * the name of the interface virtual IP addresses are installed on
464 */
465 char *install_virtual_ip_on;
466
467 /**
468 * whether preferred source addresses can be specified for IPv6 routes
469 */
470 bool rta_prefsrc_for_ipv6;
471
472 /**
473 * whether to prefer temporary IPv6 addresses over public ones
474 */
475 bool prefer_temporary_addrs;
476
477 /**
478 * list with routing tables to be excluded from route lookup
479 */
480 linked_list_t *rt_exclude;
481 };
482
483 /**
484 * Forward declaration
485 */
486 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
487 int nlmsg_type, int flags, chunk_t dst_net,
488 u_int8_t prefixlen, host_t *gateway,
489 host_t *src_ip, char *if_name);
490
491 /**
492 * Clear the queued network changes.
493 */
494 static void net_changes_clear(private_kernel_netlink_net_t *this)
495 {
496 enumerator_t *enumerator;
497 net_change_t *change;
498
499 enumerator = this->net_changes->create_enumerator(this->net_changes);
500 while (enumerator->enumerate(enumerator, NULL, (void**)&change))
501 {
502 this->net_changes->remove_at(this->net_changes, enumerator);
503 net_change_destroy(change);
504 }
505 enumerator->destroy(enumerator);
506 }
507
508 /**
509 * Act upon queued network changes.
510 */
511 static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
512 {
513 enumerator_t *enumerator;
514 route_entry_t *route;
515
516 this->net_changes_lock->lock(this->net_changes_lock);
517 this->routes_lock->lock(this->routes_lock);
518
519 enumerator = this->routes->create_enumerator(this->routes);
520 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
521 {
522 net_change_t *change, lookup = {
523 .if_name = route->if_name,
524 };
525 /* check if a change for the outgoing interface is queued */
526 change = this->net_changes->get(this->net_changes, &lookup);
527 if (!change)
528 { /* in case src_ip is not on the outgoing interface */
529 if (this->public.interface.get_interface(&this->public.interface,
530 route->src_ip, &lookup.if_name))
531 {
532 if (!streq(lookup.if_name, route->if_name))
533 {
534 change = this->net_changes->get(this->net_changes, &lookup);
535 }
536 free(lookup.if_name);
537 }
538 }
539 if (change)
540 {
541 manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
542 route->dst_net, route->prefixlen, route->gateway,
543 route->src_ip, route->if_name);
544 }
545 }
546 enumerator->destroy(enumerator);
547 this->routes_lock->unlock(this->routes_lock);
548
549 net_changes_clear(this);
550 this->net_changes_lock->unlock(this->net_changes_lock);
551 return JOB_REQUEUE_NONE;
552 }
553
554 /**
555 * Queue route reinstallation caused by network changes for a given interface.
556 *
557 * The route reinstallation is delayed for a while and only done once for
558 * several calls during this delay, in order to avoid doing it too often.
559 * The interface name is freed.
560 */
561 static void queue_route_reinstall(private_kernel_netlink_net_t *this,
562 char *if_name)
563 {
564 net_change_t *update, *found;
565 timeval_t now;
566 job_t *job;
567
568 INIT(update,
569 .if_name = if_name
570 );
571
572 this->net_changes_lock->lock(this->net_changes_lock);
573 found = this->net_changes->put(this->net_changes, update, update);
574 if (found)
575 {
576 net_change_destroy(found);
577 }
578 time_monotonic(&now);
579 if (timercmp(&now, &this->last_route_reinstall, >))
580 {
581 timeval_add_ms(&now, ROUTE_DELAY);
582 this->last_route_reinstall = now;
583
584 job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
585 this, NULL, NULL);
586 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
587 }
588 this->net_changes_lock->unlock(this->net_changes_lock);
589 }
590
591 /**
592 * check if the given IP is known as virtual IP and currently installed
593 *
594 * this function will also return TRUE if the virtual IP entry disappeared.
595 * in that case the returned entry will be NULL.
596 *
597 * this->lock must be held when calling this function
598 */
599 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
600 host_t *ip, addr_map_entry_t **entry)
601 {
602 addr_map_entry_t lookup = {
603 .ip = ip,
604 };
605
606 *entry = this->vips->get_match(this->vips, &lookup,
607 (void*)addr_map_entry_match);
608 if (*entry == NULL)
609 { /* the virtual IP disappeared */
610 return TRUE;
611 }
612 return (*entry)->addr->installed;
613 }
614
615 /**
616 * check if the given IP is known as virtual IP
617 *
618 * this->lock must be held when calling this function
619 */
620 static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
621 {
622 addr_map_entry_t lookup = {
623 .ip = ip,
624 };
625
626 return this->vips->get_match(this->vips, &lookup,
627 (void*)addr_map_entry_match) != NULL;
628 }
629
630 /**
631 * Add an address map entry
632 */
633 static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
634 iface_entry_t *iface)
635 {
636 addr_map_entry_t *entry;
637
638 INIT(entry,
639 .ip = addr->ip,
640 .addr = addr,
641 .iface = iface,
642 );
643 entry = map->put(map, entry, entry);
644 free(entry);
645 }
646
647 /**
648 * Remove an address map entry
649 */
650 static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
651 iface_entry_t *iface)
652 {
653 addr_map_entry_t *entry, lookup = {
654 .ip = addr->ip,
655 .addr = addr,
656 .iface = iface,
657 };
658
659 entry = map->remove(map, &lookup);
660 free(entry);
661 }
662
663 /**
664 * Determine the type or scope of the given unicast IP address. This is not
665 * the same thing returned in rtm_scope/ifa_scope.
666 *
667 * We use return values as defined in RFC 6724 (referring to RFC 4291).
668 */
669 static u_char get_scope(host_t *ip)
670 {
671 chunk_t addr;
672
673 addr = ip->get_address(ip);
674 switch (addr.len)
675 {
676 case 4:
677 /* we use the mapping defined in RFC 6724, 3.2 */
678 if (addr.ptr[0] == 127)
679 { /* link-local, same as the IPv6 loopback address */
680 return 2;
681 }
682 if (addr.ptr[0] == 169 && addr.ptr[1] == 254)
683 { /* link-local */
684 return 2;
685 }
686 break;
687 case 16:
688 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr*)addr.ptr))
689 { /* link-local, according to RFC 4291, 2.5.3 */
690 return 2;
691 }
692 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr*)addr.ptr))
693 {
694 return 2;
695 }
696 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr*)addr.ptr))
697 { /* deprecated, according to RFC 4291, 2.5.7 */
698 return 5;
699 }
700 break;
701 default:
702 break;
703 }
704 /* global */
705 return 14;
706 }
707
708 /**
709 * Returns the length of the common prefix in bits up to the length of a's
710 * prefix, defined by RFC 6724 as the portion of the address not including the
711 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
712 */
713 static u_char common_prefix(host_t *a, host_t *b)
714 {
715 chunk_t aa, ba;
716 u_char byte, bits = 0, match;
717
718 aa = a->get_address(a);
719 ba = b->get_address(b);
720 for (byte = 0; byte < 8; byte++)
721 {
722 if (aa.ptr[byte] != ba.ptr[byte])
723 {
724 match = aa.ptr[byte] ^ ba.ptr[byte];
725 for (bits = 8; match; match >>= 1)
726 {
727 bits--;
728 }
729 break;
730 }
731 }
732 return byte * 8 + bits;
733 }
734
735 /**
736 * Compare two IP addresses and return TRUE if the second address is the better
737 * choice of the two to reach the destination.
738 * For IPv6 we approximately follow RFC 6724.
739 */
740 static bool is_address_better(private_kernel_netlink_net_t *this,
741 addr_entry_t *a, addr_entry_t *b, host_t *d)
742 {
743 u_char sa, sb, sd, pa, pb;
744
745 /* rule 2: prefer appropriate scope */
746 if (d)
747 {
748 sa = get_scope(a->ip);
749 sb = get_scope(b->ip);
750 sd = get_scope(d);
751 if (sa < sb)
752 {
753 return sa < sd;
754 }
755 else if (sb < sa)
756 {
757 return sb >= sd;
758 }
759 }
760 if (a->ip->get_family(a->ip) == AF_INET)
761 { /* stop here for IPv4, default to addresses found earlier */
762 return FALSE;
763 }
764 /* rule 3: avoid deprecated addresses (RFC 4862) */
765 if ((a->flags & IFA_F_DEPRECATED) != (b->flags & IFA_F_DEPRECATED))
766 {
767 return a->flags & IFA_F_DEPRECATED;
768 }
769 /* rule 4 is not applicable as we don't know if an address is a home or
770 * care-of addresses.
771 * rule 5 does not apply as we only compare addresses from one interface
772 * rule 6 requires a policy table (optionally configurable) to match
773 * configurable labels
774 */
775 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
776 if ((a->flags & IFA_F_TEMPORARY) != (b->flags & IFA_F_TEMPORARY))
777 {
778 if (this->prefer_temporary_addrs)
779 {
780 return b->flags & IFA_F_TEMPORARY;
781 }
782 return a->flags & IFA_F_TEMPORARY;
783 }
784 /* rule 8: use longest matching prefix */
785 if (d)
786 {
787 pa = common_prefix(a->ip, d);
788 pb = common_prefix(b->ip, d);
789 if (pa != pb)
790 {
791 return pb > pa;
792 }
793 }
794 /* default to addresses found earlier */
795 return FALSE;
796 }
797
798 /**
799 * Get a non-virtual IP address on the given interface.
800 *
801 * If a candidate address is given, we first search for that address and if not
802 * found return the address as above.
803 * Returned host is a clone, has to be freed by caller.
804 *
805 * this->lock must be held when calling this function.
806 */
807 static host_t *get_interface_address(private_kernel_netlink_net_t *this,
808 int ifindex, int family, host_t *dest,
809 host_t *candidate)
810 {
811 iface_entry_t *iface;
812 enumerator_t *addrs;
813 addr_entry_t *addr, *best = NULL;
814
815 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
816 (void**)&iface, &ifindex) == SUCCESS)
817 {
818 if (iface->usable)
819 { /* only use interfaces not excluded by config */
820 addrs = iface->addrs->create_enumerator(iface->addrs);
821 while (addrs->enumerate(addrs, &addr))
822 {
823 if (addr->refcount ||
824 addr->ip->get_family(addr->ip) != family)
825 { /* ignore virtual IP addresses and ensure family matches */
826 continue;
827 }
828 if (candidate && candidate->ip_equals(candidate, addr->ip))
829 { /* stop if we find the candidate */
830 best = addr;
831 break;
832 }
833 else if (!best || is_address_better(this, best, addr, dest))
834 {
835 best = addr;
836 }
837 }
838 addrs->destroy(addrs);
839 }
840 }
841 return best ? best->ip->clone(best->ip) : NULL;
842 }
843
844 /**
845 * callback function that raises the delayed roam event
846 */
847 static job_requeue_t roam_event(private_kernel_netlink_net_t *this)
848 {
849 bool address;
850
851 this->roam_lock->lock(this->roam_lock);
852 address = this->roam_address;
853 this->roam_address = FALSE;
854 this->roam_lock->unlock(this->roam_lock);
855 hydra->kernel_interface->roam(hydra->kernel_interface, address);
856 return JOB_REQUEUE_NONE;
857 }
858
859 /**
860 * fire a roaming event. we delay it for a bit and fire only one event
861 * for multiple calls. otherwise we would create too many events.
862 */
863 static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
864 {
865 timeval_t now;
866 job_t *job;
867
868 if (!this->roam_events)
869 {
870 return;
871 }
872
873 time_monotonic(&now);
874 this->roam_lock->lock(this->roam_lock);
875 this->roam_address |= address;
876 if (!timercmp(&now, &this->next_roam, >))
877 {
878 this->roam_lock->unlock(this->roam_lock);
879 return;
880 }
881 timeval_add_ms(&now, ROAM_DELAY);
882 this->next_roam = now;
883 this->roam_lock->unlock(this->roam_lock);
884
885 job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
886 this, NULL, NULL);
887 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
888 }
889
890 /**
891 * check if an interface with a given index is up and usable
892 *
893 * this->lock must be locked when calling this function
894 */
895 static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
896 int index)
897 {
898 iface_entry_t *iface;
899
900 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
901 (void**)&iface, &index) == SUCCESS)
902 {
903 return iface_entry_up_and_usable(iface);
904 }
905 return FALSE;
906 }
907
908 /**
909 * unregister the current addr_entry_t from the hashtable it is stored in
910 *
911 * this->lock must be locked when calling this function
912 */
913 static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
914 private_kernel_netlink_net_t *this)
915 {
916 if (addr->refcount)
917 {
918 addr_map_entry_remove(this->vips, addr, iface);
919 this->condvar->broadcast(this->condvar);
920 return;
921 }
922 addr_map_entry_remove(this->addrs, addr, iface);
923 }
924
925 /**
926 * process RTM_NEWLINK/RTM_DELLINK from kernel
927 */
928 static void process_link(private_kernel_netlink_net_t *this,
929 struct nlmsghdr *hdr, bool event)
930 {
931 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
932 struct rtattr *rta = IFLA_RTA(msg);
933 size_t rtasize = IFLA_PAYLOAD (hdr);
934 enumerator_t *enumerator;
935 iface_entry_t *current, *entry = NULL;
936 char *name = NULL;
937 bool update = FALSE, update_routes = FALSE;
938
939 while (RTA_OK(rta, rtasize))
940 {
941 switch (rta->rta_type)
942 {
943 case IFLA_IFNAME:
944 name = RTA_DATA(rta);
945 break;
946 }
947 rta = RTA_NEXT(rta, rtasize);
948 }
949 if (!name)
950 {
951 name = "(unknown)";
952 }
953
954 this->lock->write_lock(this->lock);
955 switch (hdr->nlmsg_type)
956 {
957 case RTM_NEWLINK:
958 {
959 if (this->ifaces->find_first(this->ifaces,
960 (void*)iface_entry_by_index, (void**)&entry,
961 &msg->ifi_index) != SUCCESS)
962 {
963 INIT(entry,
964 .ifindex = msg->ifi_index,
965 .addrs = linked_list_create(),
966 .usable = hydra->kernel_interface->is_interface_usable(
967 hydra->kernel_interface, name),
968 );
969 this->ifaces->insert_last(this->ifaces, entry);
970 }
971 strncpy(entry->ifname, name, IFNAMSIZ);
972 entry->ifname[IFNAMSIZ-1] = '\0';
973 if (event && entry->usable)
974 {
975 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
976 {
977 update = update_routes = TRUE;
978 DBG1(DBG_KNL, "interface %s activated", name);
979 }
980 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
981 {
982 update = TRUE;
983 DBG1(DBG_KNL, "interface %s deactivated", name);
984 }
985 }
986 entry->flags = msg->ifi_flags;
987 break;
988 }
989 case RTM_DELLINK:
990 {
991 enumerator = this->ifaces->create_enumerator(this->ifaces);
992 while (enumerator->enumerate(enumerator, &current))
993 {
994 if (current->ifindex == msg->ifi_index)
995 {
996 if (event && current->usable)
997 {
998 update = TRUE;
999 DBG1(DBG_KNL, "interface %s deleted", current->ifname);
1000 }
1001 /* TODO: move virtual IPs installed on this interface to
1002 * another interface? */
1003 this->ifaces->remove_at(this->ifaces, enumerator);
1004 current->addrs->invoke_function(current->addrs,
1005 (void*)addr_entry_unregister, current, this);
1006 iface_entry_destroy(current);
1007 break;
1008 }
1009 }
1010 enumerator->destroy(enumerator);
1011 break;
1012 }
1013 }
1014 this->lock->unlock(this->lock);
1015
1016 if (update_routes && event)
1017 {
1018 queue_route_reinstall(this, strdup(name));
1019 }
1020
1021 if (update && event)
1022 {
1023 fire_roam_event(this, TRUE);
1024 }
1025 }
1026
1027 /**
1028 * process RTM_NEWADDR/RTM_DELADDR from kernel
1029 */
1030 static void process_addr(private_kernel_netlink_net_t *this,
1031 struct nlmsghdr *hdr, bool event)
1032 {
1033 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
1034 struct rtattr *rta = IFA_RTA(msg);
1035 size_t rtasize = IFA_PAYLOAD (hdr);
1036 host_t *host = NULL;
1037 iface_entry_t *iface;
1038 chunk_t local = chunk_empty, address = chunk_empty;
1039 char *route_ifname = NULL;
1040 bool update = FALSE, found = FALSE, changed = FALSE;
1041
1042 while (RTA_OK(rta, rtasize))
1043 {
1044 switch (rta->rta_type)
1045 {
1046 case IFA_LOCAL:
1047 local.ptr = RTA_DATA(rta);
1048 local.len = RTA_PAYLOAD(rta);
1049 break;
1050 case IFA_ADDRESS:
1051 address.ptr = RTA_DATA(rta);
1052 address.len = RTA_PAYLOAD(rta);
1053 break;
1054 }
1055 rta = RTA_NEXT(rta, rtasize);
1056 }
1057
1058 /* For PPP interfaces, we need the IFA_LOCAL address,
1059 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1060 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1061 if (local.ptr)
1062 {
1063 host = host_create_from_chunk(msg->ifa_family, local, 0);
1064 }
1065 else if (address.ptr)
1066 {
1067 host = host_create_from_chunk(msg->ifa_family, address, 0);
1068 }
1069
1070 if (host == NULL)
1071 { /* bad family? */
1072 return;
1073 }
1074
1075 this->lock->write_lock(this->lock);
1076 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
1077 (void**)&iface, &msg->ifa_index) == SUCCESS)
1078 {
1079 addr_map_entry_t *entry, lookup = {
1080 .ip = host,
1081 .iface = iface,
1082 };
1083 addr_entry_t *addr;
1084
1085 entry = this->vips->get(this->vips, &lookup);
1086 if (entry)
1087 {
1088 if (hdr->nlmsg_type == RTM_NEWADDR)
1089 { /* mark as installed and signal waiting threads */
1090 entry->addr->installed = TRUE;
1091 }
1092 else
1093 { /* the address was already marked as uninstalled */
1094 addr = entry->addr;
1095 iface->addrs->remove(iface->addrs, addr, NULL);
1096 addr_map_entry_remove(this->vips, addr, iface);
1097 addr_entry_destroy(addr);
1098 }
1099 /* no roam events etc. for virtual IPs */
1100 this->condvar->broadcast(this->condvar);
1101 this->lock->unlock(this->lock);
1102 host->destroy(host);
1103 return;
1104 }
1105 entry = this->addrs->get(this->addrs, &lookup);
1106 if (entry)
1107 {
1108 if (hdr->nlmsg_type == RTM_DELADDR)
1109 {
1110 found = TRUE;
1111 addr = entry->addr;
1112 iface->addrs->remove(iface->addrs, addr, NULL);
1113 if (iface->usable)
1114 {
1115 changed = TRUE;
1116 DBG1(DBG_KNL, "%H disappeared from %s", host,
1117 iface->ifname);
1118 }
1119 addr_map_entry_remove(this->addrs, addr, iface);
1120 addr_entry_destroy(addr);
1121 }
1122 }
1123 else
1124 {
1125 if (hdr->nlmsg_type == RTM_NEWADDR)
1126 {
1127 found = TRUE;
1128 changed = TRUE;
1129 route_ifname = strdup(iface->ifname);
1130 INIT(addr,
1131 .ip = host->clone(host),
1132 .flags = msg->ifa_flags,
1133 .scope = msg->ifa_scope,
1134 );
1135 iface->addrs->insert_last(iface->addrs, addr);
1136 addr_map_entry_add(this->addrs, addr, iface);
1137 if (event && iface->usable)
1138 {
1139 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
1140 }
1141 }
1142 }
1143 if (found && (iface->flags & IFF_UP))
1144 {
1145 update = TRUE;
1146 }
1147 if (!iface->usable)
1148 { /* ignore events for interfaces excluded by config */
1149 update = changed = FALSE;
1150 }
1151 }
1152 this->lock->unlock(this->lock);
1153
1154 if (update && event && route_ifname)
1155 {
1156 queue_route_reinstall(this, route_ifname);
1157 }
1158 else
1159 {
1160 free(route_ifname);
1161 }
1162 host->destroy(host);
1163
1164 /* send an update to all IKE_SAs */
1165 if (update && event && changed)
1166 {
1167 fire_roam_event(this, TRUE);
1168 }
1169 }
1170
1171 /**
1172 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1173 */
1174 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
1175 {
1176 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
1177 struct rtattr *rta = RTM_RTA(msg);
1178 size_t rtasize = RTM_PAYLOAD(hdr);
1179 u_int32_t rta_oif = 0;
1180 host_t *host = NULL;
1181
1182 /* ignore routes added by us or in the local routing table (local addrs) */
1183 if (msg->rtm_table && (msg->rtm_table == this->routing_table ||
1184 msg->rtm_table == RT_TABLE_LOCAL))
1185 {
1186 return;
1187 }
1188 else if (msg->rtm_flags & RTM_F_CLONED)
1189 { /* ignore cached routes, seem to be created a lot for IPv6 */
1190 return;
1191 }
1192
1193 while (RTA_OK(rta, rtasize))
1194 {
1195 switch (rta->rta_type)
1196 {
1197 case RTA_PREFSRC:
1198 DESTROY_IF(host);
1199 host = host_create_from_chunk(msg->rtm_family,
1200 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
1201 break;
1202 case RTA_OIF:
1203 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1204 {
1205 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1206 }
1207 break;
1208 }
1209 rta = RTA_NEXT(rta, rtasize);
1210 }
1211 this->lock->read_lock(this->lock);
1212 if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
1213 { /* ignore route changes for interfaces that are ignored or down */
1214 this->lock->unlock(this->lock);
1215 DESTROY_IF(host);
1216 return;
1217 }
1218 if (!host && rta_oif)
1219 {
1220 host = get_interface_address(this, rta_oif, msg->rtm_family,
1221 NULL, NULL);
1222 }
1223 if (!host || is_known_vip(this, host))
1224 { /* ignore routes added for virtual IPs */
1225 this->lock->unlock(this->lock);
1226 DESTROY_IF(host);
1227 return;
1228 }
1229 this->lock->unlock(this->lock);
1230 fire_roam_event(this, FALSE);
1231 host->destroy(host);
1232 }
1233
1234 /**
1235 * Receives events from kernel
1236 */
1237 static bool receive_events(private_kernel_netlink_net_t *this, int fd,
1238 watcher_event_t event)
1239 {
1240 char response[1536];
1241 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1242 struct sockaddr_nl addr;
1243 socklen_t addr_len = sizeof(addr);
1244 int len;
1245
1246 len = recvfrom(this->socket_events, response, sizeof(response),
1247 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
1248 if (len < 0)
1249 {
1250 switch (errno)
1251 {
1252 case EINTR:
1253 /* interrupted, try again */
1254 return TRUE;
1255 case EAGAIN:
1256 /* no data ready, select again */
1257 return TRUE;
1258 default:
1259 DBG1(DBG_KNL, "unable to receive from rt event socket");
1260 sleep(1);
1261 return TRUE;
1262 }
1263 }
1264
1265 if (addr.nl_pid != 0)
1266 { /* not from kernel. not interested, try another one */
1267 return TRUE;
1268 }
1269
1270 while (NLMSG_OK(hdr, len))
1271 {
1272 /* looks good so far, dispatch netlink message */
1273 switch (hdr->nlmsg_type)
1274 {
1275 case RTM_NEWADDR:
1276 case RTM_DELADDR:
1277 process_addr(this, hdr, TRUE);
1278 break;
1279 case RTM_NEWLINK:
1280 case RTM_DELLINK:
1281 process_link(this, hdr, TRUE);
1282 break;
1283 case RTM_NEWROUTE:
1284 case RTM_DELROUTE:
1285 if (this->process_route)
1286 {
1287 process_route(this, hdr);
1288 }
1289 break;
1290 default:
1291 break;
1292 }
1293 hdr = NLMSG_NEXT(hdr, len);
1294 }
1295 return TRUE;
1296 }
1297
1298 /** enumerator over addresses */
1299 typedef struct {
1300 private_kernel_netlink_net_t* this;
1301 /** which addresses to enumerate */
1302 kernel_address_type_t which;
1303 } address_enumerator_t;
1304
1305 /**
1306 * cleanup function for address enumerator
1307 */
1308 static void address_enumerator_destroy(address_enumerator_t *data)
1309 {
1310 data->this->lock->unlock(data->this->lock);
1311 free(data);
1312 }
1313
1314 /**
1315 * filter for addresses
1316 */
1317 static bool filter_addresses(address_enumerator_t *data,
1318 addr_entry_t** in, host_t** out)
1319 {
1320 if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
1321 { /* skip virtual interfaces added by us */
1322 return FALSE;
1323 }
1324 if (!(data->which & ADDR_TYPE_REGULAR) && !(*in)->refcount)
1325 { /* address is regular, but not requested */
1326 return FALSE;
1327 }
1328 if ((*in)->scope >= RT_SCOPE_LINK)
1329 { /* skip addresses with a unusable scope */
1330 return FALSE;
1331 }
1332 *out = (*in)->ip;
1333 return TRUE;
1334 }
1335
1336 /**
1337 * enumerator constructor for interfaces
1338 */
1339 static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
1340 address_enumerator_t *data)
1341 {
1342 return enumerator_create_filter(
1343 iface->addrs->create_enumerator(iface->addrs),
1344 (void*)filter_addresses, data, NULL);
1345 }
1346
1347 /**
1348 * filter for interfaces
1349 */
1350 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
1351 iface_entry_t** out)
1352 {
1353 if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
1354 { /* skip interfaces excluded by config */
1355 return FALSE;
1356 }
1357 if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
1358 { /* ignore loopback devices */
1359 return FALSE;
1360 }
1361 if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
1362 { /* skip interfaces not up */
1363 return FALSE;
1364 }
1365 *out = *in;
1366 return TRUE;
1367 }
1368
1369 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
1370 private_kernel_netlink_net_t *this, kernel_address_type_t which)
1371 {
1372 address_enumerator_t *data;
1373
1374 INIT(data,
1375 .this = this,
1376 .which = which,
1377 );
1378
1379 this->lock->read_lock(this->lock);
1380 return enumerator_create_nested(
1381 enumerator_create_filter(
1382 this->ifaces->create_enumerator(this->ifaces),
1383 (void*)filter_interfaces, data, NULL),
1384 (void*)create_iface_enumerator, data,
1385 (void*)address_enumerator_destroy);
1386 }
1387
1388 METHOD(kernel_net_t, get_interface_name, bool,
1389 private_kernel_netlink_net_t *this, host_t* ip, char **name)
1390 {
1391 addr_map_entry_t *entry, lookup = {
1392 .ip = ip,
1393 };
1394
1395 if (ip->is_anyaddr(ip))
1396 {
1397 return FALSE;
1398 }
1399 this->lock->read_lock(this->lock);
1400 /* first try to find it on an up and usable interface */
1401 entry = this->addrs->get_match(this->addrs, &lookup,
1402 (void*)addr_map_entry_match_up_and_usable);
1403 if (entry)
1404 {
1405 if (name)
1406 {
1407 *name = strdup(entry->iface->ifname);
1408 DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
1409 }
1410 this->lock->unlock(this->lock);
1411 return TRUE;
1412 }
1413 /* in a second step, consider virtual IPs installed by us */
1414 entry = this->vips->get_match(this->vips, &lookup,
1415 (void*)addr_map_entry_match_up_and_usable);
1416 if (entry)
1417 {
1418 if (name)
1419 {
1420 *name = strdup(entry->iface->ifname);
1421 DBG2(DBG_KNL, "virtual IP %H is on interface %s", ip, *name);
1422 }
1423 this->lock->unlock(this->lock);
1424 return TRUE;
1425 }
1426 /* maybe it is installed on an ignored interface */
1427 entry = this->addrs->get_match(this->addrs, &lookup,
1428 (void*)addr_map_entry_match_up);
1429 if (!entry)
1430 {
1431 DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
1432 }
1433 this->lock->unlock(this->lock);
1434 return FALSE;
1435 }
1436
1437 /**
1438 * get the index of an interface by name
1439 */
1440 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
1441 {
1442 iface_entry_t *iface;
1443 int ifindex = 0;
1444
1445 DBG2(DBG_KNL, "getting iface index for %s", name);
1446
1447 this->lock->read_lock(this->lock);
1448 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1449 (void**)&iface, name) == SUCCESS)
1450 {
1451 ifindex = iface->ifindex;
1452 }
1453 this->lock->unlock(this->lock);
1454
1455 if (ifindex == 0)
1456 {
1457 DBG1(DBG_KNL, "unable to get interface index for %s", name);
1458 }
1459 return ifindex;
1460 }
1461
1462 /**
1463 * check if an address or net (addr with prefix net bits) is in
1464 * subnet (net with net_len net bits)
1465 */
1466 static bool addr_in_subnet(chunk_t addr, int prefix, chunk_t net, int net_len)
1467 {
1468 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1469 int byte = 0;
1470
1471 if (net_len == 0)
1472 { /* any address matches a /0 network */
1473 return TRUE;
1474 }
1475 if (addr.len != net.len || net_len > 8 * net.len || prefix < net_len)
1476 {
1477 return FALSE;
1478 }
1479 /* scan through all bytes in network order */
1480 while (net_len > 0)
1481 {
1482 if (net_len < 8)
1483 {
1484 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
1485 }
1486 else
1487 {
1488 if (addr.ptr[byte] != net.ptr[byte])
1489 {
1490 return FALSE;
1491 }
1492 byte++;
1493 net_len -= 8;
1494 }
1495 }
1496 return TRUE;
1497 }
1498
1499 /**
1500 * Store information about a route retrieved via RTNETLINK
1501 */
1502 typedef struct {
1503 chunk_t gtw;
1504 chunk_t src;
1505 chunk_t dst;
1506 host_t *src_host;
1507 u_int8_t dst_len;
1508 u_int32_t table;
1509 u_int32_t oif;
1510 } rt_entry_t;
1511
1512 /**
1513 * Free a route entry
1514 */
1515 static void rt_entry_destroy(rt_entry_t *this)
1516 {
1517 DESTROY_IF(this->src_host);
1518 free(this);
1519 }
1520
1521 /**
1522 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1523 * reused if not NULL.
1524 *
1525 * Returned chunks point to internal data of the Netlink message.
1526 */
1527 static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
1528 {
1529 struct rtattr *rta;
1530 struct rtmsg *msg;
1531 size_t rtasize;
1532
1533 msg = (struct rtmsg*)(NLMSG_DATA(hdr));
1534 rta = RTM_RTA(msg);
1535 rtasize = RTM_PAYLOAD(hdr);
1536
1537 if (route)
1538 {
1539 route->gtw = chunk_empty;
1540 route->src = chunk_empty;
1541 route->dst = chunk_empty;
1542 route->dst_len = msg->rtm_dst_len;
1543 route->table = msg->rtm_table;
1544 route->oif = 0;
1545 }
1546 else
1547 {
1548 INIT(route,
1549 .dst_len = msg->rtm_dst_len,
1550 .table = msg->rtm_table,
1551 );
1552 }
1553
1554 while (RTA_OK(rta, rtasize))
1555 {
1556 switch (rta->rta_type)
1557 {
1558 case RTA_PREFSRC:
1559 route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1560 break;
1561 case RTA_GATEWAY:
1562 route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1563 break;
1564 case RTA_DST:
1565 route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1566 break;
1567 case RTA_OIF:
1568 if (RTA_PAYLOAD(rta) == sizeof(route->oif))
1569 {
1570 route->oif = *(u_int32_t*)RTA_DATA(rta);
1571 }
1572 break;
1573 #ifdef HAVE_RTA_TABLE
1574 case RTA_TABLE:
1575 if (RTA_PAYLOAD(rta) == sizeof(route->table))
1576 {
1577 route->table = *(u_int32_t*)RTA_DATA(rta);
1578 }
1579 break;
1580 #endif /* HAVE_RTA_TABLE*/
1581 }
1582 rta = RTA_NEXT(rta, rtasize);
1583 }
1584 return route;
1585 }
1586
1587 /**
1588 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1589 */
1590 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
1591 int prefix, bool nexthop, host_t *candidate,
1592 u_int recursion)
1593 {
1594 netlink_buf_t request;
1595 struct nlmsghdr *hdr, *out, *current;
1596 struct rtmsg *msg;
1597 chunk_t chunk;
1598 size_t len;
1599 linked_list_t *routes;
1600 rt_entry_t *route = NULL, *best = NULL;
1601 enumerator_t *enumerator;
1602 host_t *addr = NULL;
1603 bool match_net;
1604 int family;
1605
1606 if (recursion > MAX_ROUTE_RECURSION)
1607 {
1608 return NULL;
1609 }
1610 chunk = dest->get_address(dest);
1611 len = chunk.len * 8;
1612 prefix = prefix < 0 ? len : min(prefix, len);
1613 match_net = prefix != len;
1614
1615 memset(&request, 0, sizeof(request));
1616
1617 family = dest->get_family(dest);
1618 hdr = (struct nlmsghdr*)request;
1619 hdr->nlmsg_flags = NLM_F_REQUEST;
1620 if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
1621 this->routing_table || match_net)
1622 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1623 * as we want to ignore routes with virtual IPs we cannot use DUMP
1624 * if these routes are not installed in a separate table */
1625 hdr->nlmsg_flags |= NLM_F_DUMP;
1626 }
1627 hdr->nlmsg_type = RTM_GETROUTE;
1628 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1629
1630 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1631 msg->rtm_family = family;
1632 if (candidate)
1633 {
1634 chunk = candidate->get_address(candidate);
1635 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1636 }
1637 if (!match_net)
1638 {
1639 chunk = dest->get_address(dest);
1640 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1641 }
1642
1643 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
1644 {
1645 DBG2(DBG_KNL, "getting %s to reach %H/%d failed",
1646 nexthop ? "nexthop" : "address", dest, prefix);
1647 return NULL;
1648 }
1649 routes = linked_list_create();
1650 this->lock->read_lock(this->lock);
1651
1652 for (current = out; NLMSG_OK(current, len);
1653 current = NLMSG_NEXT(current, len))
1654 {
1655 switch (current->nlmsg_type)
1656 {
1657 case NLMSG_DONE:
1658 break;
1659 case RTM_NEWROUTE:
1660 {
1661 rt_entry_t *other;
1662 uintptr_t table;
1663
1664 route = parse_route(current, route);
1665
1666 table = (uintptr_t)route->table;
1667 if (this->rt_exclude->find_first(this->rt_exclude, NULL,
1668 (void**)&table) == SUCCESS)
1669 { /* route is from an excluded routing table */
1670 continue;
1671 }
1672 if (this->routing_table != 0 &&
1673 route->table == this->routing_table)
1674 { /* route is from our own ipsec routing table */
1675 continue;
1676 }
1677 if (route->oif && !is_interface_up_and_usable(this, route->oif))
1678 { /* interface is down */
1679 continue;
1680 }
1681 if (!addr_in_subnet(chunk, prefix, route->dst, route->dst_len))
1682 { /* route destination does not contain dest */
1683 continue;
1684 }
1685 if (route->src.ptr)
1686 { /* verify source address, if any */
1687 host_t *src = host_create_from_chunk(msg->rtm_family,
1688 route->src, 0);
1689 if (src && is_known_vip(this, src))
1690 { /* ignore routes installed by us */
1691 src->destroy(src);
1692 continue;
1693 }
1694 route->src_host = src;
1695 }
1696 /* insert route, sorted by decreasing network prefix */
1697 enumerator = routes->create_enumerator(routes);
1698 while (enumerator->enumerate(enumerator, &other))
1699 {
1700 if (route->dst_len > other->dst_len)
1701 {
1702 break;
1703 }
1704 }
1705 routes->insert_before(routes, enumerator, route);
1706 enumerator->destroy(enumerator);
1707 route = NULL;
1708 continue;
1709 }
1710 default:
1711 continue;
1712 }
1713 break;
1714 }
1715 if (route)
1716 {
1717 rt_entry_destroy(route);
1718 }
1719
1720 /* now we have a list of routes matching dest, sorted by net prefix.
1721 * we will look for source addresses for these routes and select the one
1722 * with the preferred source address, if possible */
1723 enumerator = routes->create_enumerator(routes);
1724 while (enumerator->enumerate(enumerator, &route))
1725 {
1726 if (route->src_host)
1727 { /* got a source address with the route, if no preferred source
1728 * is given or it matches we are done, as this is the best route */
1729 if (!candidate || candidate->ip_equals(candidate, route->src_host))
1730 {
1731 best = route;
1732 break;
1733 }
1734 else if (route->oif)
1735 { /* no match yet, maybe it is assigned to the same interface */
1736 host_t *src = get_interface_address(this, route->oif,
1737 msg->rtm_family, dest, candidate);
1738 if (src && src->ip_equals(src, candidate))
1739 {
1740 route->src_host->destroy(route->src_host);
1741 route->src_host = src;
1742 best = route;
1743 break;
1744 }
1745 DESTROY_IF(src);
1746 }
1747 /* no luck yet with the source address. if this is the best (first)
1748 * route we store it as fallback in case we don't find a route with
1749 * the preferred source */
1750 best = best ?: route;
1751 continue;
1752 }
1753 if (route->oif)
1754 { /* no src, but an interface - get address from it */
1755 route->src_host = get_interface_address(this, route->oif,
1756 msg->rtm_family, dest, candidate);
1757 if (route->src_host)
1758 { /* we handle this address the same as the one above */
1759 if (!candidate ||
1760 candidate->ip_equals(candidate, route->src_host))
1761 {
1762 best = route;
1763 break;
1764 }
1765 best = best ?: route;
1766 continue;
1767 }
1768 }
1769 if (route->gtw.ptr)
1770 { /* no src, no iface, but a gateway - lookup src to reach gtw */
1771 host_t *gtw;
1772
1773 gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
1774 if (gtw && !gtw->ip_equals(gtw, dest))
1775 {
1776 route->src_host = get_route(this, gtw, -1, FALSE, candidate,
1777 recursion + 1);
1778 }
1779 DESTROY_IF(gtw);
1780 if (route->src_host)
1781 { /* more of the same */
1782 if (!candidate ||
1783 candidate->ip_equals(candidate, route->src_host))
1784 {
1785 best = route;
1786 break;
1787 }
1788 best = best ?: route;
1789 }
1790 }
1791 }
1792 enumerator->destroy(enumerator);
1793
1794 if (nexthop)
1795 { /* nexthop lookup, return gateway if any */
1796 if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
1797 {
1798 addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
1799 }
1800 if (!addr && !match_net)
1801 { /* fallback to destination address */
1802 addr = dest->clone(dest);
1803 }
1804 }
1805 else
1806 {
1807 if (best)
1808 {
1809 addr = best->src_host->clone(best->src_host);
1810 }
1811 }
1812 this->lock->unlock(this->lock);
1813 routes->destroy_function(routes, (void*)rt_entry_destroy);
1814 free(out);
1815
1816 if (addr)
1817 {
1818 DBG2(DBG_KNL, "using %H as %s to reach %H/%d", addr,
1819 nexthop ? "nexthop" : "address", dest, prefix);
1820 }
1821 else if (!recursion)
1822 {
1823 DBG2(DBG_KNL, "no %s found to reach %H/%d",
1824 nexthop ? "nexthop" : "address", dest, prefix);
1825 }
1826 return addr;
1827 }
1828
1829 METHOD(kernel_net_t, get_source_addr, host_t*,
1830 private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
1831 {
1832 return get_route(this, dest, -1, FALSE, src, 0);
1833 }
1834
1835 METHOD(kernel_net_t, get_nexthop, host_t*,
1836 private_kernel_netlink_net_t *this, host_t *dest, int prefix, host_t *src)
1837 {
1838 return get_route(this, dest, prefix, TRUE, src, 0);
1839 }
1840
1841 /**
1842 * Manages the creation and deletion of ip addresses on an interface.
1843 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1844 */
1845 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
1846 int flags, int if_index, host_t *ip, int prefix)
1847 {
1848 netlink_buf_t request;
1849 struct nlmsghdr *hdr;
1850 struct ifaddrmsg *msg;
1851 chunk_t chunk;
1852
1853 memset(&request, 0, sizeof(request));
1854
1855 chunk = ip->get_address(ip);
1856
1857 hdr = (struct nlmsghdr*)request;
1858 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1859 hdr->nlmsg_type = nlmsg_type;
1860 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1861
1862 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1863 msg->ifa_family = ip->get_family(ip);
1864 msg->ifa_flags = 0;
1865 msg->ifa_prefixlen = prefix < 0 ? chunk.len * 8 : prefix;
1866 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1867 msg->ifa_index = if_index;
1868
1869 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1870
1871 if (ip->get_family(ip) == AF_INET6 && this->rta_prefsrc_for_ipv6)
1872 { /* if source routes are possible we let the virtual IP get deprecated
1873 * immediately (but mark it as valid forever) so it gets only used if
1874 * forced by our route, and not by the default IPv6 address selection */
1875 struct ifa_cacheinfo cache = {
1876 .ifa_valid = 0xFFFFFFFF,
1877 .ifa_prefered = 0,
1878 };
1879 netlink_add_attribute(hdr, IFA_CACHEINFO, chunk_from_thing(cache),
1880 sizeof(request));
1881 }
1882 return this->socket->send_ack(this->socket, hdr);
1883 }
1884
1885 METHOD(kernel_net_t, add_ip, status_t,
1886 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
1887 char *iface_name)
1888 {
1889 addr_map_entry_t *entry, lookup = {
1890 .ip = virtual_ip,
1891 };
1892 iface_entry_t *iface = NULL;
1893
1894 if (!this->install_virtual_ip)
1895 { /* disabled by config */
1896 return SUCCESS;
1897 }
1898
1899 this->lock->write_lock(this->lock);
1900 /* the virtual IP might actually be installed as regular IP, in which case
1901 * we don't track it as virtual IP */
1902 entry = this->addrs->get_match(this->addrs, &lookup,
1903 (void*)addr_map_entry_match);
1904 if (!entry)
1905 { /* otherwise it might already be installed as virtual IP */
1906 entry = this->vips->get_match(this->vips, &lookup,
1907 (void*)addr_map_entry_match);
1908 if (entry)
1909 { /* the vip we found can be in one of three states: 1) installed and
1910 * ready, 2) just added by another thread, but not yet confirmed to
1911 * be installed by the kernel, 3) just deleted, but not yet gone.
1912 * Then while we wait below, several things could happen (as we
1913 * release the lock). For instance, the interface could disappear,
1914 * or the IP is finally deleted, and it reappears on a different
1915 * interface. All these cases are handled by the call below. */
1916 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
1917 {
1918 this->condvar->wait(this->condvar, this->lock);
1919 }
1920 if (entry)
1921 {
1922 entry->addr->refcount++;
1923 }
1924 }
1925 }
1926 if (entry)
1927 {
1928 DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
1929 entry->iface->ifname);
1930 this->lock->unlock(this->lock);
1931 return SUCCESS;
1932 }
1933 /* try to find the target interface, either by config or via src ip */
1934 if (!this->install_virtual_ip_on ||
1935 this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1936 (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
1937 {
1938 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1939 (void**)&iface, iface_name) != SUCCESS)
1940 { /* if we don't find the requested interface we just use the first */
1941 this->ifaces->get_first(this->ifaces, (void**)&iface);
1942 }
1943 }
1944 if (iface)
1945 {
1946 addr_entry_t *addr;
1947
1948 INIT(addr,
1949 .ip = virtual_ip->clone(virtual_ip),
1950 .refcount = 1,
1951 .scope = RT_SCOPE_UNIVERSE,
1952 );
1953 iface->addrs->insert_last(iface->addrs, addr);
1954 addr_map_entry_add(this->vips, addr, iface);
1955 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1956 iface->ifindex, virtual_ip, prefix) == SUCCESS)
1957 {
1958 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
1959 { /* wait until address appears */
1960 this->condvar->wait(this->condvar, this->lock);
1961 }
1962 if (entry)
1963 { /* we fail if the interface got deleted in the meantime */
1964 DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
1965 entry->iface->ifname);
1966 this->lock->unlock(this->lock);
1967 /* during IKEv1 reauthentication, children get moved from
1968 * old the new SA before the virtual IP is available. This
1969 * kills the route for our virtual IP, reinstall. */
1970 queue_route_reinstall(this, strdup(entry->iface->ifname));
1971 return SUCCESS;
1972 }
1973 }
1974 this->lock->unlock(this->lock);
1975 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1976 return FAILED;
1977 }
1978 this->lock->unlock(this->lock);
1979 DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
1980 virtual_ip);
1981 return FAILED;
1982 }
1983
1984 METHOD(kernel_net_t, del_ip, status_t,
1985 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
1986 bool wait)
1987 {
1988 addr_map_entry_t *entry, lookup = {
1989 .ip = virtual_ip,
1990 };
1991
1992 if (!this->install_virtual_ip)
1993 { /* disabled by config */
1994 return SUCCESS;
1995 }
1996
1997 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1998
1999 this->lock->write_lock(this->lock);
2000 entry = this->vips->get_match(this->vips, &lookup,
2001 (void*)addr_map_entry_match);
2002 if (!entry)
2003 { /* we didn't install this IP as virtual IP */
2004 entry = this->addrs->get_match(this->addrs, &lookup,
2005 (void*)addr_map_entry_match);
2006 if (entry)
2007 {
2008 DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
2009 entry->iface->ifname);
2010 this->lock->unlock(this->lock);
2011 return SUCCESS;
2012 }
2013 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
2014 this->lock->unlock(this->lock);
2015 return FAILED;
2016 }
2017 if (entry->addr->refcount == 1)
2018 {
2019 status_t status;
2020
2021 /* we set this flag so that threads calling add_ip will block and wait
2022 * until the entry is gone, also so we can wait below */
2023 entry->addr->installed = FALSE;
2024 status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
2025 virtual_ip, prefix);
2026 if (status == SUCCESS && wait)
2027 { /* wait until the address is really gone */
2028 while (is_known_vip(this, virtual_ip))
2029 {
2030 this->condvar->wait(this->condvar, this->lock);
2031 }
2032 }
2033 this->lock->unlock(this->lock);
2034 return status;
2035 }
2036 else
2037 {
2038 entry->addr->refcount--;
2039 }
2040 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
2041 virtual_ip);
2042 this->lock->unlock(this->lock);
2043 return SUCCESS;
2044 }
2045
2046 /**
2047 * Manages source routes in the routing table.
2048 * By setting the appropriate nlmsg_type, the route gets added or removed.
2049 */
2050 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
2051 int nlmsg_type, int flags, chunk_t dst_net,
2052 u_int8_t prefixlen, host_t *gateway,
2053 host_t *src_ip, char *if_name)
2054 {
2055 netlink_buf_t request;
2056 struct nlmsghdr *hdr;
2057 struct rtmsg *msg;
2058 int ifindex;
2059 chunk_t chunk;
2060
2061 /* if route is 0.0.0.0/0, we can't install it, as it would
2062 * overwrite the default route. Instead, we add two routes:
2063 * 0.0.0.0/1 and 128.0.0.0/1 */
2064 if (this->routing_table == 0 && prefixlen == 0)
2065 {
2066 chunk_t half_net;
2067 u_int8_t half_prefixlen;
2068 status_t status;
2069
2070 half_net = chunk_alloca(dst_net.len);
2071 memset(half_net.ptr, 0, half_net.len);
2072 half_prefixlen = 1;
2073
2074 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2075 gateway, src_ip, if_name);
2076 half_net.ptr[0] |= 0x80;
2077 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2078 gateway, src_ip, if_name);
2079 return status;
2080 }
2081
2082 memset(&request, 0, sizeof(request));
2083
2084 hdr = (struct nlmsghdr*)request;
2085 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
2086 hdr->nlmsg_type = nlmsg_type;
2087 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2088
2089 msg = (struct rtmsg*)NLMSG_DATA(hdr);
2090 msg->rtm_family = src_ip->get_family(src_ip);
2091 msg->rtm_dst_len = prefixlen;
2092 msg->rtm_table = this->routing_table;
2093 msg->rtm_protocol = RTPROT_STATIC;
2094 msg->rtm_type = RTN_UNICAST;
2095 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2096
2097 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
2098 chunk = src_ip->get_address(src_ip);
2099 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
2100 if (gateway && gateway->get_family(gateway) == src_ip->get_family(src_ip))
2101 {
2102 chunk = gateway->get_address(gateway);
2103 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
2104 }
2105 ifindex = get_interface_index(this, if_name);
2106 chunk.ptr = (char*)&ifindex;
2107 chunk.len = sizeof(ifindex);
2108 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
2109
2110 return this->socket->send_ack(this->socket, hdr);
2111 }
2112
2113 METHOD(kernel_net_t, add_route, status_t,
2114 private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
2115 host_t *gateway, host_t *src_ip, char *if_name)
2116 {
2117 status_t status;
2118 route_entry_t *found, route = {
2119 .dst_net = dst_net,
2120 .prefixlen = prefixlen,
2121 .gateway = gateway,
2122 .src_ip = src_ip,
2123 .if_name = if_name,
2124 };
2125
2126 this->routes_lock->lock(this->routes_lock);
2127 found = this->routes->get(this->routes, &route);
2128 if (found)
2129 {
2130 this->routes_lock->unlock(this->routes_lock);
2131 return ALREADY_DONE;
2132 }
2133 status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2134 dst_net, prefixlen, gateway, src_ip, if_name);
2135 if (status == SUCCESS)
2136 {
2137 found = route_entry_clone(&route);
2138 this->routes->put(this->routes, found, found);
2139 }
2140 this->routes_lock->unlock(this->routes_lock);
2141 return status;
2142 }
2143
2144 METHOD(kernel_net_t, del_route, status_t,
2145 private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
2146 host_t *gateway, host_t *src_ip, char *if_name)
2147 {
2148 status_t status;
2149 route_entry_t *found, route = {
2150 .dst_net = dst_net,
2151 .prefixlen = prefixlen,
2152 .gateway = gateway,
2153 .src_ip = src_ip,
2154 .if_name = if_name,
2155 };
2156
2157 this->routes_lock->lock(this->routes_lock);
2158 found = this->routes->get(this->routes, &route);
2159 if (!found)
2160 {
2161 this->routes_lock->unlock(this->routes_lock);
2162 return NOT_FOUND;
2163 }
2164 this->routes->remove(this->routes, found);
2165 route_entry_destroy(found);
2166 status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
2167 gateway, src_ip, if_name);
2168 this->routes_lock->unlock(this->routes_lock);
2169 return status;
2170 }
2171
2172 /**
2173 * Initialize a list of local addresses.
2174 */
2175 static status_t init_address_list(private_kernel_netlink_net_t *this)
2176 {
2177 netlink_buf_t request;
2178 struct nlmsghdr *out, *current, *in;
2179 struct rtgenmsg *msg;
2180 size_t len;
2181 enumerator_t *ifaces, *addrs;
2182 iface_entry_t *iface;
2183 addr_entry_t *addr;
2184
2185 DBG2(DBG_KNL, "known interfaces and IP addresses:");
2186
2187 memset(&request, 0, sizeof(request));
2188
2189 in = (struct nlmsghdr*)&request;
2190 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
2191 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
2192 msg = (struct rtgenmsg*)NLMSG_DATA(in);
2193 msg->rtgen_family = AF_UNSPEC;
2194
2195 /* get all links */
2196 in->nlmsg_type = RTM_GETLINK;
2197 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2198 {
2199 return FAILED;
2200 }
2201 current = out;
2202 while (NLMSG_OK(current, len))
2203 {
2204 switch (current->nlmsg_type)
2205 {
2206 case NLMSG_DONE:
2207 break;
2208 case RTM_NEWLINK:
2209 process_link(this, current, FALSE);
2210 /* fall through */
2211 default:
2212 current = NLMSG_NEXT(current, len);
2213 continue;
2214 }
2215 break;
2216 }
2217 free(out);
2218
2219 /* get all interface addresses */
2220 in->nlmsg_type = RTM_GETADDR;
2221 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2222 {
2223 return FAILED;
2224 }
2225 current = out;
2226 while (NLMSG_OK(current, len))
2227 {
2228 switch (current->nlmsg_type)
2229 {
2230 case NLMSG_DONE:
2231 break;
2232 case RTM_NEWADDR:
2233 process_addr(this, current, FALSE);
2234 /* fall through */
2235 default:
2236 current = NLMSG_NEXT(current, len);
2237 continue;
2238 }
2239 break;
2240 }
2241 free(out);
2242
2243 this->lock->read_lock(this->lock);
2244 ifaces = this->ifaces->create_enumerator(this->ifaces);
2245 while (ifaces->enumerate(ifaces, &iface))
2246 {
2247 if (iface_entry_up_and_usable(iface))
2248 {
2249 DBG2(DBG_KNL, " %s", iface->ifname);
2250 addrs = iface->addrs->create_enumerator(iface->addrs);
2251 while (addrs->enumerate(addrs, (void**)&addr))
2252 {
2253 DBG2(DBG_KNL, " %H", addr->ip);
2254 }
2255 addrs->destroy(addrs);
2256 }
2257 }
2258 ifaces->destroy(ifaces);
2259 this->lock->unlock(this->lock);
2260 return SUCCESS;
2261 }
2262
2263 /**
2264 * create or delete a rule to use our routing table
2265 */
2266 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
2267 int family, u_int32_t table, u_int32_t prio)
2268 {
2269 netlink_buf_t request;
2270 struct nlmsghdr *hdr;
2271 struct rtmsg *msg;
2272 chunk_t chunk;
2273 char *fwmark;
2274
2275 memset(&request, 0, sizeof(request));
2276 hdr = (struct nlmsghdr*)request;
2277 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2278 hdr->nlmsg_type = nlmsg_type;
2279 if (nlmsg_type == RTM_NEWRULE)
2280 {
2281 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
2282 }
2283 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2284
2285 msg = (struct rtmsg*)NLMSG_DATA(hdr);
2286 msg->rtm_table = table;
2287 msg->rtm_family = family;
2288 msg->rtm_protocol = RTPROT_BOOT;
2289 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2290 msg->rtm_type = RTN_UNICAST;
2291
2292 chunk = chunk_from_thing(prio);
2293 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
2294
2295 fwmark = lib->settings->get_str(lib->settings,
2296 "%s.plugins.kernel-netlink.fwmark", NULL, lib->ns);
2297 if (fwmark)
2298 {
2299 #ifdef HAVE_LINUX_FIB_RULES_H
2300 mark_t mark;
2301
2302 if (fwmark[0] == '!')
2303 {
2304 msg->rtm_flags |= FIB_RULE_INVERT;
2305 fwmark++;
2306 }
2307 if (mark_from_string(fwmark, &mark))
2308 {
2309 chunk = chunk_from_thing(mark.value);
2310 netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
2311 chunk = chunk_from_thing(mark.mask);
2312 netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
2313 }
2314 #else
2315 DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
2316 #endif
2317 }
2318 return this->socket->send_ack(this->socket, hdr);
2319 }
2320
2321 /**
2322 * check for kernel features (currently only via version number)
2323 */
2324 static void check_kernel_features(private_kernel_netlink_net_t *this)
2325 {
2326 struct utsname utsname;
2327 int a, b, c;
2328
2329 if (uname(&utsname) == 0)
2330 {
2331 switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
2332 {
2333 case 3:
2334 if (a == 2)
2335 {
2336 DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
2337 "RTA_PREFSRC for IPv6 routes", a, b, c);
2338 break;
2339 }
2340 /* fall-through */
2341 case 2:
2342 /* only 3.x+ uses two part version numbers */
2343 this->rta_prefsrc_for_ipv6 = TRUE;
2344 break;
2345 default:
2346 break;
2347 }
2348 }
2349 }
2350
2351 /**
2352 * Destroy an address to iface map
2353 */
2354 static void addr_map_destroy(hashtable_t *map)
2355 {
2356 enumerator_t *enumerator;
2357 addr_map_entry_t *addr;
2358
2359 enumerator = map->create_enumerator(map);
2360 while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
2361 {
2362 free(addr);
2363 }
2364 enumerator->destroy(enumerator);
2365 map->destroy(map);
2366 }
2367
2368 METHOD(kernel_net_t, destroy, void,
2369 private_kernel_netlink_net_t *this)
2370 {
2371 enumerator_t *enumerator;
2372 route_entry_t *route;
2373
2374 if (this->routing_table)
2375 {
2376 manage_rule(this, RTM_DELRULE, AF_INET, this->routing_table,
2377 this->routing_table_prio);
2378 manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
2379 this->routing_table_prio);
2380 }
2381 if (this->socket_events > 0)
2382 {
2383 lib->watcher->remove(lib->watcher, this->socket_events);
2384 close(this->socket_events);
2385 }
2386 enumerator = this->routes->create_enumerator(this->routes);
2387 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
2388 {
2389 manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
2390 route->gateway, route->src_ip, route->if_name);
2391 route_entry_destroy(route);
2392 }
2393 enumerator->destroy(enumerator);
2394 this->routes->destroy(this->routes);
2395 this->routes_lock->destroy(this->routes_lock);
2396 DESTROY_IF(this->socket);
2397
2398 net_changes_clear(this);
2399 this->net_changes->destroy(this->net_changes);
2400 this->net_changes_lock->destroy(this->net_changes_lock);
2401
2402 addr_map_destroy(this->addrs);
2403 addr_map_destroy(this->vips);
2404
2405 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2406 this->rt_exclude->destroy(this->rt_exclude);
2407 this->roam_lock->destroy(this->roam_lock);
2408 this->condvar->destroy(this->condvar);
2409 this->lock->destroy(this->lock);
2410 free(this);
2411 }
2412
2413 /*
2414 * Described in header.
2415 */
2416 kernel_netlink_net_t *kernel_netlink_net_create()
2417 {
2418 private_kernel_netlink_net_t *this;
2419 enumerator_t *enumerator;
2420 bool register_for_events = TRUE;
2421 char *exclude;
2422
2423 INIT(this,
2424 .public = {
2425 .interface = {
2426 .get_interface = _get_interface_name,
2427 .create_address_enumerator = _create_address_enumerator,
2428 .get_source_addr = _get_source_addr,
2429 .get_nexthop = _get_nexthop,
2430 .add_ip = _add_ip,
2431 .del_ip = _del_ip,
2432 .add_route = _add_route,
2433 .del_route = _del_route,
2434 .destroy = _destroy,
2435 },
2436 },
2437 .socket = netlink_socket_create(NETLINK_ROUTE),
2438 .rt_exclude = linked_list_create(),
2439 .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
2440 (hashtable_equals_t)route_entry_equals, 16),
2441 .net_changes = hashtable_create(
2442 (hashtable_hash_t)net_change_hash,
2443 (hashtable_equals_t)net_change_equals, 16),
2444 .addrs = hashtable_create(
2445 (hashtable_hash_t)addr_map_entry_hash,
2446 (hashtable_equals_t)addr_map_entry_equals, 16),
2447 .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
2448 (hashtable_equals_t)addr_map_entry_equals, 16),
2449 .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2450 .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2451 .ifaces = linked_list_create(),
2452 .lock = rwlock_create(RWLOCK_TYPE_DEFAULT),
2453 .condvar = rwlock_condvar_create(),
2454 .roam_lock = spinlock_create(),
2455 .routing_table = lib->settings->get_int(lib->settings,
2456 "%s.routing_table", ROUTING_TABLE, lib->ns),
2457 .routing_table_prio = lib->settings->get_int(lib->settings,
2458 "%s.routing_table_prio", ROUTING_TABLE_PRIO, lib->ns),
2459 .process_route = lib->settings->get_bool(lib->settings,
2460 "%s.process_route", TRUE, lib->ns),
2461 .install_virtual_ip = lib->settings->get_bool(lib->settings,
2462 "%s.install_virtual_ip", TRUE, lib->ns),
2463 .install_virtual_ip_on = lib->settings->get_str(lib->settings,
2464 "%s.install_virtual_ip_on", NULL, lib->ns),
2465 .prefer_temporary_addrs = lib->settings->get_bool(lib->settings,
2466 "%s.prefer_temporary_addrs", FALSE, lib->ns),
2467 .roam_events = lib->settings->get_bool(lib->settings,
2468 "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
2469 );
2470 timerclear(&this->last_route_reinstall);
2471 timerclear(&this->next_roam);
2472
2473 check_kernel_features(this);
2474
2475 if (streq(lib->ns, "starter"))
2476 { /* starter has no threads, so we do not register for kernel events */
2477 register_for_events = FALSE;
2478 }
2479
2480 exclude = lib->settings->get_str(lib->settings,
2481 "%s.ignore_routing_tables", NULL, lib->ns);
2482 if (exclude)
2483 {
2484 char *token;
2485 uintptr_t table;
2486
2487 enumerator = enumerator_create_token(exclude, " ", " ");
2488 while (enumerator->enumerate(enumerator, &token))
2489 {
2490 errno = 0;
2491 table = strtoul(token, NULL, 10);
2492
2493 if (errno == 0)
2494 {
2495 this->rt_exclude->insert_last(this->rt_exclude, (void*)table);
2496 }
2497 }
2498 enumerator->destroy(enumerator);
2499 }
2500
2501 if (register_for_events)
2502 {
2503 struct sockaddr_nl addr;
2504
2505 memset(&addr, 0, sizeof(addr));
2506 addr.nl_family = AF_NETLINK;
2507
2508 /* create and bind RT socket for events (address/interface/route changes) */
2509 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2510 if (this->socket_events < 0)
2511 {
2512 DBG1(DBG_KNL, "unable to create RT event socket");
2513 destroy(this);
2514 return NULL;
2515 }
2516 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2517 RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
2518 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
2519 {
2520 DBG1(DBG_KNL, "unable to bind RT event socket");
2521 destroy(this);
2522 return NULL;
2523 }
2524
2525 lib->watcher->add(lib->watcher, this->socket_events, WATCHER_READ,
2526 (watcher_cb_t)receive_events, this);
2527 }
2528
2529 if (init_address_list(this) != SUCCESS)
2530 {
2531 DBG1(DBG_KNL, "unable to get interface list");
2532 destroy(this);
2533 return NULL;
2534 }
2535
2536 if (this->routing_table)
2537 {
2538 if (manage_rule(this, RTM_NEWRULE, AF_INET, this->routing_table,
2539 this->routing_table_prio) != SUCCESS)
2540 {
2541 DBG1(DBG_KNL, "unable to create IPv4 routing table rule");
2542 }
2543 if (manage_rule(this, RTM_NEWRULE, AF_INET6, this->routing_table,
2544 this->routing_table_prio) != SUCCESS)
2545 {
2546 DBG1(DBG_KNL, "unable to create IPv6 routing table rule");
2547 }
2548 }
2549
2550 return &this->public;
2551 }