kernel-netlink: Follow RFC 6724 when selecting IPv6 source addresses
[strongswan.git] / src / libhydra / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008-2014 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 */
16
17 /*
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
20 *
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37 * THE SOFTWARE.
38 */
39
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <unistd.h>
45 #include <errno.h>
46 #include <net/if.h>
47 #ifdef HAVE_LINUX_FIB_RULES_H
48 #include <linux/fib_rules.h>
49 #endif
50
51 #include "kernel_netlink_net.h"
52 #include "kernel_netlink_shared.h"
53
54 #include <hydra.h>
55 #include <utils/debug.h>
56 #include <threading/mutex.h>
57 #include <threading/rwlock.h>
58 #include <threading/rwlock_condvar.h>
59 #include <threading/spinlock.h>
60 #include <collections/hashtable.h>
61 #include <collections/linked_list.h>
62 #include <processing/jobs/callback_job.h>
63
64 /** delay before firing roam events (ms) */
65 #define ROAM_DELAY 100
66
67 /** delay before reinstalling routes (ms) */
68 #define ROUTE_DELAY 100
69
70 /** maximum recursion when searching for addresses in get_route() */
71 #define MAX_ROUTE_RECURSION 2
72
73 #ifndef ROUTING_TABLE
74 #define ROUTING_TABLE 0
75 #endif
76
77 #ifndef ROUTING_TABLE_PRIO
78 #define ROUTING_TABLE_PRIO 0
79 #endif
80
81 typedef struct addr_entry_t addr_entry_t;
82
83 /**
84 * IP address in an iface_entry_t
85 */
86 struct addr_entry_t {
87
88 /** the ip address */
89 host_t *ip;
90
91 /** address flags */
92 u_char flags;
93
94 /** scope of the address */
95 u_char scope;
96
97 /** number of times this IP is used, if virtual (i.e. managed by us) */
98 u_int refcount;
99
100 /** TRUE once it is installed, if virtual */
101 bool installed;
102 };
103
104 /**
105 * destroy a addr_entry_t object
106 */
107 static void addr_entry_destroy(addr_entry_t *this)
108 {
109 this->ip->destroy(this->ip);
110 free(this);
111 }
112
113 typedef struct iface_entry_t iface_entry_t;
114
115 /**
116 * A network interface on this system, containing addr_entry_t's
117 */
118 struct iface_entry_t {
119
120 /** interface index */
121 int ifindex;
122
123 /** name of the interface */
124 char ifname[IFNAMSIZ];
125
126 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
127 u_int flags;
128
129 /** list of addresses as host_t */
130 linked_list_t *addrs;
131
132 /** TRUE if usable by config */
133 bool usable;
134 };
135
136 /**
137 * destroy an interface entry
138 */
139 static void iface_entry_destroy(iface_entry_t *this)
140 {
141 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
142 free(this);
143 }
144
145 /**
146 * find an interface entry by index
147 */
148 static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
149 {
150 return this->ifindex == *ifindex;
151 }
152
153 /**
154 * find an interface entry by name
155 */
156 static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
157 {
158 return streq(this->ifname, ifname);
159 }
160
161 /**
162 * check if an interface is up
163 */
164 static inline bool iface_entry_up(iface_entry_t *iface)
165 {
166 return (iface->flags & IFF_UP) == IFF_UP;
167 }
168
169 /**
170 * check if an interface is up and usable
171 */
172 static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
173 {
174 return iface->usable && iface_entry_up(iface);
175 }
176
177 typedef struct addr_map_entry_t addr_map_entry_t;
178
179 /**
180 * Entry that maps an IP address to an interface entry
181 */
182 struct addr_map_entry_t {
183 /** The IP address */
184 host_t *ip;
185
186 /** The address entry for this IP address */
187 addr_entry_t *addr;
188
189 /** The interface this address is installed on */
190 iface_entry_t *iface;
191 };
192
193 /**
194 * Hash a addr_map_entry_t object, all entries with the same IP address
195 * are stored in the same bucket
196 */
197 static u_int addr_map_entry_hash(addr_map_entry_t *this)
198 {
199 return chunk_hash(this->ip->get_address(this->ip));
200 }
201
202 /**
203 * Compare two addr_map_entry_t objects, two entries are equal if they are
204 * installed on the same interface
205 */
206 static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
207 {
208 return a->iface->ifindex == b->iface->ifindex &&
209 a->ip->ip_equals(a->ip, b->ip);
210 }
211
212 /**
213 * Used with get_match this finds an address entry if it is installed on
214 * an up and usable interface
215 */
216 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
217 addr_map_entry_t *b)
218 {
219 return iface_entry_up_and_usable(b->iface) &&
220 a->ip->ip_equals(a->ip, b->ip);
221 }
222
223 /**
224 * Used with get_match this finds an address entry if it is installed on
225 * any active local interface
226 */
227 static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
228 {
229 return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
230 }
231
232 /**
233 * Used with get_match this finds an address entry if it is installed on
234 * any local interface
235 */
236 static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
237 {
238 return a->ip->ip_equals(a->ip, b->ip);
239 }
240
241 typedef struct route_entry_t route_entry_t;
242
243 /**
244 * Installed routing entry
245 */
246 struct route_entry_t {
247 /** Name of the interface the route is bound to */
248 char *if_name;
249
250 /** Source ip of the route */
251 host_t *src_ip;
252
253 /** Gateway for this route */
254 host_t *gateway;
255
256 /** Destination net */
257 chunk_t dst_net;
258
259 /** Destination net prefixlen */
260 u_int8_t prefixlen;
261 };
262
263 /**
264 * Clone a route_entry_t object.
265 */
266 static route_entry_t *route_entry_clone(route_entry_t *this)
267 {
268 route_entry_t *route;
269
270 INIT(route,
271 .if_name = strdup(this->if_name),
272 .src_ip = this->src_ip->clone(this->src_ip),
273 .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
274 .dst_net = chunk_clone(this->dst_net),
275 .prefixlen = this->prefixlen,
276 );
277 return route;
278 }
279
280 /**
281 * Destroy a route_entry_t object
282 */
283 static void route_entry_destroy(route_entry_t *this)
284 {
285 free(this->if_name);
286 DESTROY_IF(this->src_ip);
287 DESTROY_IF(this->gateway);
288 chunk_free(&this->dst_net);
289 free(this);
290 }
291
292 /**
293 * Hash a route_entry_t object
294 */
295 static u_int route_entry_hash(route_entry_t *this)
296 {
297 return chunk_hash_inc(chunk_from_thing(this->prefixlen),
298 chunk_hash(this->dst_net));
299 }
300
301 /**
302 * Compare two route_entry_t objects
303 */
304 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
305 {
306 if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
307 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
308 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
309 {
310 return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
311 a->gateway->ip_equals(a->gateway, b->gateway));
312 }
313 return FALSE;
314 }
315
316 typedef struct net_change_t net_change_t;
317
318 /**
319 * Queued network changes
320 */
321 struct net_change_t {
322 /** Name of the interface that got activated (or an IP appeared on) */
323 char *if_name;
324 };
325
326 /**
327 * Destroy a net_change_t object
328 */
329 static void net_change_destroy(net_change_t *this)
330 {
331 free(this->if_name);
332 free(this);
333 }
334
335 /**
336 * Hash a net_change_t object
337 */
338 static u_int net_change_hash(net_change_t *this)
339 {
340 return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
341 }
342
343 /**
344 * Compare two net_change_t objects
345 */
346 static bool net_change_equals(net_change_t *a, net_change_t *b)
347 {
348 return streq(a->if_name, b->if_name);
349 }
350
351 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
352
353 /**
354 * Private variables and functions of kernel_netlink_net class.
355 */
356 struct private_kernel_netlink_net_t {
357 /**
358 * Public part of the kernel_netlink_net_t object.
359 */
360 kernel_netlink_net_t public;
361
362 /**
363 * lock to access various lists and maps
364 */
365 rwlock_t *lock;
366
367 /**
368 * condition variable to signal virtual IP add/removal
369 */
370 rwlock_condvar_t *condvar;
371
372 /**
373 * Cached list of interfaces and its addresses (iface_entry_t)
374 */
375 linked_list_t *ifaces;
376
377 /**
378 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
379 */
380 hashtable_t *addrs;
381
382 /**
383 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
384 */
385 hashtable_t *vips;
386
387 /**
388 * netlink rt socket (routing)
389 */
390 netlink_socket_t *socket;
391
392 /**
393 * Netlink rt socket to receive address change events
394 */
395 int socket_events;
396
397 /**
398 * earliest time of the next roam event
399 */
400 timeval_t next_roam;
401
402 /**
403 * roam event due to address change
404 */
405 bool roam_address;
406
407 /**
408 * lock to check and update roam event time
409 */
410 spinlock_t *roam_lock;
411
412 /**
413 * routing table to install routes
414 */
415 int routing_table;
416
417 /**
418 * priority of used routing table
419 */
420 int routing_table_prio;
421
422 /**
423 * installed routes
424 */
425 hashtable_t *routes;
426
427 /**
428 * mutex for routes
429 */
430 mutex_t *routes_lock;
431
432 /**
433 * interface changes which may trigger route reinstallation
434 */
435 hashtable_t *net_changes;
436
437 /**
438 * mutex for route reinstallation triggers
439 */
440 mutex_t *net_changes_lock;
441
442 /**
443 * time of last route reinstallation
444 */
445 timeval_t last_route_reinstall;
446
447 /**
448 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
449 */
450 bool process_route;
451
452 /**
453 * whether to trigger roam events
454 */
455 bool roam_events;
456
457 /**
458 * whether to actually install virtual IPs
459 */
460 bool install_virtual_ip;
461
462 /**
463 * the name of the interface virtual IP addresses are installed on
464 */
465 char *install_virtual_ip_on;
466
467 /**
468 * whether preferred source addresses can be specified for IPv6 routes
469 */
470 bool rta_prefsrc_for_ipv6;
471
472 /**
473 * whether to prefer temporary IPv6 addresses over public ones
474 */
475 bool prefer_temporary_addrs;
476
477 /**
478 * list with routing tables to be excluded from route lookup
479 */
480 linked_list_t *rt_exclude;
481 };
482
483 /**
484 * Forward declaration
485 */
486 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
487 int nlmsg_type, int flags, chunk_t dst_net,
488 u_int8_t prefixlen, host_t *gateway,
489 host_t *src_ip, char *if_name);
490
491 /**
492 * Clear the queued network changes.
493 */
494 static void net_changes_clear(private_kernel_netlink_net_t *this)
495 {
496 enumerator_t *enumerator;
497 net_change_t *change;
498
499 enumerator = this->net_changes->create_enumerator(this->net_changes);
500 while (enumerator->enumerate(enumerator, NULL, (void**)&change))
501 {
502 this->net_changes->remove_at(this->net_changes, enumerator);
503 net_change_destroy(change);
504 }
505 enumerator->destroy(enumerator);
506 }
507
508 /**
509 * Act upon queued network changes.
510 */
511 static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
512 {
513 enumerator_t *enumerator;
514 route_entry_t *route;
515
516 this->net_changes_lock->lock(this->net_changes_lock);
517 this->routes_lock->lock(this->routes_lock);
518
519 enumerator = this->routes->create_enumerator(this->routes);
520 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
521 {
522 net_change_t *change, lookup = {
523 .if_name = route->if_name,
524 };
525 /* check if a change for the outgoing interface is queued */
526 change = this->net_changes->get(this->net_changes, &lookup);
527 if (!change)
528 { /* in case src_ip is not on the outgoing interface */
529 if (this->public.interface.get_interface(&this->public.interface,
530 route->src_ip, &lookup.if_name))
531 {
532 if (!streq(lookup.if_name, route->if_name))
533 {
534 change = this->net_changes->get(this->net_changes, &lookup);
535 }
536 free(lookup.if_name);
537 }
538 }
539 if (change)
540 {
541 manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
542 route->dst_net, route->prefixlen, route->gateway,
543 route->src_ip, route->if_name);
544 }
545 }
546 enumerator->destroy(enumerator);
547 this->routes_lock->unlock(this->routes_lock);
548
549 net_changes_clear(this);
550 this->net_changes_lock->unlock(this->net_changes_lock);
551 return JOB_REQUEUE_NONE;
552 }
553
554 /**
555 * Queue route reinstallation caused by network changes for a given interface.
556 *
557 * The route reinstallation is delayed for a while and only done once for
558 * several calls during this delay, in order to avoid doing it too often.
559 * The interface name is freed.
560 */
561 static void queue_route_reinstall(private_kernel_netlink_net_t *this,
562 char *if_name)
563 {
564 net_change_t *update, *found;
565 timeval_t now;
566 job_t *job;
567
568 INIT(update,
569 .if_name = if_name
570 );
571
572 this->net_changes_lock->lock(this->net_changes_lock);
573 found = this->net_changes->put(this->net_changes, update, update);
574 if (found)
575 {
576 net_change_destroy(found);
577 }
578 time_monotonic(&now);
579 if (timercmp(&now, &this->last_route_reinstall, >))
580 {
581 timeval_add_ms(&now, ROUTE_DELAY);
582 this->last_route_reinstall = now;
583
584 job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
585 this, NULL, NULL);
586 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
587 }
588 this->net_changes_lock->unlock(this->net_changes_lock);
589 }
590
591 /**
592 * check if the given IP is known as virtual IP and currently installed
593 *
594 * this function will also return TRUE if the virtual IP entry disappeared.
595 * in that case the returned entry will be NULL.
596 *
597 * this->lock must be held when calling this function
598 */
599 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
600 host_t *ip, addr_map_entry_t **entry)
601 {
602 addr_map_entry_t lookup = {
603 .ip = ip,
604 };
605
606 *entry = this->vips->get_match(this->vips, &lookup,
607 (void*)addr_map_entry_match);
608 if (*entry == NULL)
609 { /* the virtual IP disappeared */
610 return TRUE;
611 }
612 return (*entry)->addr->installed;
613 }
614
615 /**
616 * check if the given IP is known as virtual IP
617 *
618 * this->lock must be held when calling this function
619 */
620 static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
621 {
622 addr_map_entry_t lookup = {
623 .ip = ip,
624 };
625
626 return this->vips->get_match(this->vips, &lookup,
627 (void*)addr_map_entry_match) != NULL;
628 }
629
630 /**
631 * Add an address map entry
632 */
633 static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
634 iface_entry_t *iface)
635 {
636 addr_map_entry_t *entry;
637
638 INIT(entry,
639 .ip = addr->ip,
640 .addr = addr,
641 .iface = iface,
642 );
643 entry = map->put(map, entry, entry);
644 free(entry);
645 }
646
647 /**
648 * Remove an address map entry
649 */
650 static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
651 iface_entry_t *iface)
652 {
653 addr_map_entry_t *entry, lookup = {
654 .ip = addr->ip,
655 .addr = addr,
656 .iface = iface,
657 };
658
659 entry = map->remove(map, &lookup);
660 free(entry);
661 }
662
663 /**
664 * Determine the type or scope of the given unicast IP address. This is not
665 * the same thing returned in rtm_scope/ifa_scope.
666 *
667 * We use return values as defined in RFC 6724 (referring to RFC 4291).
668 */
669 static u_char get_scope(host_t *ip)
670 {
671 chunk_t addr;
672
673 addr = ip->get_address(ip);
674 switch (addr.len)
675 {
676 case 4:
677 /* we use the mapping defined in RFC 6724, 3.2 */
678 if (addr.ptr[0] == 127)
679 { /* link-local, same as the IPv6 loopback address */
680 return 2;
681 }
682 if (addr.ptr[0] == 169 && addr.ptr[1] == 254)
683 { /* link-local */
684 return 2;
685 }
686 break;
687 case 16:
688 if (IN6_IS_ADDR_LOOPBACK(addr.ptr))
689 { /* link-local, according to RFC 4291, 2.5.3 */
690 return 2;
691 }
692 if (IN6_IS_ADDR_LINKLOCAL(addr.ptr))
693 {
694 return 2;
695 }
696 if (IN6_IS_ADDR_SITELOCAL(addr.ptr))
697 { /* deprecated, according to RFC 4291, 2.5.7 */
698 return 5;
699 }
700 break;
701 default:
702 break;
703 }
704 /* global */
705 return 14;
706 }
707
708 /**
709 * Returns the length of the common prefix in bits up to the length of a's
710 * prefix, defined by RFC 6724 as the portion of the address not including the
711 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
712 */
713 static u_char common_prefix(host_t *a, host_t *b)
714 {
715 chunk_t aa, ba;
716 u_char byte, bits = 0, match;
717
718 aa = a->get_address(a);
719 ba = b->get_address(b);
720 for (byte = 0; byte < 8; byte++)
721 {
722 if (aa.ptr[byte] != ba.ptr[byte])
723 {
724 match = aa.ptr[byte] ^ ba.ptr[byte];
725 for (bits = 8; match; match >>= 1)
726 {
727 bits--;
728 }
729 break;
730 }
731 }
732 return byte * 8 + bits;
733 }
734
735 /**
736 * Compare two IP addresses and return TRUE if the second address is the better
737 * choice of the two to reach the destination.
738 * For IPv6 we approximately follow RFC 6724.
739 */
740 static bool is_address_better(private_kernel_netlink_net_t *this,
741 addr_entry_t *a, addr_entry_t *b, host_t *d)
742 {
743 u_char sa, sb, sd, pa, pb;
744
745 /* rule 2: prefer appropriate scope */
746 if (d)
747 {
748 sa = get_scope(a->ip);
749 sb = get_scope(b->ip);
750 sd = get_scope(d);
751 if (sa < sb)
752 {
753 return sa < sd;
754 }
755 else if (sb < sa)
756 {
757 return sb >= sd;
758 }
759 }
760 if (a->ip->get_family(a->ip) == AF_INET)
761 { /* stop here for IPv4, default to addresses found earlier */
762 return FALSE;
763 }
764 /* rule 3: avoid deprecated addresses (RFC 4862) */
765 if ((a->flags & IFA_F_DEPRECATED) != (b->flags & IFA_F_DEPRECATED))
766 {
767 return a->flags & IFA_F_DEPRECATED;
768 }
769 /* rule 4 is not applicable as we don't know if an address is a home or
770 * care-of addresses.
771 * rule 5 does not apply as we only compare addresses from one interface
772 * rule 6 requires a policy table (optionally configurable) to match
773 * configurable labels
774 */
775 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
776 if ((a->flags & IFA_F_TEMPORARY) != (b->flags & IFA_F_TEMPORARY))
777 {
778 if (this->prefer_temporary_addrs)
779 {
780 return b->flags & IFA_F_TEMPORARY;
781 }
782 return a->flags & IFA_F_TEMPORARY;
783 }
784 /* rule 8: use longest matching prefix */
785 if (d)
786 {
787 pa = common_prefix(a->ip, d);
788 pb = common_prefix(b->ip, d);
789 if (pa != pb)
790 {
791 return pb > pa;
792 }
793 }
794 /* default to addresses found earlier */
795 return FALSE;
796 }
797
798 /**
799 * Get a non-virtual IP address on the given interface.
800 *
801 * If a candidate address is given, we first search for that address and if not
802 * found return the address as above.
803 * Returned host is a clone, has to be freed by caller.
804 *
805 * this->lock must be held when calling this function.
806 */
807 static host_t *get_interface_address(private_kernel_netlink_net_t *this,
808 int ifindex, int family, host_t *dest,
809 host_t *candidate)
810 {
811 iface_entry_t *iface;
812 enumerator_t *addrs;
813 addr_entry_t *addr, *best = NULL;
814
815 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
816 (void**)&iface, &ifindex) == SUCCESS)
817 {
818 if (iface->usable)
819 { /* only use interfaces not excluded by config */
820 addrs = iface->addrs->create_enumerator(iface->addrs);
821 while (addrs->enumerate(addrs, &addr))
822 {
823 if (addr->refcount ||
824 addr->ip->get_family(addr->ip) != family)
825 { /* ignore virtual IP addresses and ensure family matches */
826 continue;
827 }
828 if (candidate && candidate->ip_equals(candidate, addr->ip))
829 { /* stop if we find the candidate */
830 best = addr;
831 break;
832 }
833 else if (!best || is_address_better(this, best, addr, dest))
834 {
835 best = addr;
836 }
837 }
838 addrs->destroy(addrs);
839 }
840 }
841 return best ? best->ip->clone(best->ip) : NULL;
842 }
843
844 /**
845 * callback function that raises the delayed roam event
846 */
847 static job_requeue_t roam_event(private_kernel_netlink_net_t *this)
848 {
849 bool address;
850
851 this->roam_lock->lock(this->roam_lock);
852 address = this->roam_address;
853 this->roam_address = FALSE;
854 this->roam_lock->unlock(this->roam_lock);
855 hydra->kernel_interface->roam(hydra->kernel_interface, address);
856 return JOB_REQUEUE_NONE;
857 }
858
859 /**
860 * fire a roaming event. we delay it for a bit and fire only one event
861 * for multiple calls. otherwise we would create too many events.
862 */
863 static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
864 {
865 timeval_t now;
866 job_t *job;
867
868 if (!this->roam_events)
869 {
870 return;
871 }
872
873 time_monotonic(&now);
874 this->roam_lock->lock(this->roam_lock);
875 this->roam_address |= address;
876 if (!timercmp(&now, &this->next_roam, >))
877 {
878 this->roam_lock->unlock(this->roam_lock);
879 return;
880 }
881 timeval_add_ms(&now, ROAM_DELAY);
882 this->next_roam = now;
883 this->roam_lock->unlock(this->roam_lock);
884
885 job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
886 this, NULL, NULL);
887 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
888 }
889
890 /**
891 * check if an interface with a given index is up and usable
892 *
893 * this->lock must be locked when calling this function
894 */
895 static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
896 int index)
897 {
898 iface_entry_t *iface;
899
900 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
901 (void**)&iface, &index) == SUCCESS)
902 {
903 return iface_entry_up_and_usable(iface);
904 }
905 return FALSE;
906 }
907
908 /**
909 * unregister the current addr_entry_t from the hashtable it is stored in
910 *
911 * this->lock must be locked when calling this function
912 */
913 static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
914 private_kernel_netlink_net_t *this)
915 {
916 if (addr->refcount)
917 {
918 addr_map_entry_remove(this->vips, addr, iface);
919 this->condvar->broadcast(this->condvar);
920 return;
921 }
922 addr_map_entry_remove(this->addrs, addr, iface);
923 }
924
925 /**
926 * process RTM_NEWLINK/RTM_DELLINK from kernel
927 */
928 static void process_link(private_kernel_netlink_net_t *this,
929 struct nlmsghdr *hdr, bool event)
930 {
931 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
932 struct rtattr *rta = IFLA_RTA(msg);
933 size_t rtasize = IFLA_PAYLOAD (hdr);
934 enumerator_t *enumerator;
935 iface_entry_t *current, *entry = NULL;
936 char *name = NULL;
937 bool update = FALSE, update_routes = FALSE;
938
939 while (RTA_OK(rta, rtasize))
940 {
941 switch (rta->rta_type)
942 {
943 case IFLA_IFNAME:
944 name = RTA_DATA(rta);
945 break;
946 }
947 rta = RTA_NEXT(rta, rtasize);
948 }
949 if (!name)
950 {
951 name = "(unknown)";
952 }
953
954 this->lock->write_lock(this->lock);
955 switch (hdr->nlmsg_type)
956 {
957 case RTM_NEWLINK:
958 {
959 if (this->ifaces->find_first(this->ifaces,
960 (void*)iface_entry_by_index, (void**)&entry,
961 &msg->ifi_index) != SUCCESS)
962 {
963 INIT(entry,
964 .ifindex = msg->ifi_index,
965 .addrs = linked_list_create(),
966 .usable = hydra->kernel_interface->is_interface_usable(
967 hydra->kernel_interface, name),
968 );
969 this->ifaces->insert_last(this->ifaces, entry);
970 }
971 strncpy(entry->ifname, name, IFNAMSIZ);
972 entry->ifname[IFNAMSIZ-1] = '\0';
973 if (event && entry->usable)
974 {
975 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
976 {
977 update = update_routes = TRUE;
978 DBG1(DBG_KNL, "interface %s activated", name);
979 }
980 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
981 {
982 update = TRUE;
983 DBG1(DBG_KNL, "interface %s deactivated", name);
984 }
985 }
986 entry->flags = msg->ifi_flags;
987 break;
988 }
989 case RTM_DELLINK:
990 {
991 enumerator = this->ifaces->create_enumerator(this->ifaces);
992 while (enumerator->enumerate(enumerator, &current))
993 {
994 if (current->ifindex == msg->ifi_index)
995 {
996 if (event && current->usable)
997 {
998 update = TRUE;
999 DBG1(DBG_KNL, "interface %s deleted", current->ifname);
1000 }
1001 /* TODO: move virtual IPs installed on this interface to
1002 * another interface? */
1003 this->ifaces->remove_at(this->ifaces, enumerator);
1004 current->addrs->invoke_function(current->addrs,
1005 (void*)addr_entry_unregister, current, this);
1006 iface_entry_destroy(current);
1007 break;
1008 }
1009 }
1010 enumerator->destroy(enumerator);
1011 break;
1012 }
1013 }
1014 this->lock->unlock(this->lock);
1015
1016 if (update_routes && event)
1017 {
1018 queue_route_reinstall(this, strdup(name));
1019 }
1020
1021 if (update && event)
1022 {
1023 fire_roam_event(this, TRUE);
1024 }
1025 }
1026
1027 /**
1028 * process RTM_NEWADDR/RTM_DELADDR from kernel
1029 */
1030 static void process_addr(private_kernel_netlink_net_t *this,
1031 struct nlmsghdr *hdr, bool event)
1032 {
1033 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
1034 struct rtattr *rta = IFA_RTA(msg);
1035 size_t rtasize = IFA_PAYLOAD (hdr);
1036 host_t *host = NULL;
1037 iface_entry_t *iface;
1038 chunk_t local = chunk_empty, address = chunk_empty;
1039 char *route_ifname = NULL;
1040 bool update = FALSE, found = FALSE, changed = FALSE;
1041
1042 while (RTA_OK(rta, rtasize))
1043 {
1044 switch (rta->rta_type)
1045 {
1046 case IFA_LOCAL:
1047 local.ptr = RTA_DATA(rta);
1048 local.len = RTA_PAYLOAD(rta);
1049 break;
1050 case IFA_ADDRESS:
1051 address.ptr = RTA_DATA(rta);
1052 address.len = RTA_PAYLOAD(rta);
1053 break;
1054 }
1055 rta = RTA_NEXT(rta, rtasize);
1056 }
1057
1058 /* For PPP interfaces, we need the IFA_LOCAL address,
1059 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1060 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1061 if (local.ptr)
1062 {
1063 host = host_create_from_chunk(msg->ifa_family, local, 0);
1064 }
1065 else if (address.ptr)
1066 {
1067 host = host_create_from_chunk(msg->ifa_family, address, 0);
1068 }
1069
1070 if (host == NULL)
1071 { /* bad family? */
1072 return;
1073 }
1074
1075 this->lock->write_lock(this->lock);
1076 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
1077 (void**)&iface, &msg->ifa_index) == SUCCESS)
1078 {
1079 addr_map_entry_t *entry, lookup = {
1080 .ip = host,
1081 .iface = iface,
1082 };
1083 addr_entry_t *addr;
1084
1085 entry = this->vips->get(this->vips, &lookup);
1086 if (entry)
1087 {
1088 if (hdr->nlmsg_type == RTM_NEWADDR)
1089 { /* mark as installed and signal waiting threads */
1090 entry->addr->installed = TRUE;
1091 }
1092 else
1093 { /* the address was already marked as uninstalled */
1094 addr = entry->addr;
1095 iface->addrs->remove(iface->addrs, addr, NULL);
1096 addr_map_entry_remove(this->vips, addr, iface);
1097 addr_entry_destroy(addr);
1098 }
1099 /* no roam events etc. for virtual IPs */
1100 this->condvar->broadcast(this->condvar);
1101 this->lock->unlock(this->lock);
1102 host->destroy(host);
1103 return;
1104 }
1105 entry = this->addrs->get(this->addrs, &lookup);
1106 if (entry)
1107 {
1108 if (hdr->nlmsg_type == RTM_DELADDR)
1109 {
1110 found = TRUE;
1111 addr = entry->addr;
1112 iface->addrs->remove(iface->addrs, addr, NULL);
1113 if (iface->usable)
1114 {
1115 changed = TRUE;
1116 DBG1(DBG_KNL, "%H disappeared from %s", host,
1117 iface->ifname);
1118 }
1119 addr_map_entry_remove(this->addrs, addr, iface);
1120 addr_entry_destroy(addr);
1121 }
1122 }
1123 else
1124 {
1125 if (hdr->nlmsg_type == RTM_NEWADDR)
1126 {
1127 found = TRUE;
1128 changed = TRUE;
1129 route_ifname = strdup(iface->ifname);
1130 INIT(addr,
1131 .ip = host->clone(host),
1132 .flags = msg->ifa_flags,
1133 .scope = msg->ifa_scope,
1134 );
1135 iface->addrs->insert_last(iface->addrs, addr);
1136 addr_map_entry_add(this->addrs, addr, iface);
1137 if (event && iface->usable)
1138 {
1139 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
1140 }
1141 }
1142 }
1143 if (found && (iface->flags & IFF_UP))
1144 {
1145 update = TRUE;
1146 }
1147 if (!iface->usable)
1148 { /* ignore events for interfaces excluded by config */
1149 update = changed = FALSE;
1150 }
1151 }
1152 this->lock->unlock(this->lock);
1153
1154 if (update && event && route_ifname)
1155 {
1156 queue_route_reinstall(this, route_ifname);
1157 }
1158 else
1159 {
1160 free(route_ifname);
1161 }
1162 host->destroy(host);
1163
1164 /* send an update to all IKE_SAs */
1165 if (update && event && changed)
1166 {
1167 fire_roam_event(this, TRUE);
1168 }
1169 }
1170
1171 /**
1172 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1173 */
1174 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
1175 {
1176 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
1177 struct rtattr *rta = RTM_RTA(msg);
1178 size_t rtasize = RTM_PAYLOAD(hdr);
1179 u_int32_t rta_oif = 0;
1180 host_t *host = NULL;
1181
1182 /* ignore routes added by us or in the local routing table (local addrs) */
1183 if (msg->rtm_table && (msg->rtm_table == this->routing_table ||
1184 msg->rtm_table == RT_TABLE_LOCAL))
1185 {
1186 return;
1187 }
1188 else if (msg->rtm_flags & RTM_F_CLONED)
1189 { /* ignore cached routes, seem to be created a lot for IPv6 */
1190 return;
1191 }
1192
1193 while (RTA_OK(rta, rtasize))
1194 {
1195 switch (rta->rta_type)
1196 {
1197 case RTA_PREFSRC:
1198 DESTROY_IF(host);
1199 host = host_create_from_chunk(msg->rtm_family,
1200 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
1201 break;
1202 case RTA_OIF:
1203 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1204 {
1205 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1206 }
1207 break;
1208 }
1209 rta = RTA_NEXT(rta, rtasize);
1210 }
1211 this->lock->read_lock(this->lock);
1212 if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
1213 { /* ignore route changes for interfaces that are ignored or down */
1214 this->lock->unlock(this->lock);
1215 DESTROY_IF(host);
1216 return;
1217 }
1218 if (!host && rta_oif)
1219 {
1220 host = get_interface_address(this, rta_oif, msg->rtm_family,
1221 NULL, NULL);
1222 }
1223 if (!host || is_known_vip(this, host))
1224 { /* ignore routes added for virtual IPs */
1225 this->lock->unlock(this->lock);
1226 DESTROY_IF(host);
1227 return;
1228 }
1229 this->lock->unlock(this->lock);
1230 fire_roam_event(this, FALSE);
1231 host->destroy(host);
1232 }
1233
1234 /**
1235 * Receives events from kernel
1236 */
1237 static bool receive_events(private_kernel_netlink_net_t *this, int fd,
1238 watcher_event_t event)
1239 {
1240 char response[1536];
1241 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1242 struct sockaddr_nl addr;
1243 socklen_t addr_len = sizeof(addr);
1244 int len;
1245
1246 len = recvfrom(this->socket_events, response, sizeof(response),
1247 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
1248 if (len < 0)
1249 {
1250 switch (errno)
1251 {
1252 case EINTR:
1253 /* interrupted, try again */
1254 return TRUE;
1255 case EAGAIN:
1256 /* no data ready, select again */
1257 return TRUE;
1258 default:
1259 DBG1(DBG_KNL, "unable to receive from rt event socket");
1260 sleep(1);
1261 return TRUE;
1262 }
1263 }
1264
1265 if (addr.nl_pid != 0)
1266 { /* not from kernel. not interested, try another one */
1267 return TRUE;
1268 }
1269
1270 while (NLMSG_OK(hdr, len))
1271 {
1272 /* looks good so far, dispatch netlink message */
1273 switch (hdr->nlmsg_type)
1274 {
1275 case RTM_NEWADDR:
1276 case RTM_DELADDR:
1277 process_addr(this, hdr, TRUE);
1278 break;
1279 case RTM_NEWLINK:
1280 case RTM_DELLINK:
1281 process_link(this, hdr, TRUE);
1282 break;
1283 case RTM_NEWROUTE:
1284 case RTM_DELROUTE:
1285 if (this->process_route)
1286 {
1287 process_route(this, hdr);
1288 }
1289 break;
1290 default:
1291 break;
1292 }
1293 hdr = NLMSG_NEXT(hdr, len);
1294 }
1295 return TRUE;
1296 }
1297
1298 /** enumerator over addresses */
1299 typedef struct {
1300 private_kernel_netlink_net_t* this;
1301 /** which addresses to enumerate */
1302 kernel_address_type_t which;
1303 } address_enumerator_t;
1304
1305 /**
1306 * cleanup function for address enumerator
1307 */
1308 static void address_enumerator_destroy(address_enumerator_t *data)
1309 {
1310 data->this->lock->unlock(data->this->lock);
1311 free(data);
1312 }
1313
1314 /**
1315 * filter for addresses
1316 */
1317 static bool filter_addresses(address_enumerator_t *data,
1318 addr_entry_t** in, host_t** out)
1319 {
1320 if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
1321 { /* skip virtual interfaces added by us */
1322 return FALSE;
1323 }
1324 if (!(data->which & ADDR_TYPE_REGULAR) && !(*in)->refcount)
1325 { /* address is regular, but not requested */
1326 return FALSE;
1327 }
1328 if ((*in)->scope >= RT_SCOPE_LINK)
1329 { /* skip addresses with a unusable scope */
1330 return FALSE;
1331 }
1332 *out = (*in)->ip;
1333 return TRUE;
1334 }
1335
1336 /**
1337 * enumerator constructor for interfaces
1338 */
1339 static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
1340 address_enumerator_t *data)
1341 {
1342 return enumerator_create_filter(
1343 iface->addrs->create_enumerator(iface->addrs),
1344 (void*)filter_addresses, data, NULL);
1345 }
1346
1347 /**
1348 * filter for interfaces
1349 */
1350 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
1351 iface_entry_t** out)
1352 {
1353 if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
1354 { /* skip interfaces excluded by config */
1355 return FALSE;
1356 }
1357 if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
1358 { /* ignore loopback devices */
1359 return FALSE;
1360 }
1361 if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
1362 { /* skip interfaces not up */
1363 return FALSE;
1364 }
1365 *out = *in;
1366 return TRUE;
1367 }
1368
1369 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
1370 private_kernel_netlink_net_t *this, kernel_address_type_t which)
1371 {
1372 address_enumerator_t *data;
1373
1374 INIT(data,
1375 .this = this,
1376 .which = which,
1377 );
1378
1379 this->lock->read_lock(this->lock);
1380 return enumerator_create_nested(
1381 enumerator_create_filter(
1382 this->ifaces->create_enumerator(this->ifaces),
1383 (void*)filter_interfaces, data, NULL),
1384 (void*)create_iface_enumerator, data,
1385 (void*)address_enumerator_destroy);
1386 }
1387
1388 METHOD(kernel_net_t, get_interface_name, bool,
1389 private_kernel_netlink_net_t *this, host_t* ip, char **name)
1390 {
1391 addr_map_entry_t *entry, lookup = {
1392 .ip = ip,
1393 };
1394
1395 if (ip->is_anyaddr(ip))
1396 {
1397 return FALSE;
1398 }
1399 this->lock->read_lock(this->lock);
1400 /* first try to find it on an up and usable interface */
1401 entry = this->addrs->get_match(this->addrs, &lookup,
1402 (void*)addr_map_entry_match_up_and_usable);
1403 if (entry)
1404 {
1405 if (name)
1406 {
1407 *name = strdup(entry->iface->ifname);
1408 DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
1409 }
1410 this->lock->unlock(this->lock);
1411 return TRUE;
1412 }
1413 /* in a second step, consider virtual IPs installed by us */
1414 entry = this->vips->get_match(this->vips, &lookup,
1415 (void*)addr_map_entry_match_up_and_usable);
1416 if (entry)
1417 {
1418 if (name)
1419 {
1420 *name = strdup(entry->iface->ifname);
1421 DBG2(DBG_KNL, "virtual IP %H is on interface %s", ip, *name);
1422 }
1423 this->lock->unlock(this->lock);
1424 return TRUE;
1425 }
1426 /* maybe it is installed on an ignored interface */
1427 entry = this->addrs->get_match(this->addrs, &lookup,
1428 (void*)addr_map_entry_match_up);
1429 if (!entry)
1430 {
1431 DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
1432 }
1433 this->lock->unlock(this->lock);
1434 return FALSE;
1435 }
1436
1437 /**
1438 * get the index of an interface by name
1439 */
1440 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
1441 {
1442 iface_entry_t *iface;
1443 int ifindex = 0;
1444
1445 DBG2(DBG_KNL, "getting iface index for %s", name);
1446
1447 this->lock->read_lock(this->lock);
1448 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1449 (void**)&iface, name) == SUCCESS)
1450 {
1451 ifindex = iface->ifindex;
1452 }
1453 this->lock->unlock(this->lock);
1454
1455 if (ifindex == 0)
1456 {
1457 DBG1(DBG_KNL, "unable to get interface index for %s", name);
1458 }
1459 return ifindex;
1460 }
1461
1462 /**
1463 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1464 */
1465 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1466 {
1467 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1468 int byte = 0;
1469
1470 if (net_len == 0)
1471 { /* any address matches a /0 network */
1472 return TRUE;
1473 }
1474 if (addr.len != net.len || net_len > 8 * net.len )
1475 {
1476 return FALSE;
1477 }
1478 /* scan through all bytes in network order */
1479 while (net_len > 0)
1480 {
1481 if (net_len < 8)
1482 {
1483 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
1484 }
1485 else
1486 {
1487 if (addr.ptr[byte] != net.ptr[byte])
1488 {
1489 return FALSE;
1490 }
1491 byte++;
1492 net_len -= 8;
1493 }
1494 }
1495 return TRUE;
1496 }
1497
1498 /**
1499 * Store information about a route retrieved via RTNETLINK
1500 */
1501 typedef struct {
1502 chunk_t gtw;
1503 chunk_t src;
1504 chunk_t dst;
1505 host_t *src_host;
1506 u_int8_t dst_len;
1507 u_int32_t table;
1508 u_int32_t oif;
1509 } rt_entry_t;
1510
1511 /**
1512 * Free a route entry
1513 */
1514 static void rt_entry_destroy(rt_entry_t *this)
1515 {
1516 DESTROY_IF(this->src_host);
1517 free(this);
1518 }
1519
1520 /**
1521 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1522 * reused if not NULL.
1523 *
1524 * Returned chunks point to internal data of the Netlink message.
1525 */
1526 static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
1527 {
1528 struct rtattr *rta;
1529 struct rtmsg *msg;
1530 size_t rtasize;
1531
1532 msg = (struct rtmsg*)(NLMSG_DATA(hdr));
1533 rta = RTM_RTA(msg);
1534 rtasize = RTM_PAYLOAD(hdr);
1535
1536 if (route)
1537 {
1538 route->gtw = chunk_empty;
1539 route->src = chunk_empty;
1540 route->dst = chunk_empty;
1541 route->dst_len = msg->rtm_dst_len;
1542 route->table = msg->rtm_table;
1543 route->oif = 0;
1544 }
1545 else
1546 {
1547 INIT(route,
1548 .dst_len = msg->rtm_dst_len,
1549 .table = msg->rtm_table,
1550 );
1551 }
1552
1553 while (RTA_OK(rta, rtasize))
1554 {
1555 switch (rta->rta_type)
1556 {
1557 case RTA_PREFSRC:
1558 route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1559 break;
1560 case RTA_GATEWAY:
1561 route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1562 break;
1563 case RTA_DST:
1564 route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1565 break;
1566 case RTA_OIF:
1567 if (RTA_PAYLOAD(rta) == sizeof(route->oif))
1568 {
1569 route->oif = *(u_int32_t*)RTA_DATA(rta);
1570 }
1571 break;
1572 #ifdef HAVE_RTA_TABLE
1573 case RTA_TABLE:
1574 if (RTA_PAYLOAD(rta) == sizeof(route->table))
1575 {
1576 route->table = *(u_int32_t*)RTA_DATA(rta);
1577 }
1578 break;
1579 #endif /* HAVE_RTA_TABLE*/
1580 }
1581 rta = RTA_NEXT(rta, rtasize);
1582 }
1583 return route;
1584 }
1585
1586 /**
1587 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1588 */
1589 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
1590 bool nexthop, host_t *candidate, u_int recursion)
1591 {
1592 netlink_buf_t request;
1593 struct nlmsghdr *hdr, *out, *current;
1594 struct rtmsg *msg;
1595 chunk_t chunk;
1596 size_t len;
1597 linked_list_t *routes;
1598 rt_entry_t *route = NULL, *best = NULL;
1599 enumerator_t *enumerator;
1600 host_t *addr = NULL;
1601
1602 if (recursion > MAX_ROUTE_RECURSION)
1603 {
1604 return NULL;
1605 }
1606
1607 memset(&request, 0, sizeof(request));
1608
1609 hdr = (struct nlmsghdr*)request;
1610 hdr->nlmsg_flags = NLM_F_REQUEST;
1611 if (dest->get_family(dest) == AF_INET || this->rta_prefsrc_for_ipv6 ||
1612 this->routing_table)
1613 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1614 * as we want to ignore routes with virtual IPs we cannot use DUMP
1615 * if these routes are not installed in a separate table */
1616 hdr->nlmsg_flags |= NLM_F_DUMP;
1617 }
1618 hdr->nlmsg_type = RTM_GETROUTE;
1619 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1620
1621 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1622 msg->rtm_family = dest->get_family(dest);
1623 if (candidate)
1624 {
1625 chunk = candidate->get_address(candidate);
1626 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1627 }
1628 chunk = dest->get_address(dest);
1629 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1630
1631 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
1632 {
1633 DBG2(DBG_KNL, "getting %s to reach %H failed",
1634 nexthop ? "nexthop" : "address", dest);
1635 return NULL;
1636 }
1637 routes = linked_list_create();
1638 this->lock->read_lock(this->lock);
1639
1640 for (current = out; NLMSG_OK(current, len);
1641 current = NLMSG_NEXT(current, len))
1642 {
1643 switch (current->nlmsg_type)
1644 {
1645 case NLMSG_DONE:
1646 break;
1647 case RTM_NEWROUTE:
1648 {
1649 rt_entry_t *other;
1650 uintptr_t table;
1651
1652 route = parse_route(current, route);
1653
1654 table = (uintptr_t)route->table;
1655 if (this->rt_exclude->find_first(this->rt_exclude, NULL,
1656 (void**)&table) == SUCCESS)
1657 { /* route is from an excluded routing table */
1658 continue;
1659 }
1660 if (this->routing_table != 0 &&
1661 route->table == this->routing_table)
1662 { /* route is from our own ipsec routing table */
1663 continue;
1664 }
1665 if (route->oif && !is_interface_up_and_usable(this, route->oif))
1666 { /* interface is down */
1667 continue;
1668 }
1669 if (!addr_in_subnet(chunk, route->dst, route->dst_len))
1670 { /* route destination does not contain dest */
1671 continue;
1672 }
1673 if (route->src.ptr)
1674 { /* verify source address, if any */
1675 host_t *src = host_create_from_chunk(msg->rtm_family,
1676 route->src, 0);
1677 if (src && is_known_vip(this, src))
1678 { /* ignore routes installed by us */
1679 src->destroy(src);
1680 continue;
1681 }
1682 route->src_host = src;
1683 }
1684 /* insert route, sorted by decreasing network prefix */
1685 enumerator = routes->create_enumerator(routes);
1686 while (enumerator->enumerate(enumerator, &other))
1687 {
1688 if (route->dst_len > other->dst_len)
1689 {
1690 break;
1691 }
1692 }
1693 routes->insert_before(routes, enumerator, route);
1694 enumerator->destroy(enumerator);
1695 route = NULL;
1696 continue;
1697 }
1698 default:
1699 continue;
1700 }
1701 break;
1702 }
1703 if (route)
1704 {
1705 rt_entry_destroy(route);
1706 }
1707
1708 /* now we have a list of routes matching dest, sorted by net prefix.
1709 * we will look for source addresses for these routes and select the one
1710 * with the preferred source address, if possible */
1711 enumerator = routes->create_enumerator(routes);
1712 while (enumerator->enumerate(enumerator, &route))
1713 {
1714 if (route->src_host)
1715 { /* got a source address with the route, if no preferred source
1716 * is given or it matches we are done, as this is the best route */
1717 if (!candidate || candidate->ip_equals(candidate, route->src_host))
1718 {
1719 best = route;
1720 break;
1721 }
1722 else if (route->oif)
1723 { /* no match yet, maybe it is assigned to the same interface */
1724 host_t *src = get_interface_address(this, route->oif,
1725 msg->rtm_family, dest, candidate);
1726 if (src && src->ip_equals(src, candidate))
1727 {
1728 route->src_host->destroy(route->src_host);
1729 route->src_host = src;
1730 best = route;
1731 break;
1732 }
1733 DESTROY_IF(src);
1734 }
1735 /* no luck yet with the source address. if this is the best (first)
1736 * route we store it as fallback in case we don't find a route with
1737 * the preferred source */
1738 best = best ?: route;
1739 continue;
1740 }
1741 if (route->oif)
1742 { /* no src, but an interface - get address from it */
1743 route->src_host = get_interface_address(this, route->oif,
1744 msg->rtm_family, dest, candidate);
1745 if (route->src_host)
1746 { /* we handle this address the same as the one above */
1747 if (!candidate ||
1748 candidate->ip_equals(candidate, route->src_host))
1749 {
1750 best = route;
1751 break;
1752 }
1753 best = best ?: route;
1754 continue;
1755 }
1756 }
1757 if (route->gtw.ptr)
1758 { /* no src, no iface, but a gateway - lookup src to reach gtw */
1759 host_t *gtw;
1760
1761 gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
1762 if (gtw && !gtw->ip_equals(gtw, dest))
1763 {
1764 route->src_host = get_route(this, gtw, FALSE, candidate,
1765 recursion + 1);
1766 }
1767 DESTROY_IF(gtw);
1768 if (route->src_host)
1769 { /* more of the same */
1770 if (!candidate ||
1771 candidate->ip_equals(candidate, route->src_host))
1772 {
1773 best = route;
1774 break;
1775 }
1776 best = best ?: route;
1777 }
1778 }
1779 }
1780 enumerator->destroy(enumerator);
1781
1782 if (nexthop)
1783 { /* nexthop lookup, return gateway if any */
1784 if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
1785 {
1786 addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
1787 }
1788 addr = addr ?: dest->clone(dest);
1789 }
1790 else
1791 {
1792 if (best)
1793 {
1794 addr = best->src_host->clone(best->src_host);
1795 }
1796 }
1797 this->lock->unlock(this->lock);
1798 routes->destroy_function(routes, (void*)rt_entry_destroy);
1799 free(out);
1800
1801 if (addr)
1802 {
1803 DBG2(DBG_KNL, "using %H as %s to reach %H", addr,
1804 nexthop ? "nexthop" : "address", dest);
1805 }
1806 else if (!recursion)
1807 {
1808 DBG2(DBG_KNL, "no %s found to reach %H",
1809 nexthop ? "nexthop" : "address", dest);
1810 }
1811 return addr;
1812 }
1813
1814 METHOD(kernel_net_t, get_source_addr, host_t*,
1815 private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
1816 {
1817 return get_route(this, dest, FALSE, src, 0);
1818 }
1819
1820 METHOD(kernel_net_t, get_nexthop, host_t*,
1821 private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
1822 {
1823 return get_route(this, dest, TRUE, src, 0);
1824 }
1825
1826 /**
1827 * Manages the creation and deletion of ip addresses on an interface.
1828 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1829 */
1830 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
1831 int flags, int if_index, host_t *ip, int prefix)
1832 {
1833 netlink_buf_t request;
1834 struct nlmsghdr *hdr;
1835 struct ifaddrmsg *msg;
1836 chunk_t chunk;
1837
1838 memset(&request, 0, sizeof(request));
1839
1840 chunk = ip->get_address(ip);
1841
1842 hdr = (struct nlmsghdr*)request;
1843 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1844 hdr->nlmsg_type = nlmsg_type;
1845 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1846
1847 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1848 msg->ifa_family = ip->get_family(ip);
1849 msg->ifa_flags = 0;
1850 msg->ifa_prefixlen = prefix < 0 ? chunk.len * 8 : prefix;
1851 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1852 msg->ifa_index = if_index;
1853
1854 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1855
1856 return this->socket->send_ack(this->socket, hdr);
1857 }
1858
1859 METHOD(kernel_net_t, add_ip, status_t,
1860 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
1861 char *iface_name)
1862 {
1863 addr_map_entry_t *entry, lookup = {
1864 .ip = virtual_ip,
1865 };
1866 iface_entry_t *iface = NULL;
1867
1868 if (!this->install_virtual_ip)
1869 { /* disabled by config */
1870 return SUCCESS;
1871 }
1872
1873 this->lock->write_lock(this->lock);
1874 /* the virtual IP might actually be installed as regular IP, in which case
1875 * we don't track it as virtual IP */
1876 entry = this->addrs->get_match(this->addrs, &lookup,
1877 (void*)addr_map_entry_match);
1878 if (!entry)
1879 { /* otherwise it might already be installed as virtual IP */
1880 entry = this->vips->get_match(this->vips, &lookup,
1881 (void*)addr_map_entry_match);
1882 if (entry)
1883 { /* the vip we found can be in one of three states: 1) installed and
1884 * ready, 2) just added by another thread, but not yet confirmed to
1885 * be installed by the kernel, 3) just deleted, but not yet gone.
1886 * Then while we wait below, several things could happen (as we
1887 * release the lock). For instance, the interface could disappear,
1888 * or the IP is finally deleted, and it reappears on a different
1889 * interface. All these cases are handled by the call below. */
1890 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
1891 {
1892 this->condvar->wait(this->condvar, this->lock);
1893 }
1894 if (entry)
1895 {
1896 entry->addr->refcount++;
1897 }
1898 }
1899 }
1900 if (entry)
1901 {
1902 DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
1903 entry->iface->ifname);
1904 this->lock->unlock(this->lock);
1905 return SUCCESS;
1906 }
1907 /* try to find the target interface, either by config or via src ip */
1908 if (!this->install_virtual_ip_on ||
1909 this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1910 (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
1911 {
1912 if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
1913 (void**)&iface, iface_name) != SUCCESS)
1914 { /* if we don't find the requested interface we just use the first */
1915 this->ifaces->get_first(this->ifaces, (void**)&iface);
1916 }
1917 }
1918 if (iface)
1919 {
1920 addr_entry_t *addr;
1921
1922 INIT(addr,
1923 .ip = virtual_ip->clone(virtual_ip),
1924 .refcount = 1,
1925 .scope = RT_SCOPE_UNIVERSE,
1926 );
1927 iface->addrs->insert_last(iface->addrs, addr);
1928 addr_map_entry_add(this->vips, addr, iface);
1929 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1930 iface->ifindex, virtual_ip, prefix) == SUCCESS)
1931 {
1932 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
1933 { /* wait until address appears */
1934 this->condvar->wait(this->condvar, this->lock);
1935 }
1936 if (entry)
1937 { /* we fail if the interface got deleted in the meantime */
1938 DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip,
1939 entry->iface->ifname);
1940 this->lock->unlock(this->lock);
1941 /* during IKEv1 reauthentication, children get moved from
1942 * old the new SA before the virtual IP is available. This
1943 * kills the route for our virtual IP, reinstall. */
1944 queue_route_reinstall(this, strdup(entry->iface->ifname));
1945 return SUCCESS;
1946 }
1947 }
1948 this->lock->unlock(this->lock);
1949 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1950 return FAILED;
1951 }
1952 this->lock->unlock(this->lock);
1953 DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
1954 virtual_ip);
1955 return FAILED;
1956 }
1957
1958 METHOD(kernel_net_t, del_ip, status_t,
1959 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
1960 bool wait)
1961 {
1962 addr_map_entry_t *entry, lookup = {
1963 .ip = virtual_ip,
1964 };
1965
1966 if (!this->install_virtual_ip)
1967 { /* disabled by config */
1968 return SUCCESS;
1969 }
1970
1971 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1972
1973 this->lock->write_lock(this->lock);
1974 entry = this->vips->get_match(this->vips, &lookup,
1975 (void*)addr_map_entry_match);
1976 if (!entry)
1977 { /* we didn't install this IP as virtual IP */
1978 entry = this->addrs->get_match(this->addrs, &lookup,
1979 (void*)addr_map_entry_match);
1980 if (entry)
1981 {
1982 DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
1983 entry->iface->ifname);
1984 this->lock->unlock(this->lock);
1985 return SUCCESS;
1986 }
1987 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1988 this->lock->unlock(this->lock);
1989 return FAILED;
1990 }
1991 if (entry->addr->refcount == 1)
1992 {
1993 status_t status;
1994
1995 /* we set this flag so that threads calling add_ip will block and wait
1996 * until the entry is gone, also so we can wait below */
1997 entry->addr->installed = FALSE;
1998 status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex,
1999 virtual_ip, prefix);
2000 if (status == SUCCESS && wait)
2001 { /* wait until the address is really gone */
2002 while (is_known_vip(this, virtual_ip))
2003 {
2004 this->condvar->wait(this->condvar, this->lock);
2005 }
2006 }
2007 this->lock->unlock(this->lock);
2008 return status;
2009 }
2010 else
2011 {
2012 entry->addr->refcount--;
2013 }
2014 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
2015 virtual_ip);
2016 this->lock->unlock(this->lock);
2017 return SUCCESS;
2018 }
2019
2020 /**
2021 * Manages source routes in the routing table.
2022 * By setting the appropriate nlmsg_type, the route gets added or removed.
2023 */
2024 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
2025 int nlmsg_type, int flags, chunk_t dst_net,
2026 u_int8_t prefixlen, host_t *gateway,
2027 host_t *src_ip, char *if_name)
2028 {
2029 netlink_buf_t request;
2030 struct nlmsghdr *hdr;
2031 struct rtmsg *msg;
2032 int ifindex;
2033 chunk_t chunk;
2034
2035 /* if route is 0.0.0.0/0, we can't install it, as it would
2036 * overwrite the default route. Instead, we add two routes:
2037 * 0.0.0.0/1 and 128.0.0.0/1 */
2038 if (this->routing_table == 0 && prefixlen == 0)
2039 {
2040 chunk_t half_net;
2041 u_int8_t half_prefixlen;
2042 status_t status;
2043
2044 half_net = chunk_alloca(dst_net.len);
2045 memset(half_net.ptr, 0, half_net.len);
2046 half_prefixlen = 1;
2047
2048 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2049 gateway, src_ip, if_name);
2050 half_net.ptr[0] |= 0x80;
2051 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2052 gateway, src_ip, if_name);
2053 return status;
2054 }
2055
2056 memset(&request, 0, sizeof(request));
2057
2058 hdr = (struct nlmsghdr*)request;
2059 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
2060 hdr->nlmsg_type = nlmsg_type;
2061 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2062
2063 msg = (struct rtmsg*)NLMSG_DATA(hdr);
2064 msg->rtm_family = src_ip->get_family(src_ip);
2065 msg->rtm_dst_len = prefixlen;
2066 msg->rtm_table = this->routing_table;
2067 msg->rtm_protocol = RTPROT_STATIC;
2068 msg->rtm_type = RTN_UNICAST;
2069 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2070
2071 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
2072 chunk = src_ip->get_address(src_ip);
2073 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
2074 if (gateway && gateway->get_family(gateway) == src_ip->get_family(src_ip))
2075 {
2076 chunk = gateway->get_address(gateway);
2077 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
2078 }
2079 ifindex = get_interface_index(this, if_name);
2080 chunk.ptr = (char*)&ifindex;
2081 chunk.len = sizeof(ifindex);
2082 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
2083
2084 return this->socket->send_ack(this->socket, hdr);
2085 }
2086
2087 METHOD(kernel_net_t, add_route, status_t,
2088 private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
2089 host_t *gateway, host_t *src_ip, char *if_name)
2090 {
2091 status_t status;
2092 route_entry_t *found, route = {
2093 .dst_net = dst_net,
2094 .prefixlen = prefixlen,
2095 .gateway = gateway,
2096 .src_ip = src_ip,
2097 .if_name = if_name,
2098 };
2099
2100 this->routes_lock->lock(this->routes_lock);
2101 found = this->routes->get(this->routes, &route);
2102 if (found)
2103 {
2104 this->routes_lock->unlock(this->routes_lock);
2105 return ALREADY_DONE;
2106 }
2107 status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2108 dst_net, prefixlen, gateway, src_ip, if_name);
2109 if (status == SUCCESS)
2110 {
2111 found = route_entry_clone(&route);
2112 this->routes->put(this->routes, found, found);
2113 }
2114 this->routes_lock->unlock(this->routes_lock);
2115 return status;
2116 }
2117
2118 METHOD(kernel_net_t, del_route, status_t,
2119 private_kernel_netlink_net_t *this, chunk_t dst_net, u_int8_t prefixlen,
2120 host_t *gateway, host_t *src_ip, char *if_name)
2121 {
2122 status_t status;
2123 route_entry_t *found, route = {
2124 .dst_net = dst_net,
2125 .prefixlen = prefixlen,
2126 .gateway = gateway,
2127 .src_ip = src_ip,
2128 .if_name = if_name,
2129 };
2130
2131 this->routes_lock->lock(this->routes_lock);
2132 found = this->routes->get(this->routes, &route);
2133 if (!found)
2134 {
2135 this->routes_lock->unlock(this->routes_lock);
2136 return NOT_FOUND;
2137 }
2138 this->routes->remove(this->routes, found);
2139 route_entry_destroy(found);
2140 status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
2141 gateway, src_ip, if_name);
2142 this->routes_lock->unlock(this->routes_lock);
2143 return status;
2144 }
2145
2146 /**
2147 * Initialize a list of local addresses.
2148 */
2149 static status_t init_address_list(private_kernel_netlink_net_t *this)
2150 {
2151 netlink_buf_t request;
2152 struct nlmsghdr *out, *current, *in;
2153 struct rtgenmsg *msg;
2154 size_t len;
2155 enumerator_t *ifaces, *addrs;
2156 iface_entry_t *iface;
2157 addr_entry_t *addr;
2158
2159 DBG2(DBG_KNL, "known interfaces and IP addresses:");
2160
2161 memset(&request, 0, sizeof(request));
2162
2163 in = (struct nlmsghdr*)&request;
2164 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
2165 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
2166 msg = (struct rtgenmsg*)NLMSG_DATA(in);
2167 msg->rtgen_family = AF_UNSPEC;
2168
2169 /* get all links */
2170 in->nlmsg_type = RTM_GETLINK;
2171 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2172 {
2173 return FAILED;
2174 }
2175 current = out;
2176 while (NLMSG_OK(current, len))
2177 {
2178 switch (current->nlmsg_type)
2179 {
2180 case NLMSG_DONE:
2181 break;
2182 case RTM_NEWLINK:
2183 process_link(this, current, FALSE);
2184 /* fall through */
2185 default:
2186 current = NLMSG_NEXT(current, len);
2187 continue;
2188 }
2189 break;
2190 }
2191 free(out);
2192
2193 /* get all interface addresses */
2194 in->nlmsg_type = RTM_GETADDR;
2195 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2196 {
2197 return FAILED;
2198 }
2199 current = out;
2200 while (NLMSG_OK(current, len))
2201 {
2202 switch (current->nlmsg_type)
2203 {
2204 case NLMSG_DONE:
2205 break;
2206 case RTM_NEWADDR:
2207 process_addr(this, current, FALSE);
2208 /* fall through */
2209 default:
2210 current = NLMSG_NEXT(current, len);
2211 continue;
2212 }
2213 break;
2214 }
2215 free(out);
2216
2217 this->lock->read_lock(this->lock);
2218 ifaces = this->ifaces->create_enumerator(this->ifaces);
2219 while (ifaces->enumerate(ifaces, &iface))
2220 {
2221 if (iface_entry_up_and_usable(iface))
2222 {
2223 DBG2(DBG_KNL, " %s", iface->ifname);
2224 addrs = iface->addrs->create_enumerator(iface->addrs);
2225 while (addrs->enumerate(addrs, (void**)&addr))
2226 {
2227 DBG2(DBG_KNL, " %H", addr->ip);
2228 }
2229 addrs->destroy(addrs);
2230 }
2231 }
2232 ifaces->destroy(ifaces);
2233 this->lock->unlock(this->lock);
2234 return SUCCESS;
2235 }
2236
2237 /**
2238 * create or delete a rule to use our routing table
2239 */
2240 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
2241 int family, u_int32_t table, u_int32_t prio)
2242 {
2243 netlink_buf_t request;
2244 struct nlmsghdr *hdr;
2245 struct rtmsg *msg;
2246 chunk_t chunk;
2247 char *fwmark;
2248
2249 memset(&request, 0, sizeof(request));
2250 hdr = (struct nlmsghdr*)request;
2251 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2252 hdr->nlmsg_type = nlmsg_type;
2253 if (nlmsg_type == RTM_NEWRULE)
2254 {
2255 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
2256 }
2257 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2258
2259 msg = (struct rtmsg*)NLMSG_DATA(hdr);
2260 msg->rtm_table = table;
2261 msg->rtm_family = family;
2262 msg->rtm_protocol = RTPROT_BOOT;
2263 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2264 msg->rtm_type = RTN_UNICAST;
2265
2266 chunk = chunk_from_thing(prio);
2267 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
2268
2269 fwmark = lib->settings->get_str(lib->settings,
2270 "%s.plugins.kernel-netlink.fwmark", NULL, lib->ns);
2271 if (fwmark)
2272 {
2273 #ifdef HAVE_LINUX_FIB_RULES_H
2274 mark_t mark;
2275
2276 if (fwmark[0] == '!')
2277 {
2278 msg->rtm_flags |= FIB_RULE_INVERT;
2279 fwmark++;
2280 }
2281 if (mark_from_string(fwmark, &mark))
2282 {
2283 chunk = chunk_from_thing(mark.value);
2284 netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
2285 chunk = chunk_from_thing(mark.mask);
2286 netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
2287 }
2288 #else
2289 DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
2290 #endif
2291 }
2292 return this->socket->send_ack(this->socket, hdr);
2293 }
2294
2295 /**
2296 * check for kernel features (currently only via version number)
2297 */
2298 static void check_kernel_features(private_kernel_netlink_net_t *this)
2299 {
2300 struct utsname utsname;
2301 int a, b, c;
2302
2303 if (uname(&utsname) == 0)
2304 {
2305 switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
2306 {
2307 case 3:
2308 if (a == 2)
2309 {
2310 DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
2311 "RTA_PREFSRC for IPv6 routes", a, b, c);
2312 break;
2313 }
2314 /* fall-through */
2315 case 2:
2316 /* only 3.x+ uses two part version numbers */
2317 this->rta_prefsrc_for_ipv6 = TRUE;
2318 break;
2319 default:
2320 break;
2321 }
2322 }
2323 }
2324
2325 /**
2326 * Destroy an address to iface map
2327 */
2328 static void addr_map_destroy(hashtable_t *map)
2329 {
2330 enumerator_t *enumerator;
2331 addr_map_entry_t *addr;
2332
2333 enumerator = map->create_enumerator(map);
2334 while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
2335 {
2336 free(addr);
2337 }
2338 enumerator->destroy(enumerator);
2339 map->destroy(map);
2340 }
2341
2342 METHOD(kernel_net_t, destroy, void,
2343 private_kernel_netlink_net_t *this)
2344 {
2345 enumerator_t *enumerator;
2346 route_entry_t *route;
2347
2348 if (this->routing_table)
2349 {
2350 manage_rule(this, RTM_DELRULE, AF_INET, this->routing_table,
2351 this->routing_table_prio);
2352 manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
2353 this->routing_table_prio);
2354 }
2355 if (this->socket_events > 0)
2356 {
2357 lib->watcher->remove(lib->watcher, this->socket_events);
2358 close(this->socket_events);
2359 }
2360 enumerator = this->routes->create_enumerator(this->routes);
2361 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
2362 {
2363 manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
2364 route->gateway, route->src_ip, route->if_name);
2365 route_entry_destroy(route);
2366 }
2367 enumerator->destroy(enumerator);
2368 this->routes->destroy(this->routes);
2369 this->routes_lock->destroy(this->routes_lock);
2370 DESTROY_IF(this->socket);
2371
2372 net_changes_clear(this);
2373 this->net_changes->destroy(this->net_changes);
2374 this->net_changes_lock->destroy(this->net_changes_lock);
2375
2376 addr_map_destroy(this->addrs);
2377 addr_map_destroy(this->vips);
2378
2379 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2380 this->rt_exclude->destroy(this->rt_exclude);
2381 this->roam_lock->destroy(this->roam_lock);
2382 this->condvar->destroy(this->condvar);
2383 this->lock->destroy(this->lock);
2384 free(this);
2385 }
2386
2387 /*
2388 * Described in header.
2389 */
2390 kernel_netlink_net_t *kernel_netlink_net_create()
2391 {
2392 private_kernel_netlink_net_t *this;
2393 enumerator_t *enumerator;
2394 bool register_for_events = TRUE;
2395 char *exclude;
2396
2397 INIT(this,
2398 .public = {
2399 .interface = {
2400 .get_interface = _get_interface_name,
2401 .create_address_enumerator = _create_address_enumerator,
2402 .get_source_addr = _get_source_addr,
2403 .get_nexthop = _get_nexthop,
2404 .add_ip = _add_ip,
2405 .del_ip = _del_ip,
2406 .add_route = _add_route,
2407 .del_route = _del_route,
2408 .destroy = _destroy,
2409 },
2410 },
2411 .socket = netlink_socket_create(NETLINK_ROUTE),
2412 .rt_exclude = linked_list_create(),
2413 .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
2414 (hashtable_equals_t)route_entry_equals, 16),
2415 .net_changes = hashtable_create(
2416 (hashtable_hash_t)net_change_hash,
2417 (hashtable_equals_t)net_change_equals, 16),
2418 .addrs = hashtable_create(
2419 (hashtable_hash_t)addr_map_entry_hash,
2420 (hashtable_equals_t)addr_map_entry_equals, 16),
2421 .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
2422 (hashtable_equals_t)addr_map_entry_equals, 16),
2423 .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2424 .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2425 .ifaces = linked_list_create(),
2426 .lock = rwlock_create(RWLOCK_TYPE_DEFAULT),
2427 .condvar = rwlock_condvar_create(),
2428 .roam_lock = spinlock_create(),
2429 .routing_table = lib->settings->get_int(lib->settings,
2430 "%s.routing_table", ROUTING_TABLE, lib->ns),
2431 .routing_table_prio = lib->settings->get_int(lib->settings,
2432 "%s.routing_table_prio", ROUTING_TABLE_PRIO, lib->ns),
2433 .process_route = lib->settings->get_bool(lib->settings,
2434 "%s.process_route", TRUE, lib->ns),
2435 .install_virtual_ip = lib->settings->get_bool(lib->settings,
2436 "%s.install_virtual_ip", TRUE, lib->ns),
2437 .install_virtual_ip_on = lib->settings->get_str(lib->settings,
2438 "%s.install_virtual_ip_on", NULL, lib->ns),
2439 .prefer_temporary_addrs = lib->settings->get_bool(lib->settings,
2440 "%s.prefer_temporary_addrs", FALSE, lib->ns),
2441 .roam_events = lib->settings->get_bool(lib->settings,
2442 "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
2443 );
2444 timerclear(&this->last_route_reinstall);
2445 timerclear(&this->next_roam);
2446
2447 check_kernel_features(this);
2448
2449 if (streq(lib->ns, "starter"))
2450 { /* starter has no threads, so we do not register for kernel events */
2451 register_for_events = FALSE;
2452 }
2453
2454 exclude = lib->settings->get_str(lib->settings,
2455 "%s.ignore_routing_tables", NULL, lib->ns);
2456 if (exclude)
2457 {
2458 char *token;
2459 uintptr_t table;
2460
2461 enumerator = enumerator_create_token(exclude, " ", " ");
2462 while (enumerator->enumerate(enumerator, &token))
2463 {
2464 errno = 0;
2465 table = strtoul(token, NULL, 10);
2466
2467 if (errno == 0)
2468 {
2469 this->rt_exclude->insert_last(this->rt_exclude, (void*)table);
2470 }
2471 }
2472 enumerator->destroy(enumerator);
2473 }
2474
2475 if (register_for_events)
2476 {
2477 struct sockaddr_nl addr;
2478
2479 memset(&addr, 0, sizeof(addr));
2480 addr.nl_family = AF_NETLINK;
2481
2482 /* create and bind RT socket for events (address/interface/route changes) */
2483 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2484 if (this->socket_events < 0)
2485 {
2486 DBG1(DBG_KNL, "unable to create RT event socket");
2487 destroy(this);
2488 return NULL;
2489 }
2490 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2491 RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
2492 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
2493 {
2494 DBG1(DBG_KNL, "unable to bind RT event socket");
2495 destroy(this);
2496 return NULL;
2497 }
2498
2499 lib->watcher->add(lib->watcher, this->socket_events, WATCHER_READ,
2500 (watcher_cb_t)receive_events, this);
2501 }
2502
2503 if (init_address_list(this) != SUCCESS)
2504 {
2505 DBG1(DBG_KNL, "unable to get interface list");
2506 destroy(this);
2507 return NULL;
2508 }
2509
2510 if (this->routing_table)
2511 {
2512 if (manage_rule(this, RTM_NEWRULE, AF_INET, this->routing_table,
2513 this->routing_table_prio) != SUCCESS)
2514 {
2515 DBG1(DBG_KNL, "unable to create IPv4 routing table rule");
2516 }
2517 if (manage_rule(this, RTM_NEWRULE, AF_INET6, this->routing_table,
2518 this->routing_table_prio) != SUCCESS)
2519 {
2520 DBG1(DBG_KNL, "unable to create IPv6 routing table rule");
2521 }
2522 }
2523
2524 return &this->public;
2525 }