32154a7ea37a0793b3f7b5e45d5579821fc75ece
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 */
16
17 #include <sys/socket.h>
18 #include <linux/netlink.h>
19 #include <linux/rtnetlink.h>
20 #include <sys/time.h>
21 #include <pthread.h>
22 #include <unistd.h>
23 #include <errno.h>
24 #include <net/if.h>
25
26 #include "kernel_netlink_net.h"
27 #include "kernel_netlink_shared.h"
28
29 #include <daemon.h>
30 #include <utils/mutex.h>
31 #include <utils/linked_list.h>
32 #include <processing/jobs/callback_job.h>
33 #include <processing/jobs/roam_job.h>
34
35 /** delay before firing roam jobs (ms) */
36 #define ROAM_DELAY 100
37
38 /** routing table for routes installed by us */
39 #ifndef IPSEC_ROUTING_TABLE
40 #define IPSEC_ROUTING_TABLE 100
41 #endif
42 #ifndef IPSEC_ROUTING_TABLE_PRIO
43 #define IPSEC_ROUTING_TABLE_PRIO 100
44 #endif
45
46 typedef struct addr_entry_t addr_entry_t;
47
48 /**
49 * IP address in an inface_entry_t
50 */
51 struct addr_entry_t {
52
53 /** The ip address */
54 host_t *ip;
55
56 /** virtual IP managed by us */
57 bool virtual;
58
59 /** scope of the address */
60 u_char scope;
61
62 /** Number of times this IP is used, if virtual */
63 u_int refcount;
64 };
65
66 /**
67 * destroy a addr_entry_t object
68 */
69 static void addr_entry_destroy(addr_entry_t *this)
70 {
71 this->ip->destroy(this->ip);
72 free(this);
73 }
74
75 typedef struct iface_entry_t iface_entry_t;
76
77 /**
78 * A network interface on this system, containing addr_entry_t's
79 */
80 struct iface_entry_t {
81
82 /** interface index */
83 int ifindex;
84
85 /** name of the interface */
86 char ifname[IFNAMSIZ];
87
88 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
89 u_int flags;
90
91 /** list of addresses as host_t */
92 linked_list_t *addrs;
93 };
94
95 /**
96 * destroy an interface entry
97 */
98 static void iface_entry_destroy(iface_entry_t *this)
99 {
100 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
101 free(this);
102 }
103
104 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
105
106 /**
107 * Private variables and functions of kernel_netlink_net class.
108 */
109 struct private_kernel_netlink_net_t {
110 /**
111 * Public part of the kernel_netlink_net_t object.
112 */
113 kernel_netlink_net_t public;
114
115 /**
116 * mutex to lock access to various lists
117 */
118 mutex_t *mutex;
119
120 /**
121 * condition variable to signal virtual IP add/removal
122 */
123 condvar_t *condvar;
124
125 /**
126 * Cached list of interfaces and its addresses (iface_entry_t)
127 */
128 linked_list_t *ifaces;
129
130 /**
131 * job receiving netlink events
132 */
133 callback_job_t *job;
134
135 /**
136 * netlink rt socket (routing)
137 */
138 netlink_socket_t *socket;
139
140 /**
141 * Netlink rt socket to receive address change events
142 */
143 int socket_events;
144
145 /**
146 * time of the last roam_job
147 */
148 struct timeval last_roam;
149
150 /**
151 * routing table to install routes
152 */
153 int routing_table;
154
155 /**
156 * priority of used routing table
157 */
158 int routing_table_prio;
159
160 /**
161 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
162 */
163 bool process_route;
164
165 /**
166 * whether to actually install virtual IPs
167 */
168 bool install_virtual_ip;
169 };
170
171 /**
172 * get the refcount of a virtual ip
173 */
174 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
175 {
176 iterator_t *ifaces, *addrs;
177 iface_entry_t *iface;
178 addr_entry_t *addr;
179 int refcount = 0;
180
181 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
182 while (ifaces->iterate(ifaces, (void**)&iface))
183 {
184 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
185 while (addrs->iterate(addrs, (void**)&addr))
186 {
187 if (addr->virtual && (iface->flags & IFF_UP) &&
188 ip->ip_equals(ip, addr->ip))
189 {
190 refcount = addr->refcount;
191 break;
192 }
193 }
194 addrs->destroy(addrs);
195 if (refcount)
196 {
197 break;
198 }
199 }
200 ifaces->destroy(ifaces);
201
202 return refcount;
203 }
204
205 /**
206 * start a roaming job. We delay it for a second and fire only one job
207 * for multiple events. Otherwise we would create two many jobs.
208 */
209 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
210 {
211 struct timeval now;
212
213 if (gettimeofday(&now, NULL) == 0)
214 {
215 if (timercmp(&now, &this->last_roam, >))
216 {
217 now.tv_usec += ROAM_DELAY * 1000;
218 while (now.tv_usec > 1000000)
219 {
220 now.tv_sec++;
221 now.tv_usec -= 1000000;
222 }
223 this->last_roam = now;
224 charon->scheduler->schedule_job_ms(charon->scheduler,
225 (job_t*)roam_job_create(address), ROAM_DELAY);
226 }
227 }
228 }
229
230 /**
231 * process RTM_NEWLINK/RTM_DELLINK from kernel
232 */
233 static void process_link(private_kernel_netlink_net_t *this,
234 struct nlmsghdr *hdr, bool event)
235 {
236 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
237 struct rtattr *rta = IFLA_RTA(msg);
238 size_t rtasize = IFLA_PAYLOAD (hdr);
239 enumerator_t *enumerator;
240 iface_entry_t *current, *entry = NULL;
241 char *name = NULL;
242 bool update = FALSE;
243
244 while(RTA_OK(rta, rtasize))
245 {
246 switch (rta->rta_type)
247 {
248 case IFLA_IFNAME:
249 name = RTA_DATA(rta);
250 break;
251 }
252 rta = RTA_NEXT(rta, rtasize);
253 }
254 if (!name)
255 {
256 name = "(unknown)";
257 }
258
259 this->mutex->lock(this->mutex);
260 switch (hdr->nlmsg_type)
261 {
262 case RTM_NEWLINK:
263 {
264 if (msg->ifi_flags & IFF_LOOPBACK)
265 { /* ignore loopback interfaces */
266 break;
267 }
268 enumerator = this->ifaces->create_enumerator(this->ifaces);
269 while (enumerator->enumerate(enumerator, &current))
270 {
271 if (current->ifindex == msg->ifi_index)
272 {
273 entry = current;
274 break;
275 }
276 }
277 enumerator->destroy(enumerator);
278 if (!entry)
279 {
280 entry = malloc_thing(iface_entry_t);
281 entry->ifindex = msg->ifi_index;
282 entry->flags = 0;
283 entry->addrs = linked_list_create();
284 this->ifaces->insert_last(this->ifaces, entry);
285 }
286 memcpy(entry->ifname, name, IFNAMSIZ);
287 entry->ifname[IFNAMSIZ-1] = '\0';
288 if (event)
289 {
290 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
291 {
292 update = TRUE;
293 DBG1(DBG_KNL, "interface %s activated", name);
294 }
295 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
296 {
297 update = TRUE;
298 DBG1(DBG_KNL, "interface %s deactivated", name);
299 }
300 }
301 entry->flags = msg->ifi_flags;
302 break;
303 }
304 case RTM_DELLINK:
305 {
306 enumerator = this->ifaces->create_enumerator(this->ifaces);
307 while (enumerator->enumerate(enumerator, &current))
308 {
309 if (current->ifindex == msg->ifi_index)
310 {
311 /* we do not remove it, as an address may be added to a
312 * "down" interface and we wan't to know that. */
313 current->flags = msg->ifi_flags;
314 break;
315 }
316 }
317 enumerator->destroy(enumerator);
318 break;
319 }
320 }
321 this->mutex->unlock(this->mutex);
322
323 /* send an update to all IKE_SAs */
324 if (update && event)
325 {
326 fire_roam_job(this, TRUE);
327 }
328 }
329
330 /**
331 * process RTM_NEWADDR/RTM_DELADDR from kernel
332 */
333 static void process_addr(private_kernel_netlink_net_t *this,
334 struct nlmsghdr *hdr, bool event)
335 {
336 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
337 struct rtattr *rta = IFA_RTA(msg);
338 size_t rtasize = IFA_PAYLOAD (hdr);
339 host_t *host = NULL;
340 enumerator_t *ifaces, *addrs;
341 iface_entry_t *iface;
342 addr_entry_t *addr;
343 chunk_t local = chunk_empty, address = chunk_empty;
344 bool update = FALSE, found = FALSE, changed = FALSE;
345
346 while(RTA_OK(rta, rtasize))
347 {
348 switch (rta->rta_type)
349 {
350 case IFA_LOCAL:
351 local.ptr = RTA_DATA(rta);
352 local.len = RTA_PAYLOAD(rta);
353 break;
354 case IFA_ADDRESS:
355 address.ptr = RTA_DATA(rta);
356 address.len = RTA_PAYLOAD(rta);
357 break;
358 }
359 rta = RTA_NEXT(rta, rtasize);
360 }
361
362 /* For PPP interfaces, we need the IFA_LOCAL address,
363 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
364 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
365 if (local.ptr)
366 {
367 host = host_create_from_chunk(msg->ifa_family, local, 0);
368 }
369 else if (address.ptr)
370 {
371 host = host_create_from_chunk(msg->ifa_family, address, 0);
372 }
373
374 if (host == NULL)
375 { /* bad family? */
376 return;
377 }
378
379 this->mutex->lock(this->mutex);
380 ifaces = this->ifaces->create_enumerator(this->ifaces);
381 while (ifaces->enumerate(ifaces, &iface))
382 {
383 if (iface->ifindex == msg->ifa_index)
384 {
385 addrs = iface->addrs->create_enumerator(iface->addrs);
386 while (addrs->enumerate(addrs, &addr))
387 {
388 if (host->ip_equals(host, addr->ip))
389 {
390 found = TRUE;
391 if (hdr->nlmsg_type == RTM_DELADDR)
392 {
393 iface->addrs->remove_at(iface->addrs, addrs);
394 if (!addr->virtual)
395 {
396 changed = TRUE;
397 DBG1(DBG_KNL, "%H disappeared from %s",
398 host, iface->ifname);
399 }
400 addr_entry_destroy(addr);
401 }
402 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
403 {
404 addr->refcount = 1;
405 }
406 }
407 }
408 addrs->destroy(addrs);
409
410 if (hdr->nlmsg_type == RTM_NEWADDR)
411 {
412 if (!found)
413 {
414 found = TRUE;
415 changed = TRUE;
416 addr = malloc_thing(addr_entry_t);
417 addr->ip = host->clone(host);
418 addr->virtual = FALSE;
419 addr->refcount = 1;
420 addr->scope = msg->ifa_scope;
421
422 iface->addrs->insert_last(iface->addrs, addr);
423 if (event)
424 {
425 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
426 }
427 }
428 }
429 if (found && (iface->flags & IFF_UP))
430 {
431 update = TRUE;
432 }
433 break;
434 }
435 }
436 ifaces->destroy(ifaces);
437 this->mutex->unlock(this->mutex);
438 host->destroy(host);
439
440 /* send an update to all IKE_SAs */
441 if (update && event && changed)
442 {
443 fire_roam_job(this, TRUE);
444 }
445 }
446
447 /**
448 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
449 */
450 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
451 {
452 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
453 struct rtattr *rta = RTM_RTA(msg);
454 size_t rtasize = RTM_PAYLOAD(hdr);
455 host_t *host = NULL;
456
457 /* ignore routes added by us */
458 if (msg->rtm_table && msg->rtm_table == this->routing_table)
459 {
460 return;
461 }
462
463 while (RTA_OK(rta, rtasize))
464 {
465 switch (rta->rta_type)
466 {
467 case RTA_PREFSRC:
468 host = host_create_from_chunk(msg->rtm_family,
469 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
470 break;
471 }
472 rta = RTA_NEXT(rta, rtasize);
473 }
474 if (host)
475 {
476 this->mutex->lock(this->mutex);
477 if (!get_vip_refcount(this, host))
478 { /* ignore routes added for virtual IPs */
479 fire_roam_job(this, FALSE);
480 }
481 this->mutex->unlock(this->mutex);
482 host->destroy(host);
483 }
484 }
485
486 /**
487 * Receives events from kernel
488 */
489 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
490 {
491 char response[1024];
492 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
493 struct sockaddr_nl addr;
494 socklen_t addr_len = sizeof(addr);
495 int len, oldstate;
496
497 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
498 len = recvfrom(this->socket_events, response, sizeof(response), 0,
499 (struct sockaddr*)&addr, &addr_len);
500 pthread_setcancelstate(oldstate, NULL);
501
502 if (len < 0)
503 {
504 switch (errno)
505 {
506 case EINTR:
507 /* interrupted, try again */
508 return JOB_REQUEUE_DIRECT;
509 case EAGAIN:
510 /* no data ready, select again */
511 return JOB_REQUEUE_DIRECT;
512 default:
513 DBG1(DBG_KNL, "unable to receive from rt event socket");
514 sleep(1);
515 return JOB_REQUEUE_FAIR;
516 }
517 }
518
519 if (addr.nl_pid != 0)
520 { /* not from kernel. not interested, try another one */
521 return JOB_REQUEUE_DIRECT;
522 }
523
524 while (NLMSG_OK(hdr, len))
525 {
526 /* looks good so far, dispatch netlink message */
527 switch (hdr->nlmsg_type)
528 {
529 case RTM_NEWADDR:
530 case RTM_DELADDR:
531 process_addr(this, hdr, TRUE);
532 this->condvar->broadcast(this->condvar);
533 break;
534 case RTM_NEWLINK:
535 case RTM_DELLINK:
536 process_link(this, hdr, TRUE);
537 this->condvar->broadcast(this->condvar);
538 break;
539 case RTM_NEWROUTE:
540 case RTM_DELROUTE:
541 if (this->process_route)
542 {
543 process_route(this, hdr);
544 }
545 break;
546 default:
547 break;
548 }
549 hdr = NLMSG_NEXT(hdr, len);
550 }
551 return JOB_REQUEUE_DIRECT;
552 }
553
554 /** enumerator over addresses */
555 typedef struct {
556 private_kernel_netlink_net_t* this;
557 /** whether to enumerate down interfaces */
558 bool include_down_ifaces;
559 /** whether to enumerate virtual ip addresses */
560 bool include_virtual_ips;
561 } address_enumerator_t;
562
563 /**
564 * cleanup function for address enumerator
565 */
566 static void address_enumerator_destroy(address_enumerator_t *data)
567 {
568 data->this->mutex->unlock(data->this->mutex);
569 free(data);
570 }
571
572 /**
573 * filter for addresses
574 */
575 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
576 {
577 if (!data->include_virtual_ips && (*in)->virtual)
578 { /* skip virtual interfaces added by us */
579 return FALSE;
580 }
581 if ((*in)->scope >= RT_SCOPE_LINK)
582 { /* skip addresses with a unusable scope */
583 return FALSE;
584 }
585 *out = (*in)->ip;
586 return TRUE;
587 }
588
589 /**
590 * enumerator constructor for interfaces
591 */
592 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
593 {
594 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
595 (void*)filter_addresses, data, NULL);
596 }
597
598 /**
599 * filter for interfaces
600 */
601 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
602 {
603 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
604 { /* skip interfaces not up */
605 return FALSE;
606 }
607 *out = *in;
608 return TRUE;
609 }
610
611 /**
612 * implementation of kernel_net_t.create_address_enumerator
613 */
614 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
615 bool include_down_ifaces, bool include_virtual_ips)
616 {
617 address_enumerator_t *data = malloc_thing(address_enumerator_t);
618 data->this = this;
619 data->include_down_ifaces = include_down_ifaces;
620 data->include_virtual_ips = include_virtual_ips;
621
622 this->mutex->lock(this->mutex);
623 return enumerator_create_nested(
624 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
625 (void*)filter_interfaces, data, NULL),
626 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
627 }
628
629 /**
630 * implementation of kernel_net_t.get_interface_name
631 */
632 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
633 {
634 enumerator_t *ifaces, *addrs;
635 iface_entry_t *iface;
636 addr_entry_t *addr;
637 char *name = NULL;
638
639 DBG2(DBG_KNL, "getting interface name for %H", ip);
640
641 this->mutex->lock(this->mutex);
642 ifaces = this->ifaces->create_enumerator(this->ifaces);
643 while (ifaces->enumerate(ifaces, &iface))
644 {
645 addrs = iface->addrs->create_enumerator(iface->addrs);
646 while (addrs->enumerate(addrs, &addr))
647 {
648 if (ip->ip_equals(ip, addr->ip))
649 {
650 name = strdup(iface->ifname);
651 break;
652 }
653 }
654 addrs->destroy(addrs);
655 if (name)
656 {
657 break;
658 }
659 }
660 ifaces->destroy(ifaces);
661 this->mutex->unlock(this->mutex);
662
663 if (name)
664 {
665 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
666 }
667 else
668 {
669 DBG2(DBG_KNL, "%H is not a local address", ip);
670 }
671 return name;
672 }
673
674 /**
675 * get the index of an interface by name
676 */
677 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
678 {
679 enumerator_t *ifaces;
680 iface_entry_t *iface;
681 int ifindex = 0;
682
683 DBG2(DBG_KNL, "getting iface index for %s", name);
684
685 this->mutex->lock(this->mutex);
686 ifaces = this->ifaces->create_enumerator(this->ifaces);
687 while (ifaces->enumerate(ifaces, &iface))
688 {
689 if (streq(name, iface->ifname))
690 {
691 ifindex = iface->ifindex;
692 break;
693 }
694 }
695 ifaces->destroy(ifaces);
696 this->mutex->unlock(this->mutex);
697
698 if (ifindex == 0)
699 {
700 DBG1(DBG_KNL, "unable to get interface index for %s", name);
701 }
702 return ifindex;
703 }
704
705 /**
706 * Check if an interface with a given index is up
707 */
708 static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
709 {
710 enumerator_t *ifaces;
711 iface_entry_t *iface;
712 /* default to TRUE for interface we do not monitor (e.g. lo) */
713 bool up = TRUE;
714
715 ifaces = this->ifaces->create_enumerator(this->ifaces);
716 while (ifaces->enumerate(ifaces, &iface))
717 {
718 if (iface->ifindex == index)
719 {
720 up = iface->flags & IFF_UP;
721 break;
722 }
723 }
724 ifaces->destroy(ifaces);
725 return up;
726 }
727
728 /**
729 * check if an address (chunk) addr is in subnet (net with net_len net bits)
730 */
731 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
732 {
733 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
734 int byte = 0;
735
736 if (addr.len != net.len || net_len > 8 * net.len )
737 {
738 return FALSE;
739 }
740
741 /* scan through all bytes in network order */
742 while (net_len > 0)
743 {
744 if (net_len < 8)
745 {
746 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
747 }
748 else
749 {
750 if (addr.ptr[byte] != net.ptr[byte])
751 {
752 return FALSE;
753 }
754 byte++;
755 net_len -= 8;
756 }
757 }
758 return TRUE;
759 }
760
761 /**
762 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
763 */
764 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
765 bool nexthop, host_t *candidate)
766 {
767 netlink_buf_t request;
768 struct nlmsghdr *hdr, *out, *current;
769 struct rtmsg *msg;
770 chunk_t chunk;
771 size_t len;
772 int best = -1;
773 host_t *src = NULL, *gtw = NULL;
774
775 DBG2(DBG_KNL, "getting address to reach %H", dest);
776
777 memset(&request, 0, sizeof(request));
778
779 hdr = (struct nlmsghdr*)request;
780 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
781 hdr->nlmsg_type = RTM_GETROUTE;
782 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
783
784 msg = (struct rtmsg*)NLMSG_DATA(hdr);
785 msg->rtm_family = dest->get_family(dest);
786
787 chunk = dest->get_address(dest);
788 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
789 if (candidate)
790 {
791 chunk = candidate->get_address(candidate);
792 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
793 }
794
795 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
796 {
797 DBG1(DBG_KNL, "getting address to %H failed", dest);
798 return NULL;
799 }
800 this->mutex->lock(this->mutex);
801 current = out;
802 while (NLMSG_OK(current, len))
803 {
804 switch (current->nlmsg_type)
805 {
806 case NLMSG_DONE:
807 break;
808 case RTM_NEWROUTE:
809 {
810 struct rtattr *rta;
811 size_t rtasize;
812 chunk_t rta_gtw, rta_src, rta_dst;
813 u_int32_t rta_oif = 0;
814 enumerator_t *ifaces, *addrs;
815 iface_entry_t *iface;
816 addr_entry_t *addr;
817
818 rta_gtw = rta_src = rta_dst = chunk_empty;
819 msg = (struct rtmsg*)(NLMSG_DATA(current));
820 rta = RTM_RTA(msg);
821 rtasize = RTM_PAYLOAD(current);
822 while (RTA_OK(rta, rtasize))
823 {
824 switch (rta->rta_type)
825 {
826 case RTA_PREFSRC:
827 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
828 break;
829 case RTA_GATEWAY:
830 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
831 break;
832 case RTA_DST:
833 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
834 break;
835 case RTA_OIF:
836 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
837 {
838 rta_oif = *(u_int32_t*)RTA_DATA(rta);
839 }
840 break;
841 }
842 rta = RTA_NEXT(rta, rtasize);
843 }
844 if (rta_oif && !is_interface_up(this, rta_oif))
845 { /* interface is down */
846 goto next;
847 }
848 if (this->routing_table != 0 &&
849 msg->rtm_table == this->routing_table)
850 { /* route is from our own ipsec routing table */
851 goto next;
852 }
853 if (msg->rtm_dst_len <= best)
854 { /* not better than a previous one */
855 goto next;
856 }
857 if (msg->rtm_dst_len != 0 &&
858 (!rta_dst.ptr ||
859 !addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len)))
860 { /* is not the default route and not contained in our dst */
861 goto next;
862 }
863
864 best = msg->rtm_dst_len;
865 if (nexthop)
866 {
867 DESTROY_IF(gtw);
868 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
869 goto next;
870 }
871 if (rta_src.ptr)
872 {
873 DESTROY_IF(src);
874 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
875 if (get_vip_refcount(this, src))
876 { /* skip source address if it is installed by us */
877 DESTROY_IF(src);
878 src = NULL;
879 }
880 goto next;
881 }
882 /* no source addr, get one from the interfaces */
883 ifaces = this->ifaces->create_enumerator(this->ifaces);
884 while (ifaces->enumerate(ifaces, &iface))
885 {
886 if (iface->ifindex == rta_oif &&
887 iface->flags & IFF_UP)
888 {
889 addrs = iface->addrs->create_enumerator(iface->addrs);
890 while (addrs->enumerate(addrs, &addr))
891 {
892 chunk_t ip = addr->ip->get_address(addr->ip);
893 if ((msg->rtm_dst_len == 0 &&
894 addr->ip->get_family(addr->ip) ==
895 dest->get_family(dest)) ||
896 addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
897 {
898 DESTROY_IF(src);
899 src = addr->ip->clone(addr->ip);
900 break;
901 }
902 }
903 addrs->destroy(addrs);
904 }
905 }
906 ifaces->destroy(ifaces);
907 goto next;
908 }
909 default:
910 next:
911 current = NLMSG_NEXT(current, len);
912 continue;
913 }
914 break;
915 }
916 free(out);
917 this->mutex->unlock(this->mutex);
918
919 if (nexthop)
920 {
921 if (gtw)
922 {
923 return gtw;
924 }
925 return dest->clone(dest);
926 }
927 return src;
928 }
929
930 /**
931 * Implementation of kernel_net_t.get_source_addr.
932 */
933 static host_t* get_source_addr(private_kernel_netlink_net_t *this,
934 host_t *dest, host_t *src)
935 {
936 return get_route(this, dest, FALSE, src);
937 }
938
939 /**
940 * Implementation of kernel_net_t.get_nexthop.
941 */
942 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
943 {
944 return get_route(this, dest, TRUE, NULL);
945 }
946
947 /**
948 * Manages the creation and deletion of ip addresses on an interface.
949 * By setting the appropriate nlmsg_type, the ip will be set or unset.
950 */
951 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
952 int flags, int if_index, host_t *ip)
953 {
954 netlink_buf_t request;
955 struct nlmsghdr *hdr;
956 struct ifaddrmsg *msg;
957 chunk_t chunk;
958
959 memset(&request, 0, sizeof(request));
960
961 chunk = ip->get_address(ip);
962
963 hdr = (struct nlmsghdr*)request;
964 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
965 hdr->nlmsg_type = nlmsg_type;
966 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
967
968 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
969 msg->ifa_family = ip->get_family(ip);
970 msg->ifa_flags = 0;
971 msg->ifa_prefixlen = 8 * chunk.len;
972 msg->ifa_scope = RT_SCOPE_UNIVERSE;
973 msg->ifa_index = if_index;
974
975 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
976
977 return this->socket->send_ack(this->socket, hdr);
978 }
979
980 /**
981 * Implementation of kernel_net_t.add_ip.
982 */
983 static status_t add_ip(private_kernel_netlink_net_t *this,
984 host_t *virtual_ip, host_t *iface_ip)
985 {
986 iface_entry_t *iface;
987 addr_entry_t *addr;
988 enumerator_t *addrs, *ifaces;
989 int ifindex;
990
991 if (!this->install_virtual_ip)
992 { /* disabled by config */
993 return SUCCESS;
994 }
995
996 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
997
998 this->mutex->lock(this->mutex);
999 ifaces = this->ifaces->create_enumerator(this->ifaces);
1000 while (ifaces->enumerate(ifaces, &iface))
1001 {
1002 bool iface_found = FALSE;
1003
1004 addrs = iface->addrs->create_enumerator(iface->addrs);
1005 while (addrs->enumerate(addrs, &addr))
1006 {
1007 if (iface_ip->ip_equals(iface_ip, addr->ip))
1008 {
1009 iface_found = TRUE;
1010 }
1011 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1012 {
1013 addr->refcount++;
1014 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1015 virtual_ip, iface->ifname);
1016 addrs->destroy(addrs);
1017 ifaces->destroy(ifaces);
1018 this->mutex->unlock(this->mutex);
1019 return SUCCESS;
1020 }
1021 }
1022 addrs->destroy(addrs);
1023
1024 if (iface_found)
1025 {
1026 ifindex = iface->ifindex;
1027 addr = malloc_thing(addr_entry_t);
1028 addr->ip = virtual_ip->clone(virtual_ip);
1029 addr->refcount = 0;
1030 addr->virtual = TRUE;
1031 addr->scope = RT_SCOPE_UNIVERSE;
1032 iface->addrs->insert_last(iface->addrs, addr);
1033
1034 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1035 ifindex, virtual_ip) == SUCCESS)
1036 {
1037 while (get_vip_refcount(this, virtual_ip) == 0)
1038 { /* wait until address appears */
1039 this->condvar->wait(this->condvar, this->mutex);
1040 }
1041 ifaces->destroy(ifaces);
1042 this->mutex->unlock(this->mutex);
1043 return SUCCESS;
1044 }
1045 ifaces->destroy(ifaces);
1046 this->mutex->unlock(this->mutex);
1047 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1048 return FAILED;
1049 }
1050 }
1051 ifaces->destroy(ifaces);
1052 this->mutex->unlock(this->mutex);
1053
1054 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1055 "virtual IP %H", iface_ip, virtual_ip);
1056 return FAILED;
1057 }
1058
1059 /**
1060 * Implementation of kernel_net_t.del_ip.
1061 */
1062 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
1063 {
1064 iface_entry_t *iface;
1065 addr_entry_t *addr;
1066 enumerator_t *addrs, *ifaces;
1067 status_t status;
1068 int ifindex;
1069
1070 if (!this->install_virtual_ip)
1071 { /* disabled by config */
1072 return SUCCESS;
1073 }
1074
1075 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1076
1077 this->mutex->lock(this->mutex);
1078 ifaces = this->ifaces->create_enumerator(this->ifaces);
1079 while (ifaces->enumerate(ifaces, &iface))
1080 {
1081 addrs = iface->addrs->create_enumerator(iface->addrs);
1082 while (addrs->enumerate(addrs, &addr))
1083 {
1084 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1085 {
1086 ifindex = iface->ifindex;
1087 if (addr->refcount == 1)
1088 {
1089 status = manage_ipaddr(this, RTM_DELADDR, 0,
1090 ifindex, virtual_ip);
1091 if (status == SUCCESS)
1092 { /* wait until the address is really gone */
1093 while (get_vip_refcount(this, virtual_ip) > 0)
1094 {
1095 this->condvar->wait(this->condvar, this->mutex);
1096 }
1097 }
1098 addrs->destroy(addrs);
1099 ifaces->destroy(ifaces);
1100 this->mutex->unlock(this->mutex);
1101 return status;
1102 }
1103 else
1104 {
1105 addr->refcount--;
1106 }
1107 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1108 virtual_ip);
1109 addrs->destroy(addrs);
1110 ifaces->destroy(ifaces);
1111 this->mutex->unlock(this->mutex);
1112 return SUCCESS;
1113 }
1114 }
1115 addrs->destroy(addrs);
1116 }
1117 ifaces->destroy(ifaces);
1118 this->mutex->unlock(this->mutex);
1119
1120 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1121 return FAILED;
1122 }
1123
1124 /**
1125 * Manages source routes in the routing table.
1126 * By setting the appropriate nlmsg_type, the route gets added or removed.
1127 */
1128 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1129 int flags, chunk_t dst_net, u_int8_t prefixlen,
1130 host_t *gateway, host_t *src_ip, char *if_name)
1131 {
1132 netlink_buf_t request;
1133 struct nlmsghdr *hdr;
1134 struct rtmsg *msg;
1135 int ifindex;
1136 chunk_t chunk;
1137
1138 /* if route is 0.0.0.0/0, we can't install it, as it would
1139 * overwrite the default route. Instead, we add two routes:
1140 * 0.0.0.0/1 and 128.0.0.0/1 */
1141 if (this->routing_table == 0 && prefixlen == 0)
1142 {
1143 chunk_t half_net;
1144 u_int8_t half_prefixlen;
1145 status_t status;
1146
1147 half_net = chunk_alloca(dst_net.len);
1148 memset(half_net.ptr, 0, half_net.len);
1149 half_prefixlen = 1;
1150
1151 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1152 gateway, src_ip, if_name);
1153 half_net.ptr[0] |= 0x80;
1154 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1155 gateway, src_ip, if_name);
1156 return status;
1157 }
1158
1159 memset(&request, 0, sizeof(request));
1160
1161 hdr = (struct nlmsghdr*)request;
1162 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1163 hdr->nlmsg_type = nlmsg_type;
1164 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1165
1166 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1167 msg->rtm_family = src_ip->get_family(src_ip);
1168 msg->rtm_dst_len = prefixlen;
1169 msg->rtm_table = this->routing_table;
1170 msg->rtm_protocol = RTPROT_STATIC;
1171 msg->rtm_type = RTN_UNICAST;
1172 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1173
1174 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1175 chunk = src_ip->get_address(src_ip);
1176 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1177 chunk = gateway->get_address(gateway);
1178 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1179 ifindex = get_interface_index(this, if_name);
1180 chunk.ptr = (char*)&ifindex;
1181 chunk.len = sizeof(ifindex);
1182 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1183
1184 return this->socket->send_ack(this->socket, hdr);
1185 }
1186
1187 /**
1188 * Implementation of kernel_net_t.add_route.
1189 */
1190 static status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1191 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1192 {
1193 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1194 dst_net, prefixlen, gateway, src_ip, if_name);
1195 }
1196
1197 /**
1198 * Implementation of kernel_net_t.del_route.
1199 */
1200 static status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1201 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1202 {
1203 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1204 gateway, src_ip, if_name);
1205 }
1206
1207 /**
1208 * Initialize a list of local addresses.
1209 */
1210 static status_t init_address_list(private_kernel_netlink_net_t *this)
1211 {
1212 netlink_buf_t request;
1213 struct nlmsghdr *out, *current, *in;
1214 struct rtgenmsg *msg;
1215 size_t len;
1216 enumerator_t *ifaces, *addrs;
1217 iface_entry_t *iface;
1218 addr_entry_t *addr;
1219
1220 DBG1(DBG_KNL, "listening on interfaces:");
1221
1222 memset(&request, 0, sizeof(request));
1223
1224 in = (struct nlmsghdr*)&request;
1225 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1226 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1227 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1228 msg->rtgen_family = AF_UNSPEC;
1229
1230 /* get all links */
1231 in->nlmsg_type = RTM_GETLINK;
1232 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1233 {
1234 return FAILED;
1235 }
1236 current = out;
1237 while (NLMSG_OK(current, len))
1238 {
1239 switch (current->nlmsg_type)
1240 {
1241 case NLMSG_DONE:
1242 break;
1243 case RTM_NEWLINK:
1244 process_link(this, current, FALSE);
1245 /* fall through */
1246 default:
1247 current = NLMSG_NEXT(current, len);
1248 continue;
1249 }
1250 break;
1251 }
1252 free(out);
1253
1254 /* get all interface addresses */
1255 in->nlmsg_type = RTM_GETADDR;
1256 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1257 {
1258 return FAILED;
1259 }
1260 current = out;
1261 while (NLMSG_OK(current, len))
1262 {
1263 switch (current->nlmsg_type)
1264 {
1265 case NLMSG_DONE:
1266 break;
1267 case RTM_NEWADDR:
1268 process_addr(this, current, FALSE);
1269 /* fall through */
1270 default:
1271 current = NLMSG_NEXT(current, len);
1272 continue;
1273 }
1274 break;
1275 }
1276 free(out);
1277
1278 this->mutex->lock(this->mutex);
1279 ifaces = this->ifaces->create_enumerator(this->ifaces);
1280 while (ifaces->enumerate(ifaces, &iface))
1281 {
1282 if (iface->flags & IFF_UP)
1283 {
1284 DBG1(DBG_KNL, " %s", iface->ifname);
1285 addrs = iface->addrs->create_enumerator(iface->addrs);
1286 while (addrs->enumerate(addrs, (void**)&addr))
1287 {
1288 DBG1(DBG_KNL, " %H", addr->ip);
1289 }
1290 addrs->destroy(addrs);
1291 }
1292 }
1293 ifaces->destroy(ifaces);
1294 this->mutex->unlock(this->mutex);
1295 return SUCCESS;
1296 }
1297
1298 /**
1299 * create or delete a rule to use our routing table
1300 */
1301 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1302 u_int32_t table, u_int32_t prio)
1303 {
1304 netlink_buf_t request;
1305 struct nlmsghdr *hdr;
1306 struct rtmsg *msg;
1307 chunk_t chunk;
1308
1309 memset(&request, 0, sizeof(request));
1310 hdr = (struct nlmsghdr*)request;
1311 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1312 hdr->nlmsg_type = nlmsg_type;
1313 if (nlmsg_type == RTM_NEWRULE)
1314 {
1315 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1316 }
1317 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1318
1319 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1320 msg->rtm_table = table;
1321 msg->rtm_family = AF_INET;
1322 msg->rtm_protocol = RTPROT_BOOT;
1323 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1324 msg->rtm_type = RTN_UNICAST;
1325
1326 chunk = chunk_from_thing(prio);
1327 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1328
1329 return this->socket->send_ack(this->socket, hdr);
1330 }
1331
1332 /**
1333 * Implementation of kernel_netlink_net_t.destroy.
1334 */
1335 static void destroy(private_kernel_netlink_net_t *this)
1336 {
1337 if (this->routing_table)
1338 {
1339 manage_rule(this, RTM_DELRULE, this->routing_table,
1340 this->routing_table_prio);
1341 }
1342
1343 this->job->cancel(this->job);
1344 close(this->socket_events);
1345 this->socket->destroy(this->socket);
1346 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1347 this->condvar->destroy(this->condvar);
1348 this->mutex->destroy(this->mutex);
1349 free(this);
1350 }
1351
1352 /*
1353 * Described in header.
1354 */
1355 kernel_netlink_net_t *kernel_netlink_net_create()
1356 {
1357 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1358 struct sockaddr_nl addr;
1359
1360 /* public functions */
1361 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1362 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1363 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest, host_t *src))get_source_addr;
1364 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1365 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1366 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1367 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1368 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1369 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1370
1371 /* private members */
1372 this->ifaces = linked_list_create();
1373 this->mutex = mutex_create(MUTEX_DEFAULT);
1374 this->condvar = condvar_create(CONDVAR_DEFAULT);
1375 timerclear(&this->last_roam);
1376 this->routing_table = lib->settings->get_int(lib->settings,
1377 "charon.routing_table", IPSEC_ROUTING_TABLE);
1378 this->routing_table_prio = lib->settings->get_int(lib->settings,
1379 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
1380 this->process_route = lib->settings->get_bool(lib->settings,
1381 "charon.process_route", TRUE);
1382 this->install_virtual_ip = lib->settings->get_bool(lib->settings,
1383 "charon.install_virtual_ip", TRUE);
1384
1385 this->socket = netlink_socket_create(NETLINK_ROUTE);
1386
1387 memset(&addr, 0, sizeof(addr));
1388 addr.nl_family = AF_NETLINK;
1389
1390 /* create and bind RT socket for events (address/interface/route changes) */
1391 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1392 if (this->socket_events <= 0)
1393 {
1394 charon->kill(charon, "unable to create RT event socket");
1395 }
1396 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1397 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1398 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1399 {
1400 charon->kill(charon, "unable to bind RT event socket");
1401 }
1402
1403 this->job = callback_job_create((callback_job_cb_t)receive_events,
1404 this, NULL, NULL);
1405 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1406
1407 if (init_address_list(this) != SUCCESS)
1408 {
1409 charon->kill(charon, "unable to get interface list");
1410 }
1411
1412 if (this->routing_table)
1413 {
1414 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1415 this->routing_table_prio) != SUCCESS)
1416 {
1417 DBG1(DBG_KNL, "unable to create routing table rule");
1418 }
1419 }
1420
1421 return &this->public;
1422 }