018d51fee65af7db8d8100cd6ba9346a1cd1dbd5
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 */
16
17 #include <sys/socket.h>
18 #include <linux/netlink.h>
19 #include <linux/rtnetlink.h>
20 #include <sys/time.h>
21 #include <pthread.h>
22 #include <unistd.h>
23 #include <errno.h>
24 #include <net/if.h>
25
26 #include "kernel_netlink_net.h"
27 #include "kernel_netlink_shared.h"
28
29 #include <daemon.h>
30 #include <utils/mutex.h>
31 #include <utils/linked_list.h>
32 #include <processing/jobs/callback_job.h>
33 #include <processing/jobs/roam_job.h>
34
35 /** delay before firing roam jobs (ms) */
36 #define ROAM_DELAY 100
37
38 /** routing table for routes installed by us */
39 #ifndef IPSEC_ROUTING_TABLE
40 #define IPSEC_ROUTING_TABLE 100
41 #endif
42 #ifndef IPSEC_ROUTING_TABLE_PRIO
43 #define IPSEC_ROUTING_TABLE_PRIO 100
44 #endif
45
46 typedef struct addr_entry_t addr_entry_t;
47
48 /**
49 * IP address in an inface_entry_t
50 */
51 struct addr_entry_t {
52
53 /** The ip address */
54 host_t *ip;
55
56 /** virtual IP managed by us */
57 bool virtual;
58
59 /** scope of the address */
60 u_char scope;
61
62 /** Number of times this IP is used, if virtual */
63 u_int refcount;
64 };
65
66 /**
67 * destroy a addr_entry_t object
68 */
69 static void addr_entry_destroy(addr_entry_t *this)
70 {
71 this->ip->destroy(this->ip);
72 free(this);
73 }
74
75 typedef struct iface_entry_t iface_entry_t;
76
77 /**
78 * A network interface on this system, containing addr_entry_t's
79 */
80 struct iface_entry_t {
81
82 /** interface index */
83 int ifindex;
84
85 /** name of the interface */
86 char ifname[IFNAMSIZ];
87
88 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
89 u_int flags;
90
91 /** list of addresses as host_t */
92 linked_list_t *addrs;
93 };
94
95 /**
96 * destroy an interface entry
97 */
98 static void iface_entry_destroy(iface_entry_t *this)
99 {
100 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
101 free(this);
102 }
103
104 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
105
106 /**
107 * Private variables and functions of kernel_netlink_net class.
108 */
109 struct private_kernel_netlink_net_t {
110 /**
111 * Public part of the kernel_netlink_net_t object.
112 */
113 kernel_netlink_net_t public;
114
115 /**
116 * mutex to lock access to various lists
117 */
118 mutex_t *mutex;
119
120 /**
121 * condition variable to signal virtual IP add/removal
122 */
123 condvar_t *condvar;
124
125 /**
126 * Cached list of interfaces and its addresses (iface_entry_t)
127 */
128 linked_list_t *ifaces;
129
130 /**
131 * job receiving netlink events
132 */
133 callback_job_t *job;
134
135 /**
136 * netlink rt socket (routing)
137 */
138 netlink_socket_t *socket;
139
140 /**
141 * Netlink rt socket to receive address change events
142 */
143 int socket_events;
144
145 /**
146 * time of the last roam_job
147 */
148 struct timeval last_roam;
149
150 /**
151 * routing table to install routes
152 */
153 int routing_table;
154
155 /**
156 * priority of used routing table
157 */
158 int routing_table_prio;
159
160 /**
161 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
162 */
163 bool process_route;
164
165 };
166
167 /**
168 * get the refcount of a virtual ip
169 */
170 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
171 {
172 iterator_t *ifaces, *addrs;
173 iface_entry_t *iface;
174 addr_entry_t *addr;
175 int refcount = 0;
176
177 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
178 while (ifaces->iterate(ifaces, (void**)&iface))
179 {
180 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
181 while (addrs->iterate(addrs, (void**)&addr))
182 {
183 if (addr->virtual && (iface->flags & IFF_UP) &&
184 ip->ip_equals(ip, addr->ip))
185 {
186 refcount = addr->refcount;
187 break;
188 }
189 }
190 addrs->destroy(addrs);
191 if (refcount)
192 {
193 break;
194 }
195 }
196 ifaces->destroy(ifaces);
197
198 return refcount;
199 }
200
201 /**
202 * start a roaming job. We delay it for a second and fire only one job
203 * for multiple events. Otherwise we would create two many jobs.
204 */
205 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
206 {
207 struct timeval now;
208
209 if (gettimeofday(&now, NULL) == 0)
210 {
211 if (timercmp(&now, &this->last_roam, >))
212 {
213 now.tv_usec += ROAM_DELAY * 1000;
214 while (now.tv_usec > 1000000)
215 {
216 now.tv_sec++;
217 now.tv_usec -= 1000000;
218 }
219 this->last_roam = now;
220 charon->scheduler->schedule_job_ms(charon->scheduler,
221 (job_t*)roam_job_create(address), ROAM_DELAY);
222 }
223 }
224 }
225
226 /**
227 * process RTM_NEWLINK/RTM_DELLINK from kernel
228 */
229 static void process_link(private_kernel_netlink_net_t *this,
230 struct nlmsghdr *hdr, bool event)
231 {
232 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
233 struct rtattr *rta = IFLA_RTA(msg);
234 size_t rtasize = IFLA_PAYLOAD (hdr);
235 enumerator_t *enumerator;
236 iface_entry_t *current, *entry = NULL;
237 char *name = NULL;
238 bool update = FALSE;
239
240 while(RTA_OK(rta, rtasize))
241 {
242 switch (rta->rta_type)
243 {
244 case IFLA_IFNAME:
245 name = RTA_DATA(rta);
246 break;
247 }
248 rta = RTA_NEXT(rta, rtasize);
249 }
250 if (!name)
251 {
252 name = "(unknown)";
253 }
254
255 this->mutex->lock(this->mutex);
256 switch (hdr->nlmsg_type)
257 {
258 case RTM_NEWLINK:
259 {
260 if (msg->ifi_flags & IFF_LOOPBACK)
261 { /* ignore loopback interfaces */
262 break;
263 }
264 enumerator = this->ifaces->create_enumerator(this->ifaces);
265 while (enumerator->enumerate(enumerator, &current))
266 {
267 if (current->ifindex == msg->ifi_index)
268 {
269 entry = current;
270 break;
271 }
272 }
273 enumerator->destroy(enumerator);
274 if (!entry)
275 {
276 entry = malloc_thing(iface_entry_t);
277 entry->ifindex = msg->ifi_index;
278 entry->flags = 0;
279 entry->addrs = linked_list_create();
280 this->ifaces->insert_last(this->ifaces, entry);
281 }
282 memcpy(entry->ifname, name, IFNAMSIZ);
283 entry->ifname[IFNAMSIZ-1] = '\0';
284 if (event)
285 {
286 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
287 {
288 update = TRUE;
289 DBG1(DBG_KNL, "interface %s activated", name);
290 }
291 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
292 {
293 update = TRUE;
294 DBG1(DBG_KNL, "interface %s deactivated", name);
295 }
296 }
297 entry->flags = msg->ifi_flags;
298 break;
299 }
300 case RTM_DELLINK:
301 {
302 enumerator = this->ifaces->create_enumerator(this->ifaces);
303 while (enumerator->enumerate(enumerator, &current))
304 {
305 if (current->ifindex == msg->ifi_index)
306 {
307 /* we do not remove it, as an address may be added to a
308 * "down" interface and we wan't to know that. */
309 current->flags = msg->ifi_flags;
310 break;
311 }
312 }
313 enumerator->destroy(enumerator);
314 break;
315 }
316 }
317 this->mutex->unlock(this->mutex);
318
319 /* send an update to all IKE_SAs */
320 if (update && event)
321 {
322 fire_roam_job(this, TRUE);
323 }
324 }
325
326 /**
327 * process RTM_NEWADDR/RTM_DELADDR from kernel
328 */
329 static void process_addr(private_kernel_netlink_net_t *this,
330 struct nlmsghdr *hdr, bool event)
331 {
332 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
333 struct rtattr *rta = IFA_RTA(msg);
334 size_t rtasize = IFA_PAYLOAD (hdr);
335 host_t *host = NULL;
336 enumerator_t *ifaces, *addrs;
337 iface_entry_t *iface;
338 addr_entry_t *addr;
339 chunk_t local = chunk_empty, address = chunk_empty;
340 bool update = FALSE, found = FALSE, changed = FALSE;
341
342 while(RTA_OK(rta, rtasize))
343 {
344 switch (rta->rta_type)
345 {
346 case IFA_LOCAL:
347 local.ptr = RTA_DATA(rta);
348 local.len = RTA_PAYLOAD(rta);
349 break;
350 case IFA_ADDRESS:
351 address.ptr = RTA_DATA(rta);
352 address.len = RTA_PAYLOAD(rta);
353 break;
354 }
355 rta = RTA_NEXT(rta, rtasize);
356 }
357
358 /* For PPP interfaces, we need the IFA_LOCAL address,
359 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
360 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
361 if (local.ptr)
362 {
363 host = host_create_from_chunk(msg->ifa_family, local, 0);
364 }
365 else if (address.ptr)
366 {
367 host = host_create_from_chunk(msg->ifa_family, address, 0);
368 }
369
370 if (host == NULL)
371 { /* bad family? */
372 return;
373 }
374
375 this->mutex->lock(this->mutex);
376 ifaces = this->ifaces->create_enumerator(this->ifaces);
377 while (ifaces->enumerate(ifaces, &iface))
378 {
379 if (iface->ifindex == msg->ifa_index)
380 {
381 addrs = iface->addrs->create_enumerator(iface->addrs);
382 while (addrs->enumerate(addrs, &addr))
383 {
384 if (host->ip_equals(host, addr->ip))
385 {
386 found = TRUE;
387 if (hdr->nlmsg_type == RTM_DELADDR)
388 {
389 iface->addrs->remove_at(iface->addrs, addrs);
390 if (!addr->virtual)
391 {
392 changed = TRUE;
393 DBG1(DBG_KNL, "%H disappeared from %s",
394 host, iface->ifname);
395 }
396 addr_entry_destroy(addr);
397 }
398 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
399 {
400 addr->refcount = 1;
401 }
402 }
403 }
404 addrs->destroy(addrs);
405
406 if (hdr->nlmsg_type == RTM_NEWADDR)
407 {
408 if (!found)
409 {
410 found = TRUE;
411 changed = TRUE;
412 addr = malloc_thing(addr_entry_t);
413 addr->ip = host->clone(host);
414 addr->virtual = FALSE;
415 addr->refcount = 1;
416 addr->scope = msg->ifa_scope;
417
418 iface->addrs->insert_last(iface->addrs, addr);
419 if (event)
420 {
421 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
422 }
423 }
424 }
425 if (found && (iface->flags & IFF_UP))
426 {
427 update = TRUE;
428 }
429 break;
430 }
431 }
432 ifaces->destroy(ifaces);
433 this->mutex->unlock(this->mutex);
434 host->destroy(host);
435
436 /* send an update to all IKE_SAs */
437 if (update && event && changed)
438 {
439 fire_roam_job(this, TRUE);
440 }
441 }
442
443 /**
444 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
445 */
446 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
447 {
448 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
449 struct rtattr *rta = RTM_RTA(msg);
450 size_t rtasize = RTM_PAYLOAD(hdr);
451 host_t *host = NULL;
452
453 /* ignore routes added by us */
454 if (msg->rtm_table && msg->rtm_table == this->routing_table)
455 {
456 return;
457 }
458
459 while (RTA_OK(rta, rtasize))
460 {
461 switch (rta->rta_type)
462 {
463 case RTA_PREFSRC:
464 host = host_create_from_chunk(msg->rtm_family,
465 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
466 break;
467 }
468 rta = RTA_NEXT(rta, rtasize);
469 }
470 if (host)
471 {
472 this->mutex->lock(this->mutex);
473 if (!get_vip_refcount(this, host))
474 { /* ignore routes added for virtual IPs */
475 fire_roam_job(this, FALSE);
476 }
477 this->mutex->unlock(this->mutex);
478 host->destroy(host);
479 }
480 }
481
482 /**
483 * Receives events from kernel
484 */
485 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
486 {
487 char response[1024];
488 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
489 struct sockaddr_nl addr;
490 socklen_t addr_len = sizeof(addr);
491 int len, oldstate;
492
493 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
494 len = recvfrom(this->socket_events, response, sizeof(response), 0,
495 (struct sockaddr*)&addr, &addr_len);
496 pthread_setcancelstate(oldstate, NULL);
497
498 if (len < 0)
499 {
500 switch (errno)
501 {
502 case EINTR:
503 /* interrupted, try again */
504 return JOB_REQUEUE_DIRECT;
505 case EAGAIN:
506 /* no data ready, select again */
507 return JOB_REQUEUE_DIRECT;
508 default:
509 DBG1(DBG_KNL, "unable to receive from rt event socket");
510 sleep(1);
511 return JOB_REQUEUE_FAIR;
512 }
513 }
514
515 if (addr.nl_pid != 0)
516 { /* not from kernel. not interested, try another one */
517 return JOB_REQUEUE_DIRECT;
518 }
519
520 while (NLMSG_OK(hdr, len))
521 {
522 /* looks good so far, dispatch netlink message */
523 switch (hdr->nlmsg_type)
524 {
525 case RTM_NEWADDR:
526 case RTM_DELADDR:
527 process_addr(this, hdr, TRUE);
528 this->condvar->broadcast(this->condvar);
529 break;
530 case RTM_NEWLINK:
531 case RTM_DELLINK:
532 process_link(this, hdr, TRUE);
533 this->condvar->broadcast(this->condvar);
534 break;
535 case RTM_NEWROUTE:
536 case RTM_DELROUTE:
537 if (this->process_route)
538 {
539 process_route(this, hdr);
540 }
541 break;
542 default:
543 break;
544 }
545 hdr = NLMSG_NEXT(hdr, len);
546 }
547 return JOB_REQUEUE_DIRECT;
548 }
549
550 /** enumerator over addresses */
551 typedef struct {
552 private_kernel_netlink_net_t* this;
553 /** whether to enumerate down interfaces */
554 bool include_down_ifaces;
555 /** whether to enumerate virtual ip addresses */
556 bool include_virtual_ips;
557 } address_enumerator_t;
558
559 /**
560 * cleanup function for address enumerator
561 */
562 static void address_enumerator_destroy(address_enumerator_t *data)
563 {
564 data->this->mutex->unlock(data->this->mutex);
565 free(data);
566 }
567
568 /**
569 * filter for addresses
570 */
571 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
572 {
573 if (!data->include_virtual_ips && (*in)->virtual)
574 { /* skip virtual interfaces added by us */
575 return FALSE;
576 }
577 if ((*in)->scope >= RT_SCOPE_LINK)
578 { /* skip addresses with a unusable scope */
579 return FALSE;
580 }
581 *out = (*in)->ip;
582 return TRUE;
583 }
584
585 /**
586 * enumerator constructor for interfaces
587 */
588 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
589 {
590 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
591 (void*)filter_addresses, data, NULL);
592 }
593
594 /**
595 * filter for interfaces
596 */
597 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
598 {
599 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
600 { /* skip interfaces not up */
601 return FALSE;
602 }
603 *out = *in;
604 return TRUE;
605 }
606
607 /**
608 * implementation of kernel_net_t.create_address_enumerator
609 */
610 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
611 bool include_down_ifaces, bool include_virtual_ips)
612 {
613 address_enumerator_t *data = malloc_thing(address_enumerator_t);
614 data->this = this;
615 data->include_down_ifaces = include_down_ifaces;
616 data->include_virtual_ips = include_virtual_ips;
617
618 this->mutex->lock(this->mutex);
619 return enumerator_create_nested(
620 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
621 (void*)filter_interfaces, data, NULL),
622 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
623 }
624
625 /**
626 * implementation of kernel_net_t.get_interface_name
627 */
628 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
629 {
630 enumerator_t *ifaces, *addrs;
631 iface_entry_t *iface;
632 addr_entry_t *addr;
633 char *name = NULL;
634
635 DBG2(DBG_KNL, "getting interface name for %H", ip);
636
637 this->mutex->lock(this->mutex);
638 ifaces = this->ifaces->create_enumerator(this->ifaces);
639 while (ifaces->enumerate(ifaces, &iface))
640 {
641 addrs = iface->addrs->create_enumerator(iface->addrs);
642 while (addrs->enumerate(addrs, &addr))
643 {
644 if (ip->ip_equals(ip, addr->ip))
645 {
646 name = strdup(iface->ifname);
647 break;
648 }
649 }
650 addrs->destroy(addrs);
651 if (name)
652 {
653 break;
654 }
655 }
656 ifaces->destroy(ifaces);
657 this->mutex->unlock(this->mutex);
658
659 if (name)
660 {
661 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
662 }
663 else
664 {
665 DBG2(DBG_KNL, "%H is not a local address", ip);
666 }
667 return name;
668 }
669
670 /**
671 * get the index of an interface by name
672 */
673 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
674 {
675 enumerator_t *ifaces;
676 iface_entry_t *iface;
677 int ifindex = 0;
678
679 DBG2(DBG_KNL, "getting iface index for %s", name);
680
681 this->mutex->lock(this->mutex);
682 ifaces = this->ifaces->create_enumerator(this->ifaces);
683 while (ifaces->enumerate(ifaces, &iface))
684 {
685 if (streq(name, iface->ifname))
686 {
687 ifindex = iface->ifindex;
688 break;
689 }
690 }
691 ifaces->destroy(ifaces);
692 this->mutex->unlock(this->mutex);
693
694 if (ifindex == 0)
695 {
696 DBG1(DBG_KNL, "unable to get interface index for %s", name);
697 }
698 return ifindex;
699 }
700
701 /**
702 * Check if an interface with a given index is up
703 */
704 static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
705 {
706 enumerator_t *ifaces;
707 iface_entry_t *iface;
708 /* default to TRUE for interface we do not monitor (e.g. lo) */
709 bool up = TRUE;
710
711 ifaces = this->ifaces->create_enumerator(this->ifaces);
712 while (ifaces->enumerate(ifaces, &iface))
713 {
714 if (iface->ifindex == index)
715 {
716 up = iface->flags & IFF_UP;
717 break;
718 }
719 }
720 ifaces->destroy(ifaces);
721 return up;
722 }
723
724 /**
725 * check if an address (chunk) addr is in subnet (net with net_len net bits)
726 */
727 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
728 {
729 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
730 int byte = 0;
731
732 if (addr.len != net.len || net_len > 8 * net.len )
733 {
734 return FALSE;
735 }
736
737 /* scan through all bytes in network order */
738 while (net_len > 0)
739 {
740 if (net_len < 8)
741 {
742 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
743 }
744 else
745 {
746 if (addr.ptr[byte] != net.ptr[byte])
747 {
748 return FALSE;
749 }
750 byte++;
751 net_len -= 8;
752 }
753 }
754 return TRUE;
755 }
756
757 /**
758 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
759 */
760 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
761 bool nexthop, host_t *candidate)
762 {
763 netlink_buf_t request;
764 struct nlmsghdr *hdr, *out, *current;
765 struct rtmsg *msg;
766 chunk_t chunk;
767 size_t len;
768 int best = -1;
769 host_t *src = NULL, *gtw = NULL;
770
771 DBG2(DBG_KNL, "getting address to reach %H", dest);
772
773 memset(&request, 0, sizeof(request));
774
775 hdr = (struct nlmsghdr*)request;
776 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
777 hdr->nlmsg_type = RTM_GETROUTE;
778 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
779
780 msg = (struct rtmsg*)NLMSG_DATA(hdr);
781 msg->rtm_family = dest->get_family(dest);
782
783 chunk = dest->get_address(dest);
784 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
785 if (candidate)
786 {
787 chunk = candidate->get_address(candidate);
788 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
789 }
790
791 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
792 {
793 DBG1(DBG_KNL, "getting address to %H failed", dest);
794 return NULL;
795 }
796 this->mutex->lock(this->mutex);
797 current = out;
798 while (NLMSG_OK(current, len))
799 {
800 switch (current->nlmsg_type)
801 {
802 case NLMSG_DONE:
803 break;
804 case RTM_NEWROUTE:
805 {
806 struct rtattr *rta;
807 size_t rtasize;
808 chunk_t rta_gtw, rta_src, rta_dst;
809 u_int32_t rta_oif = 0;
810 enumerator_t *ifaces, *addrs;
811 iface_entry_t *iface;
812 addr_entry_t *addr;
813
814 rta_gtw = rta_src = rta_dst = chunk_empty;
815 msg = (struct rtmsg*)(NLMSG_DATA(current));
816 rta = RTM_RTA(msg);
817 rtasize = RTM_PAYLOAD(current);
818 while (RTA_OK(rta, rtasize))
819 {
820 switch (rta->rta_type)
821 {
822 case RTA_PREFSRC:
823 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
824 break;
825 case RTA_GATEWAY:
826 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
827 break;
828 case RTA_DST:
829 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
830 break;
831 case RTA_OIF:
832 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
833 {
834 rta_oif = *(u_int32_t*)RTA_DATA(rta);
835 }
836 break;
837 }
838 rta = RTA_NEXT(rta, rtasize);
839 }
840 if (rta_oif && !is_interface_up(this, rta_oif))
841 { /* interface is down */
842 goto next;
843 }
844 if (this->routing_table != 0 &&
845 msg->rtm_table == this->routing_table)
846 { /* route is from our own ipsec routing table */
847 goto next;
848 }
849 if (msg->rtm_dst_len <= best)
850 { /* not better than a previous one */
851 goto next;
852 }
853 if (msg->rtm_dst_len != 0 &&
854 (!rta_dst.ptr ||
855 !addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len)))
856 { /* is not the default route and not contained in our dst */
857 goto next;
858 }
859
860 best = msg->rtm_dst_len;
861 if (nexthop)
862 {
863 DESTROY_IF(gtw);
864 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
865 goto next;
866 }
867 if (rta_src.ptr)
868 {
869 DESTROY_IF(src);
870 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
871 if (get_vip_refcount(this, src))
872 { /* skip source address if it is installed by us */
873 DESTROY_IF(src);
874 src = NULL;
875 }
876 goto next;
877 }
878 /* no source addr, get one from the interfaces */
879 ifaces = this->ifaces->create_enumerator(this->ifaces);
880 while (ifaces->enumerate(ifaces, &iface))
881 {
882 if (iface->ifindex == rta_oif &&
883 iface->flags & IFF_UP)
884 {
885 addrs = iface->addrs->create_enumerator(iface->addrs);
886 while (addrs->enumerate(addrs, &addr))
887 {
888 chunk_t ip = addr->ip->get_address(addr->ip);
889 if ((msg->rtm_dst_len == 0 &&
890 addr->ip->get_family(addr->ip) ==
891 dest->get_family(dest)) ||
892 addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
893 {
894 DESTROY_IF(src);
895 src = addr->ip->clone(addr->ip);
896 break;
897 }
898 }
899 addrs->destroy(addrs);
900 }
901 }
902 ifaces->destroy(ifaces);
903 goto next;
904 }
905 default:
906 next:
907 current = NLMSG_NEXT(current, len);
908 continue;
909 }
910 break;
911 }
912 free(out);
913 this->mutex->unlock(this->mutex);
914
915 if (nexthop)
916 {
917 if (gtw)
918 {
919 return gtw;
920 }
921 return dest->clone(dest);
922 }
923 return src;
924 }
925
926 /**
927 * Implementation of kernel_net_t.get_source_addr.
928 */
929 static host_t* get_source_addr(private_kernel_netlink_net_t *this,
930 host_t *dest, host_t *src)
931 {
932 return get_route(this, dest, FALSE, src);
933 }
934
935 /**
936 * Implementation of kernel_net_t.get_nexthop.
937 */
938 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
939 {
940 return get_route(this, dest, TRUE, NULL);
941 }
942
943 /**
944 * Manages the creation and deletion of ip addresses on an interface.
945 * By setting the appropriate nlmsg_type, the ip will be set or unset.
946 */
947 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
948 int flags, int if_index, host_t *ip)
949 {
950 netlink_buf_t request;
951 struct nlmsghdr *hdr;
952 struct ifaddrmsg *msg;
953 chunk_t chunk;
954
955 memset(&request, 0, sizeof(request));
956
957 chunk = ip->get_address(ip);
958
959 hdr = (struct nlmsghdr*)request;
960 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
961 hdr->nlmsg_type = nlmsg_type;
962 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
963
964 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
965 msg->ifa_family = ip->get_family(ip);
966 msg->ifa_flags = 0;
967 msg->ifa_prefixlen = 8 * chunk.len;
968 msg->ifa_scope = RT_SCOPE_UNIVERSE;
969 msg->ifa_index = if_index;
970
971 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
972
973 return this->socket->send_ack(this->socket, hdr);
974 }
975
976 /**
977 * Implementation of kernel_net_t.add_ip.
978 */
979 static status_t add_ip(private_kernel_netlink_net_t *this,
980 host_t *virtual_ip, host_t *iface_ip)
981 {
982 iface_entry_t *iface;
983 addr_entry_t *addr;
984 enumerator_t *addrs, *ifaces;
985 int ifindex;
986
987 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
988
989 this->mutex->lock(this->mutex);
990 ifaces = this->ifaces->create_enumerator(this->ifaces);
991 while (ifaces->enumerate(ifaces, &iface))
992 {
993 bool iface_found = FALSE;
994
995 addrs = iface->addrs->create_enumerator(iface->addrs);
996 while (addrs->enumerate(addrs, &addr))
997 {
998 if (iface_ip->ip_equals(iface_ip, addr->ip))
999 {
1000 iface_found = TRUE;
1001 }
1002 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1003 {
1004 addr->refcount++;
1005 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1006 virtual_ip, iface->ifname);
1007 addrs->destroy(addrs);
1008 ifaces->destroy(ifaces);
1009 this->mutex->unlock(this->mutex);
1010 return SUCCESS;
1011 }
1012 }
1013 addrs->destroy(addrs);
1014
1015 if (iface_found)
1016 {
1017 ifindex = iface->ifindex;
1018 addr = malloc_thing(addr_entry_t);
1019 addr->ip = virtual_ip->clone(virtual_ip);
1020 addr->refcount = 0;
1021 addr->virtual = TRUE;
1022 addr->scope = RT_SCOPE_UNIVERSE;
1023 iface->addrs->insert_last(iface->addrs, addr);
1024
1025 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1026 ifindex, virtual_ip) == SUCCESS)
1027 {
1028 while (get_vip_refcount(this, virtual_ip) == 0)
1029 { /* wait until address appears */
1030 this->condvar->wait(this->condvar, this->mutex);
1031 }
1032 ifaces->destroy(ifaces);
1033 this->mutex->unlock(this->mutex);
1034 return SUCCESS;
1035 }
1036 ifaces->destroy(ifaces);
1037 this->mutex->unlock(this->mutex);
1038 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1039 return FAILED;
1040 }
1041 }
1042 ifaces->destroy(ifaces);
1043 this->mutex->unlock(this->mutex);
1044
1045 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1046 "virtual IP %H", iface_ip, virtual_ip);
1047 return FAILED;
1048 }
1049
1050 /**
1051 * Implementation of kernel_net_t.del_ip.
1052 */
1053 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
1054 {
1055 iface_entry_t *iface;
1056 addr_entry_t *addr;
1057 enumerator_t *addrs, *ifaces;
1058 status_t status;
1059 int ifindex;
1060
1061 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1062
1063 this->mutex->lock(this->mutex);
1064 ifaces = this->ifaces->create_enumerator(this->ifaces);
1065 while (ifaces->enumerate(ifaces, &iface))
1066 {
1067 addrs = iface->addrs->create_enumerator(iface->addrs);
1068 while (addrs->enumerate(addrs, &addr))
1069 {
1070 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1071 {
1072 ifindex = iface->ifindex;
1073 if (addr->refcount == 1)
1074 {
1075 status = manage_ipaddr(this, RTM_DELADDR, 0,
1076 ifindex, virtual_ip);
1077 if (status == SUCCESS)
1078 { /* wait until the address is really gone */
1079 while (get_vip_refcount(this, virtual_ip) > 0)
1080 {
1081 this->condvar->wait(this->condvar, this->mutex);
1082 }
1083 }
1084 addrs->destroy(addrs);
1085 ifaces->destroy(ifaces);
1086 this->mutex->unlock(this->mutex);
1087 return status;
1088 }
1089 else
1090 {
1091 addr->refcount--;
1092 }
1093 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1094 virtual_ip);
1095 addrs->destroy(addrs);
1096 ifaces->destroy(ifaces);
1097 this->mutex->unlock(this->mutex);
1098 return SUCCESS;
1099 }
1100 }
1101 addrs->destroy(addrs);
1102 }
1103 ifaces->destroy(ifaces);
1104 this->mutex->unlock(this->mutex);
1105
1106 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1107 return FAILED;
1108 }
1109
1110 /**
1111 * Manages source routes in the routing table.
1112 * By setting the appropriate nlmsg_type, the route gets added or removed.
1113 */
1114 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1115 int flags, chunk_t dst_net, u_int8_t prefixlen,
1116 host_t *gateway, host_t *src_ip, char *if_name)
1117 {
1118 netlink_buf_t request;
1119 struct nlmsghdr *hdr;
1120 struct rtmsg *msg;
1121 int ifindex;
1122 chunk_t chunk;
1123
1124 /* if route is 0.0.0.0/0, we can't install it, as it would
1125 * overwrite the default route. Instead, we add two routes:
1126 * 0.0.0.0/1 and 128.0.0.0/1 */
1127 if (this->routing_table == 0 && prefixlen == 0)
1128 {
1129 chunk_t half_net;
1130 u_int8_t half_prefixlen;
1131 status_t status;
1132
1133 half_net = chunk_alloca(dst_net.len);
1134 memset(half_net.ptr, 0, half_net.len);
1135 half_prefixlen = 1;
1136
1137 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1138 gateway, src_ip, if_name);
1139 half_net.ptr[0] |= 0x80;
1140 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1141 gateway, src_ip, if_name);
1142 return status;
1143 }
1144
1145 memset(&request, 0, sizeof(request));
1146
1147 hdr = (struct nlmsghdr*)request;
1148 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1149 hdr->nlmsg_type = nlmsg_type;
1150 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1151
1152 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1153 msg->rtm_family = src_ip->get_family(src_ip);
1154 msg->rtm_dst_len = prefixlen;
1155 msg->rtm_table = this->routing_table;
1156 msg->rtm_protocol = RTPROT_STATIC;
1157 msg->rtm_type = RTN_UNICAST;
1158 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1159
1160 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1161 chunk = src_ip->get_address(src_ip);
1162 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1163 chunk = gateway->get_address(gateway);
1164 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1165 ifindex = get_interface_index(this, if_name);
1166 chunk.ptr = (char*)&ifindex;
1167 chunk.len = sizeof(ifindex);
1168 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1169
1170 return this->socket->send_ack(this->socket, hdr);
1171 }
1172
1173 /**
1174 * Implementation of kernel_net_t.add_route.
1175 */
1176 static status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1177 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1178 {
1179 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1180 dst_net, prefixlen, gateway, src_ip, if_name);
1181 }
1182
1183 /**
1184 * Implementation of kernel_net_t.del_route.
1185 */
1186 static status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1187 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1188 {
1189 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1190 gateway, src_ip, if_name);
1191 }
1192
1193 /**
1194 * Initialize a list of local addresses.
1195 */
1196 static status_t init_address_list(private_kernel_netlink_net_t *this)
1197 {
1198 netlink_buf_t request;
1199 struct nlmsghdr *out, *current, *in;
1200 struct rtgenmsg *msg;
1201 size_t len;
1202 enumerator_t *ifaces, *addrs;
1203 iface_entry_t *iface;
1204 addr_entry_t *addr;
1205
1206 DBG1(DBG_KNL, "listening on interfaces:");
1207
1208 memset(&request, 0, sizeof(request));
1209
1210 in = (struct nlmsghdr*)&request;
1211 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1212 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1213 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1214 msg->rtgen_family = AF_UNSPEC;
1215
1216 /* get all links */
1217 in->nlmsg_type = RTM_GETLINK;
1218 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1219 {
1220 return FAILED;
1221 }
1222 current = out;
1223 while (NLMSG_OK(current, len))
1224 {
1225 switch (current->nlmsg_type)
1226 {
1227 case NLMSG_DONE:
1228 break;
1229 case RTM_NEWLINK:
1230 process_link(this, current, FALSE);
1231 /* fall through */
1232 default:
1233 current = NLMSG_NEXT(current, len);
1234 continue;
1235 }
1236 break;
1237 }
1238 free(out);
1239
1240 /* get all interface addresses */
1241 in->nlmsg_type = RTM_GETADDR;
1242 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1243 {
1244 return FAILED;
1245 }
1246 current = out;
1247 while (NLMSG_OK(current, len))
1248 {
1249 switch (current->nlmsg_type)
1250 {
1251 case NLMSG_DONE:
1252 break;
1253 case RTM_NEWADDR:
1254 process_addr(this, current, FALSE);
1255 /* fall through */
1256 default:
1257 current = NLMSG_NEXT(current, len);
1258 continue;
1259 }
1260 break;
1261 }
1262 free(out);
1263
1264 this->mutex->lock(this->mutex);
1265 ifaces = this->ifaces->create_enumerator(this->ifaces);
1266 while (ifaces->enumerate(ifaces, &iface))
1267 {
1268 if (iface->flags & IFF_UP)
1269 {
1270 DBG1(DBG_KNL, " %s", iface->ifname);
1271 addrs = iface->addrs->create_enumerator(iface->addrs);
1272 while (addrs->enumerate(addrs, (void**)&addr))
1273 {
1274 DBG1(DBG_KNL, " %H", addr->ip);
1275 }
1276 addrs->destroy(addrs);
1277 }
1278 }
1279 ifaces->destroy(ifaces);
1280 this->mutex->unlock(this->mutex);
1281 return SUCCESS;
1282 }
1283
1284 /**
1285 * create or delete a rule to use our routing table
1286 */
1287 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1288 u_int32_t table, u_int32_t prio)
1289 {
1290 netlink_buf_t request;
1291 struct nlmsghdr *hdr;
1292 struct rtmsg *msg;
1293 chunk_t chunk;
1294
1295 memset(&request, 0, sizeof(request));
1296 hdr = (struct nlmsghdr*)request;
1297 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1298 hdr->nlmsg_type = nlmsg_type;
1299 if (nlmsg_type == RTM_NEWRULE)
1300 {
1301 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1302 }
1303 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1304
1305 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1306 msg->rtm_table = table;
1307 msg->rtm_family = AF_INET;
1308 msg->rtm_protocol = RTPROT_BOOT;
1309 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1310 msg->rtm_type = RTN_UNICAST;
1311
1312 chunk = chunk_from_thing(prio);
1313 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1314
1315 return this->socket->send_ack(this->socket, hdr);
1316 }
1317
1318 /**
1319 * Implementation of kernel_netlink_net_t.destroy.
1320 */
1321 static void destroy(private_kernel_netlink_net_t *this)
1322 {
1323 if (this->routing_table)
1324 {
1325 manage_rule(this, RTM_DELRULE, this->routing_table,
1326 this->routing_table_prio);
1327 }
1328
1329 this->job->cancel(this->job);
1330 close(this->socket_events);
1331 this->socket->destroy(this->socket);
1332 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1333 this->condvar->destroy(this->condvar);
1334 this->mutex->destroy(this->mutex);
1335 free(this);
1336 }
1337
1338 /*
1339 * Described in header.
1340 */
1341 kernel_netlink_net_t *kernel_netlink_net_create()
1342 {
1343 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1344 struct sockaddr_nl addr;
1345
1346 /* public functions */
1347 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1348 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1349 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest, host_t *src))get_source_addr;
1350 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1351 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1352 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1353 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1354 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1355 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1356
1357 /* private members */
1358 this->ifaces = linked_list_create();
1359 this->mutex = mutex_create(MUTEX_DEFAULT);
1360 this->condvar = condvar_create(CONDVAR_DEFAULT);
1361 timerclear(&this->last_roam);
1362 this->routing_table = lib->settings->get_int(lib->settings,
1363 "charon.routing_table", IPSEC_ROUTING_TABLE);
1364 this->routing_table_prio = lib->settings->get_int(lib->settings,
1365 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
1366 this->process_route = lib->settings->get_bool(lib->settings,
1367 "charon.process_route", TRUE);
1368
1369 this->socket = netlink_socket_create(NETLINK_ROUTE);
1370
1371 memset(&addr, 0, sizeof(addr));
1372 addr.nl_family = AF_NETLINK;
1373
1374 /* create and bind RT socket for events (address/interface/route changes) */
1375 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1376 if (this->socket_events <= 0)
1377 {
1378 charon->kill(charon, "unable to create RT event socket");
1379 }
1380 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1381 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1382 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1383 {
1384 charon->kill(charon, "unable to bind RT event socket");
1385 }
1386
1387 this->job = callback_job_create((callback_job_cb_t)receive_events,
1388 this, NULL, NULL);
1389 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1390
1391 if (init_address_list(this) != SUCCESS)
1392 {
1393 charon->kill(charon, "unable to get interface list");
1394 }
1395
1396 if (this->routing_table)
1397 {
1398 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1399 this->routing_table_prio) != SUCCESS)
1400 {
1401 DBG1(DBG_KNL, "unable to create routing table rule");
1402 }
1403 }
1404
1405 return &this->public;
1406 }