merging modularized kernel interface back to trunk
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Hochschule fuer Technik Rapperswil
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 *
15 * $Id$
16 */
17
18 #include <sys/socket.h>
19 #include <linux/netlink.h>
20 #include <linux/rtnetlink.h>
21 #include <sys/time.h>
22 #include <pthread.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <net/if.h>
26
27 #include "kernel_netlink_net.h"
28 #include "kernel_netlink_shared.h"
29
30 #include <daemon.h>
31 #include <utils/linked_list.h>
32 #include <processing/jobs/callback_job.h>
33 #include <processing/jobs/roam_job.h>
34
35 /** delay before firing roam jobs (ms) */
36 #define ROAM_DELAY 100
37
38 /** routing table for routes installed by us */
39 #ifndef IPSEC_ROUTING_TABLE
40 #define IPSEC_ROUTING_TABLE 100
41 #endif
42 #ifndef IPSEC_ROUTING_TABLE_PRIO
43 #define IPSEC_ROUTING_TABLE_PRIO 100
44 #endif
45
46 typedef struct addr_entry_t addr_entry_t;
47
48 /**
49 * IP address in an inface_entry_t
50 */
51 struct addr_entry_t {
52
53 /** The ip address */
54 host_t *ip;
55
56 /** virtual IP managed by us */
57 bool virtual;
58
59 /** scope of the address */
60 u_char scope;
61
62 /** Number of times this IP is used, if virtual */
63 u_int refcount;
64 };
65
66 /**
67 * destroy a addr_entry_t object
68 */
69 static void addr_entry_destroy(addr_entry_t *this)
70 {
71 this->ip->destroy(this->ip);
72 free(this);
73 }
74
75 typedef struct iface_entry_t iface_entry_t;
76
77 /**
78 * A network interface on this system, containing addr_entry_t's
79 */
80 struct iface_entry_t {
81
82 /** interface index */
83 int ifindex;
84
85 /** name of the interface */
86 char ifname[IFNAMSIZ];
87
88 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
89 u_int flags;
90
91 /** list of addresses as host_t */
92 linked_list_t *addrs;
93 };
94
95 /**
96 * destroy an interface entry
97 */
98 static void iface_entry_destroy(iface_entry_t *this)
99 {
100 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
101 free(this);
102 }
103
104 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
105
106 /**
107 * Private variables and functions of kernel_netlink_net class.
108 */
109 struct private_kernel_netlink_net_t {
110 /**
111 * Public part of the kernel_netlink_net_t object.
112 */
113 kernel_netlink_net_t public;
114
115 /**
116 * mutex to lock access to various lists
117 */
118 pthread_mutex_t mutex;
119
120 /**
121 * condition variable to signal virtual IP add/removal
122 */
123 pthread_cond_t cond;
124
125 /**
126 * Cached list of interfaces and its addresses (iface_entry_t)
127 */
128 linked_list_t *ifaces;
129
130 /**
131 * job receiving netlink events
132 */
133 callback_job_t *job;
134
135 /**
136 * netlink rt socket (routing)
137 */
138 netlink_socket_t *socket;
139
140 /**
141 * Netlink rt socket to receive address change events
142 */
143 int socket_events;
144
145 /**
146 * time of the last roam_job
147 */
148 struct timeval last_roam;
149
150 /**
151 * routing table to install routes
152 */
153 int routing_table;
154
155 /**
156 * priority of used routing table
157 */
158 int routing_table_prio;
159
160 /**
161 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
162 */
163 bool process_route;
164
165 };
166
167 /**
168 * get the refcount of a virtual ip
169 */
170 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
171 {
172 iterator_t *ifaces, *addrs;
173 iface_entry_t *iface;
174 addr_entry_t *addr;
175 int refcount = 0;
176
177 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
178 while (ifaces->iterate(ifaces, (void**)&iface))
179 {
180 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
181 while (addrs->iterate(addrs, (void**)&addr))
182 {
183 if (addr->virtual && (iface->flags & IFF_UP) &&
184 ip->ip_equals(ip, addr->ip))
185 {
186 refcount = addr->refcount;
187 break;
188 }
189 }
190 addrs->destroy(addrs);
191 if (refcount)
192 {
193 break;
194 }
195 }
196 ifaces->destroy(ifaces);
197
198 return refcount;
199 }
200
201 /**
202 * start a roaming job. We delay it for a second and fire only one job
203 * for multiple events. Otherwise we would create two many jobs.
204 */
205 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
206 {
207 struct timeval now;
208
209 if (gettimeofday(&now, NULL) == 0)
210 {
211 if (timercmp(&now, &this->last_roam, >))
212 {
213 now.tv_usec += ROAM_DELAY * 1000;
214 while (now.tv_usec > 1000000)
215 {
216 now.tv_sec++;
217 now.tv_usec -= 1000000;
218 }
219 this->last_roam = now;
220 charon->scheduler->schedule_job(charon->scheduler,
221 (job_t*)roam_job_create(address), ROAM_DELAY);
222 }
223 }
224 }
225
226 /**
227 * process RTM_NEWLINK/RTM_DELLINK from kernel
228 */
229 static void process_link(private_kernel_netlink_net_t *this,
230 struct nlmsghdr *hdr, bool event)
231 {
232 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
233 struct rtattr *rta = IFLA_RTA(msg);
234 size_t rtasize = IFLA_PAYLOAD (hdr);
235 iterator_t *iterator;
236 iface_entry_t *current, *entry = NULL;
237 char *name = NULL;
238 bool update = FALSE;
239
240 while(RTA_OK(rta, rtasize))
241 {
242 switch (rta->rta_type)
243 {
244 case IFLA_IFNAME:
245 name = RTA_DATA(rta);
246 break;
247 }
248 rta = RTA_NEXT(rta, rtasize);
249 }
250 if (!name)
251 {
252 name = "(unknown)";
253 }
254
255 switch (hdr->nlmsg_type)
256 {
257 case RTM_NEWLINK:
258 {
259 if (msg->ifi_flags & IFF_LOOPBACK)
260 { /* ignore loopback interfaces */
261 break;
262 }
263 iterator = this->ifaces->create_iterator_locked(this->ifaces,
264 &this->mutex);
265 while (iterator->iterate(iterator, (void**)&current))
266 {
267 if (current->ifindex == msg->ifi_index)
268 {
269 entry = current;
270 break;
271 }
272 }
273 if (!entry)
274 {
275 entry = malloc_thing(iface_entry_t);
276 entry->ifindex = msg->ifi_index;
277 entry->flags = 0;
278 entry->addrs = linked_list_create();
279 this->ifaces->insert_last(this->ifaces, entry);
280 }
281 memcpy(entry->ifname, name, IFNAMSIZ);
282 entry->ifname[IFNAMSIZ-1] = '\0';
283 if (event)
284 {
285 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
286 {
287 update = TRUE;
288 DBG1(DBG_KNL, "interface %s activated", name);
289 }
290 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
291 {
292 update = TRUE;
293 DBG1(DBG_KNL, "interface %s deactivated", name);
294 }
295 }
296 entry->flags = msg->ifi_flags;
297 iterator->destroy(iterator);
298 break;
299 }
300 case RTM_DELLINK:
301 {
302 iterator = this->ifaces->create_iterator_locked(this->ifaces,
303 &this->mutex);
304 while (iterator->iterate(iterator, (void**)&current))
305 {
306 if (current->ifindex == msg->ifi_index)
307 {
308 /* we do not remove it, as an address may be added to a
309 * "down" interface and we wan't to know that. */
310 current->flags = msg->ifi_flags;
311 break;
312 }
313 }
314 iterator->destroy(iterator);
315 break;
316 }
317 }
318
319 /* send an update to all IKE_SAs */
320 if (update && event)
321 {
322 fire_roam_job(this, TRUE);
323 }
324 }
325
326 /**
327 * process RTM_NEWADDR/RTM_DELADDR from kernel
328 */
329 static void process_addr(private_kernel_netlink_net_t *this,
330 struct nlmsghdr *hdr, bool event)
331 {
332 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
333 struct rtattr *rta = IFA_RTA(msg);
334 size_t rtasize = IFA_PAYLOAD (hdr);
335 host_t *host = NULL;
336 iterator_t *ifaces, *addrs;
337 iface_entry_t *iface;
338 addr_entry_t *addr;
339 chunk_t local = chunk_empty, address = chunk_empty;
340 bool update = FALSE, found = FALSE, changed = FALSE;
341
342 while(RTA_OK(rta, rtasize))
343 {
344 switch (rta->rta_type)
345 {
346 case IFA_LOCAL:
347 local.ptr = RTA_DATA(rta);
348 local.len = RTA_PAYLOAD(rta);
349 break;
350 case IFA_ADDRESS:
351 address.ptr = RTA_DATA(rta);
352 address.len = RTA_PAYLOAD(rta);
353 break;
354 }
355 rta = RTA_NEXT(rta, rtasize);
356 }
357
358 /* For PPP interfaces, we need the IFA_LOCAL address,
359 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
360 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
361 if (local.ptr)
362 {
363 host = host_create_from_chunk(msg->ifa_family, local, 0);
364 }
365 else if (address.ptr)
366 {
367 host = host_create_from_chunk(msg->ifa_family, address, 0);
368 }
369
370 if (host == NULL)
371 { /* bad family? */
372 return;
373 }
374
375 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
376 while (ifaces->iterate(ifaces, (void**)&iface))
377 {
378 if (iface->ifindex == msg->ifa_index)
379 {
380 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
381 while (addrs->iterate(addrs, (void**)&addr))
382 {
383 if (host->ip_equals(host, addr->ip))
384 {
385 found = TRUE;
386 if (hdr->nlmsg_type == RTM_DELADDR)
387 {
388 addrs->remove(addrs);
389 if (!addr->virtual)
390 {
391 changed = TRUE;
392 DBG1(DBG_KNL, "%H disappeared from %s",
393 host, iface->ifname);
394 }
395 addr_entry_destroy(addr);
396 }
397 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
398 {
399 addr->refcount = 1;
400 }
401 }
402 }
403 addrs->destroy(addrs);
404
405 if (hdr->nlmsg_type == RTM_NEWADDR)
406 {
407 if (!found)
408 {
409 found = TRUE;
410 changed = TRUE;
411 addr = malloc_thing(addr_entry_t);
412 addr->ip = host->clone(host);
413 addr->virtual = FALSE;
414 addr->refcount = 1;
415 addr->scope = msg->ifa_scope;
416
417 iface->addrs->insert_last(iface->addrs, addr);
418 if (event)
419 {
420 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
421 }
422 }
423 }
424 if (found && (iface->flags & IFF_UP))
425 {
426 update = TRUE;
427 }
428 break;
429 }
430 }
431 ifaces->destroy(ifaces);
432 host->destroy(host);
433
434 /* send an update to all IKE_SAs */
435 if (update && event && changed)
436 {
437 fire_roam_job(this, TRUE);
438 }
439 }
440
441 /**
442 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
443 */
444 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
445 {
446 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
447 struct rtattr *rta = RTM_RTA(msg);
448 size_t rtasize = RTM_PAYLOAD(hdr);
449 host_t *host = NULL;
450
451 while (RTA_OK(rta, rtasize))
452 {
453 switch (rta->rta_type)
454 {
455 case RTA_PREFSRC:
456 host = host_create_from_chunk(msg->rtm_family,
457 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
458 break;
459 }
460 rta = RTA_NEXT(rta, rtasize);
461 }
462 if (host)
463 {
464 if (!get_vip_refcount(this, host))
465 { /* ignore routes added for virtual IPs */
466 fire_roam_job(this, FALSE);
467 }
468 host->destroy(host);
469 }
470 }
471
472 /**
473 * Receives events from kernel
474 */
475 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
476 {
477 char response[1024];
478 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
479 struct sockaddr_nl addr;
480 socklen_t addr_len = sizeof(addr);
481 int len, oldstate;
482
483 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
484 len = recvfrom(this->socket_events, response, sizeof(response), 0,
485 (struct sockaddr*)&addr, &addr_len);
486 pthread_setcancelstate(oldstate, NULL);
487
488 if (len < 0)
489 {
490 switch (errno)
491 {
492 case EINTR:
493 /* interrupted, try again */
494 return JOB_REQUEUE_DIRECT;
495 case EAGAIN:
496 /* no data ready, select again */
497 return JOB_REQUEUE_DIRECT;
498 default:
499 DBG1(DBG_KNL, "unable to receive from rt event socket");
500 sleep(1);
501 return JOB_REQUEUE_FAIR;
502 }
503 }
504
505 if (addr.nl_pid != 0)
506 { /* not from kernel. not interested, try another one */
507 return JOB_REQUEUE_DIRECT;
508 }
509
510 while (NLMSG_OK(hdr, len))
511 {
512 /* looks good so far, dispatch netlink message */
513 switch (hdr->nlmsg_type)
514 {
515 case RTM_NEWADDR:
516 case RTM_DELADDR:
517 process_addr(this, hdr, TRUE);
518 pthread_cond_broadcast(&this->cond);
519 break;
520 case RTM_NEWLINK:
521 case RTM_DELLINK:
522 process_link(this, hdr, TRUE);
523 pthread_cond_broadcast(&this->cond);
524 break;
525 case RTM_NEWROUTE:
526 case RTM_DELROUTE:
527 if (this->process_route)
528 {
529 process_route(this, hdr);
530 }
531 break;
532 default:
533 break;
534 }
535 hdr = NLMSG_NEXT(hdr, len);
536 }
537 return JOB_REQUEUE_DIRECT;
538 }
539
540 /** enumerator over addresses */
541 typedef struct {
542 private_kernel_netlink_net_t* this;
543 /** whether to enumerate down interfaces */
544 bool include_down_ifaces;
545 /** whether to enumerate virtual ip addresses */
546 bool include_virtual_ips;
547 } address_enumerator_t;
548
549 /**
550 * cleanup function for address enumerator
551 */
552 static void address_enumerator_destroy(address_enumerator_t *data)
553 {
554 pthread_mutex_unlock(&data->this->mutex);
555 free(data);
556 }
557
558 /**
559 * filter for addresses
560 */
561 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
562 {
563 if (!data->include_virtual_ips && (*in)->virtual)
564 { /* skip virtual interfaces added by us */
565 return FALSE;
566 }
567 if ((*in)->scope >= RT_SCOPE_LINK)
568 { /* skip addresses with a unusable scope */
569 return FALSE;
570 }
571 *out = (*in)->ip;
572 return TRUE;
573 }
574
575 /**
576 * enumerator constructor for interfaces
577 */
578 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
579 {
580 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
581 (void*)filter_addresses, data, NULL);
582 }
583
584 /**
585 * filter for interfaces
586 */
587 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
588 {
589 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
590 { /* skip interfaces not up */
591 return FALSE;
592 }
593 *out = *in;
594 return TRUE;
595 }
596
597 /**
598 * implementation of kernel_net_t.create_address_enumerator
599 */
600 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
601 bool include_down_ifaces, bool include_virtual_ips)
602 {
603 address_enumerator_t *data = malloc_thing(address_enumerator_t);
604 data->this = this;
605 data->include_down_ifaces = include_down_ifaces;
606 data->include_virtual_ips = include_virtual_ips;
607
608 pthread_mutex_lock(&this->mutex);
609 return enumerator_create_nested(
610 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
611 (void*)filter_interfaces, data, NULL),
612 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
613 }
614
615 /**
616 * implementation of kernel_net_t.get_interface_name
617 */
618 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
619 {
620 iterator_t *ifaces, *addrs;
621 iface_entry_t *iface;
622 addr_entry_t *addr;
623 char *name = NULL;
624
625 DBG2(DBG_KNL, "getting interface name for %H", ip);
626
627 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
628 while (ifaces->iterate(ifaces, (void**)&iface))
629 {
630 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
631 while (addrs->iterate(addrs, (void**)&addr))
632 {
633 if (ip->ip_equals(ip, addr->ip))
634 {
635 name = strdup(iface->ifname);
636 break;
637 }
638 }
639 addrs->destroy(addrs);
640 if (name)
641 {
642 break;
643 }
644 }
645 ifaces->destroy(ifaces);
646
647 if (name)
648 {
649 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
650 }
651 else
652 {
653 DBG2(DBG_KNL, "%H is not a local address", ip);
654 }
655 return name;
656 }
657
658 /**
659 * get the index of an interface by name
660 */
661 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
662 {
663 iterator_t *ifaces;
664 iface_entry_t *iface;
665 int ifindex = 0;
666
667 DBG2(DBG_KNL, "getting iface index for %s", name);
668
669 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
670 while (ifaces->iterate(ifaces, (void**)&iface))
671 {
672 if (streq(name, iface->ifname))
673 {
674 ifindex = iface->ifindex;
675 break;
676 }
677 }
678 ifaces->destroy(ifaces);
679
680 if (ifindex == 0)
681 {
682 DBG1(DBG_KNL, "unable to get interface index for %s", name);
683 }
684 return ifindex;
685 }
686
687 /**
688 * check if an address (chunk) addr is in subnet (net with net_len net bits)
689 */
690 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
691 {
692 int bit, byte;
693
694 if (addr.len != net.len)
695 {
696 return FALSE;
697 }
698 /* scan through all bits, beginning in the front */
699 for (byte = 0; byte < addr.len; byte++)
700 {
701 for (bit = 7; bit >= 0; bit--)
702 {
703 /* check if bits are equal (or we reached the end of the net) */
704 if (bit + byte * 8 > net_len)
705 {
706 return TRUE;
707 }
708 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
709 {
710 return FALSE;
711 }
712 }
713 }
714 return TRUE;
715 }
716
717 /**
718 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
719 */
720 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
721 bool nexthop)
722 {
723 unsigned char request[NETLINK_BUFFER_SIZE];
724 struct nlmsghdr *hdr, *out, *current;
725 struct rtmsg *msg;
726 chunk_t chunk;
727 size_t len;
728 int best = -1;
729 host_t *src = NULL, *gtw = NULL;
730
731 DBG2(DBG_KNL, "getting address to reach %H", dest);
732
733 memset(&request, 0, sizeof(request));
734
735 hdr = (struct nlmsghdr*)request;
736 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
737 hdr->nlmsg_type = RTM_GETROUTE;
738 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
739
740 msg = (struct rtmsg*)NLMSG_DATA(hdr);
741 msg->rtm_family = dest->get_family(dest);
742
743 chunk = dest->get_address(dest);
744 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
745
746 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
747 {
748 DBG1(DBG_KNL, "getting address to %H failed", dest);
749 return NULL;
750 }
751 current = out;
752 while (NLMSG_OK(current, len))
753 {
754 switch (current->nlmsg_type)
755 {
756 case NLMSG_DONE:
757 break;
758 case RTM_NEWROUTE:
759 {
760 struct rtattr *rta;
761 size_t rtasize;
762 chunk_t rta_gtw, rta_src, rta_dst;
763 u_int32_t rta_oif = 0;
764
765 rta_gtw = rta_src = rta_dst = chunk_empty;
766 msg = (struct rtmsg*)(NLMSG_DATA(current));
767 rta = RTM_RTA(msg);
768 rtasize = RTM_PAYLOAD(current);
769 while (RTA_OK(rta, rtasize))
770 {
771 switch (rta->rta_type)
772 {
773 case RTA_PREFSRC:
774 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
775 break;
776 case RTA_GATEWAY:
777 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
778 break;
779 case RTA_DST:
780 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
781 break;
782 case RTA_OIF:
783 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
784 {
785 rta_oif = *(u_int32_t*)RTA_DATA(rta);
786 }
787 break;
788 }
789 rta = RTA_NEXT(rta, rtasize);
790 }
791
792 /* apply the route if:
793 * - it is not from our own ipsec routing table
794 * - is better than a previous one
795 * - is the default route or
796 * - its destination net contains our destination
797 */
798 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
799 && msg->rtm_dst_len > best
800 && (msg->rtm_dst_len == 0 || /* default route */
801 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
802 {
803 iterator_t *ifaces, *addrs;
804 iface_entry_t *iface;
805 addr_entry_t *addr;
806
807 best = msg->rtm_dst_len;
808 if (nexthop)
809 {
810 DESTROY_IF(gtw);
811 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
812 }
813 else if (rta_src.ptr)
814 {
815 DESTROY_IF(src);
816 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
817 if (get_vip_refcount(this, src))
818 { /* skip source address if it is installed by us */
819 DESTROY_IF(src);
820 src = NULL;
821 current = NLMSG_NEXT(current, len);
822 continue;
823 }
824 }
825 else
826 {
827 /* no source addr, get one from the interfaces */
828 ifaces = this->ifaces->create_iterator_locked(
829 this->ifaces, &this->mutex);
830 while (ifaces->iterate(ifaces, (void**)&iface))
831 {
832 if (iface->ifindex == rta_oif)
833 {
834 addrs = iface->addrs->create_iterator(
835 iface->addrs, TRUE);
836 while (addrs->iterate(addrs, (void**)&addr))
837 {
838 chunk_t ip = addr->ip->get_address(addr->ip);
839 if ((msg->rtm_dst_len == 0 &&
840 addr->ip->get_family(addr->ip) ==
841 dest->get_family(dest)) ||
842 addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
843 {
844 DESTROY_IF(src);
845 src = addr->ip->clone(addr->ip);
846 break;
847 }
848 }
849 addrs->destroy(addrs);
850 }
851 }
852 ifaces->destroy(ifaces);
853 }
854 }
855 /* FALL through */
856 }
857 default:
858 current = NLMSG_NEXT(current, len);
859 continue;
860 }
861 break;
862 }
863 free(out);
864
865 if (nexthop)
866 {
867 if (gtw)
868 {
869 return gtw;
870 }
871 return dest->clone(dest);
872 }
873 return src;
874 }
875
876 /**
877 * Implementation of kernel_net_t.get_source_addr.
878 */
879 static host_t* get_source_addr(private_kernel_netlink_net_t *this, host_t *dest)
880 {
881 return get_route(this, dest, FALSE);
882 }
883
884 /**
885 * Implementation of kernel_net_t.get_nexthop.
886 */
887 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
888 {
889 return get_route(this, dest, TRUE);
890 }
891
892 /**
893 * Manages the creation and deletion of ip addresses on an interface.
894 * By setting the appropriate nlmsg_type, the ip will be set or unset.
895 */
896 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
897 int flags, int if_index, host_t *ip)
898 {
899 unsigned char request[NETLINK_BUFFER_SIZE];
900 struct nlmsghdr *hdr;
901 struct ifaddrmsg *msg;
902 chunk_t chunk;
903
904 memset(&request, 0, sizeof(request));
905
906 chunk = ip->get_address(ip);
907
908 hdr = (struct nlmsghdr*)request;
909 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
910 hdr->nlmsg_type = nlmsg_type;
911 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
912
913 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
914 msg->ifa_family = ip->get_family(ip);
915 msg->ifa_flags = 0;
916 msg->ifa_prefixlen = 8 * chunk.len;
917 msg->ifa_scope = RT_SCOPE_UNIVERSE;
918 msg->ifa_index = if_index;
919
920 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
921
922 return this->socket->send_ack(this->socket, hdr);
923 }
924
925 /**
926 * Implementation of kernel_net_t.add_ip.
927 */
928 static status_t add_ip(private_kernel_netlink_net_t *this,
929 host_t *virtual_ip, host_t *iface_ip)
930 {
931 iface_entry_t *iface;
932 addr_entry_t *addr;
933 iterator_t *addrs, *ifaces;
934 int ifindex;
935
936 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
937
938 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
939 while (ifaces->iterate(ifaces, (void**)&iface))
940 {
941 bool iface_found = FALSE;
942
943 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
944 while (addrs->iterate(addrs, (void**)&addr))
945 {
946 if (iface_ip->ip_equals(iface_ip, addr->ip))
947 {
948 iface_found = TRUE;
949 }
950 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
951 {
952 addr->refcount++;
953 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
954 virtual_ip, iface->ifname);
955 addrs->destroy(addrs);
956 ifaces->destroy(ifaces);
957 return SUCCESS;
958 }
959 }
960 addrs->destroy(addrs);
961
962 if (iface_found)
963 {
964 ifindex = iface->ifindex;
965 addr = malloc_thing(addr_entry_t);
966 addr->ip = virtual_ip->clone(virtual_ip);
967 addr->refcount = 0;
968 addr->virtual = TRUE;
969 addr->scope = RT_SCOPE_UNIVERSE;
970 iface->addrs->insert_last(iface->addrs, addr);
971
972 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
973 ifindex, virtual_ip) == SUCCESS)
974 {
975 while (get_vip_refcount(this, virtual_ip) == 0)
976 { /* wait until address appears */
977 pthread_cond_wait(&this->cond, &this->mutex);
978 }
979 ifaces->destroy(ifaces);
980 return SUCCESS;
981 }
982 ifaces->destroy(ifaces);
983 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
984 return FAILED;
985 }
986 }
987 ifaces->destroy(ifaces);
988
989 DBG1(DBG_KNL, "interface address %H not found, unable to install"
990 "virtual IP %H", iface_ip, virtual_ip);
991 return FAILED;
992 }
993
994 /**
995 * Implementation of kernel_net_t.del_ip.
996 */
997 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
998 {
999 iface_entry_t *iface;
1000 addr_entry_t *addr;
1001 iterator_t *addrs, *ifaces;
1002 status_t status;
1003 int ifindex;
1004
1005 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1006
1007 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1008 while (ifaces->iterate(ifaces, (void**)&iface))
1009 {
1010 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1011 while (addrs->iterate(addrs, (void**)&addr))
1012 {
1013 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1014 {
1015 ifindex = iface->ifindex;
1016 if (addr->refcount == 1)
1017 {
1018 status = manage_ipaddr(this, RTM_DELADDR, 0,
1019 ifindex, virtual_ip);
1020 if (status == SUCCESS)
1021 { /* wait until the address is really gone */
1022 while (get_vip_refcount(this, virtual_ip) > 0)
1023 {
1024 pthread_cond_wait(&this->cond, &this->mutex);
1025 }
1026 }
1027 addrs->destroy(addrs);
1028 ifaces->destroy(ifaces);
1029 return status;
1030 }
1031 else
1032 {
1033 addr->refcount--;
1034 }
1035 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1036 virtual_ip);
1037 addrs->destroy(addrs);
1038 ifaces->destroy(ifaces);
1039 return SUCCESS;
1040 }
1041 }
1042 addrs->destroy(addrs);
1043 }
1044 ifaces->destroy(ifaces);
1045
1046 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1047 return FAILED;
1048 }
1049
1050 /**
1051 * Manages source routes in the routing table.
1052 * By setting the appropriate nlmsg_type, the route gets added or removed.
1053 */
1054 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1055 int flags, chunk_t dst_net, u_int8_t prefixlen,
1056 host_t *gateway, host_t *src_ip, char *if_name)
1057 {
1058 unsigned char request[NETLINK_BUFFER_SIZE];
1059 struct nlmsghdr *hdr;
1060 struct rtmsg *msg;
1061 int ifindex;
1062 chunk_t chunk;
1063
1064 /* if route is 0.0.0.0/0, we can't install it, as it would
1065 * overwrite the default route. Instead, we add two routes:
1066 * 0.0.0.0/1 and 128.0.0.0/1 */
1067 if (this->routing_table == 0 && prefixlen == 0)
1068 {
1069 chunk_t half_net;
1070 u_int8_t half_prefixlen;
1071 status_t status;
1072
1073 half_net = chunk_alloca(dst_net.len);
1074 memset(half_net.ptr, 0, half_net.len);
1075 half_prefixlen = 1;
1076
1077 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1078 gateway, src_ip, if_name);
1079 half_net.ptr[0] |= 0x80;
1080 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1081 gateway, src_ip, if_name);
1082 return status;
1083 }
1084
1085 memset(&request, 0, sizeof(request));
1086
1087 hdr = (struct nlmsghdr*)request;
1088 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1089 hdr->nlmsg_type = nlmsg_type;
1090 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1091
1092 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1093 msg->rtm_family = src_ip->get_family(src_ip);
1094 msg->rtm_dst_len = prefixlen;
1095 msg->rtm_table = this->routing_table;
1096 msg->rtm_protocol = RTPROT_STATIC;
1097 msg->rtm_type = RTN_UNICAST;
1098 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1099
1100 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1101 chunk = src_ip->get_address(src_ip);
1102 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1103 chunk = gateway->get_address(gateway);
1104 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1105 ifindex = get_interface_index(this, if_name);
1106 chunk.ptr = (char*)&ifindex;
1107 chunk.len = sizeof(ifindex);
1108 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1109
1110 return this->socket->send_ack(this->socket, hdr);
1111 }
1112
1113 /**
1114 * Implementation of kernel_net_t.add_route.
1115 */
1116 status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1117 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1118 {
1119 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1120 dst_net, prefixlen, gateway, src_ip, if_name);
1121 }
1122
1123 /**
1124 * Implementation of kernel_net_t.del_route.
1125 */
1126 status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1127 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1128 {
1129 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1130 gateway, src_ip, if_name);
1131 }
1132
1133 /**
1134 * Initialize a list of local addresses.
1135 */
1136 static status_t init_address_list(private_kernel_netlink_net_t *this)
1137 {
1138 char request[NETLINK_BUFFER_SIZE];
1139 struct nlmsghdr *out, *current, *in;
1140 struct rtgenmsg *msg;
1141 size_t len;
1142 iterator_t *ifaces, *addrs;
1143 iface_entry_t *iface;
1144 addr_entry_t *addr;
1145
1146 DBG1(DBG_KNL, "listening on interfaces:");
1147
1148 memset(&request, 0, sizeof(request));
1149
1150 in = (struct nlmsghdr*)&request;
1151 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1152 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1153 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1154 msg->rtgen_family = AF_UNSPEC;
1155
1156 /* get all links */
1157 in->nlmsg_type = RTM_GETLINK;
1158 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1159 {
1160 return FAILED;
1161 }
1162 current = out;
1163 while (NLMSG_OK(current, len))
1164 {
1165 switch (current->nlmsg_type)
1166 {
1167 case NLMSG_DONE:
1168 break;
1169 case RTM_NEWLINK:
1170 process_link(this, current, FALSE);
1171 /* fall through */
1172 default:
1173 current = NLMSG_NEXT(current, len);
1174 continue;
1175 }
1176 break;
1177 }
1178 free(out);
1179
1180 /* get all interface addresses */
1181 in->nlmsg_type = RTM_GETADDR;
1182 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1183 {
1184 return FAILED;
1185 }
1186 current = out;
1187 while (NLMSG_OK(current, len))
1188 {
1189 switch (current->nlmsg_type)
1190 {
1191 case NLMSG_DONE:
1192 break;
1193 case RTM_NEWADDR:
1194 process_addr(this, current, FALSE);
1195 /* fall through */
1196 default:
1197 current = NLMSG_NEXT(current, len);
1198 continue;
1199 }
1200 break;
1201 }
1202 free(out);
1203
1204 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1205 while (ifaces->iterate(ifaces, (void**)&iface))
1206 {
1207 if (iface->flags & IFF_UP)
1208 {
1209 DBG1(DBG_KNL, " %s", iface->ifname);
1210 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1211 while (addrs->iterate(addrs, (void**)&addr))
1212 {
1213 DBG1(DBG_KNL, " %H", addr->ip);
1214 }
1215 addrs->destroy(addrs);
1216 }
1217 }
1218 ifaces->destroy(ifaces);
1219 return SUCCESS;
1220 }
1221
1222 /**
1223 * create or delete a rule to use our routing table
1224 */
1225 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1226 u_int32_t table, u_int32_t prio)
1227 {
1228 unsigned char request[NETLINK_BUFFER_SIZE];
1229 struct nlmsghdr *hdr;
1230 struct rtmsg *msg;
1231 chunk_t chunk;
1232
1233 memset(&request, 0, sizeof(request));
1234 hdr = (struct nlmsghdr*)request;
1235 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1236 hdr->nlmsg_type = nlmsg_type;
1237 if (nlmsg_type == RTM_NEWRULE)
1238 {
1239 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1240 }
1241 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1242
1243 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1244 msg->rtm_table = table;
1245 msg->rtm_family = AF_INET;
1246 msg->rtm_protocol = RTPROT_BOOT;
1247 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1248 msg->rtm_type = RTN_UNICAST;
1249
1250 chunk = chunk_from_thing(prio);
1251 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1252
1253 return this->socket->send_ack(this->socket, hdr);
1254 }
1255
1256 /**
1257 * Implementation of kernel_netlink_net_t.destroy.
1258 */
1259 static void destroy(private_kernel_netlink_net_t *this)
1260 {
1261 if (this->routing_table)
1262 {
1263 manage_rule(this, RTM_DELRULE, this->routing_table,
1264 this->routing_table_prio);
1265 }
1266
1267 this->job->cancel(this->job);
1268 close(this->socket_events);
1269 this->socket->destroy(this->socket);
1270 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1271 free(this);
1272 }
1273
1274 /*
1275 * Described in header.
1276 */
1277 kernel_netlink_net_t *kernel_netlink_net_create()
1278 {
1279 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1280 struct sockaddr_nl addr;
1281
1282 /* public functions */
1283 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1284 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1285 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest))get_source_addr;
1286 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1287 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1288 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1289 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1290 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1291 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1292
1293 /* private members */
1294 this->ifaces = linked_list_create();
1295 pthread_mutex_init(&this->mutex, NULL);
1296 pthread_cond_init(&this->cond, NULL);
1297 timerclear(&this->last_roam);
1298 this->routing_table = lib->settings->get_int(lib->settings,
1299 "charon.routing_table", IPSEC_ROUTING_TABLE);
1300 this->routing_table_prio = lib->settings->get_int(lib->settings,
1301 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
1302 this->process_route = lib->settings->get_bool(lib->settings,
1303 "charon.process_route", TRUE);
1304
1305 this->socket = netlink_socket_create(NETLINK_ROUTE);
1306
1307 memset(&addr, 0, sizeof(addr));
1308 addr.nl_family = AF_NETLINK;
1309
1310 /* create and bind RT socket for events (address/interface/route changes) */
1311 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1312 if (this->socket_events <= 0)
1313 {
1314 charon->kill(charon, "unable to create RT event socket");
1315 }
1316 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1317 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1318 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1319 {
1320 charon->kill(charon, "unable to bind RT event socket");
1321 }
1322
1323 this->job = callback_job_create((callback_job_cb_t)receive_events,
1324 this, NULL, NULL);
1325 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1326
1327 if (init_address_list(this) != SUCCESS)
1328 {
1329 charon->kill(charon, "unable to get interface list");
1330 }
1331
1332 if (this->routing_table)
1333 {
1334 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1335 this->routing_table_prio) != SUCCESS)
1336 {
1337 DBG1(DBG_KNL, "unable to create routing table rule");
1338 }
1339 }
1340
1341 return &this->public;
1342 }