fixed loop termination criterion in addr_in_subnet(). Thanks go to Wolfgang Steudel...
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Hochschule fuer Technik Rapperswil
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 *
15 * $Id$
16 */
17
18 #include <sys/socket.h>
19 #include <linux/netlink.h>
20 #include <linux/rtnetlink.h>
21 #include <sys/time.h>
22 #include <pthread.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <net/if.h>
26
27 #include "kernel_netlink_net.h"
28 #include "kernel_netlink_shared.h"
29
30 #include <daemon.h>
31 #include <utils/linked_list.h>
32 #include <processing/jobs/callback_job.h>
33 #include <processing/jobs/roam_job.h>
34
35 /** delay before firing roam jobs (ms) */
36 #define ROAM_DELAY 100
37
38 /** routing table for routes installed by us */
39 #ifndef IPSEC_ROUTING_TABLE
40 #define IPSEC_ROUTING_TABLE 100
41 #endif
42 #ifndef IPSEC_ROUTING_TABLE_PRIO
43 #define IPSEC_ROUTING_TABLE_PRIO 100
44 #endif
45
46 typedef struct addr_entry_t addr_entry_t;
47
48 /**
49 * IP address in an inface_entry_t
50 */
51 struct addr_entry_t {
52
53 /** The ip address */
54 host_t *ip;
55
56 /** virtual IP managed by us */
57 bool virtual;
58
59 /** scope of the address */
60 u_char scope;
61
62 /** Number of times this IP is used, if virtual */
63 u_int refcount;
64 };
65
66 /**
67 * destroy a addr_entry_t object
68 */
69 static void addr_entry_destroy(addr_entry_t *this)
70 {
71 this->ip->destroy(this->ip);
72 free(this);
73 }
74
75 typedef struct iface_entry_t iface_entry_t;
76
77 /**
78 * A network interface on this system, containing addr_entry_t's
79 */
80 struct iface_entry_t {
81
82 /** interface index */
83 int ifindex;
84
85 /** name of the interface */
86 char ifname[IFNAMSIZ];
87
88 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
89 u_int flags;
90
91 /** list of addresses as host_t */
92 linked_list_t *addrs;
93 };
94
95 /**
96 * destroy an interface entry
97 */
98 static void iface_entry_destroy(iface_entry_t *this)
99 {
100 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
101 free(this);
102 }
103
104 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
105
106 /**
107 * Private variables and functions of kernel_netlink_net class.
108 */
109 struct private_kernel_netlink_net_t {
110 /**
111 * Public part of the kernel_netlink_net_t object.
112 */
113 kernel_netlink_net_t public;
114
115 /**
116 * mutex to lock access to various lists
117 */
118 pthread_mutex_t mutex;
119
120 /**
121 * condition variable to signal virtual IP add/removal
122 */
123 pthread_cond_t cond;
124
125 /**
126 * Cached list of interfaces and its addresses (iface_entry_t)
127 */
128 linked_list_t *ifaces;
129
130 /**
131 * job receiving netlink events
132 */
133 callback_job_t *job;
134
135 /**
136 * netlink rt socket (routing)
137 */
138 netlink_socket_t *socket;
139
140 /**
141 * Netlink rt socket to receive address change events
142 */
143 int socket_events;
144
145 /**
146 * time of the last roam_job
147 */
148 struct timeval last_roam;
149
150 /**
151 * routing table to install routes
152 */
153 int routing_table;
154
155 /**
156 * priority of used routing table
157 */
158 int routing_table_prio;
159
160 /**
161 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
162 */
163 bool process_route;
164
165 };
166
167 /**
168 * get the refcount of a virtual ip
169 */
170 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
171 {
172 iterator_t *ifaces, *addrs;
173 iface_entry_t *iface;
174 addr_entry_t *addr;
175 int refcount = 0;
176
177 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
178 while (ifaces->iterate(ifaces, (void**)&iface))
179 {
180 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
181 while (addrs->iterate(addrs, (void**)&addr))
182 {
183 if (addr->virtual && (iface->flags & IFF_UP) &&
184 ip->ip_equals(ip, addr->ip))
185 {
186 refcount = addr->refcount;
187 break;
188 }
189 }
190 addrs->destroy(addrs);
191 if (refcount)
192 {
193 break;
194 }
195 }
196 ifaces->destroy(ifaces);
197
198 return refcount;
199 }
200
201 /**
202 * start a roaming job. We delay it for a second and fire only one job
203 * for multiple events. Otherwise we would create two many jobs.
204 */
205 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
206 {
207 struct timeval now;
208
209 if (gettimeofday(&now, NULL) == 0)
210 {
211 if (timercmp(&now, &this->last_roam, >))
212 {
213 now.tv_usec += ROAM_DELAY * 1000;
214 while (now.tv_usec > 1000000)
215 {
216 now.tv_sec++;
217 now.tv_usec -= 1000000;
218 }
219 this->last_roam = now;
220 charon->scheduler->schedule_job(charon->scheduler,
221 (job_t*)roam_job_create(address), ROAM_DELAY);
222 }
223 }
224 }
225
226 /**
227 * process RTM_NEWLINK/RTM_DELLINK from kernel
228 */
229 static void process_link(private_kernel_netlink_net_t *this,
230 struct nlmsghdr *hdr, bool event)
231 {
232 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
233 struct rtattr *rta = IFLA_RTA(msg);
234 size_t rtasize = IFLA_PAYLOAD (hdr);
235 iterator_t *iterator;
236 iface_entry_t *current, *entry = NULL;
237 char *name = NULL;
238 bool update = FALSE;
239
240 while(RTA_OK(rta, rtasize))
241 {
242 switch (rta->rta_type)
243 {
244 case IFLA_IFNAME:
245 name = RTA_DATA(rta);
246 break;
247 }
248 rta = RTA_NEXT(rta, rtasize);
249 }
250 if (!name)
251 {
252 name = "(unknown)";
253 }
254
255 switch (hdr->nlmsg_type)
256 {
257 case RTM_NEWLINK:
258 {
259 if (msg->ifi_flags & IFF_LOOPBACK)
260 { /* ignore loopback interfaces */
261 break;
262 }
263 iterator = this->ifaces->create_iterator_locked(this->ifaces,
264 &this->mutex);
265 while (iterator->iterate(iterator, (void**)&current))
266 {
267 if (current->ifindex == msg->ifi_index)
268 {
269 entry = current;
270 break;
271 }
272 }
273 if (!entry)
274 {
275 entry = malloc_thing(iface_entry_t);
276 entry->ifindex = msg->ifi_index;
277 entry->flags = 0;
278 entry->addrs = linked_list_create();
279 this->ifaces->insert_last(this->ifaces, entry);
280 }
281 memcpy(entry->ifname, name, IFNAMSIZ);
282 entry->ifname[IFNAMSIZ-1] = '\0';
283 if (event)
284 {
285 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
286 {
287 update = TRUE;
288 DBG1(DBG_KNL, "interface %s activated", name);
289 }
290 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
291 {
292 update = TRUE;
293 DBG1(DBG_KNL, "interface %s deactivated", name);
294 }
295 }
296 entry->flags = msg->ifi_flags;
297 iterator->destroy(iterator);
298 break;
299 }
300 case RTM_DELLINK:
301 {
302 iterator = this->ifaces->create_iterator_locked(this->ifaces,
303 &this->mutex);
304 while (iterator->iterate(iterator, (void**)&current))
305 {
306 if (current->ifindex == msg->ifi_index)
307 {
308 /* we do not remove it, as an address may be added to a
309 * "down" interface and we wan't to know that. */
310 current->flags = msg->ifi_flags;
311 break;
312 }
313 }
314 iterator->destroy(iterator);
315 break;
316 }
317 }
318
319 /* send an update to all IKE_SAs */
320 if (update && event)
321 {
322 fire_roam_job(this, TRUE);
323 }
324 }
325
326 /**
327 * process RTM_NEWADDR/RTM_DELADDR from kernel
328 */
329 static void process_addr(private_kernel_netlink_net_t *this,
330 struct nlmsghdr *hdr, bool event)
331 {
332 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
333 struct rtattr *rta = IFA_RTA(msg);
334 size_t rtasize = IFA_PAYLOAD (hdr);
335 host_t *host = NULL;
336 iterator_t *ifaces, *addrs;
337 iface_entry_t *iface;
338 addr_entry_t *addr;
339 chunk_t local = chunk_empty, address = chunk_empty;
340 bool update = FALSE, found = FALSE, changed = FALSE;
341
342 while(RTA_OK(rta, rtasize))
343 {
344 switch (rta->rta_type)
345 {
346 case IFA_LOCAL:
347 local.ptr = RTA_DATA(rta);
348 local.len = RTA_PAYLOAD(rta);
349 break;
350 case IFA_ADDRESS:
351 address.ptr = RTA_DATA(rta);
352 address.len = RTA_PAYLOAD(rta);
353 break;
354 }
355 rta = RTA_NEXT(rta, rtasize);
356 }
357
358 /* For PPP interfaces, we need the IFA_LOCAL address,
359 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
360 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
361 if (local.ptr)
362 {
363 host = host_create_from_chunk(msg->ifa_family, local, 0);
364 }
365 else if (address.ptr)
366 {
367 host = host_create_from_chunk(msg->ifa_family, address, 0);
368 }
369
370 if (host == NULL)
371 { /* bad family? */
372 return;
373 }
374
375 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
376 while (ifaces->iterate(ifaces, (void**)&iface))
377 {
378 if (iface->ifindex == msg->ifa_index)
379 {
380 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
381 while (addrs->iterate(addrs, (void**)&addr))
382 {
383 if (host->ip_equals(host, addr->ip))
384 {
385 found = TRUE;
386 if (hdr->nlmsg_type == RTM_DELADDR)
387 {
388 addrs->remove(addrs);
389 if (!addr->virtual)
390 {
391 changed = TRUE;
392 DBG1(DBG_KNL, "%H disappeared from %s",
393 host, iface->ifname);
394 }
395 addr_entry_destroy(addr);
396 }
397 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
398 {
399 addr->refcount = 1;
400 }
401 }
402 }
403 addrs->destroy(addrs);
404
405 if (hdr->nlmsg_type == RTM_NEWADDR)
406 {
407 if (!found)
408 {
409 found = TRUE;
410 changed = TRUE;
411 addr = malloc_thing(addr_entry_t);
412 addr->ip = host->clone(host);
413 addr->virtual = FALSE;
414 addr->refcount = 1;
415 addr->scope = msg->ifa_scope;
416
417 iface->addrs->insert_last(iface->addrs, addr);
418 if (event)
419 {
420 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
421 }
422 }
423 }
424 if (found && (iface->flags & IFF_UP))
425 {
426 update = TRUE;
427 }
428 break;
429 }
430 }
431 ifaces->destroy(ifaces);
432 host->destroy(host);
433
434 /* send an update to all IKE_SAs */
435 if (update && event && changed)
436 {
437 fire_roam_job(this, TRUE);
438 }
439 }
440
441 /**
442 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
443 */
444 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
445 {
446 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
447 struct rtattr *rta = RTM_RTA(msg);
448 size_t rtasize = RTM_PAYLOAD(hdr);
449 host_t *host = NULL;
450
451 while (RTA_OK(rta, rtasize))
452 {
453 switch (rta->rta_type)
454 {
455 case RTA_PREFSRC:
456 host = host_create_from_chunk(msg->rtm_family,
457 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
458 break;
459 }
460 rta = RTA_NEXT(rta, rtasize);
461 }
462 if (host)
463 {
464 if (!get_vip_refcount(this, host))
465 { /* ignore routes added for virtual IPs */
466 fire_roam_job(this, FALSE);
467 }
468 host->destroy(host);
469 }
470 }
471
472 /**
473 * Receives events from kernel
474 */
475 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
476 {
477 char response[1024];
478 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
479 struct sockaddr_nl addr;
480 socklen_t addr_len = sizeof(addr);
481 int len, oldstate;
482
483 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
484 len = recvfrom(this->socket_events, response, sizeof(response), 0,
485 (struct sockaddr*)&addr, &addr_len);
486 pthread_setcancelstate(oldstate, NULL);
487
488 if (len < 0)
489 {
490 switch (errno)
491 {
492 case EINTR:
493 /* interrupted, try again */
494 return JOB_REQUEUE_DIRECT;
495 case EAGAIN:
496 /* no data ready, select again */
497 return JOB_REQUEUE_DIRECT;
498 default:
499 DBG1(DBG_KNL, "unable to receive from rt event socket");
500 sleep(1);
501 return JOB_REQUEUE_FAIR;
502 }
503 }
504
505 if (addr.nl_pid != 0)
506 { /* not from kernel. not interested, try another one */
507 return JOB_REQUEUE_DIRECT;
508 }
509
510 while (NLMSG_OK(hdr, len))
511 {
512 /* looks good so far, dispatch netlink message */
513 switch (hdr->nlmsg_type)
514 {
515 case RTM_NEWADDR:
516 case RTM_DELADDR:
517 process_addr(this, hdr, TRUE);
518 pthread_cond_broadcast(&this->cond);
519 break;
520 case RTM_NEWLINK:
521 case RTM_DELLINK:
522 process_link(this, hdr, TRUE);
523 pthread_cond_broadcast(&this->cond);
524 break;
525 case RTM_NEWROUTE:
526 case RTM_DELROUTE:
527 if (this->process_route)
528 {
529 process_route(this, hdr);
530 }
531 break;
532 default:
533 break;
534 }
535 hdr = NLMSG_NEXT(hdr, len);
536 }
537 return JOB_REQUEUE_DIRECT;
538 }
539
540 /** enumerator over addresses */
541 typedef struct {
542 private_kernel_netlink_net_t* this;
543 /** whether to enumerate down interfaces */
544 bool include_down_ifaces;
545 /** whether to enumerate virtual ip addresses */
546 bool include_virtual_ips;
547 } address_enumerator_t;
548
549 /**
550 * cleanup function for address enumerator
551 */
552 static void address_enumerator_destroy(address_enumerator_t *data)
553 {
554 pthread_mutex_unlock(&data->this->mutex);
555 free(data);
556 }
557
558 /**
559 * filter for addresses
560 */
561 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
562 {
563 if (!data->include_virtual_ips && (*in)->virtual)
564 { /* skip virtual interfaces added by us */
565 return FALSE;
566 }
567 if ((*in)->scope >= RT_SCOPE_LINK)
568 { /* skip addresses with a unusable scope */
569 return FALSE;
570 }
571 *out = (*in)->ip;
572 return TRUE;
573 }
574
575 /**
576 * enumerator constructor for interfaces
577 */
578 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
579 {
580 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
581 (void*)filter_addresses, data, NULL);
582 }
583
584 /**
585 * filter for interfaces
586 */
587 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
588 {
589 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
590 { /* skip interfaces not up */
591 return FALSE;
592 }
593 *out = *in;
594 return TRUE;
595 }
596
597 /**
598 * implementation of kernel_net_t.create_address_enumerator
599 */
600 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
601 bool include_down_ifaces, bool include_virtual_ips)
602 {
603 address_enumerator_t *data = malloc_thing(address_enumerator_t);
604 data->this = this;
605 data->include_down_ifaces = include_down_ifaces;
606 data->include_virtual_ips = include_virtual_ips;
607
608 pthread_mutex_lock(&this->mutex);
609 return enumerator_create_nested(
610 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
611 (void*)filter_interfaces, data, NULL),
612 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
613 }
614
615 /**
616 * implementation of kernel_net_t.get_interface_name
617 */
618 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
619 {
620 iterator_t *ifaces, *addrs;
621 iface_entry_t *iface;
622 addr_entry_t *addr;
623 char *name = NULL;
624
625 DBG2(DBG_KNL, "getting interface name for %H", ip);
626
627 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
628 while (ifaces->iterate(ifaces, (void**)&iface))
629 {
630 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
631 while (addrs->iterate(addrs, (void**)&addr))
632 {
633 if (ip->ip_equals(ip, addr->ip))
634 {
635 name = strdup(iface->ifname);
636 break;
637 }
638 }
639 addrs->destroy(addrs);
640 if (name)
641 {
642 break;
643 }
644 }
645 ifaces->destroy(ifaces);
646
647 if (name)
648 {
649 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
650 }
651 else
652 {
653 DBG2(DBG_KNL, "%H is not a local address", ip);
654 }
655 return name;
656 }
657
658 /**
659 * get the index of an interface by name
660 */
661 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
662 {
663 iterator_t *ifaces;
664 iface_entry_t *iface;
665 int ifindex = 0;
666
667 DBG2(DBG_KNL, "getting iface index for %s", name);
668
669 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
670 while (ifaces->iterate(ifaces, (void**)&iface))
671 {
672 if (streq(name, iface->ifname))
673 {
674 ifindex = iface->ifindex;
675 break;
676 }
677 }
678 ifaces->destroy(ifaces);
679
680 if (ifindex == 0)
681 {
682 DBG1(DBG_KNL, "unable to get interface index for %s", name);
683 }
684 return ifindex;
685 }
686
687 /**
688 * check if an address (chunk) addr is in subnet (net with net_len net bits)
689 */
690 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
691 {
692 int bit, byte;
693
694 if (addr.len != net.len)
695 {
696 return FALSE;
697 }
698 /* scan through all bits, beginning in the front */
699 for (byte = 0; byte < addr.len; byte++)
700 {
701 for (bit = 0; bit < 8; bit++)
702 {
703 u_char bitpos = 1 << (7-bit);
704
705 /* check if bits are equal (or we reached the end of the net) */
706 if (bit + byte * 8 > net_len)
707 {
708 return TRUE;
709 }
710 if ((bitpos & addr.ptr[byte]) != (bitpos & net.ptr[byte]))
711 {
712 return FALSE;
713 }
714 }
715 }
716 return TRUE;
717 }
718
719 /**
720 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
721 */
722 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
723 bool nexthop)
724 {
725 unsigned char request[NETLINK_BUFFER_SIZE];
726 struct nlmsghdr *hdr, *out, *current;
727 struct rtmsg *msg;
728 chunk_t chunk;
729 size_t len;
730 int best = -1;
731 host_t *src = NULL, *gtw = NULL;
732
733 DBG2(DBG_KNL, "getting address to reach %H", dest);
734
735 memset(&request, 0, sizeof(request));
736
737 hdr = (struct nlmsghdr*)request;
738 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
739 hdr->nlmsg_type = RTM_GETROUTE;
740 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
741
742 msg = (struct rtmsg*)NLMSG_DATA(hdr);
743 msg->rtm_family = dest->get_family(dest);
744
745 chunk = dest->get_address(dest);
746 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
747
748 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
749 {
750 DBG1(DBG_KNL, "getting address to %H failed", dest);
751 return NULL;
752 }
753 current = out;
754 while (NLMSG_OK(current, len))
755 {
756 switch (current->nlmsg_type)
757 {
758 case NLMSG_DONE:
759 break;
760 case RTM_NEWROUTE:
761 {
762 struct rtattr *rta;
763 size_t rtasize;
764 chunk_t rta_gtw, rta_src, rta_dst;
765 u_int32_t rta_oif = 0;
766
767 rta_gtw = rta_src = rta_dst = chunk_empty;
768 msg = (struct rtmsg*)(NLMSG_DATA(current));
769 rta = RTM_RTA(msg);
770 rtasize = RTM_PAYLOAD(current);
771 while (RTA_OK(rta, rtasize))
772 {
773 switch (rta->rta_type)
774 {
775 case RTA_PREFSRC:
776 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
777 break;
778 case RTA_GATEWAY:
779 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
780 break;
781 case RTA_DST:
782 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
783 break;
784 case RTA_OIF:
785 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
786 {
787 rta_oif = *(u_int32_t*)RTA_DATA(rta);
788 }
789 break;
790 }
791 rta = RTA_NEXT(rta, rtasize);
792 }
793
794 /* apply the route if:
795 * - it is not from our own ipsec routing table
796 * - is better than a previous one
797 * - is the default route or
798 * - its destination net contains our destination
799 */
800 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
801 && msg->rtm_dst_len > best
802 && (msg->rtm_dst_len == 0 || /* default route */
803 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
804 {
805 iterator_t *ifaces, *addrs;
806 iface_entry_t *iface;
807 addr_entry_t *addr;
808
809 best = msg->rtm_dst_len;
810 if (nexthop)
811 {
812 DESTROY_IF(gtw);
813 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
814 }
815 else if (rta_src.ptr)
816 {
817 DESTROY_IF(src);
818 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
819 if (get_vip_refcount(this, src))
820 { /* skip source address if it is installed by us */
821 DESTROY_IF(src);
822 src = NULL;
823 current = NLMSG_NEXT(current, len);
824 continue;
825 }
826 }
827 else
828 {
829 /* no source addr, get one from the interfaces */
830 ifaces = this->ifaces->create_iterator_locked(
831 this->ifaces, &this->mutex);
832 while (ifaces->iterate(ifaces, (void**)&iface))
833 {
834 if (iface->ifindex == rta_oif)
835 {
836 addrs = iface->addrs->create_iterator(
837 iface->addrs, TRUE);
838 while (addrs->iterate(addrs, (void**)&addr))
839 {
840 chunk_t ip = addr->ip->get_address(addr->ip);
841 if ((msg->rtm_dst_len == 0 &&
842 addr->ip->get_family(addr->ip) ==
843 dest->get_family(dest)) ||
844 addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
845 {
846 DESTROY_IF(src);
847 src = addr->ip->clone(addr->ip);
848 break;
849 }
850 }
851 addrs->destroy(addrs);
852 }
853 }
854 ifaces->destroy(ifaces);
855 }
856 }
857 /* FALL through */
858 }
859 default:
860 current = NLMSG_NEXT(current, len);
861 continue;
862 }
863 break;
864 }
865 free(out);
866
867 if (nexthop)
868 {
869 if (gtw)
870 {
871 return gtw;
872 }
873 return dest->clone(dest);
874 }
875 return src;
876 }
877
878 /**
879 * Implementation of kernel_net_t.get_source_addr.
880 */
881 static host_t* get_source_addr(private_kernel_netlink_net_t *this, host_t *dest)
882 {
883 return get_route(this, dest, FALSE);
884 }
885
886 /**
887 * Implementation of kernel_net_t.get_nexthop.
888 */
889 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
890 {
891 return get_route(this, dest, TRUE);
892 }
893
894 /**
895 * Manages the creation and deletion of ip addresses on an interface.
896 * By setting the appropriate nlmsg_type, the ip will be set or unset.
897 */
898 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
899 int flags, int if_index, host_t *ip)
900 {
901 unsigned char request[NETLINK_BUFFER_SIZE];
902 struct nlmsghdr *hdr;
903 struct ifaddrmsg *msg;
904 chunk_t chunk;
905
906 memset(&request, 0, sizeof(request));
907
908 chunk = ip->get_address(ip);
909
910 hdr = (struct nlmsghdr*)request;
911 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
912 hdr->nlmsg_type = nlmsg_type;
913 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
914
915 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
916 msg->ifa_family = ip->get_family(ip);
917 msg->ifa_flags = 0;
918 msg->ifa_prefixlen = 8 * chunk.len;
919 msg->ifa_scope = RT_SCOPE_UNIVERSE;
920 msg->ifa_index = if_index;
921
922 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
923
924 return this->socket->send_ack(this->socket, hdr);
925 }
926
927 /**
928 * Implementation of kernel_net_t.add_ip.
929 */
930 static status_t add_ip(private_kernel_netlink_net_t *this,
931 host_t *virtual_ip, host_t *iface_ip)
932 {
933 iface_entry_t *iface;
934 addr_entry_t *addr;
935 iterator_t *addrs, *ifaces;
936 int ifindex;
937
938 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
939
940 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
941 while (ifaces->iterate(ifaces, (void**)&iface))
942 {
943 bool iface_found = FALSE;
944
945 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
946 while (addrs->iterate(addrs, (void**)&addr))
947 {
948 if (iface_ip->ip_equals(iface_ip, addr->ip))
949 {
950 iface_found = TRUE;
951 }
952 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
953 {
954 addr->refcount++;
955 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
956 virtual_ip, iface->ifname);
957 addrs->destroy(addrs);
958 ifaces->destroy(ifaces);
959 return SUCCESS;
960 }
961 }
962 addrs->destroy(addrs);
963
964 if (iface_found)
965 {
966 ifindex = iface->ifindex;
967 addr = malloc_thing(addr_entry_t);
968 addr->ip = virtual_ip->clone(virtual_ip);
969 addr->refcount = 0;
970 addr->virtual = TRUE;
971 addr->scope = RT_SCOPE_UNIVERSE;
972 iface->addrs->insert_last(iface->addrs, addr);
973
974 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
975 ifindex, virtual_ip) == SUCCESS)
976 {
977 while (get_vip_refcount(this, virtual_ip) == 0)
978 { /* wait until address appears */
979 pthread_cond_wait(&this->cond, &this->mutex);
980 }
981 ifaces->destroy(ifaces);
982 return SUCCESS;
983 }
984 ifaces->destroy(ifaces);
985 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
986 return FAILED;
987 }
988 }
989 ifaces->destroy(ifaces);
990
991 DBG1(DBG_KNL, "interface address %H not found, unable to install"
992 "virtual IP %H", iface_ip, virtual_ip);
993 return FAILED;
994 }
995
996 /**
997 * Implementation of kernel_net_t.del_ip.
998 */
999 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
1000 {
1001 iface_entry_t *iface;
1002 addr_entry_t *addr;
1003 iterator_t *addrs, *ifaces;
1004 status_t status;
1005 int ifindex;
1006
1007 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1008
1009 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1010 while (ifaces->iterate(ifaces, (void**)&iface))
1011 {
1012 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1013 while (addrs->iterate(addrs, (void**)&addr))
1014 {
1015 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1016 {
1017 ifindex = iface->ifindex;
1018 if (addr->refcount == 1)
1019 {
1020 status = manage_ipaddr(this, RTM_DELADDR, 0,
1021 ifindex, virtual_ip);
1022 if (status == SUCCESS)
1023 { /* wait until the address is really gone */
1024 while (get_vip_refcount(this, virtual_ip) > 0)
1025 {
1026 pthread_cond_wait(&this->cond, &this->mutex);
1027 }
1028 }
1029 addrs->destroy(addrs);
1030 ifaces->destroy(ifaces);
1031 return status;
1032 }
1033 else
1034 {
1035 addr->refcount--;
1036 }
1037 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1038 virtual_ip);
1039 addrs->destroy(addrs);
1040 ifaces->destroy(ifaces);
1041 return SUCCESS;
1042 }
1043 }
1044 addrs->destroy(addrs);
1045 }
1046 ifaces->destroy(ifaces);
1047
1048 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1049 return FAILED;
1050 }
1051
1052 /**
1053 * Manages source routes in the routing table.
1054 * By setting the appropriate nlmsg_type, the route gets added or removed.
1055 */
1056 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1057 int flags, chunk_t dst_net, u_int8_t prefixlen,
1058 host_t *gateway, host_t *src_ip, char *if_name)
1059 {
1060 unsigned char request[NETLINK_BUFFER_SIZE];
1061 struct nlmsghdr *hdr;
1062 struct rtmsg *msg;
1063 int ifindex;
1064 chunk_t chunk;
1065
1066 /* if route is 0.0.0.0/0, we can't install it, as it would
1067 * overwrite the default route. Instead, we add two routes:
1068 * 0.0.0.0/1 and 128.0.0.0/1 */
1069 if (this->routing_table == 0 && prefixlen == 0)
1070 {
1071 chunk_t half_net;
1072 u_int8_t half_prefixlen;
1073 status_t status;
1074
1075 half_net = chunk_alloca(dst_net.len);
1076 memset(half_net.ptr, 0, half_net.len);
1077 half_prefixlen = 1;
1078
1079 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1080 gateway, src_ip, if_name);
1081 half_net.ptr[0] |= 0x80;
1082 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1083 gateway, src_ip, if_name);
1084 return status;
1085 }
1086
1087 memset(&request, 0, sizeof(request));
1088
1089 hdr = (struct nlmsghdr*)request;
1090 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1091 hdr->nlmsg_type = nlmsg_type;
1092 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1093
1094 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1095 msg->rtm_family = src_ip->get_family(src_ip);
1096 msg->rtm_dst_len = prefixlen;
1097 msg->rtm_table = this->routing_table;
1098 msg->rtm_protocol = RTPROT_STATIC;
1099 msg->rtm_type = RTN_UNICAST;
1100 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1101
1102 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1103 chunk = src_ip->get_address(src_ip);
1104 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1105 chunk = gateway->get_address(gateway);
1106 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1107 ifindex = get_interface_index(this, if_name);
1108 chunk.ptr = (char*)&ifindex;
1109 chunk.len = sizeof(ifindex);
1110 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1111
1112 return this->socket->send_ack(this->socket, hdr);
1113 }
1114
1115 /**
1116 * Implementation of kernel_net_t.add_route.
1117 */
1118 status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1119 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1120 {
1121 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1122 dst_net, prefixlen, gateway, src_ip, if_name);
1123 }
1124
1125 /**
1126 * Implementation of kernel_net_t.del_route.
1127 */
1128 status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1129 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1130 {
1131 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1132 gateway, src_ip, if_name);
1133 }
1134
1135 /**
1136 * Initialize a list of local addresses.
1137 */
1138 static status_t init_address_list(private_kernel_netlink_net_t *this)
1139 {
1140 char request[NETLINK_BUFFER_SIZE];
1141 struct nlmsghdr *out, *current, *in;
1142 struct rtgenmsg *msg;
1143 size_t len;
1144 iterator_t *ifaces, *addrs;
1145 iface_entry_t *iface;
1146 addr_entry_t *addr;
1147
1148 DBG1(DBG_KNL, "listening on interfaces:");
1149
1150 memset(&request, 0, sizeof(request));
1151
1152 in = (struct nlmsghdr*)&request;
1153 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1154 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1155 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1156 msg->rtgen_family = AF_UNSPEC;
1157
1158 /* get all links */
1159 in->nlmsg_type = RTM_GETLINK;
1160 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1161 {
1162 return FAILED;
1163 }
1164 current = out;
1165 while (NLMSG_OK(current, len))
1166 {
1167 switch (current->nlmsg_type)
1168 {
1169 case NLMSG_DONE:
1170 break;
1171 case RTM_NEWLINK:
1172 process_link(this, current, FALSE);
1173 /* fall through */
1174 default:
1175 current = NLMSG_NEXT(current, len);
1176 continue;
1177 }
1178 break;
1179 }
1180 free(out);
1181
1182 /* get all interface addresses */
1183 in->nlmsg_type = RTM_GETADDR;
1184 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1185 {
1186 return FAILED;
1187 }
1188 current = out;
1189 while (NLMSG_OK(current, len))
1190 {
1191 switch (current->nlmsg_type)
1192 {
1193 case NLMSG_DONE:
1194 break;
1195 case RTM_NEWADDR:
1196 process_addr(this, current, FALSE);
1197 /* fall through */
1198 default:
1199 current = NLMSG_NEXT(current, len);
1200 continue;
1201 }
1202 break;
1203 }
1204 free(out);
1205
1206 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1207 while (ifaces->iterate(ifaces, (void**)&iface))
1208 {
1209 if (iface->flags & IFF_UP)
1210 {
1211 DBG1(DBG_KNL, " %s", iface->ifname);
1212 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1213 while (addrs->iterate(addrs, (void**)&addr))
1214 {
1215 DBG1(DBG_KNL, " %H", addr->ip);
1216 }
1217 addrs->destroy(addrs);
1218 }
1219 }
1220 ifaces->destroy(ifaces);
1221 return SUCCESS;
1222 }
1223
1224 /**
1225 * create or delete a rule to use our routing table
1226 */
1227 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1228 u_int32_t table, u_int32_t prio)
1229 {
1230 unsigned char request[NETLINK_BUFFER_SIZE];
1231 struct nlmsghdr *hdr;
1232 struct rtmsg *msg;
1233 chunk_t chunk;
1234
1235 memset(&request, 0, sizeof(request));
1236 hdr = (struct nlmsghdr*)request;
1237 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1238 hdr->nlmsg_type = nlmsg_type;
1239 if (nlmsg_type == RTM_NEWRULE)
1240 {
1241 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1242 }
1243 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1244
1245 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1246 msg->rtm_table = table;
1247 msg->rtm_family = AF_INET;
1248 msg->rtm_protocol = RTPROT_BOOT;
1249 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1250 msg->rtm_type = RTN_UNICAST;
1251
1252 chunk = chunk_from_thing(prio);
1253 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1254
1255 return this->socket->send_ack(this->socket, hdr);
1256 }
1257
1258 /**
1259 * Implementation of kernel_netlink_net_t.destroy.
1260 */
1261 static void destroy(private_kernel_netlink_net_t *this)
1262 {
1263 if (this->routing_table)
1264 {
1265 manage_rule(this, RTM_DELRULE, this->routing_table,
1266 this->routing_table_prio);
1267 }
1268
1269 this->job->cancel(this->job);
1270 close(this->socket_events);
1271 this->socket->destroy(this->socket);
1272 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1273 free(this);
1274 }
1275
1276 /*
1277 * Described in header.
1278 */
1279 kernel_netlink_net_t *kernel_netlink_net_create()
1280 {
1281 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1282 struct sockaddr_nl addr;
1283
1284 /* public functions */
1285 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1286 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1287 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest))get_source_addr;
1288 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1289 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1290 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1291 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1292 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1293 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1294
1295 /* private members */
1296 this->ifaces = linked_list_create();
1297 pthread_mutex_init(&this->mutex, NULL);
1298 pthread_cond_init(&this->cond, NULL);
1299 timerclear(&this->last_roam);
1300 this->routing_table = lib->settings->get_int(lib->settings,
1301 "charon.routing_table", IPSEC_ROUTING_TABLE);
1302 this->routing_table_prio = lib->settings->get_int(lib->settings,
1303 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
1304 this->process_route = lib->settings->get_bool(lib->settings,
1305 "charon.process_route", TRUE);
1306
1307 this->socket = netlink_socket_create(NETLINK_ROUTE);
1308
1309 memset(&addr, 0, sizeof(addr));
1310 addr.nl_family = AF_NETLINK;
1311
1312 /* create and bind RT socket for events (address/interface/route changes) */
1313 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1314 if (this->socket_events <= 0)
1315 {
1316 charon->kill(charon, "unable to create RT event socket");
1317 }
1318 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1319 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1320 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1321 {
1322 charon->kill(charon, "unable to bind RT event socket");
1323 }
1324
1325 this->job = callback_job_create((callback_job_cb_t)receive_events,
1326 this, NULL, NULL);
1327 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1328
1329 if (init_address_list(this) != SUCCESS)
1330 {
1331 charon->kill(charon, "unable to get interface list");
1332 }
1333
1334 if (this->routing_table)
1335 {
1336 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1337 this->routing_table_prio) != SUCCESS)
1338 {
1339 DBG1(DBG_KNL, "unable to create routing table rule");
1340 }
1341 }
1342
1343 return &this->public;
1344 }