faster implementation of addr_in_subnet()
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 *
16 * $Id$
17 */
18
19 #include <sys/socket.h>
20 #include <linux/netlink.h>
21 #include <linux/rtnetlink.h>
22 #include <sys/time.h>
23 #include <pthread.h>
24 #include <unistd.h>
25 #include <errno.h>
26 #include <net/if.h>
27
28 #include "kernel_netlink_net.h"
29 #include "kernel_netlink_shared.h"
30
31 #include <daemon.h>
32 #include <utils/linked_list.h>
33 #include <processing/jobs/callback_job.h>
34 #include <processing/jobs/roam_job.h>
35
36 /** delay before firing roam jobs (ms) */
37 #define ROAM_DELAY 100
38
39 /** routing table for routes installed by us */
40 #ifndef IPSEC_ROUTING_TABLE
41 #define IPSEC_ROUTING_TABLE 100
42 #endif
43 #ifndef IPSEC_ROUTING_TABLE_PRIO
44 #define IPSEC_ROUTING_TABLE_PRIO 100
45 #endif
46
47 typedef struct addr_entry_t addr_entry_t;
48
49 /**
50 * IP address in an inface_entry_t
51 */
52 struct addr_entry_t {
53
54 /** The ip address */
55 host_t *ip;
56
57 /** virtual IP managed by us */
58 bool virtual;
59
60 /** scope of the address */
61 u_char scope;
62
63 /** Number of times this IP is used, if virtual */
64 u_int refcount;
65 };
66
67 /**
68 * destroy a addr_entry_t object
69 */
70 static void addr_entry_destroy(addr_entry_t *this)
71 {
72 this->ip->destroy(this->ip);
73 free(this);
74 }
75
76 typedef struct iface_entry_t iface_entry_t;
77
78 /**
79 * A network interface on this system, containing addr_entry_t's
80 */
81 struct iface_entry_t {
82
83 /** interface index */
84 int ifindex;
85
86 /** name of the interface */
87 char ifname[IFNAMSIZ];
88
89 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
90 u_int flags;
91
92 /** list of addresses as host_t */
93 linked_list_t *addrs;
94 };
95
96 /**
97 * destroy an interface entry
98 */
99 static void iface_entry_destroy(iface_entry_t *this)
100 {
101 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
102 free(this);
103 }
104
105 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
106
107 /**
108 * Private variables and functions of kernel_netlink_net class.
109 */
110 struct private_kernel_netlink_net_t {
111 /**
112 * Public part of the kernel_netlink_net_t object.
113 */
114 kernel_netlink_net_t public;
115
116 /**
117 * mutex to lock access to various lists
118 */
119 pthread_mutex_t mutex;
120
121 /**
122 * condition variable to signal virtual IP add/removal
123 */
124 pthread_cond_t cond;
125
126 /**
127 * Cached list of interfaces and its addresses (iface_entry_t)
128 */
129 linked_list_t *ifaces;
130
131 /**
132 * job receiving netlink events
133 */
134 callback_job_t *job;
135
136 /**
137 * netlink rt socket (routing)
138 */
139 netlink_socket_t *socket;
140
141 /**
142 * Netlink rt socket to receive address change events
143 */
144 int socket_events;
145
146 /**
147 * time of the last roam_job
148 */
149 struct timeval last_roam;
150
151 /**
152 * routing table to install routes
153 */
154 int routing_table;
155
156 /**
157 * priority of used routing table
158 */
159 int routing_table_prio;
160
161 /**
162 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
163 */
164 bool process_route;
165
166 };
167
168 /**
169 * get the refcount of a virtual ip
170 */
171 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
172 {
173 iterator_t *ifaces, *addrs;
174 iface_entry_t *iface;
175 addr_entry_t *addr;
176 int refcount = 0;
177
178 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
179 while (ifaces->iterate(ifaces, (void**)&iface))
180 {
181 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
182 while (addrs->iterate(addrs, (void**)&addr))
183 {
184 if (addr->virtual && (iface->flags & IFF_UP) &&
185 ip->ip_equals(ip, addr->ip))
186 {
187 refcount = addr->refcount;
188 break;
189 }
190 }
191 addrs->destroy(addrs);
192 if (refcount)
193 {
194 break;
195 }
196 }
197 ifaces->destroy(ifaces);
198
199 return refcount;
200 }
201
202 /**
203 * start a roaming job. We delay it for a second and fire only one job
204 * for multiple events. Otherwise we would create two many jobs.
205 */
206 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
207 {
208 struct timeval now;
209
210 if (gettimeofday(&now, NULL) == 0)
211 {
212 if (timercmp(&now, &this->last_roam, >))
213 {
214 now.tv_usec += ROAM_DELAY * 1000;
215 while (now.tv_usec > 1000000)
216 {
217 now.tv_sec++;
218 now.tv_usec -= 1000000;
219 }
220 this->last_roam = now;
221 charon->scheduler->schedule_job(charon->scheduler,
222 (job_t*)roam_job_create(address), ROAM_DELAY);
223 }
224 }
225 }
226
227 /**
228 * process RTM_NEWLINK/RTM_DELLINK from kernel
229 */
230 static void process_link(private_kernel_netlink_net_t *this,
231 struct nlmsghdr *hdr, bool event)
232 {
233 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
234 struct rtattr *rta = IFLA_RTA(msg);
235 size_t rtasize = IFLA_PAYLOAD (hdr);
236 iterator_t *iterator;
237 iface_entry_t *current, *entry = NULL;
238 char *name = NULL;
239 bool update = FALSE;
240
241 while(RTA_OK(rta, rtasize))
242 {
243 switch (rta->rta_type)
244 {
245 case IFLA_IFNAME:
246 name = RTA_DATA(rta);
247 break;
248 }
249 rta = RTA_NEXT(rta, rtasize);
250 }
251 if (!name)
252 {
253 name = "(unknown)";
254 }
255
256 switch (hdr->nlmsg_type)
257 {
258 case RTM_NEWLINK:
259 {
260 if (msg->ifi_flags & IFF_LOOPBACK)
261 { /* ignore loopback interfaces */
262 break;
263 }
264 iterator = this->ifaces->create_iterator_locked(this->ifaces,
265 &this->mutex);
266 while (iterator->iterate(iterator, (void**)&current))
267 {
268 if (current->ifindex == msg->ifi_index)
269 {
270 entry = current;
271 break;
272 }
273 }
274 if (!entry)
275 {
276 entry = malloc_thing(iface_entry_t);
277 entry->ifindex = msg->ifi_index;
278 entry->flags = 0;
279 entry->addrs = linked_list_create();
280 this->ifaces->insert_last(this->ifaces, entry);
281 }
282 memcpy(entry->ifname, name, IFNAMSIZ);
283 entry->ifname[IFNAMSIZ-1] = '\0';
284 if (event)
285 {
286 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
287 {
288 update = TRUE;
289 DBG1(DBG_KNL, "interface %s activated", name);
290 }
291 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
292 {
293 update = TRUE;
294 DBG1(DBG_KNL, "interface %s deactivated", name);
295 }
296 }
297 entry->flags = msg->ifi_flags;
298 iterator->destroy(iterator);
299 break;
300 }
301 case RTM_DELLINK:
302 {
303 iterator = this->ifaces->create_iterator_locked(this->ifaces,
304 &this->mutex);
305 while (iterator->iterate(iterator, (void**)&current))
306 {
307 if (current->ifindex == msg->ifi_index)
308 {
309 /* we do not remove it, as an address may be added to a
310 * "down" interface and we wan't to know that. */
311 current->flags = msg->ifi_flags;
312 break;
313 }
314 }
315 iterator->destroy(iterator);
316 break;
317 }
318 }
319
320 /* send an update to all IKE_SAs */
321 if (update && event)
322 {
323 fire_roam_job(this, TRUE);
324 }
325 }
326
327 /**
328 * process RTM_NEWADDR/RTM_DELADDR from kernel
329 */
330 static void process_addr(private_kernel_netlink_net_t *this,
331 struct nlmsghdr *hdr, bool event)
332 {
333 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
334 struct rtattr *rta = IFA_RTA(msg);
335 size_t rtasize = IFA_PAYLOAD (hdr);
336 host_t *host = NULL;
337 iterator_t *ifaces, *addrs;
338 iface_entry_t *iface;
339 addr_entry_t *addr;
340 chunk_t local = chunk_empty, address = chunk_empty;
341 bool update = FALSE, found = FALSE, changed = FALSE;
342
343 while(RTA_OK(rta, rtasize))
344 {
345 switch (rta->rta_type)
346 {
347 case IFA_LOCAL:
348 local.ptr = RTA_DATA(rta);
349 local.len = RTA_PAYLOAD(rta);
350 break;
351 case IFA_ADDRESS:
352 address.ptr = RTA_DATA(rta);
353 address.len = RTA_PAYLOAD(rta);
354 break;
355 }
356 rta = RTA_NEXT(rta, rtasize);
357 }
358
359 /* For PPP interfaces, we need the IFA_LOCAL address,
360 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
361 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
362 if (local.ptr)
363 {
364 host = host_create_from_chunk(msg->ifa_family, local, 0);
365 }
366 else if (address.ptr)
367 {
368 host = host_create_from_chunk(msg->ifa_family, address, 0);
369 }
370
371 if (host == NULL)
372 { /* bad family? */
373 return;
374 }
375
376 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
377 while (ifaces->iterate(ifaces, (void**)&iface))
378 {
379 if (iface->ifindex == msg->ifa_index)
380 {
381 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
382 while (addrs->iterate(addrs, (void**)&addr))
383 {
384 if (host->ip_equals(host, addr->ip))
385 {
386 found = TRUE;
387 if (hdr->nlmsg_type == RTM_DELADDR)
388 {
389 addrs->remove(addrs);
390 if (!addr->virtual)
391 {
392 changed = TRUE;
393 DBG1(DBG_KNL, "%H disappeared from %s",
394 host, iface->ifname);
395 }
396 addr_entry_destroy(addr);
397 }
398 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
399 {
400 addr->refcount = 1;
401 }
402 }
403 }
404 addrs->destroy(addrs);
405
406 if (hdr->nlmsg_type == RTM_NEWADDR)
407 {
408 if (!found)
409 {
410 found = TRUE;
411 changed = TRUE;
412 addr = malloc_thing(addr_entry_t);
413 addr->ip = host->clone(host);
414 addr->virtual = FALSE;
415 addr->refcount = 1;
416 addr->scope = msg->ifa_scope;
417
418 iface->addrs->insert_last(iface->addrs, addr);
419 if (event)
420 {
421 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
422 }
423 }
424 }
425 if (found && (iface->flags & IFF_UP))
426 {
427 update = TRUE;
428 }
429 break;
430 }
431 }
432 ifaces->destroy(ifaces);
433 host->destroy(host);
434
435 /* send an update to all IKE_SAs */
436 if (update && event && changed)
437 {
438 fire_roam_job(this, TRUE);
439 }
440 }
441
442 /**
443 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
444 */
445 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
446 {
447 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
448 struct rtattr *rta = RTM_RTA(msg);
449 size_t rtasize = RTM_PAYLOAD(hdr);
450 host_t *host = NULL;
451
452 /* ignore routes added by us */
453 if (msg->rtm_table && msg->rtm_table == this->routing_table)
454 {
455 return;
456 }
457
458 while (RTA_OK(rta, rtasize))
459 {
460 switch (rta->rta_type)
461 {
462 case RTA_PREFSRC:
463 host = host_create_from_chunk(msg->rtm_family,
464 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
465 break;
466 }
467 rta = RTA_NEXT(rta, rtasize);
468 }
469 if (host)
470 {
471 if (!get_vip_refcount(this, host))
472 { /* ignore routes added for virtual IPs */
473 fire_roam_job(this, FALSE);
474 }
475 host->destroy(host);
476 }
477 }
478
479 /**
480 * Receives events from kernel
481 */
482 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
483 {
484 char response[1024];
485 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
486 struct sockaddr_nl addr;
487 socklen_t addr_len = sizeof(addr);
488 int len, oldstate;
489
490 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
491 len = recvfrom(this->socket_events, response, sizeof(response), 0,
492 (struct sockaddr*)&addr, &addr_len);
493 pthread_setcancelstate(oldstate, NULL);
494
495 if (len < 0)
496 {
497 switch (errno)
498 {
499 case EINTR:
500 /* interrupted, try again */
501 return JOB_REQUEUE_DIRECT;
502 case EAGAIN:
503 /* no data ready, select again */
504 return JOB_REQUEUE_DIRECT;
505 default:
506 DBG1(DBG_KNL, "unable to receive from rt event socket");
507 sleep(1);
508 return JOB_REQUEUE_FAIR;
509 }
510 }
511
512 if (addr.nl_pid != 0)
513 { /* not from kernel. not interested, try another one */
514 return JOB_REQUEUE_DIRECT;
515 }
516
517 while (NLMSG_OK(hdr, len))
518 {
519 /* looks good so far, dispatch netlink message */
520 switch (hdr->nlmsg_type)
521 {
522 case RTM_NEWADDR:
523 case RTM_DELADDR:
524 process_addr(this, hdr, TRUE);
525 pthread_cond_broadcast(&this->cond);
526 break;
527 case RTM_NEWLINK:
528 case RTM_DELLINK:
529 process_link(this, hdr, TRUE);
530 pthread_cond_broadcast(&this->cond);
531 break;
532 case RTM_NEWROUTE:
533 case RTM_DELROUTE:
534 if (this->process_route)
535 {
536 process_route(this, hdr);
537 }
538 break;
539 default:
540 break;
541 }
542 hdr = NLMSG_NEXT(hdr, len);
543 }
544 return JOB_REQUEUE_DIRECT;
545 }
546
547 /** enumerator over addresses */
548 typedef struct {
549 private_kernel_netlink_net_t* this;
550 /** whether to enumerate down interfaces */
551 bool include_down_ifaces;
552 /** whether to enumerate virtual ip addresses */
553 bool include_virtual_ips;
554 } address_enumerator_t;
555
556 /**
557 * cleanup function for address enumerator
558 */
559 static void address_enumerator_destroy(address_enumerator_t *data)
560 {
561 pthread_mutex_unlock(&data->this->mutex);
562 free(data);
563 }
564
565 /**
566 * filter for addresses
567 */
568 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
569 {
570 if (!data->include_virtual_ips && (*in)->virtual)
571 { /* skip virtual interfaces added by us */
572 return FALSE;
573 }
574 if ((*in)->scope >= RT_SCOPE_LINK)
575 { /* skip addresses with a unusable scope */
576 return FALSE;
577 }
578 *out = (*in)->ip;
579 return TRUE;
580 }
581
582 /**
583 * enumerator constructor for interfaces
584 */
585 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
586 {
587 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
588 (void*)filter_addresses, data, NULL);
589 }
590
591 /**
592 * filter for interfaces
593 */
594 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
595 {
596 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
597 { /* skip interfaces not up */
598 return FALSE;
599 }
600 *out = *in;
601 return TRUE;
602 }
603
604 /**
605 * implementation of kernel_net_t.create_address_enumerator
606 */
607 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
608 bool include_down_ifaces, bool include_virtual_ips)
609 {
610 address_enumerator_t *data = malloc_thing(address_enumerator_t);
611 data->this = this;
612 data->include_down_ifaces = include_down_ifaces;
613 data->include_virtual_ips = include_virtual_ips;
614
615 pthread_mutex_lock(&this->mutex);
616 return enumerator_create_nested(
617 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
618 (void*)filter_interfaces, data, NULL),
619 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
620 }
621
622 /**
623 * implementation of kernel_net_t.get_interface_name
624 */
625 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
626 {
627 iterator_t *ifaces, *addrs;
628 iface_entry_t *iface;
629 addr_entry_t *addr;
630 char *name = NULL;
631
632 DBG2(DBG_KNL, "getting interface name for %H", ip);
633
634 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
635 while (ifaces->iterate(ifaces, (void**)&iface))
636 {
637 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
638 while (addrs->iterate(addrs, (void**)&addr))
639 {
640 if (ip->ip_equals(ip, addr->ip))
641 {
642 name = strdup(iface->ifname);
643 break;
644 }
645 }
646 addrs->destroy(addrs);
647 if (name)
648 {
649 break;
650 }
651 }
652 ifaces->destroy(ifaces);
653
654 if (name)
655 {
656 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
657 }
658 else
659 {
660 DBG2(DBG_KNL, "%H is not a local address", ip);
661 }
662 return name;
663 }
664
665 /**
666 * get the index of an interface by name
667 */
668 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
669 {
670 iterator_t *ifaces;
671 iface_entry_t *iface;
672 int ifindex = 0;
673
674 DBG2(DBG_KNL, "getting iface index for %s", name);
675
676 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
677 while (ifaces->iterate(ifaces, (void**)&iface))
678 {
679 if (streq(name, iface->ifname))
680 {
681 ifindex = iface->ifindex;
682 break;
683 }
684 }
685 ifaces->destroy(ifaces);
686
687 if (ifindex == 0)
688 {
689 DBG1(DBG_KNL, "unable to get interface index for %s", name);
690 }
691 return ifindex;
692 }
693
694 /**
695 * check if an address (chunk) addr is in subnet (net with net_len net bits)
696 */
697 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
698 {
699 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
700 int byte = 0;
701
702 if (addr.len != net.len || net_len > 8 * net.len )
703 {
704 return FALSE;
705 }
706
707 /* scan through all bytes in network order */
708 while (net_len > 0)
709 {
710 if (net_len < 8)
711 {
712 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
713 }
714 else
715 {
716 if (addr.ptr[byte] != net.ptr[byte])
717 {
718 return FALSE;
719 }
720 byte++;
721 net_len -= 8;
722 }
723 }
724 return TRUE;
725 }
726
727 /**
728 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
729 */
730 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
731 bool nexthop, host_t *candidate)
732 {
733 unsigned char request[NETLINK_BUFFER_SIZE];
734 struct nlmsghdr *hdr, *out, *current;
735 struct rtmsg *msg;
736 chunk_t chunk;
737 size_t len;
738 int best = -1;
739 host_t *src = NULL, *gtw = NULL;
740
741 DBG2(DBG_KNL, "getting address to reach %H", dest);
742
743 memset(&request, 0, sizeof(request));
744
745 hdr = (struct nlmsghdr*)request;
746 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
747 hdr->nlmsg_type = RTM_GETROUTE;
748 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
749
750 msg = (struct rtmsg*)NLMSG_DATA(hdr);
751 msg->rtm_family = dest->get_family(dest);
752
753 chunk = dest->get_address(dest);
754 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
755 if (candidate)
756 {
757 chunk = candidate->get_address(candidate);
758 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
759 }
760
761 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
762 {
763 DBG1(DBG_KNL, "getting address to %H failed", dest);
764 return NULL;
765 }
766 current = out;
767 while (NLMSG_OK(current, len))
768 {
769 switch (current->nlmsg_type)
770 {
771 case NLMSG_DONE:
772 break;
773 case RTM_NEWROUTE:
774 {
775 struct rtattr *rta;
776 size_t rtasize;
777 chunk_t rta_gtw, rta_src, rta_dst;
778 u_int32_t rta_oif = 0;
779
780 rta_gtw = rta_src = rta_dst = chunk_empty;
781 msg = (struct rtmsg*)(NLMSG_DATA(current));
782 rta = RTM_RTA(msg);
783 rtasize = RTM_PAYLOAD(current);
784 while (RTA_OK(rta, rtasize))
785 {
786 switch (rta->rta_type)
787 {
788 case RTA_PREFSRC:
789 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
790 break;
791 case RTA_GATEWAY:
792 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
793 break;
794 case RTA_DST:
795 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
796 break;
797 case RTA_OIF:
798 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
799 {
800 rta_oif = *(u_int32_t*)RTA_DATA(rta);
801 }
802 break;
803 }
804 rta = RTA_NEXT(rta, rtasize);
805 }
806
807 /* apply the route if:
808 * - it is not from our own ipsec routing table
809 * - is better than a previous one
810 * - is the default route or
811 * - its destination net contains our destination
812 */
813 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
814 && msg->rtm_dst_len > best
815 && (msg->rtm_dst_len == 0 || /* default route */
816 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
817 {
818 iterator_t *ifaces, *addrs;
819 iface_entry_t *iface;
820 addr_entry_t *addr;
821
822 best = msg->rtm_dst_len;
823 if (nexthop)
824 {
825 DESTROY_IF(gtw);
826 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
827 }
828 else if (rta_src.ptr)
829 {
830 DESTROY_IF(src);
831 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
832 if (get_vip_refcount(this, src))
833 { /* skip source address if it is installed by us */
834 DESTROY_IF(src);
835 src = NULL;
836 current = NLMSG_NEXT(current, len);
837 continue;
838 }
839 }
840 else
841 {
842 /* no source addr, get one from the interfaces */
843 ifaces = this->ifaces->create_iterator_locked(
844 this->ifaces, &this->mutex);
845 while (ifaces->iterate(ifaces, (void**)&iface))
846 {
847 if (iface->ifindex == rta_oif)
848 {
849 addrs = iface->addrs->create_iterator(
850 iface->addrs, TRUE);
851 while (addrs->iterate(addrs, (void**)&addr))
852 {
853 chunk_t ip = addr->ip->get_address(addr->ip);
854 if ((msg->rtm_dst_len == 0 &&
855 addr->ip->get_family(addr->ip) ==
856 dest->get_family(dest)) ||
857 addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
858 {
859 DESTROY_IF(src);
860 src = addr->ip->clone(addr->ip);
861 break;
862 }
863 }
864 addrs->destroy(addrs);
865 }
866 }
867 ifaces->destroy(ifaces);
868 }
869 }
870 /* FALL through */
871 }
872 default:
873 current = NLMSG_NEXT(current, len);
874 continue;
875 }
876 break;
877 }
878 free(out);
879
880 if (nexthop)
881 {
882 if (gtw)
883 {
884 return gtw;
885 }
886 return dest->clone(dest);
887 }
888 return src;
889 }
890
891 /**
892 * Implementation of kernel_net_t.get_source_addr.
893 */
894 static host_t* get_source_addr(private_kernel_netlink_net_t *this,
895 host_t *dest, host_t *src)
896 {
897 return get_route(this, dest, FALSE, src);
898 }
899
900 /**
901 * Implementation of kernel_net_t.get_nexthop.
902 */
903 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
904 {
905 return get_route(this, dest, TRUE, NULL);
906 }
907
908 /**
909 * Manages the creation and deletion of ip addresses on an interface.
910 * By setting the appropriate nlmsg_type, the ip will be set or unset.
911 */
912 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
913 int flags, int if_index, host_t *ip)
914 {
915 unsigned char request[NETLINK_BUFFER_SIZE];
916 struct nlmsghdr *hdr;
917 struct ifaddrmsg *msg;
918 chunk_t chunk;
919
920 memset(&request, 0, sizeof(request));
921
922 chunk = ip->get_address(ip);
923
924 hdr = (struct nlmsghdr*)request;
925 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
926 hdr->nlmsg_type = nlmsg_type;
927 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
928
929 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
930 msg->ifa_family = ip->get_family(ip);
931 msg->ifa_flags = 0;
932 msg->ifa_prefixlen = 8 * chunk.len;
933 msg->ifa_scope = RT_SCOPE_UNIVERSE;
934 msg->ifa_index = if_index;
935
936 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
937
938 return this->socket->send_ack(this->socket, hdr);
939 }
940
941 /**
942 * Implementation of kernel_net_t.add_ip.
943 */
944 static status_t add_ip(private_kernel_netlink_net_t *this,
945 host_t *virtual_ip, host_t *iface_ip)
946 {
947 iface_entry_t *iface;
948 addr_entry_t *addr;
949 iterator_t *addrs, *ifaces;
950 int ifindex;
951
952 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
953
954 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
955 while (ifaces->iterate(ifaces, (void**)&iface))
956 {
957 bool iface_found = FALSE;
958
959 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
960 while (addrs->iterate(addrs, (void**)&addr))
961 {
962 if (iface_ip->ip_equals(iface_ip, addr->ip))
963 {
964 iface_found = TRUE;
965 }
966 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
967 {
968 addr->refcount++;
969 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
970 virtual_ip, iface->ifname);
971 addrs->destroy(addrs);
972 ifaces->destroy(ifaces);
973 return SUCCESS;
974 }
975 }
976 addrs->destroy(addrs);
977
978 if (iface_found)
979 {
980 ifindex = iface->ifindex;
981 addr = malloc_thing(addr_entry_t);
982 addr->ip = virtual_ip->clone(virtual_ip);
983 addr->refcount = 0;
984 addr->virtual = TRUE;
985 addr->scope = RT_SCOPE_UNIVERSE;
986 iface->addrs->insert_last(iface->addrs, addr);
987
988 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
989 ifindex, virtual_ip) == SUCCESS)
990 {
991 while (get_vip_refcount(this, virtual_ip) == 0)
992 { /* wait until address appears */
993 pthread_cond_wait(&this->cond, &this->mutex);
994 }
995 ifaces->destroy(ifaces);
996 return SUCCESS;
997 }
998 ifaces->destroy(ifaces);
999 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1000 return FAILED;
1001 }
1002 }
1003 ifaces->destroy(ifaces);
1004
1005 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1006 "virtual IP %H", iface_ip, virtual_ip);
1007 return FAILED;
1008 }
1009
1010 /**
1011 * Implementation of kernel_net_t.del_ip.
1012 */
1013 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
1014 {
1015 iface_entry_t *iface;
1016 addr_entry_t *addr;
1017 iterator_t *addrs, *ifaces;
1018 status_t status;
1019 int ifindex;
1020
1021 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1022
1023 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1024 while (ifaces->iterate(ifaces, (void**)&iface))
1025 {
1026 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1027 while (addrs->iterate(addrs, (void**)&addr))
1028 {
1029 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1030 {
1031 ifindex = iface->ifindex;
1032 if (addr->refcount == 1)
1033 {
1034 status = manage_ipaddr(this, RTM_DELADDR, 0,
1035 ifindex, virtual_ip);
1036 if (status == SUCCESS)
1037 { /* wait until the address is really gone */
1038 while (get_vip_refcount(this, virtual_ip) > 0)
1039 {
1040 pthread_cond_wait(&this->cond, &this->mutex);
1041 }
1042 }
1043 addrs->destroy(addrs);
1044 ifaces->destroy(ifaces);
1045 return status;
1046 }
1047 else
1048 {
1049 addr->refcount--;
1050 }
1051 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1052 virtual_ip);
1053 addrs->destroy(addrs);
1054 ifaces->destroy(ifaces);
1055 return SUCCESS;
1056 }
1057 }
1058 addrs->destroy(addrs);
1059 }
1060 ifaces->destroy(ifaces);
1061
1062 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1063 return FAILED;
1064 }
1065
1066 /**
1067 * Manages source routes in the routing table.
1068 * By setting the appropriate nlmsg_type, the route gets added or removed.
1069 */
1070 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1071 int flags, chunk_t dst_net, u_int8_t prefixlen,
1072 host_t *gateway, host_t *src_ip, char *if_name)
1073 {
1074 unsigned char request[NETLINK_BUFFER_SIZE];
1075 struct nlmsghdr *hdr;
1076 struct rtmsg *msg;
1077 int ifindex;
1078 chunk_t chunk;
1079
1080 /* if route is 0.0.0.0/0, we can't install it, as it would
1081 * overwrite the default route. Instead, we add two routes:
1082 * 0.0.0.0/1 and 128.0.0.0/1 */
1083 if (this->routing_table == 0 && prefixlen == 0)
1084 {
1085 chunk_t half_net;
1086 u_int8_t half_prefixlen;
1087 status_t status;
1088
1089 half_net = chunk_alloca(dst_net.len);
1090 memset(half_net.ptr, 0, half_net.len);
1091 half_prefixlen = 1;
1092
1093 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1094 gateway, src_ip, if_name);
1095 half_net.ptr[0] |= 0x80;
1096 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1097 gateway, src_ip, if_name);
1098 return status;
1099 }
1100
1101 memset(&request, 0, sizeof(request));
1102
1103 hdr = (struct nlmsghdr*)request;
1104 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1105 hdr->nlmsg_type = nlmsg_type;
1106 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1107
1108 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1109 msg->rtm_family = src_ip->get_family(src_ip);
1110 msg->rtm_dst_len = prefixlen;
1111 msg->rtm_table = this->routing_table;
1112 msg->rtm_protocol = RTPROT_STATIC;
1113 msg->rtm_type = RTN_UNICAST;
1114 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1115
1116 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1117 chunk = src_ip->get_address(src_ip);
1118 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1119 chunk = gateway->get_address(gateway);
1120 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1121 ifindex = get_interface_index(this, if_name);
1122 chunk.ptr = (char*)&ifindex;
1123 chunk.len = sizeof(ifindex);
1124 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1125
1126 return this->socket->send_ack(this->socket, hdr);
1127 }
1128
1129 /**
1130 * Implementation of kernel_net_t.add_route.
1131 */
1132 status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1133 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1134 {
1135 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1136 dst_net, prefixlen, gateway, src_ip, if_name);
1137 }
1138
1139 /**
1140 * Implementation of kernel_net_t.del_route.
1141 */
1142 status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1143 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1144 {
1145 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1146 gateway, src_ip, if_name);
1147 }
1148
1149 /**
1150 * Initialize a list of local addresses.
1151 */
1152 static status_t init_address_list(private_kernel_netlink_net_t *this)
1153 {
1154 char request[NETLINK_BUFFER_SIZE];
1155 struct nlmsghdr *out, *current, *in;
1156 struct rtgenmsg *msg;
1157 size_t len;
1158 iterator_t *ifaces, *addrs;
1159 iface_entry_t *iface;
1160 addr_entry_t *addr;
1161
1162 DBG1(DBG_KNL, "listening on interfaces:");
1163
1164 memset(&request, 0, sizeof(request));
1165
1166 in = (struct nlmsghdr*)&request;
1167 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1168 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1169 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1170 msg->rtgen_family = AF_UNSPEC;
1171
1172 /* get all links */
1173 in->nlmsg_type = RTM_GETLINK;
1174 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1175 {
1176 return FAILED;
1177 }
1178 current = out;
1179 while (NLMSG_OK(current, len))
1180 {
1181 switch (current->nlmsg_type)
1182 {
1183 case NLMSG_DONE:
1184 break;
1185 case RTM_NEWLINK:
1186 process_link(this, current, FALSE);
1187 /* fall through */
1188 default:
1189 current = NLMSG_NEXT(current, len);
1190 continue;
1191 }
1192 break;
1193 }
1194 free(out);
1195
1196 /* get all interface addresses */
1197 in->nlmsg_type = RTM_GETADDR;
1198 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1199 {
1200 return FAILED;
1201 }
1202 current = out;
1203 while (NLMSG_OK(current, len))
1204 {
1205 switch (current->nlmsg_type)
1206 {
1207 case NLMSG_DONE:
1208 break;
1209 case RTM_NEWADDR:
1210 process_addr(this, current, FALSE);
1211 /* fall through */
1212 default:
1213 current = NLMSG_NEXT(current, len);
1214 continue;
1215 }
1216 break;
1217 }
1218 free(out);
1219
1220 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1221 while (ifaces->iterate(ifaces, (void**)&iface))
1222 {
1223 if (iface->flags & IFF_UP)
1224 {
1225 DBG1(DBG_KNL, " %s", iface->ifname);
1226 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1227 while (addrs->iterate(addrs, (void**)&addr))
1228 {
1229 DBG1(DBG_KNL, " %H", addr->ip);
1230 }
1231 addrs->destroy(addrs);
1232 }
1233 }
1234 ifaces->destroy(ifaces);
1235 return SUCCESS;
1236 }
1237
1238 /**
1239 * create or delete a rule to use our routing table
1240 */
1241 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1242 u_int32_t table, u_int32_t prio)
1243 {
1244 unsigned char request[NETLINK_BUFFER_SIZE];
1245 struct nlmsghdr *hdr;
1246 struct rtmsg *msg;
1247 chunk_t chunk;
1248
1249 memset(&request, 0, sizeof(request));
1250 hdr = (struct nlmsghdr*)request;
1251 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1252 hdr->nlmsg_type = nlmsg_type;
1253 if (nlmsg_type == RTM_NEWRULE)
1254 {
1255 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1256 }
1257 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1258
1259 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1260 msg->rtm_table = table;
1261 msg->rtm_family = AF_INET;
1262 msg->rtm_protocol = RTPROT_BOOT;
1263 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1264 msg->rtm_type = RTN_UNICAST;
1265
1266 chunk = chunk_from_thing(prio);
1267 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1268
1269 return this->socket->send_ack(this->socket, hdr);
1270 }
1271
1272 /**
1273 * Implementation of kernel_netlink_net_t.destroy.
1274 */
1275 static void destroy(private_kernel_netlink_net_t *this)
1276 {
1277 if (this->routing_table)
1278 {
1279 manage_rule(this, RTM_DELRULE, this->routing_table,
1280 this->routing_table_prio);
1281 }
1282
1283 this->job->cancel(this->job);
1284 close(this->socket_events);
1285 this->socket->destroy(this->socket);
1286 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1287 free(this);
1288 }
1289
1290 /*
1291 * Described in header.
1292 */
1293 kernel_netlink_net_t *kernel_netlink_net_create()
1294 {
1295 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1296 struct sockaddr_nl addr;
1297
1298 /* public functions */
1299 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1300 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1301 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest, host_t *src))get_source_addr;
1302 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1303 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1304 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1305 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1306 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1307 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1308
1309 /* private members */
1310 this->ifaces = linked_list_create();
1311 pthread_mutex_init(&this->mutex, NULL);
1312 pthread_cond_init(&this->cond, NULL);
1313 timerclear(&this->last_roam);
1314 this->routing_table = lib->settings->get_int(lib->settings,
1315 "charon.routing_table", IPSEC_ROUTING_TABLE);
1316 this->routing_table_prio = lib->settings->get_int(lib->settings,
1317 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
1318 this->process_route = lib->settings->get_bool(lib->settings,
1319 "charon.process_route", TRUE);
1320
1321 this->socket = netlink_socket_create(NETLINK_ROUTE);
1322
1323 memset(&addr, 0, sizeof(addr));
1324 addr.nl_family = AF_NETLINK;
1325
1326 /* create and bind RT socket for events (address/interface/route changes) */
1327 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1328 if (this->socket_events <= 0)
1329 {
1330 charon->kill(charon, "unable to create RT event socket");
1331 }
1332 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1333 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1334 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1335 {
1336 charon->kill(charon, "unable to bind RT event socket");
1337 }
1338
1339 this->job = callback_job_create((callback_job_cb_t)receive_events,
1340 this, NULL, NULL);
1341 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1342
1343 if (init_address_list(this) != SUCCESS)
1344 {
1345 charon->kill(charon, "unable to get interface list");
1346 }
1347
1348 if (this->routing_table)
1349 {
1350 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1351 this->routing_table_prio) != SUCCESS)
1352 {
1353 DBG1(DBG_KNL, "unable to create routing table rule");
1354 }
1355 }
1356
1357 return &this->public;
1358 }