Fixed a crash in source address lookup
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 */
16
17 #include <sys/socket.h>
18 #include <linux/netlink.h>
19 #include <linux/rtnetlink.h>
20 #include <pthread.h>
21 #include <unistd.h>
22 #include <errno.h>
23 #include <net/if.h>
24
25 #include "kernel_netlink_net.h"
26 #include "kernel_netlink_shared.h"
27
28 #include <daemon.h>
29 #include <utils/mutex.h>
30 #include <utils/linked_list.h>
31 #include <processing/jobs/callback_job.h>
32 #include <processing/jobs/roam_job.h>
33
34 /** delay before firing roam jobs (ms) */
35 #define ROAM_DELAY 100
36
37 typedef struct addr_entry_t addr_entry_t;
38
39 /**
40 * IP address in an inface_entry_t
41 */
42 struct addr_entry_t {
43
44 /** The ip address */
45 host_t *ip;
46
47 /** virtual IP managed by us */
48 bool virtual;
49
50 /** scope of the address */
51 u_char scope;
52
53 /** Number of times this IP is used, if virtual */
54 u_int refcount;
55 };
56
57 /**
58 * destroy a addr_entry_t object
59 */
60 static void addr_entry_destroy(addr_entry_t *this)
61 {
62 this->ip->destroy(this->ip);
63 free(this);
64 }
65
66 typedef struct iface_entry_t iface_entry_t;
67
68 /**
69 * A network interface on this system, containing addr_entry_t's
70 */
71 struct iface_entry_t {
72
73 /** interface index */
74 int ifindex;
75
76 /** name of the interface */
77 char ifname[IFNAMSIZ];
78
79 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
80 u_int flags;
81
82 /** list of addresses as host_t */
83 linked_list_t *addrs;
84 };
85
86 /**
87 * destroy an interface entry
88 */
89 static void iface_entry_destroy(iface_entry_t *this)
90 {
91 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
92 free(this);
93 }
94
95 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
96
97 /**
98 * Private variables and functions of kernel_netlink_net class.
99 */
100 struct private_kernel_netlink_net_t {
101 /**
102 * Public part of the kernel_netlink_net_t object.
103 */
104 kernel_netlink_net_t public;
105
106 /**
107 * mutex to lock access to various lists
108 */
109 mutex_t *mutex;
110
111 /**
112 * condition variable to signal virtual IP add/removal
113 */
114 condvar_t *condvar;
115
116 /**
117 * Cached list of interfaces and its addresses (iface_entry_t)
118 */
119 linked_list_t *ifaces;
120
121 /**
122 * job receiving netlink events
123 */
124 callback_job_t *job;
125
126 /**
127 * netlink rt socket (routing)
128 */
129 netlink_socket_t *socket;
130
131 /**
132 * Netlink rt socket to receive address change events
133 */
134 int socket_events;
135
136 /**
137 * time of the last roam_job
138 */
139 timeval_t last_roam;
140
141 /**
142 * routing table to install routes
143 */
144 int routing_table;
145
146 /**
147 * priority of used routing table
148 */
149 int routing_table_prio;
150
151 /**
152 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
153 */
154 bool process_route;
155
156 /**
157 * whether to actually install virtual IPs
158 */
159 bool install_virtual_ip;
160 };
161
162 /**
163 * get the refcount of a virtual ip
164 */
165 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
166 {
167 iterator_t *ifaces, *addrs;
168 iface_entry_t *iface;
169 addr_entry_t *addr;
170 int refcount = 0;
171
172 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
173 while (ifaces->iterate(ifaces, (void**)&iface))
174 {
175 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
176 while (addrs->iterate(addrs, (void**)&addr))
177 {
178 if (addr->virtual && (iface->flags & IFF_UP) &&
179 ip->ip_equals(ip, addr->ip))
180 {
181 refcount = addr->refcount;
182 break;
183 }
184 }
185 addrs->destroy(addrs);
186 if (refcount)
187 {
188 break;
189 }
190 }
191 ifaces->destroy(ifaces);
192
193 return refcount;
194 }
195
196 /**
197 * start a roaming job. We delay it for a second and fire only one job
198 * for multiple events. Otherwise we would create two many jobs.
199 */
200 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
201 {
202 timeval_t now;
203
204 time_monotonic(&now);
205 if (timercmp(&now, &this->last_roam, >))
206 {
207 now.tv_usec += ROAM_DELAY * 1000;
208 while (now.tv_usec > 1000000)
209 {
210 now.tv_sec++;
211 now.tv_usec -= 1000000;
212 }
213 this->last_roam = now;
214 charon->scheduler->schedule_job_ms(charon->scheduler,
215 (job_t*)roam_job_create(address), ROAM_DELAY);
216 }
217 }
218
219 /**
220 * process RTM_NEWLINK/RTM_DELLINK from kernel
221 */
222 static void process_link(private_kernel_netlink_net_t *this,
223 struct nlmsghdr *hdr, bool event)
224 {
225 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
226 struct rtattr *rta = IFLA_RTA(msg);
227 size_t rtasize = IFLA_PAYLOAD (hdr);
228 enumerator_t *enumerator;
229 iface_entry_t *current, *entry = NULL;
230 char *name = NULL;
231 bool update = FALSE;
232
233 while(RTA_OK(rta, rtasize))
234 {
235 switch (rta->rta_type)
236 {
237 case IFLA_IFNAME:
238 name = RTA_DATA(rta);
239 break;
240 }
241 rta = RTA_NEXT(rta, rtasize);
242 }
243 if (!name)
244 {
245 name = "(unknown)";
246 }
247
248 this->mutex->lock(this->mutex);
249 switch (hdr->nlmsg_type)
250 {
251 case RTM_NEWLINK:
252 {
253 if (msg->ifi_flags & IFF_LOOPBACK)
254 { /* ignore loopback interfaces */
255 break;
256 }
257 enumerator = this->ifaces->create_enumerator(this->ifaces);
258 while (enumerator->enumerate(enumerator, &current))
259 {
260 if (current->ifindex == msg->ifi_index)
261 {
262 entry = current;
263 break;
264 }
265 }
266 enumerator->destroy(enumerator);
267 if (!entry)
268 {
269 entry = malloc_thing(iface_entry_t);
270 entry->ifindex = msg->ifi_index;
271 entry->flags = 0;
272 entry->addrs = linked_list_create();
273 this->ifaces->insert_last(this->ifaces, entry);
274 }
275 memcpy(entry->ifname, name, IFNAMSIZ);
276 entry->ifname[IFNAMSIZ-1] = '\0';
277 if (event)
278 {
279 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
280 {
281 update = TRUE;
282 DBG1(DBG_KNL, "interface %s activated", name);
283 }
284 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
285 {
286 update = TRUE;
287 DBG1(DBG_KNL, "interface %s deactivated", name);
288 }
289 }
290 entry->flags = msg->ifi_flags;
291 break;
292 }
293 case RTM_DELLINK:
294 {
295 enumerator = this->ifaces->create_enumerator(this->ifaces);
296 while (enumerator->enumerate(enumerator, &current))
297 {
298 if (current->ifindex == msg->ifi_index)
299 {
300 /* we do not remove it, as an address may be added to a
301 * "down" interface and we wan't to know that. */
302 current->flags = msg->ifi_flags;
303 break;
304 }
305 }
306 enumerator->destroy(enumerator);
307 break;
308 }
309 }
310 this->mutex->unlock(this->mutex);
311
312 /* send an update to all IKE_SAs */
313 if (update && event)
314 {
315 fire_roam_job(this, TRUE);
316 }
317 }
318
319 /**
320 * process RTM_NEWADDR/RTM_DELADDR from kernel
321 */
322 static void process_addr(private_kernel_netlink_net_t *this,
323 struct nlmsghdr *hdr, bool event)
324 {
325 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
326 struct rtattr *rta = IFA_RTA(msg);
327 size_t rtasize = IFA_PAYLOAD (hdr);
328 host_t *host = NULL;
329 enumerator_t *ifaces, *addrs;
330 iface_entry_t *iface;
331 addr_entry_t *addr;
332 chunk_t local = chunk_empty, address = chunk_empty;
333 bool update = FALSE, found = FALSE, changed = FALSE;
334
335 while(RTA_OK(rta, rtasize))
336 {
337 switch (rta->rta_type)
338 {
339 case IFA_LOCAL:
340 local.ptr = RTA_DATA(rta);
341 local.len = RTA_PAYLOAD(rta);
342 break;
343 case IFA_ADDRESS:
344 address.ptr = RTA_DATA(rta);
345 address.len = RTA_PAYLOAD(rta);
346 break;
347 }
348 rta = RTA_NEXT(rta, rtasize);
349 }
350
351 /* For PPP interfaces, we need the IFA_LOCAL address,
352 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
353 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
354 if (local.ptr)
355 {
356 host = host_create_from_chunk(msg->ifa_family, local, 0);
357 }
358 else if (address.ptr)
359 {
360 host = host_create_from_chunk(msg->ifa_family, address, 0);
361 }
362
363 if (host == NULL)
364 { /* bad family? */
365 return;
366 }
367
368 this->mutex->lock(this->mutex);
369 ifaces = this->ifaces->create_enumerator(this->ifaces);
370 while (ifaces->enumerate(ifaces, &iface))
371 {
372 if (iface->ifindex == msg->ifa_index)
373 {
374 addrs = iface->addrs->create_enumerator(iface->addrs);
375 while (addrs->enumerate(addrs, &addr))
376 {
377 if (host->ip_equals(host, addr->ip))
378 {
379 found = TRUE;
380 if (hdr->nlmsg_type == RTM_DELADDR)
381 {
382 iface->addrs->remove_at(iface->addrs, addrs);
383 if (!addr->virtual)
384 {
385 changed = TRUE;
386 DBG1(DBG_KNL, "%H disappeared from %s",
387 host, iface->ifname);
388 }
389 addr_entry_destroy(addr);
390 }
391 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
392 {
393 addr->refcount = 1;
394 }
395 }
396 }
397 addrs->destroy(addrs);
398
399 if (hdr->nlmsg_type == RTM_NEWADDR)
400 {
401 if (!found)
402 {
403 found = TRUE;
404 changed = TRUE;
405 addr = malloc_thing(addr_entry_t);
406 addr->ip = host->clone(host);
407 addr->virtual = FALSE;
408 addr->refcount = 1;
409 addr->scope = msg->ifa_scope;
410
411 iface->addrs->insert_last(iface->addrs, addr);
412 if (event)
413 {
414 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
415 }
416 }
417 }
418 if (found && (iface->flags & IFF_UP))
419 {
420 update = TRUE;
421 }
422 break;
423 }
424 }
425 ifaces->destroy(ifaces);
426 this->mutex->unlock(this->mutex);
427 host->destroy(host);
428
429 /* send an update to all IKE_SAs */
430 if (update && event && changed)
431 {
432 fire_roam_job(this, TRUE);
433 }
434 }
435
436 /**
437 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
438 */
439 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
440 {
441 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
442 struct rtattr *rta = RTM_RTA(msg);
443 size_t rtasize = RTM_PAYLOAD(hdr);
444 host_t *host = NULL;
445
446 /* ignore routes added by us */
447 if (msg->rtm_table && msg->rtm_table == this->routing_table)
448 {
449 return;
450 }
451
452 while (RTA_OK(rta, rtasize))
453 {
454 switch (rta->rta_type)
455 {
456 case RTA_PREFSRC:
457 host = host_create_from_chunk(msg->rtm_family,
458 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
459 break;
460 }
461 rta = RTA_NEXT(rta, rtasize);
462 }
463 if (host)
464 {
465 this->mutex->lock(this->mutex);
466 if (!get_vip_refcount(this, host))
467 { /* ignore routes added for virtual IPs */
468 fire_roam_job(this, FALSE);
469 }
470 this->mutex->unlock(this->mutex);
471 host->destroy(host);
472 }
473 }
474
475 /**
476 * Receives events from kernel
477 */
478 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
479 {
480 char response[1024];
481 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
482 struct sockaddr_nl addr;
483 socklen_t addr_len = sizeof(addr);
484 int len, oldstate;
485
486 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
487 len = recvfrom(this->socket_events, response, sizeof(response), 0,
488 (struct sockaddr*)&addr, &addr_len);
489 pthread_setcancelstate(oldstate, NULL);
490
491 if (len < 0)
492 {
493 switch (errno)
494 {
495 case EINTR:
496 /* interrupted, try again */
497 return JOB_REQUEUE_DIRECT;
498 case EAGAIN:
499 /* no data ready, select again */
500 return JOB_REQUEUE_DIRECT;
501 default:
502 DBG1(DBG_KNL, "unable to receive from rt event socket");
503 sleep(1);
504 return JOB_REQUEUE_FAIR;
505 }
506 }
507
508 if (addr.nl_pid != 0)
509 { /* not from kernel. not interested, try another one */
510 return JOB_REQUEUE_DIRECT;
511 }
512
513 while (NLMSG_OK(hdr, len))
514 {
515 /* looks good so far, dispatch netlink message */
516 switch (hdr->nlmsg_type)
517 {
518 case RTM_NEWADDR:
519 case RTM_DELADDR:
520 process_addr(this, hdr, TRUE);
521 this->condvar->broadcast(this->condvar);
522 break;
523 case RTM_NEWLINK:
524 case RTM_DELLINK:
525 process_link(this, hdr, TRUE);
526 this->condvar->broadcast(this->condvar);
527 break;
528 case RTM_NEWROUTE:
529 case RTM_DELROUTE:
530 if (this->process_route)
531 {
532 process_route(this, hdr);
533 }
534 break;
535 default:
536 break;
537 }
538 hdr = NLMSG_NEXT(hdr, len);
539 }
540 return JOB_REQUEUE_DIRECT;
541 }
542
543 /** enumerator over addresses */
544 typedef struct {
545 private_kernel_netlink_net_t* this;
546 /** whether to enumerate down interfaces */
547 bool include_down_ifaces;
548 /** whether to enumerate virtual ip addresses */
549 bool include_virtual_ips;
550 } address_enumerator_t;
551
552 /**
553 * cleanup function for address enumerator
554 */
555 static void address_enumerator_destroy(address_enumerator_t *data)
556 {
557 data->this->mutex->unlock(data->this->mutex);
558 free(data);
559 }
560
561 /**
562 * filter for addresses
563 */
564 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
565 {
566 if (!data->include_virtual_ips && (*in)->virtual)
567 { /* skip virtual interfaces added by us */
568 return FALSE;
569 }
570 if ((*in)->scope >= RT_SCOPE_LINK)
571 { /* skip addresses with a unusable scope */
572 return FALSE;
573 }
574 *out = (*in)->ip;
575 return TRUE;
576 }
577
578 /**
579 * enumerator constructor for interfaces
580 */
581 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
582 {
583 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
584 (void*)filter_addresses, data, NULL);
585 }
586
587 /**
588 * filter for interfaces
589 */
590 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
591 {
592 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
593 { /* skip interfaces not up */
594 return FALSE;
595 }
596 *out = *in;
597 return TRUE;
598 }
599
600 /**
601 * implementation of kernel_net_t.create_address_enumerator
602 */
603 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
604 bool include_down_ifaces, bool include_virtual_ips)
605 {
606 address_enumerator_t *data = malloc_thing(address_enumerator_t);
607 data->this = this;
608 data->include_down_ifaces = include_down_ifaces;
609 data->include_virtual_ips = include_virtual_ips;
610
611 this->mutex->lock(this->mutex);
612 return enumerator_create_nested(
613 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
614 (void*)filter_interfaces, data, NULL),
615 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
616 }
617
618 /**
619 * implementation of kernel_net_t.get_interface_name
620 */
621 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
622 {
623 enumerator_t *ifaces, *addrs;
624 iface_entry_t *iface;
625 addr_entry_t *addr;
626 char *name = NULL;
627
628 DBG2(DBG_KNL, "getting interface name for %H", ip);
629
630 this->mutex->lock(this->mutex);
631 ifaces = this->ifaces->create_enumerator(this->ifaces);
632 while (ifaces->enumerate(ifaces, &iface))
633 {
634 addrs = iface->addrs->create_enumerator(iface->addrs);
635 while (addrs->enumerate(addrs, &addr))
636 {
637 if (ip->ip_equals(ip, addr->ip))
638 {
639 name = strdup(iface->ifname);
640 break;
641 }
642 }
643 addrs->destroy(addrs);
644 if (name)
645 {
646 break;
647 }
648 }
649 ifaces->destroy(ifaces);
650 this->mutex->unlock(this->mutex);
651
652 if (name)
653 {
654 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
655 }
656 else
657 {
658 DBG2(DBG_KNL, "%H is not a local address", ip);
659 }
660 return name;
661 }
662
663 /**
664 * get the index of an interface by name
665 */
666 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
667 {
668 enumerator_t *ifaces;
669 iface_entry_t *iface;
670 int ifindex = 0;
671
672 DBG2(DBG_KNL, "getting iface index for %s", name);
673
674 this->mutex->lock(this->mutex);
675 ifaces = this->ifaces->create_enumerator(this->ifaces);
676 while (ifaces->enumerate(ifaces, &iface))
677 {
678 if (streq(name, iface->ifname))
679 {
680 ifindex = iface->ifindex;
681 break;
682 }
683 }
684 ifaces->destroy(ifaces);
685 this->mutex->unlock(this->mutex);
686
687 if (ifindex == 0)
688 {
689 DBG1(DBG_KNL, "unable to get interface index for %s", name);
690 }
691 return ifindex;
692 }
693
694 /**
695 * Check if an interface with a given index is up
696 */
697 static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
698 {
699 enumerator_t *ifaces;
700 iface_entry_t *iface;
701 /* default to TRUE for interface we do not monitor (e.g. lo) */
702 bool up = TRUE;
703
704 ifaces = this->ifaces->create_enumerator(this->ifaces);
705 while (ifaces->enumerate(ifaces, &iface))
706 {
707 if (iface->ifindex == index)
708 {
709 up = iface->flags & IFF_UP;
710 break;
711 }
712 }
713 ifaces->destroy(ifaces);
714 return up;
715 }
716
717 /**
718 * check if an address (chunk) addr is in subnet (net with net_len net bits)
719 */
720 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
721 {
722 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
723 int byte = 0;
724
725 if (net_len == 0)
726 { /* any address matches a /0 network */
727 return TRUE;
728 }
729 if (addr.len != net.len || net_len > 8 * net.len )
730 {
731 return FALSE;
732 }
733 /* scan through all bytes in network order */
734 while (net_len > 0)
735 {
736 if (net_len < 8)
737 {
738 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
739 }
740 else
741 {
742 if (addr.ptr[byte] != net.ptr[byte])
743 {
744 return FALSE;
745 }
746 byte++;
747 net_len -= 8;
748 }
749 }
750 return TRUE;
751 }
752
753 /**
754 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
755 */
756 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
757 bool nexthop, host_t *candidate)
758 {
759 netlink_buf_t request;
760 struct nlmsghdr *hdr, *out, *current;
761 struct rtmsg *msg;
762 chunk_t chunk;
763 size_t len;
764 int best = -1;
765 host_t *src = NULL, *gtw = NULL;
766
767 DBG2(DBG_KNL, "getting address to reach %H", dest);
768
769 memset(&request, 0, sizeof(request));
770
771 hdr = (struct nlmsghdr*)request;
772 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
773 hdr->nlmsg_type = RTM_GETROUTE;
774 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
775
776 msg = (struct rtmsg*)NLMSG_DATA(hdr);
777 msg->rtm_family = dest->get_family(dest);
778 if (candidate)
779 {
780 chunk = candidate->get_address(candidate);
781 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
782 }
783 chunk = dest->get_address(dest);
784 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
785
786 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
787 {
788 DBG1(DBG_KNL, "getting address to %H failed", dest);
789 return NULL;
790 }
791 this->mutex->lock(this->mutex);
792
793 for (current = out; NLMSG_OK(current, len);
794 current = NLMSG_NEXT(current, len))
795 {
796 switch (current->nlmsg_type)
797 {
798 case NLMSG_DONE:
799 break;
800 case RTM_NEWROUTE:
801 {
802 struct rtattr *rta;
803 size_t rtasize;
804 chunk_t rta_gtw, rta_src, rta_dst;
805 u_int32_t rta_oif = 0;
806 host_t *new_src, *new_gtw;
807
808 rta_gtw = rta_src = rta_dst = chunk_empty;
809 msg = (struct rtmsg*)(NLMSG_DATA(current));
810 rta = RTM_RTA(msg);
811 rtasize = RTM_PAYLOAD(current);
812 while (RTA_OK(rta, rtasize))
813 {
814 switch (rta->rta_type)
815 {
816 case RTA_PREFSRC:
817 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
818 break;
819 case RTA_GATEWAY:
820 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
821 break;
822 case RTA_DST:
823 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
824 break;
825 case RTA_OIF:
826 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
827 {
828 rta_oif = *(u_int32_t*)RTA_DATA(rta);
829 }
830 break;
831 }
832 rta = RTA_NEXT(rta, rtasize);
833 }
834 if (msg->rtm_dst_len <= best)
835 { /* not better than a previous one */
836 continue;
837 }
838 if (this->routing_table != 0 &&
839 msg->rtm_table == this->routing_table)
840 { /* route is from our own ipsec routing table */
841 continue;
842 }
843 if (rta_oif && !is_interface_up(this, rta_oif))
844 { /* interface is down */
845 continue;
846 }
847 if (!addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))
848 { /* route destination does not contain dest */
849 continue;
850 }
851
852 if (nexthop)
853 {
854 /* nexthop lookup, return gateway if any */
855 DESTROY_IF(gtw);
856 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
857 best = msg->rtm_dst_len;
858 continue;
859 }
860 if (rta_src.ptr)
861 {
862 /* got a source address */
863 new_src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
864 if (new_src)
865 {
866 if (get_vip_refcount(this, new_src))
867 { /* skip source address if it is installed by us */
868 new_src->destroy(new_src);
869 }
870 else
871 {
872 DESTROY_IF(src);
873 src = new_src;
874 best = msg->rtm_dst_len;
875 }
876 }
877 continue;
878 }
879 if (rta_gtw.ptr)
880 { /* no source, but a gateway. Lookup source to reach gtw. */
881 new_gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
882 new_src = get_route(this, new_gtw, FALSE, candidate);
883 new_gtw->destroy(new_gtw);
884 if (new_src)
885 {
886 DESTROY_IF(src);
887 src = new_src;
888 best = msg->rtm_dst_len;
889 }
890 continue;
891 }
892 continue;
893 }
894 default:
895 continue;
896 }
897 break;
898 }
899 free(out);
900 this->mutex->unlock(this->mutex);
901
902 if (nexthop)
903 {
904 if (gtw)
905 {
906 return gtw;
907 }
908 return dest->clone(dest);
909 }
910 return src;
911 }
912
913 /**
914 * Implementation of kernel_net_t.get_source_addr.
915 */
916 static host_t* get_source_addr(private_kernel_netlink_net_t *this,
917 host_t *dest, host_t *src)
918 {
919 return get_route(this, dest, FALSE, src);
920 }
921
922 /**
923 * Implementation of kernel_net_t.get_nexthop.
924 */
925 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
926 {
927 return get_route(this, dest, TRUE, NULL);
928 }
929
930 /**
931 * Manages the creation and deletion of ip addresses on an interface.
932 * By setting the appropriate nlmsg_type, the ip will be set or unset.
933 */
934 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
935 int flags, int if_index, host_t *ip)
936 {
937 netlink_buf_t request;
938 struct nlmsghdr *hdr;
939 struct ifaddrmsg *msg;
940 chunk_t chunk;
941
942 memset(&request, 0, sizeof(request));
943
944 chunk = ip->get_address(ip);
945
946 hdr = (struct nlmsghdr*)request;
947 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
948 hdr->nlmsg_type = nlmsg_type;
949 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
950
951 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
952 msg->ifa_family = ip->get_family(ip);
953 msg->ifa_flags = 0;
954 msg->ifa_prefixlen = 8 * chunk.len;
955 msg->ifa_scope = RT_SCOPE_UNIVERSE;
956 msg->ifa_index = if_index;
957
958 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
959
960 return this->socket->send_ack(this->socket, hdr);
961 }
962
963 /**
964 * Implementation of kernel_net_t.add_ip.
965 */
966 static status_t add_ip(private_kernel_netlink_net_t *this,
967 host_t *virtual_ip, host_t *iface_ip)
968 {
969 iface_entry_t *iface;
970 addr_entry_t *addr;
971 enumerator_t *addrs, *ifaces;
972 int ifindex;
973
974 if (!this->install_virtual_ip)
975 { /* disabled by config */
976 return SUCCESS;
977 }
978
979 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
980
981 this->mutex->lock(this->mutex);
982 ifaces = this->ifaces->create_enumerator(this->ifaces);
983 while (ifaces->enumerate(ifaces, &iface))
984 {
985 bool iface_found = FALSE;
986
987 addrs = iface->addrs->create_enumerator(iface->addrs);
988 while (addrs->enumerate(addrs, &addr))
989 {
990 if (iface_ip->ip_equals(iface_ip, addr->ip))
991 {
992 iface_found = TRUE;
993 }
994 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
995 {
996 addr->refcount++;
997 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
998 virtual_ip, iface->ifname);
999 addrs->destroy(addrs);
1000 ifaces->destroy(ifaces);
1001 this->mutex->unlock(this->mutex);
1002 return SUCCESS;
1003 }
1004 }
1005 addrs->destroy(addrs);
1006
1007 if (iface_found)
1008 {
1009 ifindex = iface->ifindex;
1010 addr = malloc_thing(addr_entry_t);
1011 addr->ip = virtual_ip->clone(virtual_ip);
1012 addr->refcount = 0;
1013 addr->virtual = TRUE;
1014 addr->scope = RT_SCOPE_UNIVERSE;
1015 iface->addrs->insert_last(iface->addrs, addr);
1016
1017 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1018 ifindex, virtual_ip) == SUCCESS)
1019 {
1020 while (get_vip_refcount(this, virtual_ip) == 0)
1021 { /* wait until address appears */
1022 this->condvar->wait(this->condvar, this->mutex);
1023 }
1024 ifaces->destroy(ifaces);
1025 this->mutex->unlock(this->mutex);
1026 return SUCCESS;
1027 }
1028 ifaces->destroy(ifaces);
1029 this->mutex->unlock(this->mutex);
1030 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1031 return FAILED;
1032 }
1033 }
1034 ifaces->destroy(ifaces);
1035 this->mutex->unlock(this->mutex);
1036
1037 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1038 "virtual IP %H", iface_ip, virtual_ip);
1039 return FAILED;
1040 }
1041
1042 /**
1043 * Implementation of kernel_net_t.del_ip.
1044 */
1045 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
1046 {
1047 iface_entry_t *iface;
1048 addr_entry_t *addr;
1049 enumerator_t *addrs, *ifaces;
1050 status_t status;
1051 int ifindex;
1052
1053 if (!this->install_virtual_ip)
1054 { /* disabled by config */
1055 return SUCCESS;
1056 }
1057
1058 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1059
1060 this->mutex->lock(this->mutex);
1061 ifaces = this->ifaces->create_enumerator(this->ifaces);
1062 while (ifaces->enumerate(ifaces, &iface))
1063 {
1064 addrs = iface->addrs->create_enumerator(iface->addrs);
1065 while (addrs->enumerate(addrs, &addr))
1066 {
1067 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1068 {
1069 ifindex = iface->ifindex;
1070 if (addr->refcount == 1)
1071 {
1072 status = manage_ipaddr(this, RTM_DELADDR, 0,
1073 ifindex, virtual_ip);
1074 if (status == SUCCESS)
1075 { /* wait until the address is really gone */
1076 while (get_vip_refcount(this, virtual_ip) > 0)
1077 {
1078 this->condvar->wait(this->condvar, this->mutex);
1079 }
1080 }
1081 addrs->destroy(addrs);
1082 ifaces->destroy(ifaces);
1083 this->mutex->unlock(this->mutex);
1084 return status;
1085 }
1086 else
1087 {
1088 addr->refcount--;
1089 }
1090 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1091 virtual_ip);
1092 addrs->destroy(addrs);
1093 ifaces->destroy(ifaces);
1094 this->mutex->unlock(this->mutex);
1095 return SUCCESS;
1096 }
1097 }
1098 addrs->destroy(addrs);
1099 }
1100 ifaces->destroy(ifaces);
1101 this->mutex->unlock(this->mutex);
1102
1103 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1104 return FAILED;
1105 }
1106
1107 /**
1108 * Manages source routes in the routing table.
1109 * By setting the appropriate nlmsg_type, the route gets added or removed.
1110 */
1111 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1112 int flags, chunk_t dst_net, u_int8_t prefixlen,
1113 host_t *gateway, host_t *src_ip, char *if_name)
1114 {
1115 netlink_buf_t request;
1116 struct nlmsghdr *hdr;
1117 struct rtmsg *msg;
1118 int ifindex;
1119 chunk_t chunk;
1120
1121 /* if route is 0.0.0.0/0, we can't install it, as it would
1122 * overwrite the default route. Instead, we add two routes:
1123 * 0.0.0.0/1 and 128.0.0.0/1 */
1124 if (this->routing_table == 0 && prefixlen == 0)
1125 {
1126 chunk_t half_net;
1127 u_int8_t half_prefixlen;
1128 status_t status;
1129
1130 half_net = chunk_alloca(dst_net.len);
1131 memset(half_net.ptr, 0, half_net.len);
1132 half_prefixlen = 1;
1133
1134 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1135 gateway, src_ip, if_name);
1136 half_net.ptr[0] |= 0x80;
1137 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1138 gateway, src_ip, if_name);
1139 return status;
1140 }
1141
1142 memset(&request, 0, sizeof(request));
1143
1144 hdr = (struct nlmsghdr*)request;
1145 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1146 hdr->nlmsg_type = nlmsg_type;
1147 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1148
1149 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1150 msg->rtm_family = src_ip->get_family(src_ip);
1151 msg->rtm_dst_len = prefixlen;
1152 msg->rtm_table = this->routing_table;
1153 msg->rtm_protocol = RTPROT_STATIC;
1154 msg->rtm_type = RTN_UNICAST;
1155 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1156
1157 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1158 chunk = src_ip->get_address(src_ip);
1159 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1160 chunk = gateway->get_address(gateway);
1161 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1162 ifindex = get_interface_index(this, if_name);
1163 chunk.ptr = (char*)&ifindex;
1164 chunk.len = sizeof(ifindex);
1165 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1166
1167 return this->socket->send_ack(this->socket, hdr);
1168 }
1169
1170 /**
1171 * Implementation of kernel_net_t.add_route.
1172 */
1173 static status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1174 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1175 {
1176 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1177 dst_net, prefixlen, gateway, src_ip, if_name);
1178 }
1179
1180 /**
1181 * Implementation of kernel_net_t.del_route.
1182 */
1183 static status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1184 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1185 {
1186 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1187 gateway, src_ip, if_name);
1188 }
1189
1190 /**
1191 * Initialize a list of local addresses.
1192 */
1193 static status_t init_address_list(private_kernel_netlink_net_t *this)
1194 {
1195 netlink_buf_t request;
1196 struct nlmsghdr *out, *current, *in;
1197 struct rtgenmsg *msg;
1198 size_t len;
1199 enumerator_t *ifaces, *addrs;
1200 iface_entry_t *iface;
1201 addr_entry_t *addr;
1202
1203 DBG1(DBG_KNL, "listening on interfaces:");
1204
1205 memset(&request, 0, sizeof(request));
1206
1207 in = (struct nlmsghdr*)&request;
1208 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1209 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1210 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1211 msg->rtgen_family = AF_UNSPEC;
1212
1213 /* get all links */
1214 in->nlmsg_type = RTM_GETLINK;
1215 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1216 {
1217 return FAILED;
1218 }
1219 current = out;
1220 while (NLMSG_OK(current, len))
1221 {
1222 switch (current->nlmsg_type)
1223 {
1224 case NLMSG_DONE:
1225 break;
1226 case RTM_NEWLINK:
1227 process_link(this, current, FALSE);
1228 /* fall through */
1229 default:
1230 current = NLMSG_NEXT(current, len);
1231 continue;
1232 }
1233 break;
1234 }
1235 free(out);
1236
1237 /* get all interface addresses */
1238 in->nlmsg_type = RTM_GETADDR;
1239 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1240 {
1241 return FAILED;
1242 }
1243 current = out;
1244 while (NLMSG_OK(current, len))
1245 {
1246 switch (current->nlmsg_type)
1247 {
1248 case NLMSG_DONE:
1249 break;
1250 case RTM_NEWADDR:
1251 process_addr(this, current, FALSE);
1252 /* fall through */
1253 default:
1254 current = NLMSG_NEXT(current, len);
1255 continue;
1256 }
1257 break;
1258 }
1259 free(out);
1260
1261 this->mutex->lock(this->mutex);
1262 ifaces = this->ifaces->create_enumerator(this->ifaces);
1263 while (ifaces->enumerate(ifaces, &iface))
1264 {
1265 if (iface->flags & IFF_UP)
1266 {
1267 DBG1(DBG_KNL, " %s", iface->ifname);
1268 addrs = iface->addrs->create_enumerator(iface->addrs);
1269 while (addrs->enumerate(addrs, (void**)&addr))
1270 {
1271 DBG1(DBG_KNL, " %H", addr->ip);
1272 }
1273 addrs->destroy(addrs);
1274 }
1275 }
1276 ifaces->destroy(ifaces);
1277 this->mutex->unlock(this->mutex);
1278 return SUCCESS;
1279 }
1280
1281 /**
1282 * create or delete a rule to use our routing table
1283 */
1284 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1285 u_int32_t table, u_int32_t prio)
1286 {
1287 netlink_buf_t request;
1288 struct nlmsghdr *hdr;
1289 struct rtmsg *msg;
1290 chunk_t chunk;
1291
1292 memset(&request, 0, sizeof(request));
1293 hdr = (struct nlmsghdr*)request;
1294 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1295 hdr->nlmsg_type = nlmsg_type;
1296 if (nlmsg_type == RTM_NEWRULE)
1297 {
1298 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1299 }
1300 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1301
1302 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1303 msg->rtm_table = table;
1304 msg->rtm_family = AF_INET;
1305 msg->rtm_protocol = RTPROT_BOOT;
1306 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1307 msg->rtm_type = RTN_UNICAST;
1308
1309 chunk = chunk_from_thing(prio);
1310 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1311
1312 return this->socket->send_ack(this->socket, hdr);
1313 }
1314
1315 /**
1316 * Implementation of kernel_netlink_net_t.destroy.
1317 */
1318 static void destroy(private_kernel_netlink_net_t *this)
1319 {
1320 if (this->routing_table)
1321 {
1322 manage_rule(this, RTM_DELRULE, this->routing_table,
1323 this->routing_table_prio);
1324 }
1325
1326 this->job->cancel(this->job);
1327 close(this->socket_events);
1328 this->socket->destroy(this->socket);
1329 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1330 this->condvar->destroy(this->condvar);
1331 this->mutex->destroy(this->mutex);
1332 free(this);
1333 }
1334
1335 /*
1336 * Described in header.
1337 */
1338 kernel_netlink_net_t *kernel_netlink_net_create()
1339 {
1340 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1341 struct sockaddr_nl addr;
1342
1343 /* public functions */
1344 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1345 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1346 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest, host_t *src))get_source_addr;
1347 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1348 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1349 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1350 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1351 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1352 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1353
1354 /* private members */
1355 this->ifaces = linked_list_create();
1356 this->mutex = mutex_create(MUTEX_TYPE_RECURSIVE);
1357 this->condvar = condvar_create(CONDVAR_TYPE_DEFAULT);
1358 timerclear(&this->last_roam);
1359 this->routing_table = lib->settings->get_int(lib->settings,
1360 "charon.routing_table", ROUTING_TABLE);
1361 this->routing_table_prio = lib->settings->get_int(lib->settings,
1362 "charon.routing_table_prio", ROUTING_TABLE_PRIO);
1363 this->process_route = lib->settings->get_bool(lib->settings,
1364 "charon.process_route", TRUE);
1365 this->install_virtual_ip = lib->settings->get_bool(lib->settings,
1366 "charon.install_virtual_ip", TRUE);
1367
1368 this->socket = netlink_socket_create(NETLINK_ROUTE);
1369
1370 memset(&addr, 0, sizeof(addr));
1371 addr.nl_family = AF_NETLINK;
1372
1373 /* create and bind RT socket for events (address/interface/route changes) */
1374 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1375 if (this->socket_events <= 0)
1376 {
1377 charon->kill(charon, "unable to create RT event socket");
1378 }
1379 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1380 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1381 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1382 {
1383 charon->kill(charon, "unable to bind RT event socket");
1384 }
1385
1386 this->job = callback_job_create((callback_job_cb_t)receive_events,
1387 this, NULL, NULL);
1388 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1389
1390 if (init_address_list(this) != SUCCESS)
1391 {
1392 charon->kill(charon, "unable to get interface list");
1393 }
1394
1395 if (this->routing_table)
1396 {
1397 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1398 this->routing_table_prio) != SUCCESS)
1399 {
1400 DBG1(DBG_KNL, "unable to create routing table rule");
1401 }
1402 }
1403
1404 return &this->public;
1405 }