d8b05e1e24b9d228def19ac9b5dbb4d21bb6860a
[strongswan.git] / src / charon / plugins / kernel_netlink / kernel_netlink_net.c
1 /*
2 * Copyright (C) 2008 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 *
16 * $Id$
17 */
18
19 #include <sys/socket.h>
20 #include <linux/netlink.h>
21 #include <linux/rtnetlink.h>
22 #include <sys/time.h>
23 #include <pthread.h>
24 #include <unistd.h>
25 #include <errno.h>
26 #include <net/if.h>
27
28 #include "kernel_netlink_net.h"
29 #include "kernel_netlink_shared.h"
30
31 #include <daemon.h>
32 #include <utils/mutex.h>
33 #include <utils/linked_list.h>
34 #include <processing/jobs/callback_job.h>
35 #include <processing/jobs/roam_job.h>
36
37 /** delay before firing roam jobs (ms) */
38 #define ROAM_DELAY 100
39
40 /** routing table for routes installed by us */
41 #ifndef IPSEC_ROUTING_TABLE
42 #define IPSEC_ROUTING_TABLE 100
43 #endif
44 #ifndef IPSEC_ROUTING_TABLE_PRIO
45 #define IPSEC_ROUTING_TABLE_PRIO 100
46 #endif
47
48 typedef struct addr_entry_t addr_entry_t;
49
50 /**
51 * IP address in an inface_entry_t
52 */
53 struct addr_entry_t {
54
55 /** The ip address */
56 host_t *ip;
57
58 /** virtual IP managed by us */
59 bool virtual;
60
61 /** scope of the address */
62 u_char scope;
63
64 /** Number of times this IP is used, if virtual */
65 u_int refcount;
66 };
67
68 /**
69 * destroy a addr_entry_t object
70 */
71 static void addr_entry_destroy(addr_entry_t *this)
72 {
73 this->ip->destroy(this->ip);
74 free(this);
75 }
76
77 typedef struct iface_entry_t iface_entry_t;
78
79 /**
80 * A network interface on this system, containing addr_entry_t's
81 */
82 struct iface_entry_t {
83
84 /** interface index */
85 int ifindex;
86
87 /** name of the interface */
88 char ifname[IFNAMSIZ];
89
90 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
91 u_int flags;
92
93 /** list of addresses as host_t */
94 linked_list_t *addrs;
95 };
96
97 /**
98 * destroy an interface entry
99 */
100 static void iface_entry_destroy(iface_entry_t *this)
101 {
102 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
103 free(this);
104 }
105
106 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
107
108 /**
109 * Private variables and functions of kernel_netlink_net class.
110 */
111 struct private_kernel_netlink_net_t {
112 /**
113 * Public part of the kernel_netlink_net_t object.
114 */
115 kernel_netlink_net_t public;
116
117 /**
118 * mutex to lock access to various lists
119 */
120 mutex_t *mutex;
121
122 /**
123 * condition variable to signal virtual IP add/removal
124 */
125 condvar_t *condvar;
126
127 /**
128 * Cached list of interfaces and its addresses (iface_entry_t)
129 */
130 linked_list_t *ifaces;
131
132 /**
133 * job receiving netlink events
134 */
135 callback_job_t *job;
136
137 /**
138 * netlink rt socket (routing)
139 */
140 netlink_socket_t *socket;
141
142 /**
143 * Netlink rt socket to receive address change events
144 */
145 int socket_events;
146
147 /**
148 * time of the last roam_job
149 */
150 struct timeval last_roam;
151
152 /**
153 * routing table to install routes
154 */
155 int routing_table;
156
157 /**
158 * priority of used routing table
159 */
160 int routing_table_prio;
161
162 /**
163 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
164 */
165 bool process_route;
166
167 };
168
169 /**
170 * get the refcount of a virtual ip
171 */
172 static int get_vip_refcount(private_kernel_netlink_net_t *this, host_t* ip)
173 {
174 iterator_t *ifaces, *addrs;
175 iface_entry_t *iface;
176 addr_entry_t *addr;
177 int refcount = 0;
178
179 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
180 while (ifaces->iterate(ifaces, (void**)&iface))
181 {
182 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
183 while (addrs->iterate(addrs, (void**)&addr))
184 {
185 if (addr->virtual && (iface->flags & IFF_UP) &&
186 ip->ip_equals(ip, addr->ip))
187 {
188 refcount = addr->refcount;
189 break;
190 }
191 }
192 addrs->destroy(addrs);
193 if (refcount)
194 {
195 break;
196 }
197 }
198 ifaces->destroy(ifaces);
199
200 return refcount;
201 }
202
203 /**
204 * start a roaming job. We delay it for a second and fire only one job
205 * for multiple events. Otherwise we would create two many jobs.
206 */
207 static void fire_roam_job(private_kernel_netlink_net_t *this, bool address)
208 {
209 struct timeval now;
210
211 if (gettimeofday(&now, NULL) == 0)
212 {
213 if (timercmp(&now, &this->last_roam, >))
214 {
215 now.tv_usec += ROAM_DELAY * 1000;
216 while (now.tv_usec > 1000000)
217 {
218 now.tv_sec++;
219 now.tv_usec -= 1000000;
220 }
221 this->last_roam = now;
222 charon->scheduler->schedule_job(charon->scheduler,
223 (job_t*)roam_job_create(address), ROAM_DELAY);
224 }
225 }
226 }
227
228 /**
229 * process RTM_NEWLINK/RTM_DELLINK from kernel
230 */
231 static void process_link(private_kernel_netlink_net_t *this,
232 struct nlmsghdr *hdr, bool event)
233 {
234 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
235 struct rtattr *rta = IFLA_RTA(msg);
236 size_t rtasize = IFLA_PAYLOAD (hdr);
237 enumerator_t *enumerator;
238 iface_entry_t *current, *entry = NULL;
239 char *name = NULL;
240 bool update = FALSE;
241
242 while(RTA_OK(rta, rtasize))
243 {
244 switch (rta->rta_type)
245 {
246 case IFLA_IFNAME:
247 name = RTA_DATA(rta);
248 break;
249 }
250 rta = RTA_NEXT(rta, rtasize);
251 }
252 if (!name)
253 {
254 name = "(unknown)";
255 }
256
257 this->mutex->lock(this->mutex);
258 switch (hdr->nlmsg_type)
259 {
260 case RTM_NEWLINK:
261 {
262 if (msg->ifi_flags & IFF_LOOPBACK)
263 { /* ignore loopback interfaces */
264 break;
265 }
266 enumerator = this->ifaces->create_enumerator(this->ifaces);
267 while (enumerator->enumerate(enumerator, &current))
268 {
269 if (current->ifindex == msg->ifi_index)
270 {
271 entry = current;
272 break;
273 }
274 }
275 enumerator->destroy(enumerator);
276 if (!entry)
277 {
278 entry = malloc_thing(iface_entry_t);
279 entry->ifindex = msg->ifi_index;
280 entry->flags = 0;
281 entry->addrs = linked_list_create();
282 this->ifaces->insert_last(this->ifaces, entry);
283 }
284 memcpy(entry->ifname, name, IFNAMSIZ);
285 entry->ifname[IFNAMSIZ-1] = '\0';
286 if (event)
287 {
288 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
289 {
290 update = TRUE;
291 DBG1(DBG_KNL, "interface %s activated", name);
292 }
293 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
294 {
295 update = TRUE;
296 DBG1(DBG_KNL, "interface %s deactivated", name);
297 }
298 }
299 entry->flags = msg->ifi_flags;
300 break;
301 }
302 case RTM_DELLINK:
303 {
304 enumerator = this->ifaces->create_enumerator(this->ifaces);
305 while (enumerator->enumerate(enumerator, &current))
306 {
307 if (current->ifindex == msg->ifi_index)
308 {
309 /* we do not remove it, as an address may be added to a
310 * "down" interface and we wan't to know that. */
311 current->flags = msg->ifi_flags;
312 break;
313 }
314 }
315 enumerator->destroy(enumerator);
316 break;
317 }
318 }
319 this->mutex->unlock(this->mutex);
320
321 /* send an update to all IKE_SAs */
322 if (update && event)
323 {
324 fire_roam_job(this, TRUE);
325 }
326 }
327
328 /**
329 * process RTM_NEWADDR/RTM_DELADDR from kernel
330 */
331 static void process_addr(private_kernel_netlink_net_t *this,
332 struct nlmsghdr *hdr, bool event)
333 {
334 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
335 struct rtattr *rta = IFA_RTA(msg);
336 size_t rtasize = IFA_PAYLOAD (hdr);
337 host_t *host = NULL;
338 enumerator_t *ifaces, *addrs;
339 iface_entry_t *iface;
340 addr_entry_t *addr;
341 chunk_t local = chunk_empty, address = chunk_empty;
342 bool update = FALSE, found = FALSE, changed = FALSE;
343
344 while(RTA_OK(rta, rtasize))
345 {
346 switch (rta->rta_type)
347 {
348 case IFA_LOCAL:
349 local.ptr = RTA_DATA(rta);
350 local.len = RTA_PAYLOAD(rta);
351 break;
352 case IFA_ADDRESS:
353 address.ptr = RTA_DATA(rta);
354 address.len = RTA_PAYLOAD(rta);
355 break;
356 }
357 rta = RTA_NEXT(rta, rtasize);
358 }
359
360 /* For PPP interfaces, we need the IFA_LOCAL address,
361 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
362 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
363 if (local.ptr)
364 {
365 host = host_create_from_chunk(msg->ifa_family, local, 0);
366 }
367 else if (address.ptr)
368 {
369 host = host_create_from_chunk(msg->ifa_family, address, 0);
370 }
371
372 if (host == NULL)
373 { /* bad family? */
374 return;
375 }
376
377 this->mutex->lock(this->mutex);
378 ifaces = this->ifaces->create_enumerator(this->ifaces);
379 while (ifaces->enumerate(ifaces, &iface))
380 {
381 if (iface->ifindex == msg->ifa_index)
382 {
383 addrs = iface->addrs->create_enumerator(iface->addrs);
384 while (addrs->enumerate(addrs, &addr))
385 {
386 if (host->ip_equals(host, addr->ip))
387 {
388 found = TRUE;
389 if (hdr->nlmsg_type == RTM_DELADDR)
390 {
391 iface->addrs->remove_at(iface->addrs, addrs);
392 if (!addr->virtual)
393 {
394 changed = TRUE;
395 DBG1(DBG_KNL, "%H disappeared from %s",
396 host, iface->ifname);
397 }
398 addr_entry_destroy(addr);
399 }
400 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
401 {
402 addr->refcount = 1;
403 }
404 }
405 }
406 addrs->destroy(addrs);
407
408 if (hdr->nlmsg_type == RTM_NEWADDR)
409 {
410 if (!found)
411 {
412 found = TRUE;
413 changed = TRUE;
414 addr = malloc_thing(addr_entry_t);
415 addr->ip = host->clone(host);
416 addr->virtual = FALSE;
417 addr->refcount = 1;
418 addr->scope = msg->ifa_scope;
419
420 iface->addrs->insert_last(iface->addrs, addr);
421 if (event)
422 {
423 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
424 }
425 }
426 }
427 if (found && (iface->flags & IFF_UP))
428 {
429 update = TRUE;
430 }
431 break;
432 }
433 }
434 ifaces->destroy(ifaces);
435 this->mutex->unlock(this->mutex);
436 host->destroy(host);
437
438 /* send an update to all IKE_SAs */
439 if (update && event && changed)
440 {
441 fire_roam_job(this, TRUE);
442 }
443 }
444
445 /**
446 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
447 */
448 static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
449 {
450 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
451 struct rtattr *rta = RTM_RTA(msg);
452 size_t rtasize = RTM_PAYLOAD(hdr);
453 host_t *host = NULL;
454
455 /* ignore routes added by us */
456 if (msg->rtm_table && msg->rtm_table == this->routing_table)
457 {
458 return;
459 }
460
461 while (RTA_OK(rta, rtasize))
462 {
463 switch (rta->rta_type)
464 {
465 case RTA_PREFSRC:
466 host = host_create_from_chunk(msg->rtm_family,
467 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
468 break;
469 }
470 rta = RTA_NEXT(rta, rtasize);
471 }
472 if (host)
473 {
474 this->mutex->lock(this->mutex);
475 if (!get_vip_refcount(this, host))
476 { /* ignore routes added for virtual IPs */
477 fire_roam_job(this, FALSE);
478 }
479 this->mutex->unlock(this->mutex);
480 host->destroy(host);
481 }
482 }
483
484 /**
485 * Receives events from kernel
486 */
487 static job_requeue_t receive_events(private_kernel_netlink_net_t *this)
488 {
489 char response[1024];
490 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
491 struct sockaddr_nl addr;
492 socklen_t addr_len = sizeof(addr);
493 int len, oldstate;
494
495 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
496 len = recvfrom(this->socket_events, response, sizeof(response), 0,
497 (struct sockaddr*)&addr, &addr_len);
498 pthread_setcancelstate(oldstate, NULL);
499
500 if (len < 0)
501 {
502 switch (errno)
503 {
504 case EINTR:
505 /* interrupted, try again */
506 return JOB_REQUEUE_DIRECT;
507 case EAGAIN:
508 /* no data ready, select again */
509 return JOB_REQUEUE_DIRECT;
510 default:
511 DBG1(DBG_KNL, "unable to receive from rt event socket");
512 sleep(1);
513 return JOB_REQUEUE_FAIR;
514 }
515 }
516
517 if (addr.nl_pid != 0)
518 { /* not from kernel. not interested, try another one */
519 return JOB_REQUEUE_DIRECT;
520 }
521
522 while (NLMSG_OK(hdr, len))
523 {
524 /* looks good so far, dispatch netlink message */
525 switch (hdr->nlmsg_type)
526 {
527 case RTM_NEWADDR:
528 case RTM_DELADDR:
529 process_addr(this, hdr, TRUE);
530 this->condvar->broadcast(this->condvar);
531 break;
532 case RTM_NEWLINK:
533 case RTM_DELLINK:
534 process_link(this, hdr, TRUE);
535 this->condvar->broadcast(this->condvar);
536 break;
537 case RTM_NEWROUTE:
538 case RTM_DELROUTE:
539 if (this->process_route)
540 {
541 process_route(this, hdr);
542 }
543 break;
544 default:
545 break;
546 }
547 hdr = NLMSG_NEXT(hdr, len);
548 }
549 return JOB_REQUEUE_DIRECT;
550 }
551
552 /** enumerator over addresses */
553 typedef struct {
554 private_kernel_netlink_net_t* this;
555 /** whether to enumerate down interfaces */
556 bool include_down_ifaces;
557 /** whether to enumerate virtual ip addresses */
558 bool include_virtual_ips;
559 } address_enumerator_t;
560
561 /**
562 * cleanup function for address enumerator
563 */
564 static void address_enumerator_destroy(address_enumerator_t *data)
565 {
566 data->this->mutex->unlock(data->this->mutex);
567 free(data);
568 }
569
570 /**
571 * filter for addresses
572 */
573 static bool filter_addresses(address_enumerator_t *data, addr_entry_t** in, host_t** out)
574 {
575 if (!data->include_virtual_ips && (*in)->virtual)
576 { /* skip virtual interfaces added by us */
577 return FALSE;
578 }
579 if ((*in)->scope >= RT_SCOPE_LINK)
580 { /* skip addresses with a unusable scope */
581 return FALSE;
582 }
583 *out = (*in)->ip;
584 return TRUE;
585 }
586
587 /**
588 * enumerator constructor for interfaces
589 */
590 static enumerator_t *create_iface_enumerator(iface_entry_t *iface, address_enumerator_t *data)
591 {
592 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
593 (void*)filter_addresses, data, NULL);
594 }
595
596 /**
597 * filter for interfaces
598 */
599 static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in, iface_entry_t** out)
600 {
601 if (!data->include_down_ifaces && !((*in)->flags & IFF_UP))
602 { /* skip interfaces not up */
603 return FALSE;
604 }
605 *out = *in;
606 return TRUE;
607 }
608
609 /**
610 * implementation of kernel_net_t.create_address_enumerator
611 */
612 static enumerator_t *create_address_enumerator(private_kernel_netlink_net_t *this,
613 bool include_down_ifaces, bool include_virtual_ips)
614 {
615 address_enumerator_t *data = malloc_thing(address_enumerator_t);
616 data->this = this;
617 data->include_down_ifaces = include_down_ifaces;
618 data->include_virtual_ips = include_virtual_ips;
619
620 this->mutex->lock(this->mutex);
621 return enumerator_create_nested(
622 enumerator_create_filter(this->ifaces->create_enumerator(this->ifaces),
623 (void*)filter_interfaces, data, NULL),
624 (void*)create_iface_enumerator, data, (void*)address_enumerator_destroy);
625 }
626
627 /**
628 * implementation of kernel_net_t.get_interface_name
629 */
630 static char *get_interface_name(private_kernel_netlink_net_t *this, host_t* ip)
631 {
632 enumerator_t *ifaces, *addrs;
633 iface_entry_t *iface;
634 addr_entry_t *addr;
635 char *name = NULL;
636
637 DBG2(DBG_KNL, "getting interface name for %H", ip);
638
639 this->mutex->lock(this->mutex);
640 ifaces = this->ifaces->create_enumerator(this->ifaces);
641 while (ifaces->enumerate(ifaces, &iface))
642 {
643 addrs = iface->addrs->create_enumerator(iface->addrs);
644 while (addrs->enumerate(addrs, &addr))
645 {
646 if (ip->ip_equals(ip, addr->ip))
647 {
648 name = strdup(iface->ifname);
649 break;
650 }
651 }
652 addrs->destroy(addrs);
653 if (name)
654 {
655 break;
656 }
657 }
658 ifaces->destroy(ifaces);
659 this->mutex->unlock(this->mutex);
660
661 if (name)
662 {
663 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
664 }
665 else
666 {
667 DBG2(DBG_KNL, "%H is not a local address", ip);
668 }
669 return name;
670 }
671
672 /**
673 * get the index of an interface by name
674 */
675 static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
676 {
677 enumerator_t *ifaces;
678 iface_entry_t *iface;
679 int ifindex = 0;
680
681 DBG2(DBG_KNL, "getting iface index for %s", name);
682
683 this->mutex->lock(this->mutex);
684 ifaces = this->ifaces->create_enumerator(this->ifaces);
685 while (ifaces->enumerate(ifaces, &iface))
686 {
687 if (streq(name, iface->ifname))
688 {
689 ifindex = iface->ifindex;
690 break;
691 }
692 }
693 ifaces->destroy(ifaces);
694 this->mutex->unlock(this->mutex);
695
696 if (ifindex == 0)
697 {
698 DBG1(DBG_KNL, "unable to get interface index for %s", name);
699 }
700 return ifindex;
701 }
702
703 /**
704 * Check if an interface with a given index is up
705 */
706 static bool is_interface_up(private_kernel_netlink_net_t *this, int index)
707 {
708 enumerator_t *ifaces;
709 iface_entry_t *iface;
710 /* default to TRUE for interface we do not monitor (e.g. lo) */
711 bool up = TRUE;
712
713 ifaces = this->ifaces->create_enumerator(this->ifaces);
714 while (ifaces->enumerate(ifaces, &iface))
715 {
716 if (iface->ifindex == index)
717 {
718 up = iface->flags & IFF_UP;
719 break;
720 }
721 }
722 ifaces->destroy(ifaces);
723 return up;
724 }
725
726 /**
727 * check if an address (chunk) addr is in subnet (net with net_len net bits)
728 */
729 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
730 {
731 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
732 int byte = 0;
733
734 if (addr.len != net.len || net_len > 8 * net.len )
735 {
736 return FALSE;
737 }
738
739 /* scan through all bytes in network order */
740 while (net_len > 0)
741 {
742 if (net_len < 8)
743 {
744 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
745 }
746 else
747 {
748 if (addr.ptr[byte] != net.ptr[byte])
749 {
750 return FALSE;
751 }
752 byte++;
753 net_len -= 8;
754 }
755 }
756 return TRUE;
757 }
758
759 /**
760 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
761 */
762 static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
763 bool nexthop, host_t *candidate)
764 {
765 netlink_buf_t request;
766 struct nlmsghdr *hdr, *out, *current;
767 struct rtmsg *msg;
768 chunk_t chunk;
769 size_t len;
770 int best = -1;
771 host_t *src = NULL, *gtw = NULL;
772
773 DBG2(DBG_KNL, "getting address to reach %H", dest);
774
775 memset(&request, 0, sizeof(request));
776
777 hdr = (struct nlmsghdr*)request;
778 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
779 hdr->nlmsg_type = RTM_GETROUTE;
780 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
781
782 msg = (struct rtmsg*)NLMSG_DATA(hdr);
783 msg->rtm_family = dest->get_family(dest);
784
785 chunk = dest->get_address(dest);
786 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
787 if (candidate)
788 {
789 chunk = candidate->get_address(candidate);
790 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
791 }
792
793 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
794 {
795 DBG1(DBG_KNL, "getting address to %H failed", dest);
796 return NULL;
797 }
798 this->mutex->lock(this->mutex);
799 current = out;
800 while (NLMSG_OK(current, len))
801 {
802 switch (current->nlmsg_type)
803 {
804 case NLMSG_DONE:
805 break;
806 case RTM_NEWROUTE:
807 {
808 struct rtattr *rta;
809 size_t rtasize;
810 chunk_t rta_gtw, rta_src, rta_dst;
811 u_int32_t rta_oif = 0;
812 enumerator_t *ifaces, *addrs;
813 iface_entry_t *iface;
814 addr_entry_t *addr;
815
816 rta_gtw = rta_src = rta_dst = chunk_empty;
817 msg = (struct rtmsg*)(NLMSG_DATA(current));
818 rta = RTM_RTA(msg);
819 rtasize = RTM_PAYLOAD(current);
820 while (RTA_OK(rta, rtasize))
821 {
822 switch (rta->rta_type)
823 {
824 case RTA_PREFSRC:
825 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
826 break;
827 case RTA_GATEWAY:
828 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
829 break;
830 case RTA_DST:
831 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
832 break;
833 case RTA_OIF:
834 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
835 {
836 rta_oif = *(u_int32_t*)RTA_DATA(rta);
837 }
838 break;
839 }
840 rta = RTA_NEXT(rta, rtasize);
841 }
842 if (rta_oif && !is_interface_up(this, rta_oif))
843 { /* interface is down */
844 goto next;
845 }
846 if (this->routing_table != 0 &&
847 msg->rtm_table == this->routing_table)
848 { /* route is from our own ipsec routing table */
849 goto next;
850 }
851 if (msg->rtm_dst_len <= best)
852 { /* not better than a previous one */
853 goto next;
854 }
855 if (msg->rtm_dst_len != 0 &&
856 (!rta_dst.ptr ||
857 !addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len)))
858 { /* is not the default route and not contained in our dst */
859 goto next;
860 }
861
862 best = msg->rtm_dst_len;
863 if (nexthop)
864 {
865 DESTROY_IF(gtw);
866 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
867 goto next;
868 }
869 if (rta_src.ptr)
870 {
871 DESTROY_IF(src);
872 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
873 if (get_vip_refcount(this, src))
874 { /* skip source address if it is installed by us */
875 DESTROY_IF(src);
876 src = NULL;
877 }
878 goto next;
879 }
880 /* no source addr, get one from the interfaces */
881 ifaces = this->ifaces->create_enumerator(this->ifaces);
882 while (ifaces->enumerate(ifaces, &iface))
883 {
884 if (iface->ifindex == rta_oif &&
885 iface->flags & IFF_UP)
886 {
887 addrs = iface->addrs->create_enumerator(iface->addrs);
888 while (addrs->enumerate(addrs, &addr))
889 {
890 chunk_t ip = addr->ip->get_address(addr->ip);
891 if ((msg->rtm_dst_len == 0 &&
892 addr->ip->get_family(addr->ip) ==
893 dest->get_family(dest)) ||
894 addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
895 {
896 DESTROY_IF(src);
897 src = addr->ip->clone(addr->ip);
898 break;
899 }
900 }
901 addrs->destroy(addrs);
902 }
903 }
904 ifaces->destroy(ifaces);
905 goto next;
906 }
907 default:
908 next:
909 current = NLMSG_NEXT(current, len);
910 continue;
911 }
912 break;
913 }
914 free(out);
915 this->mutex->unlock(this->mutex);
916
917 if (nexthop)
918 {
919 if (gtw)
920 {
921 return gtw;
922 }
923 return dest->clone(dest);
924 }
925 return src;
926 }
927
928 /**
929 * Implementation of kernel_net_t.get_source_addr.
930 */
931 static host_t* get_source_addr(private_kernel_netlink_net_t *this,
932 host_t *dest, host_t *src)
933 {
934 return get_route(this, dest, FALSE, src);
935 }
936
937 /**
938 * Implementation of kernel_net_t.get_nexthop.
939 */
940 static host_t* get_nexthop(private_kernel_netlink_net_t *this, host_t *dest)
941 {
942 return get_route(this, dest, TRUE, NULL);
943 }
944
945 /**
946 * Manages the creation and deletion of ip addresses on an interface.
947 * By setting the appropriate nlmsg_type, the ip will be set or unset.
948 */
949 static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
950 int flags, int if_index, host_t *ip)
951 {
952 netlink_buf_t request;
953 struct nlmsghdr *hdr;
954 struct ifaddrmsg *msg;
955 chunk_t chunk;
956
957 memset(&request, 0, sizeof(request));
958
959 chunk = ip->get_address(ip);
960
961 hdr = (struct nlmsghdr*)request;
962 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
963 hdr->nlmsg_type = nlmsg_type;
964 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
965
966 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
967 msg->ifa_family = ip->get_family(ip);
968 msg->ifa_flags = 0;
969 msg->ifa_prefixlen = 8 * chunk.len;
970 msg->ifa_scope = RT_SCOPE_UNIVERSE;
971 msg->ifa_index = if_index;
972
973 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
974
975 return this->socket->send_ack(this->socket, hdr);
976 }
977
978 /**
979 * Implementation of kernel_net_t.add_ip.
980 */
981 static status_t add_ip(private_kernel_netlink_net_t *this,
982 host_t *virtual_ip, host_t *iface_ip)
983 {
984 iface_entry_t *iface;
985 addr_entry_t *addr;
986 enumerator_t *addrs, *ifaces;
987 int ifindex;
988
989 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
990
991 this->mutex->lock(this->mutex);
992 ifaces = this->ifaces->create_enumerator(this->ifaces);
993 while (ifaces->enumerate(ifaces, &iface))
994 {
995 bool iface_found = FALSE;
996
997 addrs = iface->addrs->create_enumerator(iface->addrs);
998 while (addrs->enumerate(addrs, &addr))
999 {
1000 if (iface_ip->ip_equals(iface_ip, addr->ip))
1001 {
1002 iface_found = TRUE;
1003 }
1004 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1005 {
1006 addr->refcount++;
1007 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1008 virtual_ip, iface->ifname);
1009 addrs->destroy(addrs);
1010 ifaces->destroy(ifaces);
1011 this->mutex->unlock(this->mutex);
1012 return SUCCESS;
1013 }
1014 }
1015 addrs->destroy(addrs);
1016
1017 if (iface_found)
1018 {
1019 ifindex = iface->ifindex;
1020 addr = malloc_thing(addr_entry_t);
1021 addr->ip = virtual_ip->clone(virtual_ip);
1022 addr->refcount = 0;
1023 addr->virtual = TRUE;
1024 addr->scope = RT_SCOPE_UNIVERSE;
1025 iface->addrs->insert_last(iface->addrs, addr);
1026
1027 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1028 ifindex, virtual_ip) == SUCCESS)
1029 {
1030 while (get_vip_refcount(this, virtual_ip) == 0)
1031 { /* wait until address appears */
1032 this->condvar->wait(this->condvar, this->mutex);
1033 }
1034 ifaces->destroy(ifaces);
1035 this->mutex->unlock(this->mutex);
1036 return SUCCESS;
1037 }
1038 ifaces->destroy(ifaces);
1039 this->mutex->unlock(this->mutex);
1040 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1041 return FAILED;
1042 }
1043 }
1044 ifaces->destroy(ifaces);
1045 this->mutex->unlock(this->mutex);
1046
1047 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1048 "virtual IP %H", iface_ip, virtual_ip);
1049 return FAILED;
1050 }
1051
1052 /**
1053 * Implementation of kernel_net_t.del_ip.
1054 */
1055 static status_t del_ip(private_kernel_netlink_net_t *this, host_t *virtual_ip)
1056 {
1057 iface_entry_t *iface;
1058 addr_entry_t *addr;
1059 enumerator_t *addrs, *ifaces;
1060 status_t status;
1061 int ifindex;
1062
1063 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1064
1065 this->mutex->lock(this->mutex);
1066 ifaces = this->ifaces->create_enumerator(this->ifaces);
1067 while (ifaces->enumerate(ifaces, &iface))
1068 {
1069 addrs = iface->addrs->create_enumerator(iface->addrs);
1070 while (addrs->enumerate(addrs, &addr))
1071 {
1072 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1073 {
1074 ifindex = iface->ifindex;
1075 if (addr->refcount == 1)
1076 {
1077 status = manage_ipaddr(this, RTM_DELADDR, 0,
1078 ifindex, virtual_ip);
1079 if (status == SUCCESS)
1080 { /* wait until the address is really gone */
1081 while (get_vip_refcount(this, virtual_ip) > 0)
1082 {
1083 this->condvar->wait(this->condvar, this->mutex);
1084 }
1085 }
1086 addrs->destroy(addrs);
1087 ifaces->destroy(ifaces);
1088 this->mutex->unlock(this->mutex);
1089 return status;
1090 }
1091 else
1092 {
1093 addr->refcount--;
1094 }
1095 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1096 virtual_ip);
1097 addrs->destroy(addrs);
1098 ifaces->destroy(ifaces);
1099 this->mutex->unlock(this->mutex);
1100 return SUCCESS;
1101 }
1102 }
1103 addrs->destroy(addrs);
1104 }
1105 ifaces->destroy(ifaces);
1106 this->mutex->unlock(this->mutex);
1107
1108 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1109 return FAILED;
1110 }
1111
1112 /**
1113 * Manages source routes in the routing table.
1114 * By setting the appropriate nlmsg_type, the route gets added or removed.
1115 */
1116 static status_t manage_srcroute(private_kernel_netlink_net_t *this, int nlmsg_type,
1117 int flags, chunk_t dst_net, u_int8_t prefixlen,
1118 host_t *gateway, host_t *src_ip, char *if_name)
1119 {
1120 netlink_buf_t request;
1121 struct nlmsghdr *hdr;
1122 struct rtmsg *msg;
1123 int ifindex;
1124 chunk_t chunk;
1125
1126 /* if route is 0.0.0.0/0, we can't install it, as it would
1127 * overwrite the default route. Instead, we add two routes:
1128 * 0.0.0.0/1 and 128.0.0.0/1 */
1129 if (this->routing_table == 0 && prefixlen == 0)
1130 {
1131 chunk_t half_net;
1132 u_int8_t half_prefixlen;
1133 status_t status;
1134
1135 half_net = chunk_alloca(dst_net.len);
1136 memset(half_net.ptr, 0, half_net.len);
1137 half_prefixlen = 1;
1138
1139 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1140 gateway, src_ip, if_name);
1141 half_net.ptr[0] |= 0x80;
1142 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
1143 gateway, src_ip, if_name);
1144 return status;
1145 }
1146
1147 memset(&request, 0, sizeof(request));
1148
1149 hdr = (struct nlmsghdr*)request;
1150 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1151 hdr->nlmsg_type = nlmsg_type;
1152 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1153
1154 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1155 msg->rtm_family = src_ip->get_family(src_ip);
1156 msg->rtm_dst_len = prefixlen;
1157 msg->rtm_table = this->routing_table;
1158 msg->rtm_protocol = RTPROT_STATIC;
1159 msg->rtm_type = RTN_UNICAST;
1160 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1161
1162 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
1163 chunk = src_ip->get_address(src_ip);
1164 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1165 chunk = gateway->get_address(gateway);
1166 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1167 ifindex = get_interface_index(this, if_name);
1168 chunk.ptr = (char*)&ifindex;
1169 chunk.len = sizeof(ifindex);
1170 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1171
1172 return this->socket->send_ack(this->socket, hdr);
1173 }
1174
1175 /**
1176 * Implementation of kernel_net_t.add_route.
1177 */
1178 status_t add_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1179 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1180 {
1181 return manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
1182 dst_net, prefixlen, gateway, src_ip, if_name);
1183 }
1184
1185 /**
1186 * Implementation of kernel_net_t.del_route.
1187 */
1188 status_t del_route(private_kernel_netlink_net_t *this, chunk_t dst_net,
1189 u_int8_t prefixlen, host_t *gateway, host_t *src_ip, char *if_name)
1190 {
1191 return manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
1192 gateway, src_ip, if_name);
1193 }
1194
1195 /**
1196 * Initialize a list of local addresses.
1197 */
1198 static status_t init_address_list(private_kernel_netlink_net_t *this)
1199 {
1200 netlink_buf_t request;
1201 struct nlmsghdr *out, *current, *in;
1202 struct rtgenmsg *msg;
1203 size_t len;
1204 enumerator_t *ifaces, *addrs;
1205 iface_entry_t *iface;
1206 addr_entry_t *addr;
1207
1208 DBG1(DBG_KNL, "listening on interfaces:");
1209
1210 memset(&request, 0, sizeof(request));
1211
1212 in = (struct nlmsghdr*)&request;
1213 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1214 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1215 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1216 msg->rtgen_family = AF_UNSPEC;
1217
1218 /* get all links */
1219 in->nlmsg_type = RTM_GETLINK;
1220 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1221 {
1222 return FAILED;
1223 }
1224 current = out;
1225 while (NLMSG_OK(current, len))
1226 {
1227 switch (current->nlmsg_type)
1228 {
1229 case NLMSG_DONE:
1230 break;
1231 case RTM_NEWLINK:
1232 process_link(this, current, FALSE);
1233 /* fall through */
1234 default:
1235 current = NLMSG_NEXT(current, len);
1236 continue;
1237 }
1238 break;
1239 }
1240 free(out);
1241
1242 /* get all interface addresses */
1243 in->nlmsg_type = RTM_GETADDR;
1244 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
1245 {
1246 return FAILED;
1247 }
1248 current = out;
1249 while (NLMSG_OK(current, len))
1250 {
1251 switch (current->nlmsg_type)
1252 {
1253 case NLMSG_DONE:
1254 break;
1255 case RTM_NEWADDR:
1256 process_addr(this, current, FALSE);
1257 /* fall through */
1258 default:
1259 current = NLMSG_NEXT(current, len);
1260 continue;
1261 }
1262 break;
1263 }
1264 free(out);
1265
1266 this->mutex->lock(this->mutex);
1267 ifaces = this->ifaces->create_enumerator(this->ifaces);
1268 while (ifaces->enumerate(ifaces, &iface))
1269 {
1270 if (iface->flags & IFF_UP)
1271 {
1272 DBG1(DBG_KNL, " %s", iface->ifname);
1273 addrs = iface->addrs->create_enumerator(iface->addrs);
1274 while (addrs->enumerate(addrs, (void**)&addr))
1275 {
1276 DBG1(DBG_KNL, " %H", addr->ip);
1277 }
1278 addrs->destroy(addrs);
1279 }
1280 }
1281 ifaces->destroy(ifaces);
1282 this->mutex->unlock(this->mutex);
1283 return SUCCESS;
1284 }
1285
1286 /**
1287 * create or delete a rule to use our routing table
1288 */
1289 static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
1290 u_int32_t table, u_int32_t prio)
1291 {
1292 netlink_buf_t request;
1293 struct nlmsghdr *hdr;
1294 struct rtmsg *msg;
1295 chunk_t chunk;
1296
1297 memset(&request, 0, sizeof(request));
1298 hdr = (struct nlmsghdr*)request;
1299 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1300 hdr->nlmsg_type = nlmsg_type;
1301 if (nlmsg_type == RTM_NEWRULE)
1302 {
1303 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1304 }
1305 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1306
1307 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1308 msg->rtm_table = table;
1309 msg->rtm_family = AF_INET;
1310 msg->rtm_protocol = RTPROT_BOOT;
1311 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1312 msg->rtm_type = RTN_UNICAST;
1313
1314 chunk = chunk_from_thing(prio);
1315 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1316
1317 return this->socket->send_ack(this->socket, hdr);
1318 }
1319
1320 /**
1321 * Implementation of kernel_netlink_net_t.destroy.
1322 */
1323 static void destroy(private_kernel_netlink_net_t *this)
1324 {
1325 if (this->routing_table)
1326 {
1327 manage_rule(this, RTM_DELRULE, this->routing_table,
1328 this->routing_table_prio);
1329 }
1330
1331 this->job->cancel(this->job);
1332 close(this->socket_events);
1333 this->socket->destroy(this->socket);
1334 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
1335 this->condvar->destroy(this->condvar);
1336 this->mutex->destroy(this->mutex);
1337 free(this);
1338 }
1339
1340 /*
1341 * Described in header.
1342 */
1343 kernel_netlink_net_t *kernel_netlink_net_create()
1344 {
1345 private_kernel_netlink_net_t *this = malloc_thing(private_kernel_netlink_net_t);
1346 struct sockaddr_nl addr;
1347
1348 /* public functions */
1349 this->public.interface.get_interface = (char*(*)(kernel_net_t*,host_t*))get_interface_name;
1350 this->public.interface.create_address_enumerator = (enumerator_t*(*)(kernel_net_t*,bool,bool))create_address_enumerator;
1351 this->public.interface.get_source_addr = (host_t*(*)(kernel_net_t*, host_t *dest, host_t *src))get_source_addr;
1352 this->public.interface.get_nexthop = (host_t*(*)(kernel_net_t*, host_t *dest))get_nexthop;
1353 this->public.interface.add_ip = (status_t(*)(kernel_net_t*,host_t*,host_t*)) add_ip;
1354 this->public.interface.del_ip = (status_t(*)(kernel_net_t*,host_t*)) del_ip;
1355 this->public.interface.add_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) add_route;
1356 this->public.interface.del_route = (status_t(*)(kernel_net_t*,chunk_t,u_int8_t,host_t*,host_t*,char*)) del_route;
1357 this->public.interface.destroy = (void(*)(kernel_net_t*)) destroy;
1358
1359 /* private members */
1360 this->ifaces = linked_list_create();
1361 this->mutex = mutex_create(MUTEX_DEFAULT);
1362 this->condvar = condvar_create(CONDVAR_DEFAULT);
1363 timerclear(&this->last_roam);
1364 this->routing_table = lib->settings->get_int(lib->settings,
1365 "charon.routing_table", IPSEC_ROUTING_TABLE);
1366 this->routing_table_prio = lib->settings->get_int(lib->settings,
1367 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
1368 this->process_route = lib->settings->get_bool(lib->settings,
1369 "charon.process_route", TRUE);
1370
1371 this->socket = netlink_socket_create(NETLINK_ROUTE);
1372
1373 memset(&addr, 0, sizeof(addr));
1374 addr.nl_family = AF_NETLINK;
1375
1376 /* create and bind RT socket for events (address/interface/route changes) */
1377 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1378 if (this->socket_events <= 0)
1379 {
1380 charon->kill(charon, "unable to create RT event socket");
1381 }
1382 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1383 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
1384 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
1385 {
1386 charon->kill(charon, "unable to bind RT event socket");
1387 }
1388
1389 this->job = callback_job_create((callback_job_cb_t)receive_events,
1390 this, NULL, NULL);
1391 charon->processor->queue_job(charon->processor, (job_t*)this->job);
1392
1393 if (init_address_list(this) != SUCCESS)
1394 {
1395 charon->kill(charon, "unable to get interface list");
1396 }
1397
1398 if (this->routing_table)
1399 {
1400 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
1401 this->routing_table_prio) != SUCCESS)
1402 {
1403 DBG1(DBG_KNL, "unable to create routing table rule");
1404 }
1405 }
1406
1407 return &this->public;
1408 }