doing route lookup in userspace to ignore routes installed by us
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /**
2 * @file kernel_interface.c
3 *
4 * @brief Implementation of kernel_interface_t.
5 *
6 */
7
8 /*
9 * Copyright (C) 2005-2007 Martin Willi
10 * Copyright (C) 2006-2007 Tobias Brunner
11 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
12 * Copyright (C) 2006 Daniel Roethlisberger
13 * Copyright (C) 2005 Jan Hutter
14 * Hochschule fuer Technik Rapperswil
15 * Copyright (C) 2003 Herbert Xu.
16 *
17 * Based on xfrm code from pluto.
18 *
19 * This program is free software; you can redistribute it and/or modify it
20 * under the terms of the GNU General Public License as published by the
21 * Free Software Foundation; either version 2 of the License, or (at your
22 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 * for more details.
28 */
29
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <linux/netlink.h>
33 #include <linux/rtnetlink.h>
34 #include <linux/xfrm.h>
35 #include <linux/udp.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <net/if.h>
42 #include <sys/ioctl.h>
43
44 #include "kernel_interface.h"
45
46 #include <daemon.h>
47 #include <utils/linked_list.h>
48 #include <processing/jobs/delete_child_sa_job.h>
49 #include <processing/jobs/rekey_child_sa_job.h>
50 #include <processing/jobs/acquire_job.h>
51 #include <processing/jobs/callback_job.h>
52 #include <processing/jobs/roam_job.h>
53
54 /** routing table for routes installed by us */
55 #ifndef IPSEC_ROUTING_TABLE
56 #define IPSEC_ROUTING_TABLE 100
57 #endif
58 #ifndef IPSEC_ROUTING_TABLE_PRIO
59 #define IPSEC_ROUTING_TABLE_PRIO 100
60 #endif
61
62 /** kernel level protocol identifiers */
63 #define KERNEL_ESP 50
64 #define KERNEL_AH 51
65
66 /** default priority of installed policies */
67 #define PRIO_LOW 3000
68 #define PRIO_HIGH 2000
69
70 #define BUFFER_SIZE 1024
71
72 /**
73 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
74 * 'usual' netlink data x like 'struct xfrm_usersa_info'
75 */
76 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
77 /**
78 * returns a pointer to the next rtattr following rta.
79 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
80 */
81 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
82 /**
83 * returns the total size of attached rta data
84 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
85 */
86 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
87
88 typedef struct kernel_algorithm_t kernel_algorithm_t;
89
90 /**
91 * Mapping from the algorithms defined in IKEv2 to
92 * kernel level algorithm names and their key length
93 */
94 struct kernel_algorithm_t {
95 /**
96 * Identifier specified in IKEv2
97 */
98 int ikev2_id;
99
100 /**
101 * Name of the algorithm, as used as kernel identifier
102 */
103 char *name;
104
105 /**
106 * Key length in bits, if fixed size
107 */
108 u_int key_size;
109 };
110 #define END_OF_LIST -1
111
112 /**
113 * Algorithms for encryption
114 */
115 kernel_algorithm_t encryption_algs[] = {
116 /* {ENCR_DES_IV64, "***", 0}, */
117 {ENCR_DES, "des", 64},
118 {ENCR_3DES, "des3_ede", 192},
119 /* {ENCR_RC5, "***", 0}, */
120 /* {ENCR_IDEA, "***", 0}, */
121 {ENCR_CAST, "cast128", 0},
122 {ENCR_BLOWFISH, "blowfish", 0},
123 /* {ENCR_3IDEA, "***", 0}, */
124 /* {ENCR_DES_IV32, "***", 0}, */
125 {ENCR_NULL, "cipher_null", 0},
126 {ENCR_AES_CBC, "aes", 0},
127 /* {ENCR_AES_CTR, "***", 0}, */
128 {END_OF_LIST, NULL, 0},
129 };
130
131 /**
132 * Algorithms for integrity protection
133 */
134 kernel_algorithm_t integrity_algs[] = {
135 {AUTH_HMAC_MD5_96, "md5", 128},
136 {AUTH_HMAC_SHA1_96, "sha1", 160},
137 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
138 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
139 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
140 /* {AUTH_DES_MAC, "***", 0}, */
141 /* {AUTH_KPDK_MD5, "***", 0}, */
142 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
143 {END_OF_LIST, NULL, 0},
144 };
145
146 /**
147 * Look up a kernel algorithm name and its key size
148 */
149 char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
150 algorithm_t *ikev2_algo, u_int *key_size)
151 {
152 while (kernel_algo->ikev2_id != END_OF_LIST)
153 {
154 if (ikev2_algo->algorithm == kernel_algo->ikev2_id)
155 {
156 /* match, evaluate key length */
157 if (ikev2_algo->key_size)
158 { /* variable length */
159 *key_size = ikev2_algo->key_size;
160 }
161 else
162 { /* fixed length */
163 *key_size = kernel_algo->key_size;
164 }
165 return kernel_algo->name;
166 }
167 kernel_algo++;
168 }
169 return NULL;
170 }
171
172 typedef struct route_entry_t route_entry_t;
173
174 /**
175 * installed routing entry
176 */
177 struct route_entry_t {
178
179 /** Index of the interface the route is bound to */
180 int if_index;
181
182 /** Source ip of the route */
183 host_t *src_ip;
184
185 /** gateway for this route */
186 host_t *gateway;
187
188 /** Destination net */
189 chunk_t dst_net;
190
191 /** Destination net prefixlen */
192 u_int8_t prefixlen;
193 };
194
195 /**
196 * destroy an route_entry_t object
197 */
198 static void route_entry_destroy(route_entry_t *this)
199 {
200 this->src_ip->destroy(this->src_ip);
201 this->gateway->destroy(this->gateway);
202 chunk_free(&this->dst_net);
203 free(this);
204 }
205
206 typedef struct policy_entry_t policy_entry_t;
207
208 /**
209 * installed kernel policy.
210 */
211 struct policy_entry_t {
212
213 /** direction of this policy: in, out, forward */
214 u_int8_t direction;
215
216 /** reqid of the policy */
217 u_int32_t reqid;
218
219 /** parameters of installed policy */
220 struct xfrm_selector sel;
221
222 /** associated route installed for this policy */
223 route_entry_t *route;
224
225 /** by how many CHILD_SA's this policy is used */
226 u_int refcount;
227 };
228
229 typedef struct addr_entry_t addr_entry_t;
230
231 /**
232 * IP address in an inface_entry_t
233 */
234 struct addr_entry_t {
235
236 /** The ip address */
237 host_t *ip;
238
239 /** virtual IP managed by us */
240 bool virtual;
241
242 /** scope of the address */
243 u_char scope;
244
245 /** Number of times this IP is used, if virtual */
246 u_int refcount;
247 };
248
249 /**
250 * destroy a addr_entry_t object
251 */
252 static void addr_entry_destroy(addr_entry_t *this)
253 {
254 this->ip->destroy(this->ip);
255 free(this);
256 }
257
258 typedef struct iface_entry_t iface_entry_t;
259
260 /**
261 * A network interface on this system, containing addr_entry_t's
262 */
263 struct iface_entry_t {
264
265 /** interface index */
266 int ifindex;
267
268 /** name of the interface */
269 char ifname[IFNAMSIZ];
270
271 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
272 u_int flags;
273
274 /** list of addresses as host_t */
275 linked_list_t *addrs;
276 };
277
278 /**
279 * destroy an interface entry
280 */
281 static void iface_entry_destroy(iface_entry_t *this)
282 {
283 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
284 free(this);
285 }
286
287 typedef struct private_kernel_interface_t private_kernel_interface_t;
288
289 /**
290 * Private variables and functions of kernel_interface class.
291 */
292 struct private_kernel_interface_t {
293 /**
294 * Public part of the kernel_interface_t object.
295 */
296 kernel_interface_t public;
297
298 /**
299 * mutex to lock access to the various lists
300 */
301 pthread_mutex_t mutex;
302
303 /**
304 * List of installed policies (policy_entry_t)
305 */
306 linked_list_t *policies;
307
308 /**
309 * Cached list of interfaces and its adresses (iface_entry_t)
310 */
311 linked_list_t *ifaces;
312
313 /**
314 * iterator used in hook()
315 */
316 iterator_t *hiter;
317
318 /**
319 * job receiving netlink events
320 */
321 callback_job_t *job;
322
323 /**
324 * current sequence number for netlink request
325 */
326 int seq;
327
328 /**
329 * Netlink xfrm socket (IPsec)
330 */
331 int socket_xfrm;
332
333 /**
334 * netlink xfrm socket to receive acquire and expire events
335 */
336 int socket_xfrm_events;
337
338 /**
339 * Netlink rt socket (routing)
340 */
341 int socket_rt;
342
343 /**
344 * Netlink rt socket to receive address change events
345 */
346 int socket_rt_events;
347 };
348
349 /**
350 * convert a host_t to a struct xfrm_address
351 */
352 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
353 {
354 chunk_t chunk = host->get_address(host);
355 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
356 }
357
358 /**
359 * convert a traffic selector address range to subnet and its mask.
360 */
361 static void ts2subnet(traffic_selector_t* ts,
362 xfrm_address_t *net, u_int8_t *mask)
363 {
364 /* there is no way to do this cleanly, as the address range may
365 * be anything else but a subnet. We use from_addr as subnet
366 * and try to calculate a usable subnet mask.
367 */
368 int byte, bit;
369 bool found = FALSE;
370 chunk_t from, to;
371 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
372
373 from = ts->get_from_address(ts);
374 to = ts->get_to_address(ts);
375
376 *mask = (size * 8);
377 /* go trough all bits of the addresses, beginning in the front.
378 * as long as they are equal, the subnet gets larger
379 */
380 for (byte = 0; byte < size; byte++)
381 {
382 for (bit = 7; bit >= 0; bit--)
383 {
384 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
385 {
386 *mask = ((7 - bit) + (byte * 8));
387 found = TRUE;
388 break;
389 }
390 }
391 if (found)
392 {
393 break;
394 }
395 }
396 memcpy(net, from.ptr, from.len);
397 chunk_free(&from);
398 chunk_free(&to);
399 }
400
401 /**
402 * convert a traffic selector port range to port/portmask
403 */
404 static void ts2ports(traffic_selector_t* ts,
405 u_int16_t *port, u_int16_t *mask)
406 {
407 /* linux does not seem to accept complex portmasks. Only
408 * any or a specific port is allowed. We set to any, if we have
409 * a port range, or to a specific, if we have one port only.
410 */
411 u_int16_t from, to;
412
413 from = ts->get_from_port(ts);
414 to = ts->get_to_port(ts);
415
416 if (from == to)
417 {
418 *port = htons(from);
419 *mask = ~0;
420 }
421 else
422 {
423 *port = 0;
424 *mask = 0;
425 }
426 }
427
428 /**
429 * convert a pair of traffic_selectors to a xfrm_selector
430 */
431 static struct xfrm_selector ts2selector(traffic_selector_t *src,
432 traffic_selector_t *dst)
433 {
434 struct xfrm_selector sel;
435
436 memset(&sel, 0, sizeof(sel));
437 sel.family = src->get_type(src) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
438 /* src or dest proto may be "any" (0), use more restrictive one */
439 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
440 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
441 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
442 ts2ports(dst, &sel.dport, &sel.dport_mask);
443 ts2ports(src, &sel.sport, &sel.sport_mask);
444 sel.ifindex = 0;
445 sel.user = 0;
446
447 return sel;
448 }
449
450 /**
451 * Creates an rtattr and adds it to the netlink message
452 */
453 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
454 size_t buflen)
455 {
456 struct rtattr *rta;
457
458 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
459 {
460 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
461 return;
462 }
463
464 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
465 rta->rta_type = rta_type;
466 rta->rta_len = RTA_LENGTH(data.len);
467 memcpy(RTA_DATA(rta), data.ptr, data.len);
468 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
469 }
470
471 /**
472 * process a XFRM_MSG_ACQUIRE from kernel
473 */
474 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
475 {
476 u_int32_t reqid = 0;
477 job_t *job;
478 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
479 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
480
481 if (RTA_OK(rtattr, rtsize))
482 {
483 if (rtattr->rta_type == XFRMA_TMPL)
484 {
485 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
486 reqid = tmpl->reqid;
487 }
488 }
489 if (reqid == 0)
490 {
491 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
492 return;
493 }
494 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
495 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
496 job = (job_t*)acquire_job_create(reqid);
497 charon->processor->queue_job(charon->processor, job);
498 }
499
500 /**
501 * process a XFRM_MSG_EXPIRE from kernel
502 */
503 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
504 {
505 job_t *job;
506 protocol_id_t protocol;
507 u_int32_t spi, reqid;
508 struct xfrm_user_expire *expire;
509
510 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
511 protocol = expire->state.id.proto == KERNEL_ESP ? PROTO_ESP : PROTO_AH;
512 spi = expire->state.id.spi;
513 reqid = expire->state.reqid;
514
515 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
516 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
517 expire->hard ? "delete" : "rekey", protocol_id_names,
518 protocol, ntohl(spi), reqid);
519 if (expire->hard)
520 {
521 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
522 }
523 else
524 {
525 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
526 }
527 charon->processor->queue_job(charon->processor, job);
528 }
529
530 /**
531 * process RTM_NEWLINK/RTM_DELLINK from kernel
532 */
533 static void process_link(private_kernel_interface_t *this,
534 struct nlmsghdr *hdr, bool event)
535 {
536 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
537 struct rtattr *rta = IFLA_RTA(msg);
538 size_t rtasize = IFLA_PAYLOAD (hdr);
539 iterator_t *iterator;
540 iface_entry_t *current, *entry = NULL;
541 char *name = NULL;
542 bool update = FALSE;
543
544 while(RTA_OK(rta, rtasize))
545 {
546 switch (rta->rta_type)
547 {
548 case IFLA_IFNAME:
549 name = RTA_DATA(rta);
550 break;
551 }
552 rta = RTA_NEXT(rta, rtasize);
553 }
554 if (!name)
555 {
556 name = "(unknown)";
557 }
558
559 switch (hdr->nlmsg_type)
560 {
561 case RTM_NEWLINK:
562 {
563 if (msg->ifi_flags & IFF_LOOPBACK)
564 { /* ignore loopback interfaces */
565 break;
566 }
567 iterator = this->ifaces->create_iterator_locked(this->ifaces,
568 &this->mutex);
569 while (iterator->iterate(iterator, (void**)&current))
570 {
571 if (current->ifindex == msg->ifi_index)
572 {
573 entry = current;
574 break;
575 }
576 }
577 if (!entry)
578 {
579 entry = malloc_thing(iface_entry_t);
580 entry->ifindex = msg->ifi_index;
581 entry->flags = 0;
582 entry->addrs = linked_list_create();
583 this->ifaces->insert_last(this->ifaces, entry);
584 }
585 memcpy(entry->ifname, name, IFNAMSIZ);
586 entry->ifname[IFNAMSIZ-1] = '\0';
587 if (event)
588 {
589 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
590 {
591 update = TRUE;
592 DBG1(DBG_KNL, "interface %s activated", name);
593 }
594 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
595 {
596 update = TRUE;
597 DBG1(DBG_KNL, "interface %s deactivated", name);
598 }
599 }
600 entry->flags = msg->ifi_flags;
601 iterator->destroy(iterator);
602 break;
603 }
604 case RTM_DELLINK:
605 {
606 iterator = this->ifaces->create_iterator_locked(this->ifaces,
607 &this->mutex);
608 while (iterator->iterate(iterator, (void**)&current))
609 {
610 if (current->ifindex == msg->ifi_index)
611 {
612 /* we do not remove it, as an address may be added to a
613 * "down" interface and we wan't to know that. */
614 current->flags = msg->ifi_flags;
615 break;
616 }
617 }
618 iterator->destroy(iterator);
619 break;
620 }
621 }
622
623 /* send an update to all IKE_SAs */
624 if (update && event)
625 {
626 charon->processor->queue_job(charon->processor,
627 (job_t*)roam_job_create(TRUE));
628 }
629 }
630
631 /**
632 * process RTM_NEWADDR/RTM_DELADDR from kernel
633 */
634 static void process_addr(private_kernel_interface_t *this,
635 struct nlmsghdr *hdr, bool event)
636 {
637 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
638 struct rtattr *rta = IFA_RTA(msg);
639 size_t rtasize = IFA_PAYLOAD (hdr);
640 host_t *host = NULL;
641 iterator_t *ifaces, *addrs;
642 iface_entry_t *iface;
643 addr_entry_t *addr;
644 chunk_t local = chunk_empty, address = chunk_empty;
645 bool update = FALSE, found = FALSE, changed = FALSE;
646
647 while(RTA_OK(rta, rtasize))
648 {
649 switch (rta->rta_type)
650 {
651 case IFA_LOCAL:
652 local.ptr = RTA_DATA(rta);
653 local.len = RTA_PAYLOAD(rta);
654 break;
655 case IFA_ADDRESS:
656 address.ptr = RTA_DATA(rta);
657 address.len = RTA_PAYLOAD(rta);
658 break;
659 }
660 rta = RTA_NEXT(rta, rtasize);
661 }
662
663 /* For PPP interfaces, we need the IFA_LOCAL address,
664 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
665 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
666 if (local.ptr)
667 {
668 host = host_create_from_chunk(msg->ifa_family, local, 0);
669 }
670 else if (address.ptr)
671 {
672 host = host_create_from_chunk(msg->ifa_family, address, 0);
673 }
674
675 if (host == NULL)
676 { /* bad family? */
677 return;
678 }
679
680 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
681 while (ifaces->iterate(ifaces, (void**)&iface))
682 {
683 if (iface->ifindex == msg->ifa_index)
684 {
685 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
686 while (addrs->iterate(addrs, (void**)&addr))
687 {
688 if (host->ip_equals(host, addr->ip))
689 {
690 found = TRUE;
691 if (hdr->nlmsg_type == RTM_DELADDR)
692 {
693 changed = TRUE;
694 addrs->remove(addrs);
695 addr_entry_destroy(addr);
696 DBG1(DBG_KNL, "%H disappeared from %s", host, iface->ifname);
697 }
698 }
699 }
700 addrs->destroy(addrs);
701
702 if (hdr->nlmsg_type == RTM_NEWADDR)
703 {
704 if (!found)
705 {
706 found = TRUE;
707 changed = TRUE;
708 addr = malloc_thing(addr_entry_t);
709 addr->ip = host->clone(host);
710 addr->virtual = FALSE;
711 addr->refcount = 1;
712 addr->scope = msg->ifa_scope;
713
714 iface->addrs->insert_last(iface->addrs, addr);
715 if (event)
716 {
717 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
718 }
719 }
720 }
721 if (found && (iface->flags & IFF_UP))
722 {
723 update = TRUE;
724 }
725 break;
726 }
727 }
728 ifaces->destroy(ifaces);
729 host->destroy(host);
730
731 /* send an update to all IKE_SAs */
732 if (update && event && changed)
733 {
734 charon->processor->queue_job(charon->processor,
735 (job_t*)roam_job_create(TRUE));
736 }
737 }
738
739 /**
740 * Receives events from kernel
741 */
742 static job_requeue_t receive_events(private_kernel_interface_t *this)
743 {
744 char response[1024];
745 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
746 struct sockaddr_nl addr;
747 socklen_t addr_len = sizeof(addr);
748 int len, oldstate, maxfd, selected;
749 fd_set rfds;
750
751 FD_ZERO(&rfds);
752 FD_SET(this->socket_xfrm_events, &rfds);
753 FD_SET(this->socket_rt_events, &rfds);
754 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
755
756 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
757 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
758 pthread_setcancelstate(oldstate, NULL);
759 if (selected <= 0)
760 {
761 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
762 return JOB_REQUEUE_FAIR;
763 }
764 if (FD_ISSET(this->socket_xfrm_events, &rfds))
765 {
766 selected = this->socket_xfrm_events;
767 }
768 else if (FD_ISSET(this->socket_rt_events, &rfds))
769 {
770 selected = this->socket_rt_events;
771 }
772 else
773 {
774 return JOB_REQUEUE_DIRECT;
775 }
776
777 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
778 (struct sockaddr*)&addr, &addr_len);
779 if (len < 0)
780 {
781 switch (errno)
782 {
783 case EINTR:
784 /* interrupted, try again */
785 return JOB_REQUEUE_DIRECT;
786 case EAGAIN:
787 /* no data ready, select again */
788 return JOB_REQUEUE_DIRECT;
789 default:
790 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
791 sleep(1);
792 return JOB_REQUEUE_FAIR;
793 }
794 }
795 if (addr.nl_pid != 0)
796 { /* not from kernel. not interested, try another one */
797 return JOB_REQUEUE_DIRECT;
798 }
799
800 while (NLMSG_OK(hdr, len))
801 {
802 /* looks good so far, dispatch netlink message */
803 if (selected == this->socket_xfrm_events)
804 {
805 switch (hdr->nlmsg_type)
806 {
807 case XFRM_MSG_ACQUIRE:
808 process_acquire(this, hdr);
809 break;
810 case XFRM_MSG_EXPIRE:
811 process_expire(this, hdr);
812 break;
813 default:
814 break;
815 }
816 }
817 else if (selected == this->socket_rt_events)
818 {
819 switch (hdr->nlmsg_type)
820 {
821 case RTM_NEWADDR:
822 case RTM_DELADDR:
823 process_addr(this, hdr, TRUE);
824 break;
825 case RTM_NEWLINK:
826 case RTM_DELLINK:
827 process_link(this, hdr, TRUE);
828 break;
829 case RTM_NEWROUTE:
830 case RTM_DELROUTE:
831 charon->processor->queue_job(charon->processor,
832 (job_t*)roam_job_create(FALSE));
833 break;
834 default:
835 break;
836 }
837 }
838 hdr = NLMSG_NEXT(hdr, len);
839 }
840 return JOB_REQUEUE_DIRECT;
841 }
842
843 /**
844 * send a netlink message and wait for a reply
845 */
846 static status_t netlink_send(private_kernel_interface_t *this,
847 int socket, struct nlmsghdr *in,
848 struct nlmsghdr **out, size_t *out_len)
849 {
850 int len, addr_len;
851 struct sockaddr_nl addr;
852 chunk_t result = chunk_empty, tmp;
853 struct nlmsghdr *msg, peek;
854
855 pthread_mutex_lock(&this->mutex);
856
857 in->nlmsg_seq = ++this->seq;
858 in->nlmsg_pid = getpid();
859
860 memset(&addr, 0, sizeof(addr));
861 addr.nl_family = AF_NETLINK;
862 addr.nl_pid = 0;
863 addr.nl_groups = 0;
864
865 while (TRUE)
866 {
867 len = sendto(socket, in, in->nlmsg_len, 0,
868 (struct sockaddr*)&addr, sizeof(addr));
869
870 if (len != in->nlmsg_len)
871 {
872 if (errno == EINTR)
873 {
874 /* interrupted, try again */
875 continue;
876 }
877 pthread_mutex_unlock(&this->mutex);
878 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
879 return FAILED;
880 }
881 break;
882 }
883
884 while (TRUE)
885 {
886 char buf[4096];
887 tmp.len = sizeof(buf);
888 tmp.ptr = buf;
889 msg = (struct nlmsghdr*)tmp.ptr;
890
891 memset(&addr, 0, sizeof(addr));
892 addr.nl_family = AF_NETLINK;
893 addr.nl_pid = getpid();
894 addr.nl_groups = 0;
895 addr_len = sizeof(addr);
896
897 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
898 (struct sockaddr*)&addr, &addr_len);
899
900 if (len < 0)
901 {
902 if (errno == EINTR)
903 {
904 DBG1(DBG_KNL, "got interrupted");
905 /* interrupted, try again */
906 continue;
907 }
908 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
909 pthread_mutex_unlock(&this->mutex);
910 return FAILED;
911 }
912 if (!NLMSG_OK(msg, len))
913 {
914 DBG1(DBG_KNL, "received corrupted netlink message");
915 pthread_mutex_unlock(&this->mutex);
916 return FAILED;
917 }
918 if (msg->nlmsg_seq != this->seq)
919 {
920 DBG1(DBG_KNL, "received invalid netlink sequence number");
921 if (msg->nlmsg_seq < this->seq)
922 {
923 continue;
924 }
925 pthread_mutex_unlock(&this->mutex);
926 return FAILED;
927 }
928
929 tmp.len = len;
930 result = chunk_cata("cc", result, tmp);
931
932 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
933 * numbers to detect multi header messages */
934 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
935 (struct sockaddr*)&addr, &addr_len);
936
937 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
938 {
939 /* seems to be multipart */
940 continue;
941 }
942 break;
943 }
944
945 *out_len = result.len;
946 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
947
948 pthread_mutex_unlock(&this->mutex);
949
950 return SUCCESS;
951 }
952
953 /**
954 * send a netlink message and wait for its acknowlegde
955 */
956 static status_t netlink_send_ack(private_kernel_interface_t *this,
957 int socket, struct nlmsghdr *in)
958 {
959 struct nlmsghdr *out, *hdr;
960 size_t len;
961
962 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
963 {
964 return FAILED;
965 }
966 hdr = out;
967 while (NLMSG_OK(hdr, len))
968 {
969 switch (hdr->nlmsg_type)
970 {
971 case NLMSG_ERROR:
972 {
973 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
974
975 if (err->error)
976 {
977 DBG1(DBG_KNL, "received netlink error: %s (%d)",
978 strerror(-err->error), -err->error);
979 free(out);
980 return FAILED;
981 }
982 free(out);
983 return SUCCESS;
984 }
985 default:
986 hdr = NLMSG_NEXT(hdr, len);
987 continue;
988 case NLMSG_DONE:
989 break;
990 }
991 break;
992 }
993 DBG1(DBG_KNL, "netlink request not acknowlegded");
994 free(out);
995 return FAILED;
996 }
997
998 /**
999 * Initialize a list of local addresses.
1000 */
1001 static status_t init_address_list(private_kernel_interface_t *this)
1002 {
1003 char request[BUFFER_SIZE];
1004 struct nlmsghdr *out, *current, *in;
1005 struct rtgenmsg *msg;
1006 size_t len;
1007 iterator_t *ifaces, *addrs;
1008 iface_entry_t *iface;
1009 addr_entry_t *addr;
1010
1011 DBG1(DBG_KNL, "listening on interfaces:");
1012
1013 memset(&request, 0, sizeof(request));
1014
1015 in = (struct nlmsghdr*)&request;
1016 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1017 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1018 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1019 msg->rtgen_family = AF_UNSPEC;
1020
1021 /* get all links */
1022 in->nlmsg_type = RTM_GETLINK;
1023 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1024 {
1025 return FAILED;
1026 }
1027 current = out;
1028 while (NLMSG_OK(current, len))
1029 {
1030 switch (current->nlmsg_type)
1031 {
1032 case NLMSG_DONE:
1033 break;
1034 case RTM_NEWLINK:
1035 process_link(this, current, FALSE);
1036 /* fall through */
1037 default:
1038 current = NLMSG_NEXT(current, len);
1039 continue;
1040 }
1041 break;
1042 }
1043 free(out);
1044
1045 /* get all interface addresses */
1046 in->nlmsg_type = RTM_GETADDR;
1047 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1048 {
1049 return FAILED;
1050 }
1051 current = out;
1052 while (NLMSG_OK(current, len))
1053 {
1054 switch (current->nlmsg_type)
1055 {
1056 case NLMSG_DONE:
1057 break;
1058 case RTM_NEWADDR:
1059 process_addr(this, current, FALSE);
1060 /* fall through */
1061 default:
1062 current = NLMSG_NEXT(current, len);
1063 continue;
1064 }
1065 break;
1066 }
1067 free(out);
1068
1069 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1070 while (ifaces->iterate(ifaces, (void**)&iface))
1071 {
1072 if (iface->flags & IFF_UP)
1073 {
1074 DBG1(DBG_KNL, " %s", iface->ifname);
1075 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1076 while (addrs->iterate(addrs, (void**)&addr))
1077 {
1078 DBG1(DBG_KNL, " %H", addr->ip);
1079 }
1080 addrs->destroy(addrs);
1081 }
1082 }
1083 ifaces->destroy(ifaces);
1084 return SUCCESS;
1085 }
1086
1087 /**
1088 * iterator hook to iterate over addrs
1089 */
1090 static hook_result_t addr_hook(private_kernel_interface_t *this,
1091 addr_entry_t *in, host_t **out)
1092 {
1093 if (in->virtual)
1094 { /* skip virtual interfaces added by us */
1095 return HOOK_SKIP;
1096 }
1097 if (in->scope >= RT_SCOPE_LINK)
1098 { /* skip addresses with a unusable scope */
1099 return HOOK_SKIP;
1100 }
1101 *out = in->ip;
1102 return HOOK_NEXT;
1103 }
1104
1105 /**
1106 * iterator hook to iterate over ifaces
1107 */
1108 static hook_result_t iface_hook(private_kernel_interface_t *this,
1109 iface_entry_t *in, host_t **out)
1110 {
1111 if (!(in->flags & IFF_UP))
1112 { /* skip interfaces not up */
1113 return HOOK_SKIP;
1114 }
1115
1116 if (this->hiter == NULL)
1117 {
1118 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1119 this->hiter->set_iterator_hook(this->hiter,
1120 (iterator_hook_t*)addr_hook, this);
1121 }
1122 while (this->hiter->iterate(this->hiter, (void**)out))
1123 {
1124 return HOOK_AGAIN;
1125 }
1126 this->hiter->destroy(this->hiter);
1127 this->hiter = NULL;
1128 return HOOK_SKIP;
1129 }
1130
1131 /**
1132 * Implements kernel_interface_t.create_address_iterator.
1133 */
1134 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1135 {
1136 iterator_t *iterator;
1137
1138 /* This iterator is not only hooked, is is double-hooked. As we have stored
1139 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1140 * entries in each interface we iterate. This does the iface_hook. The
1141 * addr_hook returns the ip instead of the addr_entry. */
1142
1143 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1144 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1145 return iterator;
1146 }
1147
1148 /**
1149 * implementation of kernel_interface_t.get_interface_name
1150 */
1151 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1152 {
1153 iterator_t *ifaces, *addrs;
1154 iface_entry_t *iface;
1155 addr_entry_t *addr;
1156 char *name = NULL;
1157
1158 DBG2(DBG_KNL, "getting interface name for %H", ip);
1159
1160 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1161 while (ifaces->iterate(ifaces, (void**)&iface))
1162 {
1163 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1164 while (addrs->iterate(addrs, (void**)&addr))
1165 {
1166 if (ip->ip_equals(ip, addr->ip))
1167 {
1168 name = strdup(iface->ifname);
1169 break;
1170 }
1171 }
1172 addrs->destroy(addrs);
1173 if (name)
1174 {
1175 break;
1176 }
1177 }
1178 ifaces->destroy(ifaces);
1179
1180 if (name)
1181 {
1182 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1183 }
1184 else
1185 {
1186 DBG2(DBG_KNL, "%H is not a local address", ip);
1187 }
1188 return name;
1189 }
1190
1191 /**
1192 * Tries to find an ip address of a local interface that is included in the
1193 * supplied traffic selector.
1194 */
1195 static status_t get_address_by_ts(private_kernel_interface_t *this,
1196 traffic_selector_t *ts, host_t **ip)
1197 {
1198 iterator_t *ifaces, *addrs;
1199 iface_entry_t *iface;
1200 addr_entry_t *addr;
1201 host_t *host;
1202 int family;
1203 bool found = FALSE;
1204
1205 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1206
1207 /* if we have a family which includes localhost, we do not
1208 * search for an IP, we use the default */
1209 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1210
1211 if (family == AF_INET)
1212 {
1213 host = host_create_from_string("127.0.0.1", 0);
1214 }
1215 else
1216 {
1217 host = host_create_from_string("::1", 0);
1218 }
1219
1220 if (ts->includes(ts, host))
1221 {
1222 *ip = host_create_any(family);
1223 host->destroy(host);
1224 DBG2(DBG_KNL, "using host %H", *ip);
1225 return SUCCESS;
1226 }
1227 host->destroy(host);
1228
1229 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1230 while (ifaces->iterate(ifaces, (void**)&iface))
1231 {
1232 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1233 while (addrs->iterate(addrs, (void**)&addr))
1234 {
1235 if (ts->includes(ts, addr->ip))
1236 {
1237 found = TRUE;
1238 *ip = addr->ip->clone(addr->ip);
1239 break;
1240 }
1241 }
1242 addrs->destroy(addrs);
1243 if (found)
1244 {
1245 break;
1246 }
1247 }
1248 ifaces->destroy(ifaces);
1249
1250 if (!found)
1251 {
1252 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1253 return FAILED;
1254 }
1255 DBG2(DBG_KNL, "using host %H", *ip);
1256 return SUCCESS;
1257 }
1258
1259 /**
1260 * get the interface of a local address
1261 */
1262 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1263 {
1264 iterator_t *ifaces, *addrs;
1265 iface_entry_t *iface;
1266 addr_entry_t *addr;
1267 int ifindex = 0;
1268
1269 DBG2(DBG_KNL, "getting iface for %H", ip);
1270
1271 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1272 while (ifaces->iterate(ifaces, (void**)&iface))
1273 {
1274 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1275 while (addrs->iterate(addrs, (void**)&addr))
1276 {
1277 if (ip->ip_equals(ip, addr->ip))
1278 {
1279 ifindex = iface->ifindex;
1280 break;
1281 }
1282 }
1283 addrs->destroy(addrs);
1284 if (ifindex)
1285 {
1286 break;
1287 }
1288 }
1289 ifaces->destroy(ifaces);
1290
1291 if (ifindex == 0)
1292 {
1293 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1294 }
1295 return ifindex;
1296 }
1297
1298 /**
1299 * Manages the creation and deletion of ip addresses on an interface.
1300 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1301 */
1302 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1303 int flags, int if_index, host_t *ip)
1304 {
1305 unsigned char request[BUFFER_SIZE];
1306 struct nlmsghdr *hdr;
1307 struct ifaddrmsg *msg;
1308 chunk_t chunk;
1309
1310 memset(&request, 0, sizeof(request));
1311
1312 chunk = ip->get_address(ip);
1313
1314 hdr = (struct nlmsghdr*)request;
1315 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1316 hdr->nlmsg_type = nlmsg_type;
1317 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1318
1319 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1320 msg->ifa_family = ip->get_family(ip);
1321 msg->ifa_flags = 0;
1322 msg->ifa_prefixlen = 8 * chunk.len;
1323 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1324 msg->ifa_index = if_index;
1325
1326 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1327
1328 return netlink_send_ack(this, this->socket_rt, hdr);
1329 }
1330
1331 /**
1332 * Manages source routes in the routing table.
1333 * By setting the appropriate nlmsg_type, the route added or r.
1334 */
1335 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1336 int flags, route_entry_t *route)
1337 {
1338 unsigned char request[BUFFER_SIZE];
1339 struct nlmsghdr *hdr;
1340 struct rtmsg *msg;
1341 chunk_t chunk;
1342
1343 /* if route is 0.0.0.0/0, we can't install it, as it would
1344 * overwrite the default route. Instead, we add two routes:
1345 * 0.0.0.0/1 and 128.0.0.0/1
1346 * TODO: use metrics instead */
1347 if (route->prefixlen == 0)
1348 {
1349 route_entry_t half;
1350 status_t status;
1351
1352 half.dst_net = chunk_alloca(route->dst_net.len);
1353 memset(half.dst_net.ptr, 0, half.dst_net.len);
1354 half.src_ip = route->src_ip;
1355 half.gateway = route->gateway;
1356 half.if_index = route->if_index;
1357 half.prefixlen = 1;
1358
1359 status = manage_srcroute(this, nlmsg_type, flags, &half);
1360 half.dst_net.ptr[0] |= 0x80;
1361 status = manage_srcroute(this, nlmsg_type, flags, &half);
1362 return status;
1363 }
1364
1365 memset(&request, 0, sizeof(request));
1366
1367 hdr = (struct nlmsghdr*)request;
1368 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1369 hdr->nlmsg_type = nlmsg_type;
1370 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1371
1372 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1373 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1374 msg->rtm_dst_len = route->prefixlen;
1375 msg->rtm_table = IPSEC_ROUTING_TABLE;
1376 msg->rtm_protocol = RTPROT_STATIC;
1377 msg->rtm_type = RTN_UNICAST;
1378 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1379
1380 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1381 chunk = route->src_ip->get_address(route->src_ip);
1382 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1383 chunk = route->gateway->get_address(route->gateway);
1384 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1385 chunk.ptr = (char*)&route->if_index;
1386 chunk.len = sizeof(route->if_index);
1387 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1388
1389 return netlink_send_ack(this, this->socket_rt, hdr);
1390 }
1391
1392 /**
1393 * create or delete an rule to use our routing table
1394 */
1395 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1396 u_int32_t table, u_int32_t prio)
1397 {
1398 unsigned char request[BUFFER_SIZE];
1399 struct nlmsghdr *hdr;
1400 struct rtmsg *msg;
1401 chunk_t chunk;
1402
1403 memset(&request, 0, sizeof(request));
1404 hdr = (struct nlmsghdr*)request;
1405 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1406 hdr->nlmsg_type = nlmsg_type;
1407 if (nlmsg_type == RTM_NEWRULE)
1408 {
1409 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1410 }
1411 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1412
1413 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1414 msg->rtm_table = table;
1415 msg->rtm_family = AF_INET;
1416 msg->rtm_protocol = RTPROT_BOOT;
1417 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1418 msg->rtm_type = RTN_UNICAST;
1419
1420 chunk = chunk_from_thing(prio);
1421 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1422
1423 return netlink_send_ack(this, this->socket_rt, hdr);
1424 }
1425
1426 /**
1427 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1428 */
1429 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1430 {
1431 int bit, byte;
1432
1433 if (addr.len != net.len)
1434 {
1435 return FALSE;
1436 }
1437 /* scan through all bits, beginning in the front */
1438 for (byte = 0; byte < addr.len; byte++)
1439 {
1440 for (bit = 7; bit >= 0; bit--)
1441 {
1442 /* check if bits are equal (or we reached the end of the net) */
1443 if (bit + byte * 8 > net_len)
1444 {
1445 return TRUE;
1446 }
1447 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1448 {
1449 return FALSE;
1450 }
1451 }
1452 }
1453 return TRUE;
1454 }
1455
1456 /**
1457 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1458 */
1459 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1460 bool nexthop)
1461 {
1462 unsigned char request[BUFFER_SIZE];
1463 struct nlmsghdr *hdr, *out, *current;
1464 struct rtmsg *msg;
1465 chunk_t chunk;
1466 size_t len;
1467 int best = -1;
1468 host_t *src = NULL, *gtw = NULL;
1469
1470 DBG2(DBG_KNL, "getting address to reach %H", dest);
1471
1472 memset(&request, 0, sizeof(request));
1473
1474 hdr = (struct nlmsghdr*)request;
1475 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1476 hdr->nlmsg_type = RTM_GETROUTE;
1477 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1478
1479 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1480 msg->rtm_family = dest->get_family(dest);
1481
1482 chunk = dest->get_address(dest);
1483 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1484
1485 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1486 {
1487 DBG1(DBG_KNL, "getting address to %H failed", dest);
1488 return NULL;
1489 }
1490 current = out;
1491 while (NLMSG_OK(current, len))
1492 {
1493 switch (current->nlmsg_type)
1494 {
1495 case NLMSG_DONE:
1496 break;
1497 case RTM_NEWROUTE:
1498 {
1499 struct rtattr *rta;
1500 size_t rtasize;
1501 chunk_t rta_gtw, rta_src, rta_dst;
1502 u_int32_t rta_oif = 0;
1503
1504 rta_gtw = rta_src = rta_dst = chunk_empty;
1505 msg = (struct rtmsg*)(NLMSG_DATA(current));
1506 rta = RTM_RTA(msg);
1507 rtasize = RTM_PAYLOAD(current);
1508 while(RTA_OK(rta, rtasize))
1509 {
1510 switch (rta->rta_type)
1511 {
1512 case RTA_PREFSRC:
1513 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1514 break;
1515 case RTA_GATEWAY:
1516 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1517 break;
1518 case RTA_DST:
1519 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1520 break;
1521 case RTA_OIF:
1522 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1523 {
1524 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1525 }
1526 break;
1527 }
1528 rta = RTA_NEXT(rta, rtasize);
1529 }
1530
1531 /* apply the route if:
1532 * - it is not from our own ipsec routing table
1533 * - its destination net contains our destination
1534 * - is better than a previous one
1535 */
1536 if (msg->rtm_table != IPSEC_ROUTING_TABLE && rta_dst.ptr &&
1537 addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len) &&
1538 msg->rtm_dst_len > best)
1539 {
1540 iterator_t *ifaces, *addrs;
1541 iface_entry_t *iface;
1542 addr_entry_t *addr;
1543
1544 best = msg->rtm_dst_len;
1545 if (nexthop)
1546 {
1547 DESTROY_IF(gtw);
1548 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1549 }
1550 else if (rta_src.ptr)
1551 {
1552 DESTROY_IF(src);
1553 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1554 }
1555 else
1556 {
1557 /* no source addr, get one from the interfaces */
1558 ifaces = this->ifaces->create_iterator_locked(
1559 this->ifaces, &this->mutex);
1560 while (ifaces->iterate(ifaces, (void**)&iface))
1561 {
1562 if (iface->ifindex == rta_oif)
1563 {
1564 addrs = iface->addrs->create_iterator(
1565 iface->addrs, TRUE);
1566 while (addrs->iterate(addrs, (void**)&addr))
1567 {
1568 chunk_t ip = addr->ip->get_address(addr->ip);
1569 if (addr_in_subnet(ip, rta_dst,
1570 msg->rtm_dst_len))
1571 {
1572 DESTROY_IF(src);
1573 src = addr->ip->clone(addr->ip);
1574 best = msg->rtm_dst_len;
1575 break;
1576 }
1577 }
1578 addrs->destroy(addrs);
1579 }
1580 }
1581 ifaces->destroy(ifaces);
1582 }
1583 }
1584 /* FALL through */
1585 }
1586 default:
1587 current = NLMSG_NEXT(current, len);
1588 continue;
1589 }
1590 break;
1591 }
1592 free(out);
1593
1594 if (nexthop)
1595 {
1596 if (gtw)
1597 {
1598 return gtw;
1599 }
1600 return dest->clone(dest);
1601 }
1602 return src;
1603 }
1604
1605 /**
1606 * Implementation of kernel_interface_t.get_source_addr.
1607 */
1608 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1609 {
1610 return get_route(this, dest, FALSE);
1611 }
1612
1613 /**
1614 * Implementation of kernel_interface_t.add_ip.
1615 */
1616 static status_t add_ip(private_kernel_interface_t *this,
1617 host_t *virtual_ip, host_t *iface_ip)
1618 {
1619 iface_entry_t *iface;
1620 addr_entry_t *addr;
1621 iterator_t *addrs, *ifaces;
1622
1623 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1624
1625 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1626 while (ifaces->iterate(ifaces, (void**)&iface))
1627 {
1628 bool iface_found = FALSE;
1629
1630 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1631 while (addrs->iterate(addrs, (void**)&addr))
1632 {
1633 if (iface_ip->ip_equals(iface_ip, addr->ip))
1634 {
1635 iface_found = TRUE;
1636 }
1637 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1638 {
1639 addr->refcount++;
1640 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1641 virtual_ip, iface->ifname);
1642 addrs->destroy(addrs);
1643 ifaces->destroy(ifaces);
1644 return SUCCESS;
1645 }
1646 }
1647 addrs->destroy(addrs);
1648
1649 if (iface_found)
1650 {
1651 int ifindex = iface->ifindex;
1652 ifaces->destroy(ifaces);
1653 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1654 ifindex, virtual_ip) == SUCCESS)
1655 {
1656 addr = malloc_thing(addr_entry_t);
1657 addr->ip = virtual_ip->clone(virtual_ip);
1658 addr->refcount = 1;
1659 addr->virtual = TRUE;
1660 addr->scope = RT_SCOPE_UNIVERSE;
1661 pthread_mutex_lock(&this->mutex);
1662 iface->addrs->insert_last(iface->addrs, addr);
1663 pthread_mutex_unlock(&this->mutex);
1664 return SUCCESS;
1665 }
1666 DBG2(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1667 return FAILED;
1668
1669 }
1670
1671 }
1672 ifaces->destroy(ifaces);
1673
1674 DBG2(DBG_KNL, "interface address %H not found, unable to install"
1675 "virtual IP %H", iface_ip, virtual_ip);
1676 return FAILED;
1677 }
1678
1679 /**
1680 * Implementation of kernel_interface_t.del_ip.
1681 */
1682 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1683 {
1684 iface_entry_t *iface;
1685 addr_entry_t *addr;
1686 iterator_t *addrs, *ifaces;
1687
1688 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1689
1690 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1691 while (ifaces->iterate(ifaces, (void**)&iface))
1692 {
1693 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1694 while (addrs->iterate(addrs, (void**)&addr))
1695 {
1696 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1697 {
1698 int ifindex = iface->ifindex;
1699 addr->refcount--;
1700 if (addr->refcount == 0)
1701 {
1702 addrs->remove(addrs);
1703 addrs->destroy(addrs);
1704 ifaces->destroy(ifaces);
1705 addr_entry_destroy(addr);
1706 return manage_ipaddr(this, RTM_DELADDR, 0,
1707 ifindex, virtual_ip);
1708 }
1709 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1710 virtual_ip);
1711 addrs->destroy(addrs);
1712 ifaces->destroy(ifaces);
1713 return SUCCESS;
1714 }
1715 }
1716 addrs->destroy(addrs);
1717 }
1718 ifaces->destroy(ifaces);
1719
1720 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1721 return FAILED;
1722 }
1723
1724 /**
1725 * Implementation of kernel_interface_t.get_spi.
1726 */
1727 static status_t get_spi(private_kernel_interface_t *this,
1728 host_t *src, host_t *dst,
1729 protocol_id_t protocol, u_int32_t reqid,
1730 u_int32_t *spi)
1731 {
1732 unsigned char request[BUFFER_SIZE];
1733 struct nlmsghdr *hdr, *out;
1734 struct xfrm_userspi_info *userspi;
1735 u_int32_t received_spi = 0;
1736 size_t len;
1737
1738 memset(&request, 0, sizeof(request));
1739
1740 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1741
1742 hdr = (struct nlmsghdr*)request;
1743 hdr->nlmsg_flags = NLM_F_REQUEST;
1744 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1745 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1746
1747 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1748 host2xfrm(src, &userspi->info.saddr);
1749 host2xfrm(dst, &userspi->info.id.daddr);
1750 userspi->info.id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1751 userspi->info.mode = TRUE; /* tunnel mode */
1752 userspi->info.reqid = reqid;
1753 userspi->info.family = src->get_family(src);
1754 userspi->min = 0xc0000000;
1755 userspi->max = 0xcFFFFFFF;
1756
1757 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1758 {
1759 hdr = out;
1760 while (NLMSG_OK(hdr, len))
1761 {
1762 switch (hdr->nlmsg_type)
1763 {
1764 case XFRM_MSG_NEWSA:
1765 {
1766 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1767 received_spi = usersa->id.spi;
1768 break;
1769 }
1770 case NLMSG_ERROR:
1771 {
1772 struct nlmsgerr *err = NLMSG_DATA(hdr);
1773
1774 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1775 strerror(-err->error), -err->error);
1776 break;
1777 }
1778 default:
1779 hdr = NLMSG_NEXT(hdr, len);
1780 continue;
1781 case NLMSG_DONE:
1782 break;
1783 }
1784 break;
1785 }
1786 free(out);
1787 }
1788
1789 if (received_spi == 0)
1790 {
1791 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1792 return FAILED;
1793 }
1794
1795 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", received_spi, reqid);
1796
1797 *spi = received_spi;
1798 return SUCCESS;
1799 }
1800
1801 /**
1802 * Implementation of kernel_interface_t.add_sa.
1803 */
1804 static status_t add_sa(private_kernel_interface_t *this,
1805 host_t *src, host_t *dst, u_int32_t spi,
1806 protocol_id_t protocol, u_int32_t reqid,
1807 u_int64_t expire_soft, u_int64_t expire_hard,
1808 algorithm_t *enc_alg, algorithm_t *int_alg,
1809 prf_plus_t *prf_plus, mode_t mode, bool encap,
1810 bool replace)
1811 {
1812 unsigned char request[BUFFER_SIZE];
1813 char *alg_name;
1814 u_int key_size;
1815 struct nlmsghdr *hdr;
1816 struct xfrm_usersa_info *sa;
1817
1818 memset(&request, 0, sizeof(request));
1819
1820 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x", spi);
1821
1822 hdr = (struct nlmsghdr*)request;
1823 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1824 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1825 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1826
1827 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
1828 host2xfrm(src, &sa->saddr);
1829 host2xfrm(dst, &sa->id.daddr);
1830 sa->id.spi = spi;
1831 sa->id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1832 sa->family = src->get_family(src);
1833 sa->mode = mode;
1834 sa->replay_window = 32;
1835 sa->reqid = reqid;
1836 /* we currently do not expire SAs by volume/packet count */
1837 sa->lft.soft_byte_limit = XFRM_INF;
1838 sa->lft.hard_byte_limit = XFRM_INF;
1839 sa->lft.soft_packet_limit = XFRM_INF;
1840 sa->lft.hard_packet_limit = XFRM_INF;
1841 /* we use lifetimes since added, not since used */
1842 sa->lft.soft_add_expires_seconds = expire_soft;
1843 sa->lft.hard_add_expires_seconds = expire_hard;
1844 sa->lft.soft_use_expires_seconds = 0;
1845 sa->lft.hard_use_expires_seconds = 0;
1846
1847 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
1848
1849 if (enc_alg->algorithm != ENCR_UNDEFINED)
1850 {
1851 rthdr->rta_type = XFRMA_ALG_CRYPT;
1852 alg_name = lookup_algorithm(encryption_algs, enc_alg, &key_size);
1853 if (alg_name == NULL)
1854 {
1855 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1856 encryption_algorithm_names, enc_alg->algorithm);
1857 return FAILED;
1858 }
1859 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1860 encryption_algorithm_names, enc_alg->algorithm, key_size);
1861
1862 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1863 hdr->nlmsg_len += rthdr->rta_len;
1864 if (hdr->nlmsg_len > sizeof(request))
1865 {
1866 return FAILED;
1867 }
1868
1869 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1870 algo->alg_key_len = key_size;
1871 strcpy(algo->alg_name, alg_name);
1872 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1873
1874 rthdr = XFRM_RTA_NEXT(rthdr);
1875 }
1876
1877 if (int_alg->algorithm != AUTH_UNDEFINED)
1878 {
1879 rthdr->rta_type = XFRMA_ALG_AUTH;
1880 alg_name = lookup_algorithm(integrity_algs, int_alg, &key_size);
1881 if (alg_name == NULL)
1882 {
1883 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1884 integrity_algorithm_names, int_alg->algorithm);
1885 return FAILED;
1886 }
1887 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1888 integrity_algorithm_names, int_alg->algorithm, key_size);
1889
1890 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1891 hdr->nlmsg_len += rthdr->rta_len;
1892 if (hdr->nlmsg_len > sizeof(request))
1893 {
1894 return FAILED;
1895 }
1896
1897 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1898 algo->alg_key_len = key_size;
1899 strcpy(algo->alg_name, alg_name);
1900 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1901
1902 rthdr = XFRM_RTA_NEXT(rthdr);
1903 }
1904
1905 /* TODO: add IPComp here */
1906
1907 if (encap)
1908 {
1909 rthdr->rta_type = XFRMA_ENCAP;
1910 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
1911
1912 hdr->nlmsg_len += rthdr->rta_len;
1913 if (hdr->nlmsg_len > sizeof(request))
1914 {
1915 return FAILED;
1916 }
1917
1918 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
1919 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1920 tmpl->encap_sport = htons(src->get_port(src));
1921 tmpl->encap_dport = htons(dst->get_port(dst));
1922 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1923 /* encap_oa could probably be derived from the
1924 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
1925 * pluto does the same as we do here but it uses encap_oa in the
1926 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
1927 * the kernel ignores it anyway
1928 * -> does that mean that NAT-T encap doesn't work in transport mode?
1929 * No. The reason the kernel ignores NAT-OA is that it recomputes
1930 * (or, rather, just ignores) the checksum. If packets pass
1931 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
1932 rthdr = XFRM_RTA_NEXT(rthdr);
1933 }
1934
1935 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
1936 {
1937 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
1938 return FAILED;
1939 }
1940 return SUCCESS;
1941 }
1942
1943 /**
1944 * Implementation of kernel_interface_t.update_sa.
1945 */
1946 static status_t update_sa(private_kernel_interface_t *this,
1947 u_int32_t spi, protocol_id_t protocol,
1948 host_t *src, host_t *dst,
1949 host_t *new_src, host_t *new_dst, bool encap)
1950 {
1951 unsigned char request[BUFFER_SIZE], *pos;
1952 struct nlmsghdr *hdr, *out = NULL;
1953 struct xfrm_usersa_id *sa_id;
1954 struct xfrm_usersa_info *out_sa = NULL, *sa;
1955 size_t len;
1956 struct rtattr *rta;
1957 size_t rtasize;
1958 struct xfrm_encap_tmpl* tmpl = NULL;
1959
1960 memset(&request, 0, sizeof(request));
1961
1962 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
1963
1964 /* query the exisiting SA first */
1965 hdr = (struct nlmsghdr*)request;
1966 hdr->nlmsg_flags = NLM_F_REQUEST;
1967 hdr->nlmsg_type = XFRM_MSG_GETSA;
1968 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
1969
1970 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
1971 host2xfrm(dst, &sa_id->daddr);
1972 sa_id->spi = spi;
1973 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1974 sa_id->family = dst->get_family(dst);
1975
1976 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1977 {
1978 hdr = out;
1979 while (NLMSG_OK(hdr, len))
1980 {
1981 switch (hdr->nlmsg_type)
1982 {
1983 case XFRM_MSG_NEWSA:
1984 {
1985 out_sa = NLMSG_DATA(hdr);
1986 break;
1987 }
1988 case NLMSG_ERROR:
1989 {
1990 struct nlmsgerr *err = NLMSG_DATA(hdr);
1991 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
1992 strerror(-err->error), -err->error);
1993 break;
1994 }
1995 default:
1996 hdr = NLMSG_NEXT(hdr, len);
1997 continue;
1998 case NLMSG_DONE:
1999 break;
2000 }
2001 break;
2002 }
2003 }
2004 if (out_sa == NULL ||
2005 this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2006 {
2007 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2008 free(out);
2009 return FAILED;
2010 }
2011
2012 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2013 spi, src, dst, new_src, new_dst);
2014
2015 /* copy over the SA from out to request */
2016 hdr = (struct nlmsghdr*)request;
2017 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2018 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2019 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2020 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2021 sa = NLMSG_DATA(hdr);
2022 sa->family = new_dst->get_family(new_dst);
2023
2024 if (!src->ip_equals(src, new_src))
2025 {
2026 host2xfrm(new_src, &sa->saddr);
2027 }
2028 if (!dst->ip_equals(dst, new_dst))
2029 {
2030 host2xfrm(new_dst, &sa->id.daddr);
2031 }
2032
2033 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2034 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2035 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2036 while(RTA_OK(rta, rtasize))
2037 {
2038 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2039 if (rta->rta_type != XFRMA_ENCAP || encap)
2040 {
2041 if (rta->rta_type == XFRMA_ENCAP)
2042 { /* update encap tmpl */
2043 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2044 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2045 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2046 }
2047 memcpy(pos, rta, rta->rta_len);
2048 pos += rta->rta_len;
2049 hdr->nlmsg_len += rta->rta_len;
2050 }
2051 rta = RTA_NEXT(rta, rtasize);
2052 }
2053 if (tmpl == NULL && encap)
2054 { /* add tmpl if we are enabling it */
2055 rta = (struct rtattr*)pos;
2056 rta->rta_type = XFRMA_ENCAP;
2057 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2058 hdr->nlmsg_len += rta->rta_len;
2059 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2060 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2061 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2062 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2063 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2064 }
2065
2066 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2067 {
2068 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2069 free(out);
2070 return FAILED;
2071 }
2072 free(out);
2073
2074 return SUCCESS;
2075 }
2076
2077 /**
2078 * Implementation of kernel_interface_t.query_sa.
2079 */
2080 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2081 u_int32_t spi, protocol_id_t protocol,
2082 u_int32_t *use_time)
2083 {
2084 unsigned char request[BUFFER_SIZE];
2085 struct nlmsghdr *out = NULL, *hdr;
2086 struct xfrm_usersa_id *sa_id;
2087 struct xfrm_usersa_info *sa = NULL;
2088 size_t len;
2089
2090 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2091 memset(&request, 0, sizeof(request));
2092
2093 hdr = (struct nlmsghdr*)request;
2094 hdr->nlmsg_flags = NLM_F_REQUEST;
2095 hdr->nlmsg_type = XFRM_MSG_GETSA;
2096 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2097
2098 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2099 host2xfrm(dst, &sa_id->daddr);
2100 sa_id->spi = spi;
2101 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2102 sa_id->family = dst->get_family(dst);
2103
2104 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2105 {
2106 hdr = out;
2107 while (NLMSG_OK(hdr, len))
2108 {
2109 switch (hdr->nlmsg_type)
2110 {
2111 case XFRM_MSG_NEWSA:
2112 {
2113 sa = NLMSG_DATA(hdr);
2114 break;
2115 }
2116 case NLMSG_ERROR:
2117 {
2118 struct nlmsgerr *err = NLMSG_DATA(hdr);
2119 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2120 strerror(-err->error), -err->error);
2121 break;
2122 }
2123 default:
2124 hdr = NLMSG_NEXT(hdr, len);
2125 continue;
2126 case NLMSG_DONE:
2127 break;
2128 }
2129 break;
2130 }
2131 }
2132
2133 if (sa == NULL)
2134 {
2135 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2136 free(out);
2137 return FAILED;
2138 }
2139
2140 *use_time = sa->curlft.use_time;
2141 free (out);
2142 return SUCCESS;
2143 }
2144
2145 /**
2146 * Implementation of kernel_interface_t.del_sa.
2147 */
2148 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2149 u_int32_t spi, protocol_id_t protocol)
2150 {
2151 unsigned char request[BUFFER_SIZE];
2152 struct nlmsghdr *hdr;
2153 struct xfrm_usersa_id *sa_id;
2154
2155 memset(&request, 0, sizeof(request));
2156
2157 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2158
2159 hdr = (struct nlmsghdr*)request;
2160 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2161 hdr->nlmsg_type = XFRM_MSG_DELSA;
2162 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2163
2164 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2165 host2xfrm(dst, &sa_id->daddr);
2166 sa_id->spi = spi;
2167 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2168 sa_id->family = dst->get_family(dst);
2169
2170 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2171 {
2172 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2173 return FAILED;
2174 }
2175 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2176 return SUCCESS;
2177 }
2178
2179 /**
2180 * Implementation of kernel_interface_t.add_policy.
2181 */
2182 static status_t add_policy(private_kernel_interface_t *this,
2183 host_t *src, host_t *dst,
2184 traffic_selector_t *src_ts,
2185 traffic_selector_t *dst_ts,
2186 policy_dir_t direction, protocol_id_t protocol,
2187 u_int32_t reqid, bool high_prio, mode_t mode)
2188 {
2189 iterator_t *iterator;
2190 policy_entry_t *current, *policy;
2191 bool found = FALSE;
2192 unsigned char request[BUFFER_SIZE];
2193 struct xfrm_userpolicy_info *policy_info;
2194 struct nlmsghdr *hdr;
2195
2196 /* create a policy */
2197 policy = malloc_thing(policy_entry_t);
2198 memset(policy, 0, sizeof(policy_entry_t));
2199 policy->sel = ts2selector(src_ts, dst_ts);
2200 policy->direction = direction;
2201
2202 /* find the policy, which matches EXACTLY */
2203 pthread_mutex_lock(&this->mutex);
2204 iterator = this->policies->create_iterator(this->policies, TRUE);
2205 while (iterator->iterate(iterator, (void**)&current))
2206 {
2207 if (memcmp(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) == 0 &&
2208 policy->direction == current->direction)
2209 {
2210 /* use existing policy */
2211 current->refcount++;
2212 DBG2(DBG_KNL, "policy %R===%R already exists, increasing ",
2213 "refcount", src_ts, dst_ts);
2214 free(policy);
2215 policy = current;
2216 found = TRUE;
2217 break;
2218 }
2219 }
2220 iterator->destroy(iterator);
2221 if (!found)
2222 { /* apply the new one, if we have no such policy */
2223 this->policies->insert_last(this->policies, policy);
2224 policy->refcount = 1;
2225 }
2226
2227 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2228
2229 memset(&request, 0, sizeof(request));
2230 hdr = (struct nlmsghdr*)request;
2231 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2232 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2233 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2234
2235 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2236 policy_info->sel = policy->sel;
2237 policy_info->dir = policy->direction;
2238 /* calculate priority based on source selector size, small size = high prio */
2239 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2240 policy_info->priority -= policy->sel.prefixlen_s * 10;
2241 policy_info->priority -= policy->sel.proto ? 2 : 0;
2242 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2243 policy_info->action = XFRM_POLICY_ALLOW;
2244 policy_info->share = XFRM_SHARE_ANY;
2245 pthread_mutex_unlock(&this->mutex);
2246
2247 /* policies don't expire */
2248 policy_info->lft.soft_byte_limit = XFRM_INF;
2249 policy_info->lft.soft_packet_limit = XFRM_INF;
2250 policy_info->lft.hard_byte_limit = XFRM_INF;
2251 policy_info->lft.hard_packet_limit = XFRM_INF;
2252 policy_info->lft.soft_add_expires_seconds = 0;
2253 policy_info->lft.hard_add_expires_seconds = 0;
2254 policy_info->lft.soft_use_expires_seconds = 0;
2255 policy_info->lft.hard_use_expires_seconds = 0;
2256
2257 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2258 rthdr->rta_type = XFRMA_TMPL;
2259
2260 rthdr->rta_len = sizeof(struct xfrm_user_tmpl);
2261 rthdr->rta_len = RTA_LENGTH(rthdr->rta_len);
2262
2263 hdr->nlmsg_len += rthdr->rta_len;
2264 if (hdr->nlmsg_len > sizeof(request))
2265 {
2266 return FAILED;
2267 }
2268
2269 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2270 tmpl->reqid = reqid;
2271 tmpl->id.proto = (protocol == PROTO_AH) ? KERNEL_AH : KERNEL_ESP;
2272 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2273 tmpl->mode = mode;
2274 tmpl->family = src->get_family(src);
2275
2276 host2xfrm(src, &tmpl->saddr);
2277 host2xfrm(dst, &tmpl->id.daddr);
2278
2279 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2280 {
2281 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2282 return FAILED;
2283 }
2284
2285 /* install a route, if:
2286 * - we are NOT updating a policy
2287 * - this is a forward policy (to just get one for each child)
2288 * - we are in tunnel mode
2289 * - we are not using IPv6 (does not work correctly yet!)
2290 */
2291 if (policy->route == NULL && direction == POLICY_FWD &&
2292 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6)
2293 {
2294 policy->route = malloc_thing(route_entry_t);
2295 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2296 {
2297 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2298 policy->route->gateway = get_route(this, src, TRUE);
2299 policy->route->if_index = get_interface_index(this, dst);
2300 policy->route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2301 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr, policy->route->dst_net.len);
2302 policy->route->prefixlen = policy->sel.prefixlen_s;
2303
2304 if (manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2305 policy->route) != SUCCESS)
2306 {
2307 DBG1(DBG_KNL, "unable to install source route for %H",
2308 policy->route->src_ip);
2309 route_entry_destroy(policy->route);
2310 policy->route = NULL;
2311 }
2312 }
2313 else
2314 {
2315 free(policy->route);
2316 policy->route = NULL;
2317 }
2318 }
2319
2320 return SUCCESS;
2321 }
2322
2323 /**
2324 * Implementation of kernel_interface_t.query_policy.
2325 */
2326 static status_t query_policy(private_kernel_interface_t *this,
2327 traffic_selector_t *src_ts,
2328 traffic_selector_t *dst_ts,
2329 policy_dir_t direction, u_int32_t *use_time)
2330 {
2331 unsigned char request[BUFFER_SIZE];
2332 struct nlmsghdr *out = NULL, *hdr;
2333 struct xfrm_userpolicy_id *policy_id;
2334 struct xfrm_userpolicy_info *policy = NULL;
2335 size_t len;
2336
2337 memset(&request, 0, sizeof(request));
2338
2339 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2340
2341 hdr = (struct nlmsghdr*)request;
2342 hdr->nlmsg_flags = NLM_F_REQUEST;
2343 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2344 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2345
2346 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2347 policy_id->sel = ts2selector(src_ts, dst_ts);
2348 policy_id->dir = direction;
2349
2350 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2351 {
2352 hdr = out;
2353 while (NLMSG_OK(hdr, len))
2354 {
2355 switch (hdr->nlmsg_type)
2356 {
2357 case XFRM_MSG_NEWPOLICY:
2358 {
2359 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2360 break;
2361 }
2362 case NLMSG_ERROR:
2363 {
2364 struct nlmsgerr *err = NLMSG_DATA(hdr);
2365 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2366 strerror(-err->error), -err->error);
2367 break;
2368 }
2369 default:
2370 hdr = NLMSG_NEXT(hdr, len);
2371 continue;
2372 case NLMSG_DONE:
2373 break;
2374 }
2375 break;
2376 }
2377 }
2378
2379 if (policy == NULL)
2380 {
2381 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2382 free(out);
2383 return FAILED;
2384 }
2385 *use_time = (time_t)policy->curlft.use_time;
2386
2387 free(out);
2388 return SUCCESS;
2389 }
2390
2391 /**
2392 * Implementation of kernel_interface_t.del_policy.
2393 */
2394 static status_t del_policy(private_kernel_interface_t *this,
2395 traffic_selector_t *src_ts,
2396 traffic_selector_t *dst_ts,
2397 policy_dir_t direction)
2398 {
2399 policy_entry_t *current, policy, *to_delete = NULL;
2400 route_entry_t *route;
2401 unsigned char request[BUFFER_SIZE];
2402 struct nlmsghdr *hdr;
2403 struct xfrm_userpolicy_id *policy_id;
2404 iterator_t *iterator;
2405
2406 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2407
2408 /* create a policy */
2409 memset(&policy, 0, sizeof(policy_entry_t));
2410 policy.sel = ts2selector(src_ts, dst_ts);
2411 policy.direction = direction;
2412
2413 /* find the policy */
2414 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2415 while (iterator->iterate(iterator, (void**)&current))
2416 {
2417 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2418 policy.direction == current->direction)
2419 {
2420 to_delete = current;
2421 if (--to_delete->refcount > 0)
2422 {
2423 /* is used by more SAs, keep in kernel */
2424 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2425 iterator->destroy(iterator);
2426 return SUCCESS;
2427 }
2428 /* remove if last reference */
2429 iterator->remove(iterator);
2430 break;
2431 }
2432 }
2433 iterator->destroy(iterator);
2434 if (!to_delete)
2435 {
2436 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2437 return NOT_FOUND;
2438 }
2439
2440 memset(&request, 0, sizeof(request));
2441
2442 hdr = (struct nlmsghdr*)request;
2443 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2444 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2445 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2446
2447 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2448 policy_id->sel = to_delete->sel;
2449 policy_id->dir = direction;
2450
2451 route = to_delete->route;
2452 free(to_delete);
2453
2454 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2455 {
2456 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2457 return FAILED;
2458 }
2459
2460 if (route)
2461 {
2462 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2463 {
2464 DBG1(DBG_KNL, "error uninstalling route installed with "
2465 "policy %R===%R", src_ts, dst_ts);
2466 }
2467 route_entry_destroy(route);
2468 }
2469 return SUCCESS;
2470 }
2471
2472 /**
2473 * Implementation of kernel_interface_t.destroy.
2474 */
2475 static void destroy(private_kernel_interface_t *this)
2476 {
2477 manage_rule(this, RTM_DELRULE, IPSEC_ROUTING_TABLE, IPSEC_ROUTING_TABLE_PRIO);
2478
2479 this->job->cancel(this->job);
2480 close(this->socket_xfrm_events);
2481 close(this->socket_xfrm);
2482 close(this->socket_rt_events);
2483 close(this->socket_rt);
2484 this->policies->destroy(this->policies);
2485 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2486 free(this);
2487 }
2488
2489 /*
2490 * Described in header.
2491 */
2492 kernel_interface_t *kernel_interface_create()
2493 {
2494 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2495 struct sockaddr_nl addr;
2496
2497 /* public functions */
2498 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2499 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,algorithm_t*,algorithm_t*,prf_plus_t*,mode_t,bool,bool))add_sa;
2500 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2501 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2502 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2503 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t))add_policy;
2504 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2505 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2506 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2507 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2508 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2509 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2510 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2511 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2512
2513 /* private members */
2514 this->policies = linked_list_create();
2515 this->ifaces = linked_list_create();
2516 this->hiter = NULL;
2517 this->seq = 200;
2518 pthread_mutex_init(&this->mutex,NULL);
2519
2520 memset(&addr, 0, sizeof(addr));
2521 addr.nl_family = AF_NETLINK;
2522
2523 /* create and bind RT socket */
2524 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2525 if (this->socket_rt <= 0)
2526 {
2527 charon->kill(charon, "unable to create RT netlink socket");
2528 }
2529 addr.nl_groups = 0;
2530 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2531 {
2532 charon->kill(charon, "unable to bind RT netlink socket");
2533 }
2534
2535 /* create and bind RT socket for events (address/interface/route changes) */
2536 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2537 if (this->socket_rt_events <= 0)
2538 {
2539 charon->kill(charon, "unable to create RT event socket");
2540 }
2541 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2542 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
2543 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
2544 {
2545 charon->kill(charon, "unable to bind RT event socket");
2546 }
2547
2548 /* create and bind XFRM socket */
2549 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2550 if (this->socket_xfrm <= 0)
2551 {
2552 charon->kill(charon, "unable to create XFRM netlink socket");
2553 }
2554 addr.nl_groups = 0;
2555 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
2556 {
2557 charon->kill(charon, "unable to bind XFRM netlink socket");
2558 }
2559
2560 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
2561 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2562 if (this->socket_xfrm_events <= 0)
2563 {
2564 charon->kill(charon, "unable to create XFRM event socket");
2565 }
2566 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
2567 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
2568 {
2569 charon->kill(charon, "unable to bind XFRM event socket");
2570 }
2571
2572 this->job = callback_job_create((callback_job_cb_t)receive_events,
2573 this, NULL, NULL);
2574 charon->processor->queue_job(charon->processor, (job_t*)this->job);
2575
2576 if (init_address_list(this) != SUCCESS)
2577 {
2578 charon->kill(charon, "unable to get interface list");
2579 }
2580
2581 if (manage_rule(this, RTM_NEWRULE, IPSEC_ROUTING_TABLE,
2582 IPSEC_ROUTING_TABLE_PRIO) != SUCCESS)
2583 {
2584 DBG1(DBG_KNL, "unable to create routing table rule");
2585 }
2586
2587 return &this->public;
2588 }
2589