5d487be36288c49ddcf0a50796fdbf5b0cf00b79
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /**
2 * @file kernel_interface.c
3 *
4 * @brief Implementation of kernel_interface_t.
5 *
6 */
7
8 /*
9 * Copyright (C) 2005-2007 Martin Willi
10 * Copyright (C) 2006-2007 Tobias Brunner
11 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
12 * Copyright (C) 2006 Daniel Roethlisberger
13 * Copyright (C) 2005 Jan Hutter
14 * Hochschule fuer Technik Rapperswil
15 * Copyright (C) 2003 Herbert Xu.
16 *
17 * Based on xfrm code from pluto.
18 *
19 * This program is free software; you can redistribute it and/or modify it
20 * under the terms of the GNU General Public License as published by the
21 * Free Software Foundation; either version 2 of the License, or (at your
22 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 * for more details.
28 */
29
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <sys/time.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/xfrm.h>
36 #include <linux/udp.h>
37 #include <pthread.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <errno.h>
41 #include <string.h>
42 #include <net/if.h>
43 #include <sys/ioctl.h>
44
45 #include "kernel_interface.h"
46
47 #include <daemon.h>
48 #include <utils/linked_list.h>
49 #include <processing/jobs/delete_child_sa_job.h>
50 #include <processing/jobs/rekey_child_sa_job.h>
51 #include <processing/jobs/acquire_job.h>
52 #include <processing/jobs/callback_job.h>
53 #include <processing/jobs/roam_job.h>
54
55 /** routing table for routes installed by us */
56 #ifndef IPSEC_ROUTING_TABLE
57 #define IPSEC_ROUTING_TABLE 100
58 #endif
59 #ifndef IPSEC_ROUTING_TABLE_PRIO
60 #define IPSEC_ROUTING_TABLE_PRIO 100
61 #endif
62
63 /** kernel level protocol identifiers */
64 #define KERNEL_ESP 50
65 #define KERNEL_AH 51
66
67 /** default priority of installed policies */
68 #define PRIO_LOW 3000
69 #define PRIO_HIGH 2000
70
71 /** delay before firing roam jobs (ms) */
72 #define ROAM_DELAY 100
73
74 #define BUFFER_SIZE 1024
75
76 /**
77 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
78 * 'usual' netlink data x like 'struct xfrm_usersa_info'
79 */
80 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
81 /**
82 * returns a pointer to the next rtattr following rta.
83 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
84 */
85 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
86 /**
87 * returns the total size of attached rta data
88 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
89 */
90 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
91
92 typedef struct kernel_algorithm_t kernel_algorithm_t;
93
94 /**
95 * Mapping from the algorithms defined in IKEv2 to
96 * kernel level algorithm names and their key length
97 */
98 struct kernel_algorithm_t {
99 /**
100 * Identifier specified in IKEv2
101 */
102 int ikev2_id;
103
104 /**
105 * Name of the algorithm, as used as kernel identifier
106 */
107 char *name;
108
109 /**
110 * Key length in bits, if fixed size
111 */
112 u_int key_size;
113 };
114 #define END_OF_LIST -1
115
116 /**
117 * Algorithms for encryption
118 */
119 kernel_algorithm_t encryption_algs[] = {
120 /* {ENCR_DES_IV64, "***", 0}, */
121 {ENCR_DES, "des", 64},
122 {ENCR_3DES, "des3_ede", 192},
123 /* {ENCR_RC5, "***", 0}, */
124 /* {ENCR_IDEA, "***", 0}, */
125 {ENCR_CAST, "cast128", 0},
126 {ENCR_BLOWFISH, "blowfish", 0},
127 /* {ENCR_3IDEA, "***", 0}, */
128 /* {ENCR_DES_IV32, "***", 0}, */
129 {ENCR_NULL, "cipher_null", 0},
130 {ENCR_AES_CBC, "aes", 0},
131 /* {ENCR_AES_CTR, "***", 0}, */
132 {END_OF_LIST, NULL, 0},
133 };
134
135 /**
136 * Algorithms for integrity protection
137 */
138 kernel_algorithm_t integrity_algs[] = {
139 {AUTH_HMAC_MD5_96, "md5", 128},
140 {AUTH_HMAC_SHA1_96, "sha1", 160},
141 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
142 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
143 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
144 /* {AUTH_DES_MAC, "***", 0}, */
145 /* {AUTH_KPDK_MD5, "***", 0}, */
146 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
147 {END_OF_LIST, NULL, 0},
148 };
149
150 /**
151 * Look up a kernel algorithm name and its key size
152 */
153 char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
154 algorithm_t *ikev2_algo, u_int *key_size)
155 {
156 while (kernel_algo->ikev2_id != END_OF_LIST)
157 {
158 if (ikev2_algo->algorithm == kernel_algo->ikev2_id)
159 {
160 /* match, evaluate key length */
161 if (ikev2_algo->key_size)
162 { /* variable length */
163 *key_size = ikev2_algo->key_size;
164 }
165 else
166 { /* fixed length */
167 *key_size = kernel_algo->key_size;
168 }
169 return kernel_algo->name;
170 }
171 kernel_algo++;
172 }
173 return NULL;
174 }
175
176 typedef struct route_entry_t route_entry_t;
177
178 /**
179 * installed routing entry
180 */
181 struct route_entry_t {
182
183 /** Index of the interface the route is bound to */
184 int if_index;
185
186 /** Source ip of the route */
187 host_t *src_ip;
188
189 /** gateway for this route */
190 host_t *gateway;
191
192 /** Destination net */
193 chunk_t dst_net;
194
195 /** Destination net prefixlen */
196 u_int8_t prefixlen;
197 };
198
199 /**
200 * destroy an route_entry_t object
201 */
202 static void route_entry_destroy(route_entry_t *this)
203 {
204 this->src_ip->destroy(this->src_ip);
205 this->gateway->destroy(this->gateway);
206 chunk_free(&this->dst_net);
207 free(this);
208 }
209
210 typedef struct policy_entry_t policy_entry_t;
211
212 /**
213 * installed kernel policy.
214 */
215 struct policy_entry_t {
216
217 /** direction of this policy: in, out, forward */
218 u_int8_t direction;
219
220 /** reqid of the policy */
221 u_int32_t reqid;
222
223 /** parameters of installed policy */
224 struct xfrm_selector sel;
225
226 /** associated route installed for this policy */
227 route_entry_t *route;
228
229 /** by how many CHILD_SA's this policy is used */
230 u_int refcount;
231 };
232
233 typedef struct addr_entry_t addr_entry_t;
234
235 /**
236 * IP address in an inface_entry_t
237 */
238 struct addr_entry_t {
239
240 /** The ip address */
241 host_t *ip;
242
243 /** virtual IP managed by us */
244 bool virtual;
245
246 /** scope of the address */
247 u_char scope;
248
249 /** Number of times this IP is used, if virtual */
250 u_int refcount;
251 };
252
253 /**
254 * destroy a addr_entry_t object
255 */
256 static void addr_entry_destroy(addr_entry_t *this)
257 {
258 this->ip->destroy(this->ip);
259 free(this);
260 }
261
262 typedef struct iface_entry_t iface_entry_t;
263
264 /**
265 * A network interface on this system, containing addr_entry_t's
266 */
267 struct iface_entry_t {
268
269 /** interface index */
270 int ifindex;
271
272 /** name of the interface */
273 char ifname[IFNAMSIZ];
274
275 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
276 u_int flags;
277
278 /** list of addresses as host_t */
279 linked_list_t *addrs;
280 };
281
282 /**
283 * destroy an interface entry
284 */
285 static void iface_entry_destroy(iface_entry_t *this)
286 {
287 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
288 free(this);
289 }
290
291 typedef struct private_kernel_interface_t private_kernel_interface_t;
292
293 /**
294 * Private variables and functions of kernel_interface class.
295 */
296 struct private_kernel_interface_t {
297 /**
298 * Public part of the kernel_interface_t object.
299 */
300 kernel_interface_t public;
301
302 /**
303 * mutex to lock access to the various lists
304 */
305 pthread_mutex_t mutex;
306
307 /**
308 * List of installed policies (policy_entry_t)
309 */
310 linked_list_t *policies;
311
312 /**
313 * Cached list of interfaces and its adresses (iface_entry_t)
314 */
315 linked_list_t *ifaces;
316
317 /**
318 * iterator used in hook()
319 */
320 iterator_t *hiter;
321
322 /**
323 * job receiving netlink events
324 */
325 callback_job_t *job;
326
327 /**
328 * current sequence number for netlink request
329 */
330 int seq;
331
332 /**
333 * Netlink xfrm socket (IPsec)
334 */
335 int socket_xfrm;
336
337 /**
338 * netlink xfrm socket to receive acquire and expire events
339 */
340 int socket_xfrm_events;
341
342 /**
343 * Netlink rt socket (routing)
344 */
345 int socket_rt;
346
347 /**
348 * Netlink rt socket to receive address change events
349 */
350 int socket_rt_events;
351
352 /**
353 * time of the last roam_job
354 */
355 struct timeval last_roam;
356 };
357
358 /**
359 * convert a host_t to a struct xfrm_address
360 */
361 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
362 {
363 chunk_t chunk = host->get_address(host);
364 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
365 }
366
367 /**
368 * convert a traffic selector address range to subnet and its mask.
369 */
370 static void ts2subnet(traffic_selector_t* ts,
371 xfrm_address_t *net, u_int8_t *mask)
372 {
373 /* there is no way to do this cleanly, as the address range may
374 * be anything else but a subnet. We use from_addr as subnet
375 * and try to calculate a usable subnet mask.
376 */
377 int byte, bit;
378 bool found = FALSE;
379 chunk_t from, to;
380 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
381
382 from = ts->get_from_address(ts);
383 to = ts->get_to_address(ts);
384
385 *mask = (size * 8);
386 /* go trough all bits of the addresses, beginning in the front.
387 * as long as they are equal, the subnet gets larger
388 */
389 for (byte = 0; byte < size; byte++)
390 {
391 for (bit = 7; bit >= 0; bit--)
392 {
393 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
394 {
395 *mask = ((7 - bit) + (byte * 8));
396 found = TRUE;
397 break;
398 }
399 }
400 if (found)
401 {
402 break;
403 }
404 }
405 memcpy(net, from.ptr, from.len);
406 chunk_free(&from);
407 chunk_free(&to);
408 }
409
410 /**
411 * convert a traffic selector port range to port/portmask
412 */
413 static void ts2ports(traffic_selector_t* ts,
414 u_int16_t *port, u_int16_t *mask)
415 {
416 /* linux does not seem to accept complex portmasks. Only
417 * any or a specific port is allowed. We set to any, if we have
418 * a port range, or to a specific, if we have one port only.
419 */
420 u_int16_t from, to;
421
422 from = ts->get_from_port(ts);
423 to = ts->get_to_port(ts);
424
425 if (from == to)
426 {
427 *port = htons(from);
428 *mask = ~0;
429 }
430 else
431 {
432 *port = 0;
433 *mask = 0;
434 }
435 }
436
437 /**
438 * convert a pair of traffic_selectors to a xfrm_selector
439 */
440 static struct xfrm_selector ts2selector(traffic_selector_t *src,
441 traffic_selector_t *dst)
442 {
443 struct xfrm_selector sel;
444
445 memset(&sel, 0, sizeof(sel));
446 sel.family = src->get_type(src) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
447 /* src or dest proto may be "any" (0), use more restrictive one */
448 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
449 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
450 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
451 ts2ports(dst, &sel.dport, &sel.dport_mask);
452 ts2ports(src, &sel.sport, &sel.sport_mask);
453 sel.ifindex = 0;
454 sel.user = 0;
455
456 return sel;
457 }
458
459 /**
460 * Creates an rtattr and adds it to the netlink message
461 */
462 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
463 size_t buflen)
464 {
465 struct rtattr *rta;
466
467 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
468 {
469 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
470 return;
471 }
472
473 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
474 rta->rta_type = rta_type;
475 rta->rta_len = RTA_LENGTH(data.len);
476 memcpy(RTA_DATA(rta), data.ptr, data.len);
477 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
478 }
479
480 /**
481 * process a XFRM_MSG_ACQUIRE from kernel
482 */
483 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
484 {
485 u_int32_t reqid = 0;
486 job_t *job;
487 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
488 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
489
490 if (RTA_OK(rtattr, rtsize))
491 {
492 if (rtattr->rta_type == XFRMA_TMPL)
493 {
494 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
495 reqid = tmpl->reqid;
496 }
497 }
498 if (reqid == 0)
499 {
500 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
501 return;
502 }
503 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
504 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
505 job = (job_t*)acquire_job_create(reqid);
506 charon->processor->queue_job(charon->processor, job);
507 }
508
509 /**
510 * process a XFRM_MSG_EXPIRE from kernel
511 */
512 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
513 {
514 job_t *job;
515 protocol_id_t protocol;
516 u_int32_t spi, reqid;
517 struct xfrm_user_expire *expire;
518
519 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
520 protocol = expire->state.id.proto == KERNEL_ESP ? PROTO_ESP : PROTO_AH;
521 spi = expire->state.id.spi;
522 reqid = expire->state.reqid;
523
524 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
525 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
526 expire->hard ? "delete" : "rekey", protocol_id_names,
527 protocol, ntohl(spi), reqid);
528 if (expire->hard)
529 {
530 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
531 }
532 else
533 {
534 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
535 }
536 charon->processor->queue_job(charon->processor, job);
537 }
538
539 /**
540 * start a roaming job. We delay it for a second and fire only one job
541 * for multiple events. Otherwise we would create two many jobs.
542 */
543 static void fire_roam_job(private_kernel_interface_t *this, bool address)
544 {
545 struct timeval now;
546
547 if (gettimeofday(&now, NULL) == 0)
548 {
549 if (timercmp(&now, &this->last_roam, >))
550 {
551 now.tv_usec += ROAM_DELAY * 1000;
552 while (now.tv_usec > 1000000)
553 {
554 now.tv_sec++;
555 now.tv_usec -= 1000000;
556 }
557 this->last_roam = now;
558 charon->scheduler->schedule_job(charon->scheduler,
559 (job_t*)roam_job_create(address), ROAM_DELAY);
560 }
561 }
562 }
563
564 /**
565 * process RTM_NEWLINK/RTM_DELLINK from kernel
566 */
567 static void process_link(private_kernel_interface_t *this,
568 struct nlmsghdr *hdr, bool event)
569 {
570 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
571 struct rtattr *rta = IFLA_RTA(msg);
572 size_t rtasize = IFLA_PAYLOAD (hdr);
573 iterator_t *iterator;
574 iface_entry_t *current, *entry = NULL;
575 char *name = NULL;
576 bool update = FALSE;
577
578 while(RTA_OK(rta, rtasize))
579 {
580 switch (rta->rta_type)
581 {
582 case IFLA_IFNAME:
583 name = RTA_DATA(rta);
584 break;
585 }
586 rta = RTA_NEXT(rta, rtasize);
587 }
588 if (!name)
589 {
590 name = "(unknown)";
591 }
592
593 switch (hdr->nlmsg_type)
594 {
595 case RTM_NEWLINK:
596 {
597 if (msg->ifi_flags & IFF_LOOPBACK)
598 { /* ignore loopback interfaces */
599 break;
600 }
601 iterator = this->ifaces->create_iterator_locked(this->ifaces,
602 &this->mutex);
603 while (iterator->iterate(iterator, (void**)&current))
604 {
605 if (current->ifindex == msg->ifi_index)
606 {
607 entry = current;
608 break;
609 }
610 }
611 if (!entry)
612 {
613 entry = malloc_thing(iface_entry_t);
614 entry->ifindex = msg->ifi_index;
615 entry->flags = 0;
616 entry->addrs = linked_list_create();
617 this->ifaces->insert_last(this->ifaces, entry);
618 }
619 memcpy(entry->ifname, name, IFNAMSIZ);
620 entry->ifname[IFNAMSIZ-1] = '\0';
621 if (event)
622 {
623 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
624 {
625 update = TRUE;
626 DBG1(DBG_KNL, "interface %s activated", name);
627 }
628 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
629 {
630 update = TRUE;
631 DBG1(DBG_KNL, "interface %s deactivated", name);
632 }
633 }
634 entry->flags = msg->ifi_flags;
635 iterator->destroy(iterator);
636 break;
637 }
638 case RTM_DELLINK:
639 {
640 iterator = this->ifaces->create_iterator_locked(this->ifaces,
641 &this->mutex);
642 while (iterator->iterate(iterator, (void**)&current))
643 {
644 if (current->ifindex == msg->ifi_index)
645 {
646 /* we do not remove it, as an address may be added to a
647 * "down" interface and we wan't to know that. */
648 current->flags = msg->ifi_flags;
649 break;
650 }
651 }
652 iterator->destroy(iterator);
653 break;
654 }
655 }
656
657 /* send an update to all IKE_SAs */
658 if (update && event)
659 {
660 fire_roam_job(this, TRUE);
661 }
662 }
663
664 /**
665 * process RTM_NEWADDR/RTM_DELADDR from kernel
666 */
667 static void process_addr(private_kernel_interface_t *this,
668 struct nlmsghdr *hdr, bool event)
669 {
670 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
671 struct rtattr *rta = IFA_RTA(msg);
672 size_t rtasize = IFA_PAYLOAD (hdr);
673 host_t *host = NULL;
674 iterator_t *ifaces, *addrs;
675 iface_entry_t *iface;
676 addr_entry_t *addr;
677 chunk_t local = chunk_empty, address = chunk_empty;
678 bool update = FALSE, found = FALSE, changed = FALSE;
679
680 while(RTA_OK(rta, rtasize))
681 {
682 switch (rta->rta_type)
683 {
684 case IFA_LOCAL:
685 local.ptr = RTA_DATA(rta);
686 local.len = RTA_PAYLOAD(rta);
687 break;
688 case IFA_ADDRESS:
689 address.ptr = RTA_DATA(rta);
690 address.len = RTA_PAYLOAD(rta);
691 break;
692 }
693 rta = RTA_NEXT(rta, rtasize);
694 }
695
696 /* For PPP interfaces, we need the IFA_LOCAL address,
697 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
698 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
699 if (local.ptr)
700 {
701 host = host_create_from_chunk(msg->ifa_family, local, 0);
702 }
703 else if (address.ptr)
704 {
705 host = host_create_from_chunk(msg->ifa_family, address, 0);
706 }
707
708 if (host == NULL)
709 { /* bad family? */
710 return;
711 }
712
713 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
714 while (ifaces->iterate(ifaces, (void**)&iface))
715 {
716 if (iface->ifindex == msg->ifa_index)
717 {
718 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
719 while (addrs->iterate(addrs, (void**)&addr))
720 {
721 if (host->ip_equals(host, addr->ip))
722 {
723 found = TRUE;
724 if (hdr->nlmsg_type == RTM_DELADDR)
725 {
726 changed = TRUE;
727 addrs->remove(addrs);
728 addr_entry_destroy(addr);
729 DBG1(DBG_KNL, "%H disappeared from %s", host, iface->ifname);
730 }
731 }
732 }
733 addrs->destroy(addrs);
734
735 if (hdr->nlmsg_type == RTM_NEWADDR)
736 {
737 if (!found)
738 {
739 found = TRUE;
740 changed = TRUE;
741 addr = malloc_thing(addr_entry_t);
742 addr->ip = host->clone(host);
743 addr->virtual = FALSE;
744 addr->refcount = 1;
745 addr->scope = msg->ifa_scope;
746
747 iface->addrs->insert_last(iface->addrs, addr);
748 if (event)
749 {
750 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
751 }
752 }
753 }
754 if (found && (iface->flags & IFF_UP))
755 {
756 update = TRUE;
757 }
758 break;
759 }
760 }
761 ifaces->destroy(ifaces);
762 host->destroy(host);
763
764 /* send an update to all IKE_SAs */
765 if (update && event && changed)
766 {
767 fire_roam_job(this, TRUE);
768 }
769 }
770
771 /**
772 * Receives events from kernel
773 */
774 static job_requeue_t receive_events(private_kernel_interface_t *this)
775 {
776 char response[1024];
777 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
778 struct sockaddr_nl addr;
779 socklen_t addr_len = sizeof(addr);
780 int len, oldstate, maxfd, selected;
781 fd_set rfds;
782
783 FD_ZERO(&rfds);
784 FD_SET(this->socket_xfrm_events, &rfds);
785 FD_SET(this->socket_rt_events, &rfds);
786 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
787
788 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
789 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
790 pthread_setcancelstate(oldstate, NULL);
791 if (selected <= 0)
792 {
793 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
794 return JOB_REQUEUE_FAIR;
795 }
796 if (FD_ISSET(this->socket_xfrm_events, &rfds))
797 {
798 selected = this->socket_xfrm_events;
799 }
800 else if (FD_ISSET(this->socket_rt_events, &rfds))
801 {
802 selected = this->socket_rt_events;
803 }
804 else
805 {
806 return JOB_REQUEUE_DIRECT;
807 }
808
809 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
810 (struct sockaddr*)&addr, &addr_len);
811 if (len < 0)
812 {
813 switch (errno)
814 {
815 case EINTR:
816 /* interrupted, try again */
817 return JOB_REQUEUE_DIRECT;
818 case EAGAIN:
819 /* no data ready, select again */
820 return JOB_REQUEUE_DIRECT;
821 default:
822 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
823 sleep(1);
824 return JOB_REQUEUE_FAIR;
825 }
826 }
827 if (addr.nl_pid != 0)
828 { /* not from kernel. not interested, try another one */
829 return JOB_REQUEUE_DIRECT;
830 }
831
832 while (NLMSG_OK(hdr, len))
833 {
834 /* looks good so far, dispatch netlink message */
835 if (selected == this->socket_xfrm_events)
836 {
837 switch (hdr->nlmsg_type)
838 {
839 case XFRM_MSG_ACQUIRE:
840 process_acquire(this, hdr);
841 break;
842 case XFRM_MSG_EXPIRE:
843 process_expire(this, hdr);
844 break;
845 default:
846 break;
847 }
848 }
849 else if (selected == this->socket_rt_events)
850 {
851 switch (hdr->nlmsg_type)
852 {
853 case RTM_NEWADDR:
854 case RTM_DELADDR:
855 process_addr(this, hdr, TRUE);
856 break;
857 case RTM_NEWLINK:
858 case RTM_DELLINK:
859 process_link(this, hdr, TRUE);
860 break;
861 case RTM_NEWROUTE:
862 case RTM_DELROUTE:
863 fire_roam_job(this, FALSE);
864 break;
865 default:
866 break;
867 }
868 }
869 hdr = NLMSG_NEXT(hdr, len);
870 }
871 return JOB_REQUEUE_DIRECT;
872 }
873
874 /**
875 * send a netlink message and wait for a reply
876 */
877 static status_t netlink_send(private_kernel_interface_t *this,
878 int socket, struct nlmsghdr *in,
879 struct nlmsghdr **out, size_t *out_len)
880 {
881 int len, addr_len;
882 struct sockaddr_nl addr;
883 chunk_t result = chunk_empty, tmp;
884 struct nlmsghdr *msg, peek;
885
886 pthread_mutex_lock(&this->mutex);
887
888 in->nlmsg_seq = ++this->seq;
889 in->nlmsg_pid = getpid();
890
891 memset(&addr, 0, sizeof(addr));
892 addr.nl_family = AF_NETLINK;
893 addr.nl_pid = 0;
894 addr.nl_groups = 0;
895
896 while (TRUE)
897 {
898 len = sendto(socket, in, in->nlmsg_len, 0,
899 (struct sockaddr*)&addr, sizeof(addr));
900
901 if (len != in->nlmsg_len)
902 {
903 if (errno == EINTR)
904 {
905 /* interrupted, try again */
906 continue;
907 }
908 pthread_mutex_unlock(&this->mutex);
909 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
910 return FAILED;
911 }
912 break;
913 }
914
915 while (TRUE)
916 {
917 char buf[4096];
918 tmp.len = sizeof(buf);
919 tmp.ptr = buf;
920 msg = (struct nlmsghdr*)tmp.ptr;
921
922 memset(&addr, 0, sizeof(addr));
923 addr.nl_family = AF_NETLINK;
924 addr.nl_pid = getpid();
925 addr.nl_groups = 0;
926 addr_len = sizeof(addr);
927
928 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
929 (struct sockaddr*)&addr, &addr_len);
930
931 if (len < 0)
932 {
933 if (errno == EINTR)
934 {
935 DBG1(DBG_KNL, "got interrupted");
936 /* interrupted, try again */
937 continue;
938 }
939 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
940 pthread_mutex_unlock(&this->mutex);
941 return FAILED;
942 }
943 if (!NLMSG_OK(msg, len))
944 {
945 DBG1(DBG_KNL, "received corrupted netlink message");
946 pthread_mutex_unlock(&this->mutex);
947 return FAILED;
948 }
949 if (msg->nlmsg_seq != this->seq)
950 {
951 DBG1(DBG_KNL, "received invalid netlink sequence number");
952 if (msg->nlmsg_seq < this->seq)
953 {
954 continue;
955 }
956 pthread_mutex_unlock(&this->mutex);
957 return FAILED;
958 }
959
960 tmp.len = len;
961 result = chunk_cata("cc", result, tmp);
962
963 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
964 * numbers to detect multi header messages */
965 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
966 (struct sockaddr*)&addr, &addr_len);
967
968 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
969 {
970 /* seems to be multipart */
971 continue;
972 }
973 break;
974 }
975
976 *out_len = result.len;
977 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
978
979 pthread_mutex_unlock(&this->mutex);
980
981 return SUCCESS;
982 }
983
984 /**
985 * send a netlink message and wait for its acknowlegde
986 */
987 static status_t netlink_send_ack(private_kernel_interface_t *this,
988 int socket, struct nlmsghdr *in)
989 {
990 struct nlmsghdr *out, *hdr;
991 size_t len;
992
993 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
994 {
995 return FAILED;
996 }
997 hdr = out;
998 while (NLMSG_OK(hdr, len))
999 {
1000 switch (hdr->nlmsg_type)
1001 {
1002 case NLMSG_ERROR:
1003 {
1004 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
1005
1006 if (err->error)
1007 {
1008 DBG1(DBG_KNL, "received netlink error: %s (%d)",
1009 strerror(-err->error), -err->error);
1010 free(out);
1011 return FAILED;
1012 }
1013 free(out);
1014 return SUCCESS;
1015 }
1016 default:
1017 hdr = NLMSG_NEXT(hdr, len);
1018 continue;
1019 case NLMSG_DONE:
1020 break;
1021 }
1022 break;
1023 }
1024 DBG1(DBG_KNL, "netlink request not acknowlegded");
1025 free(out);
1026 return FAILED;
1027 }
1028
1029 /**
1030 * Initialize a list of local addresses.
1031 */
1032 static status_t init_address_list(private_kernel_interface_t *this)
1033 {
1034 char request[BUFFER_SIZE];
1035 struct nlmsghdr *out, *current, *in;
1036 struct rtgenmsg *msg;
1037 size_t len;
1038 iterator_t *ifaces, *addrs;
1039 iface_entry_t *iface;
1040 addr_entry_t *addr;
1041
1042 DBG1(DBG_KNL, "listening on interfaces:");
1043
1044 memset(&request, 0, sizeof(request));
1045
1046 in = (struct nlmsghdr*)&request;
1047 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1048 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1049 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1050 msg->rtgen_family = AF_UNSPEC;
1051
1052 /* get all links */
1053 in->nlmsg_type = RTM_GETLINK;
1054 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1055 {
1056 return FAILED;
1057 }
1058 current = out;
1059 while (NLMSG_OK(current, len))
1060 {
1061 switch (current->nlmsg_type)
1062 {
1063 case NLMSG_DONE:
1064 break;
1065 case RTM_NEWLINK:
1066 process_link(this, current, FALSE);
1067 /* fall through */
1068 default:
1069 current = NLMSG_NEXT(current, len);
1070 continue;
1071 }
1072 break;
1073 }
1074 free(out);
1075
1076 /* get all interface addresses */
1077 in->nlmsg_type = RTM_GETADDR;
1078 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1079 {
1080 return FAILED;
1081 }
1082 current = out;
1083 while (NLMSG_OK(current, len))
1084 {
1085 switch (current->nlmsg_type)
1086 {
1087 case NLMSG_DONE:
1088 break;
1089 case RTM_NEWADDR:
1090 process_addr(this, current, FALSE);
1091 /* fall through */
1092 default:
1093 current = NLMSG_NEXT(current, len);
1094 continue;
1095 }
1096 break;
1097 }
1098 free(out);
1099
1100 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1101 while (ifaces->iterate(ifaces, (void**)&iface))
1102 {
1103 if (iface->flags & IFF_UP)
1104 {
1105 DBG1(DBG_KNL, " %s", iface->ifname);
1106 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1107 while (addrs->iterate(addrs, (void**)&addr))
1108 {
1109 DBG1(DBG_KNL, " %H", addr->ip);
1110 }
1111 addrs->destroy(addrs);
1112 }
1113 }
1114 ifaces->destroy(ifaces);
1115 return SUCCESS;
1116 }
1117
1118 /**
1119 * iterator hook to iterate over addrs
1120 */
1121 static hook_result_t addr_hook(private_kernel_interface_t *this,
1122 addr_entry_t *in, host_t **out)
1123 {
1124 if (in->virtual)
1125 { /* skip virtual interfaces added by us */
1126 return HOOK_SKIP;
1127 }
1128 if (in->scope >= RT_SCOPE_LINK)
1129 { /* skip addresses with a unusable scope */
1130 return HOOK_SKIP;
1131 }
1132 *out = in->ip;
1133 return HOOK_NEXT;
1134 }
1135
1136 /**
1137 * iterator hook to iterate over ifaces
1138 */
1139 static hook_result_t iface_hook(private_kernel_interface_t *this,
1140 iface_entry_t *in, host_t **out)
1141 {
1142 if (!(in->flags & IFF_UP))
1143 { /* skip interfaces not up */
1144 return HOOK_SKIP;
1145 }
1146
1147 if (this->hiter == NULL)
1148 {
1149 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1150 this->hiter->set_iterator_hook(this->hiter,
1151 (iterator_hook_t*)addr_hook, this);
1152 }
1153 while (this->hiter->iterate(this->hiter, (void**)out))
1154 {
1155 return HOOK_AGAIN;
1156 }
1157 this->hiter->destroy(this->hiter);
1158 this->hiter = NULL;
1159 return HOOK_SKIP;
1160 }
1161
1162 /**
1163 * Implements kernel_interface_t.create_address_iterator.
1164 */
1165 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1166 {
1167 iterator_t *iterator;
1168
1169 /* This iterator is not only hooked, is is double-hooked. As we have stored
1170 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1171 * entries in each interface we iterate. This does the iface_hook. The
1172 * addr_hook returns the ip instead of the addr_entry. */
1173
1174 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1175 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1176 return iterator;
1177 }
1178
1179 /**
1180 * implementation of kernel_interface_t.get_interface_name
1181 */
1182 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1183 {
1184 iterator_t *ifaces, *addrs;
1185 iface_entry_t *iface;
1186 addr_entry_t *addr;
1187 char *name = NULL;
1188
1189 DBG2(DBG_KNL, "getting interface name for %H", ip);
1190
1191 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1192 while (ifaces->iterate(ifaces, (void**)&iface))
1193 {
1194 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1195 while (addrs->iterate(addrs, (void**)&addr))
1196 {
1197 if (ip->ip_equals(ip, addr->ip))
1198 {
1199 name = strdup(iface->ifname);
1200 break;
1201 }
1202 }
1203 addrs->destroy(addrs);
1204 if (name)
1205 {
1206 break;
1207 }
1208 }
1209 ifaces->destroy(ifaces);
1210
1211 if (name)
1212 {
1213 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1214 }
1215 else
1216 {
1217 DBG2(DBG_KNL, "%H is not a local address", ip);
1218 }
1219 return name;
1220 }
1221
1222 /**
1223 * Tries to find an ip address of a local interface that is included in the
1224 * supplied traffic selector.
1225 */
1226 static status_t get_address_by_ts(private_kernel_interface_t *this,
1227 traffic_selector_t *ts, host_t **ip)
1228 {
1229 iterator_t *ifaces, *addrs;
1230 iface_entry_t *iface;
1231 addr_entry_t *addr;
1232 host_t *host;
1233 int family;
1234 bool found = FALSE;
1235
1236 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1237
1238 /* if we have a family which includes localhost, we do not
1239 * search for an IP, we use the default */
1240 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1241
1242 if (family == AF_INET)
1243 {
1244 host = host_create_from_string("127.0.0.1", 0);
1245 }
1246 else
1247 {
1248 host = host_create_from_string("::1", 0);
1249 }
1250
1251 if (ts->includes(ts, host))
1252 {
1253 *ip = host_create_any(family);
1254 host->destroy(host);
1255 DBG2(DBG_KNL, "using host %H", *ip);
1256 return SUCCESS;
1257 }
1258 host->destroy(host);
1259
1260 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1261 while (ifaces->iterate(ifaces, (void**)&iface))
1262 {
1263 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1264 while (addrs->iterate(addrs, (void**)&addr))
1265 {
1266 if (ts->includes(ts, addr->ip))
1267 {
1268 found = TRUE;
1269 *ip = addr->ip->clone(addr->ip);
1270 break;
1271 }
1272 }
1273 addrs->destroy(addrs);
1274 if (found)
1275 {
1276 break;
1277 }
1278 }
1279 ifaces->destroy(ifaces);
1280
1281 if (!found)
1282 {
1283 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1284 return FAILED;
1285 }
1286 DBG2(DBG_KNL, "using host %H", *ip);
1287 return SUCCESS;
1288 }
1289
1290 /**
1291 * get the interface of a local address
1292 */
1293 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1294 {
1295 iterator_t *ifaces, *addrs;
1296 iface_entry_t *iface;
1297 addr_entry_t *addr;
1298 int ifindex = 0;
1299
1300 DBG2(DBG_KNL, "getting iface for %H", ip);
1301
1302 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1303 while (ifaces->iterate(ifaces, (void**)&iface))
1304 {
1305 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1306 while (addrs->iterate(addrs, (void**)&addr))
1307 {
1308 if (ip->ip_equals(ip, addr->ip))
1309 {
1310 ifindex = iface->ifindex;
1311 break;
1312 }
1313 }
1314 addrs->destroy(addrs);
1315 if (ifindex)
1316 {
1317 break;
1318 }
1319 }
1320 ifaces->destroy(ifaces);
1321
1322 if (ifindex == 0)
1323 {
1324 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1325 }
1326 return ifindex;
1327 }
1328
1329 /**
1330 * Manages the creation and deletion of ip addresses on an interface.
1331 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1332 */
1333 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1334 int flags, int if_index, host_t *ip)
1335 {
1336 unsigned char request[BUFFER_SIZE];
1337 struct nlmsghdr *hdr;
1338 struct ifaddrmsg *msg;
1339 chunk_t chunk;
1340
1341 memset(&request, 0, sizeof(request));
1342
1343 chunk = ip->get_address(ip);
1344
1345 hdr = (struct nlmsghdr*)request;
1346 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1347 hdr->nlmsg_type = nlmsg_type;
1348 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1349
1350 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1351 msg->ifa_family = ip->get_family(ip);
1352 msg->ifa_flags = 0;
1353 msg->ifa_prefixlen = 8 * chunk.len;
1354 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1355 msg->ifa_index = if_index;
1356
1357 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1358
1359 return netlink_send_ack(this, this->socket_rt, hdr);
1360 }
1361
1362 /**
1363 * Manages source routes in the routing table.
1364 * By setting the appropriate nlmsg_type, the route added or r.
1365 */
1366 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1367 int flags, route_entry_t *route)
1368 {
1369 unsigned char request[BUFFER_SIZE];
1370 struct nlmsghdr *hdr;
1371 struct rtmsg *msg;
1372 chunk_t chunk;
1373
1374 #if IPSEC_ROUTING_TABLE == 0
1375 /* if route is 0.0.0.0/0, we can't install it, as it would
1376 * overwrite the default route. Instead, we add two routes:
1377 * 0.0.0.0/1 and 128.0.0.0/1 */
1378 if (route->prefixlen == 0)
1379 {
1380 route_entry_t half;
1381 status_t status;
1382
1383 half.dst_net = chunk_alloca(route->dst_net.len);
1384 memset(half.dst_net.ptr, 0, half.dst_net.len);
1385 half.src_ip = route->src_ip;
1386 half.gateway = route->gateway;
1387 half.if_index = route->if_index;
1388 half.prefixlen = 1;
1389
1390 status = manage_srcroute(this, nlmsg_type, flags, &half);
1391 half.dst_net.ptr[0] |= 0x80;
1392 status = manage_srcroute(this, nlmsg_type, flags, &half);
1393 return status;
1394 }
1395 #endif
1396
1397 memset(&request, 0, sizeof(request));
1398
1399 hdr = (struct nlmsghdr*)request;
1400 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1401 hdr->nlmsg_type = nlmsg_type;
1402 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1403
1404 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1405 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1406 msg->rtm_dst_len = route->prefixlen;
1407 msg->rtm_table = IPSEC_ROUTING_TABLE;
1408 msg->rtm_protocol = RTPROT_STATIC;
1409 msg->rtm_type = RTN_UNICAST;
1410 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1411
1412 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1413 chunk = route->src_ip->get_address(route->src_ip);
1414 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1415 chunk = route->gateway->get_address(route->gateway);
1416 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1417 chunk.ptr = (char*)&route->if_index;
1418 chunk.len = sizeof(route->if_index);
1419 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1420
1421 return netlink_send_ack(this, this->socket_rt, hdr);
1422 }
1423
1424 /**
1425 * create or delete an rule to use our routing table
1426 */
1427 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1428 u_int32_t table, u_int32_t prio)
1429 {
1430 unsigned char request[BUFFER_SIZE];
1431 struct nlmsghdr *hdr;
1432 struct rtmsg *msg;
1433 chunk_t chunk;
1434
1435 memset(&request, 0, sizeof(request));
1436 hdr = (struct nlmsghdr*)request;
1437 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1438 hdr->nlmsg_type = nlmsg_type;
1439 if (nlmsg_type == RTM_NEWRULE)
1440 {
1441 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1442 }
1443 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1444
1445 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1446 msg->rtm_table = table;
1447 msg->rtm_family = AF_INET;
1448 msg->rtm_protocol = RTPROT_BOOT;
1449 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1450 msg->rtm_type = RTN_UNICAST;
1451
1452 chunk = chunk_from_thing(prio);
1453 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1454
1455 return netlink_send_ack(this, this->socket_rt, hdr);
1456 }
1457
1458 /**
1459 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1460 */
1461 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1462 {
1463 int bit, byte;
1464
1465 if (addr.len != net.len)
1466 {
1467 return FALSE;
1468 }
1469 /* scan through all bits, beginning in the front */
1470 for (byte = 0; byte < addr.len; byte++)
1471 {
1472 for (bit = 7; bit >= 0; bit--)
1473 {
1474 /* check if bits are equal (or we reached the end of the net) */
1475 if (bit + byte * 8 > net_len)
1476 {
1477 return TRUE;
1478 }
1479 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1480 {
1481 return FALSE;
1482 }
1483 }
1484 }
1485 return TRUE;
1486 }
1487
1488 /**
1489 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1490 */
1491 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1492 bool nexthop)
1493 {
1494 unsigned char request[BUFFER_SIZE];
1495 struct nlmsghdr *hdr, *out, *current;
1496 struct rtmsg *msg;
1497 chunk_t chunk;
1498 size_t len;
1499 int best = -1;
1500 host_t *src = NULL, *gtw = NULL;
1501
1502 DBG2(DBG_KNL, "getting address to reach %H", dest);
1503
1504 memset(&request, 0, sizeof(request));
1505
1506 hdr = (struct nlmsghdr*)request;
1507 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1508 hdr->nlmsg_type = RTM_GETROUTE;
1509 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1510
1511 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1512 msg->rtm_family = dest->get_family(dest);
1513
1514 chunk = dest->get_address(dest);
1515 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1516
1517 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1518 {
1519 DBG1(DBG_KNL, "getting address to %H failed", dest);
1520 return NULL;
1521 }
1522 current = out;
1523 while (NLMSG_OK(current, len))
1524 {
1525 switch (current->nlmsg_type)
1526 {
1527 case NLMSG_DONE:
1528 break;
1529 case RTM_NEWROUTE:
1530 {
1531 struct rtattr *rta;
1532 size_t rtasize;
1533 chunk_t rta_gtw, rta_src, rta_dst;
1534 u_int32_t rta_oif = 0;
1535
1536 rta_gtw = rta_src = rta_dst = chunk_empty;
1537 msg = (struct rtmsg*)(NLMSG_DATA(current));
1538 rta = RTM_RTA(msg);
1539 rtasize = RTM_PAYLOAD(current);
1540 while (RTA_OK(rta, rtasize))
1541 {
1542 switch (rta->rta_type)
1543 {
1544 case RTA_PREFSRC:
1545 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1546 break;
1547 case RTA_GATEWAY:
1548 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1549 break;
1550 case RTA_DST:
1551 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1552 break;
1553 case RTA_OIF:
1554 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1555 {
1556 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1557 }
1558 break;
1559 }
1560 rta = RTA_NEXT(rta, rtasize);
1561 }
1562
1563 /* apply the route if:
1564 * - it is not from our own ipsec routing table
1565 * - is better than a previous one
1566 * - is the default route or
1567 * - its destination net contains our destination
1568 */
1569 if (msg->rtm_table != IPSEC_ROUTING_TABLE
1570 && msg->rtm_dst_len > best
1571 && (msg->rtm_dst_len == 0 || /* default route */
1572 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
1573 {
1574 iterator_t *ifaces, *addrs;
1575 iface_entry_t *iface;
1576 addr_entry_t *addr;
1577
1578 best = msg->rtm_dst_len;
1579 if (nexthop)
1580 {
1581 DESTROY_IF(gtw);
1582 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1583 }
1584 else if (rta_src.ptr)
1585 {
1586 DESTROY_IF(src);
1587 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1588 }
1589 else
1590 {
1591 /* no source addr, get one from the interfaces */
1592 ifaces = this->ifaces->create_iterator_locked(
1593 this->ifaces, &this->mutex);
1594 while (ifaces->iterate(ifaces, (void**)&iface))
1595 {
1596 if (iface->ifindex == rta_oif)
1597 {
1598 addrs = iface->addrs->create_iterator(
1599 iface->addrs, TRUE);
1600 while (addrs->iterate(addrs, (void**)&addr))
1601 {
1602 chunk_t ip = addr->ip->get_address(addr->ip);
1603 if (msg->rtm_dst_len == 0
1604 || addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1605 {
1606 DESTROY_IF(src);
1607 src = addr->ip->clone(addr->ip);
1608 break;
1609 }
1610 }
1611 addrs->destroy(addrs);
1612 }
1613 }
1614 ifaces->destroy(ifaces);
1615 }
1616 }
1617 /* FALL through */
1618 }
1619 default:
1620 current = NLMSG_NEXT(current, len);
1621 continue;
1622 }
1623 break;
1624 }
1625 free(out);
1626
1627 if (nexthop)
1628 {
1629 if (gtw)
1630 {
1631 return gtw;
1632 }
1633 return dest->clone(dest);
1634 }
1635 return src;
1636 }
1637
1638 /**
1639 * Implementation of kernel_interface_t.get_source_addr.
1640 */
1641 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1642 {
1643 return get_route(this, dest, FALSE);
1644 }
1645
1646 /**
1647 * Implementation of kernel_interface_t.add_ip.
1648 */
1649 static status_t add_ip(private_kernel_interface_t *this,
1650 host_t *virtual_ip, host_t *iface_ip)
1651 {
1652 iface_entry_t *iface;
1653 addr_entry_t *addr;
1654 iterator_t *addrs, *ifaces;
1655
1656 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1657
1658 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1659 while (ifaces->iterate(ifaces, (void**)&iface))
1660 {
1661 bool iface_found = FALSE;
1662
1663 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1664 while (addrs->iterate(addrs, (void**)&addr))
1665 {
1666 if (iface_ip->ip_equals(iface_ip, addr->ip))
1667 {
1668 iface_found = TRUE;
1669 }
1670 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1671 {
1672 addr->refcount++;
1673 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1674 virtual_ip, iface->ifname);
1675 addrs->destroy(addrs);
1676 ifaces->destroy(ifaces);
1677 return SUCCESS;
1678 }
1679 }
1680 addrs->destroy(addrs);
1681
1682 if (iface_found)
1683 {
1684 int ifindex = iface->ifindex;
1685 ifaces->destroy(ifaces);
1686 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1687 ifindex, virtual_ip) == SUCCESS)
1688 {
1689 addr = malloc_thing(addr_entry_t);
1690 addr->ip = virtual_ip->clone(virtual_ip);
1691 addr->refcount = 1;
1692 addr->virtual = TRUE;
1693 addr->scope = RT_SCOPE_UNIVERSE;
1694 pthread_mutex_lock(&this->mutex);
1695 iface->addrs->insert_last(iface->addrs, addr);
1696 pthread_mutex_unlock(&this->mutex);
1697 return SUCCESS;
1698 }
1699 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1700 return FAILED;
1701
1702 }
1703
1704 }
1705 ifaces->destroy(ifaces);
1706
1707 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1708 "virtual IP %H", iface_ip, virtual_ip);
1709 return FAILED;
1710 }
1711
1712 /**
1713 * Implementation of kernel_interface_t.del_ip.
1714 */
1715 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1716 {
1717 iface_entry_t *iface;
1718 addr_entry_t *addr;
1719 iterator_t *addrs, *ifaces;
1720
1721 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1722
1723 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1724 while (ifaces->iterate(ifaces, (void**)&iface))
1725 {
1726 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1727 while (addrs->iterate(addrs, (void**)&addr))
1728 {
1729 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1730 {
1731 int ifindex = iface->ifindex;
1732 addr->refcount--;
1733 if (addr->refcount == 0)
1734 {
1735 addrs->remove(addrs);
1736 addrs->destroy(addrs);
1737 ifaces->destroy(ifaces);
1738 addr_entry_destroy(addr);
1739 return manage_ipaddr(this, RTM_DELADDR, 0,
1740 ifindex, virtual_ip);
1741 }
1742 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1743 virtual_ip);
1744 addrs->destroy(addrs);
1745 ifaces->destroy(ifaces);
1746 return SUCCESS;
1747 }
1748 }
1749 addrs->destroy(addrs);
1750 }
1751 ifaces->destroy(ifaces);
1752
1753 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1754 return FAILED;
1755 }
1756
1757 /**
1758 * Implementation of kernel_interface_t.get_spi.
1759 */
1760 static status_t get_spi(private_kernel_interface_t *this,
1761 host_t *src, host_t *dst,
1762 protocol_id_t protocol, u_int32_t reqid,
1763 u_int32_t *spi)
1764 {
1765 unsigned char request[BUFFER_SIZE];
1766 struct nlmsghdr *hdr, *out;
1767 struct xfrm_userspi_info *userspi;
1768 u_int32_t received_spi = 0;
1769 size_t len;
1770
1771 memset(&request, 0, sizeof(request));
1772
1773 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1774
1775 hdr = (struct nlmsghdr*)request;
1776 hdr->nlmsg_flags = NLM_F_REQUEST;
1777 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1778 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1779
1780 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1781 host2xfrm(src, &userspi->info.saddr);
1782 host2xfrm(dst, &userspi->info.id.daddr);
1783 userspi->info.id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1784 userspi->info.mode = TRUE; /* tunnel mode */
1785 userspi->info.reqid = reqid;
1786 userspi->info.family = src->get_family(src);
1787 userspi->min = 0xc0000000;
1788 userspi->max = 0xcFFFFFFF;
1789
1790 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1791 {
1792 hdr = out;
1793 while (NLMSG_OK(hdr, len))
1794 {
1795 switch (hdr->nlmsg_type)
1796 {
1797 case XFRM_MSG_NEWSA:
1798 {
1799 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1800 received_spi = usersa->id.spi;
1801 break;
1802 }
1803 case NLMSG_ERROR:
1804 {
1805 struct nlmsgerr *err = NLMSG_DATA(hdr);
1806
1807 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1808 strerror(-err->error), -err->error);
1809 break;
1810 }
1811 default:
1812 hdr = NLMSG_NEXT(hdr, len);
1813 continue;
1814 case NLMSG_DONE:
1815 break;
1816 }
1817 break;
1818 }
1819 free(out);
1820 }
1821
1822 if (received_spi == 0)
1823 {
1824 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1825 return FAILED;
1826 }
1827
1828 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", received_spi, reqid);
1829
1830 *spi = received_spi;
1831 return SUCCESS;
1832 }
1833
1834 /**
1835 * Implementation of kernel_interface_t.add_sa.
1836 */
1837 static status_t add_sa(private_kernel_interface_t *this,
1838 host_t *src, host_t *dst, u_int32_t spi,
1839 protocol_id_t protocol, u_int32_t reqid,
1840 u_int64_t expire_soft, u_int64_t expire_hard,
1841 algorithm_t *enc_alg, algorithm_t *int_alg,
1842 prf_plus_t *prf_plus, mode_t mode, bool encap,
1843 bool replace)
1844 {
1845 unsigned char request[BUFFER_SIZE];
1846 char *alg_name;
1847 u_int key_size;
1848 struct nlmsghdr *hdr;
1849 struct xfrm_usersa_info *sa;
1850
1851 memset(&request, 0, sizeof(request));
1852
1853 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x", spi);
1854
1855 hdr = (struct nlmsghdr*)request;
1856 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1857 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1858 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1859
1860 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
1861 host2xfrm(src, &sa->saddr);
1862 host2xfrm(dst, &sa->id.daddr);
1863 sa->id.spi = spi;
1864 sa->id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1865 sa->family = src->get_family(src);
1866 sa->mode = mode;
1867 sa->replay_window = 32;
1868 sa->reqid = reqid;
1869 /* we currently do not expire SAs by volume/packet count */
1870 sa->lft.soft_byte_limit = XFRM_INF;
1871 sa->lft.hard_byte_limit = XFRM_INF;
1872 sa->lft.soft_packet_limit = XFRM_INF;
1873 sa->lft.hard_packet_limit = XFRM_INF;
1874 /* we use lifetimes since added, not since used */
1875 sa->lft.soft_add_expires_seconds = expire_soft;
1876 sa->lft.hard_add_expires_seconds = expire_hard;
1877 sa->lft.soft_use_expires_seconds = 0;
1878 sa->lft.hard_use_expires_seconds = 0;
1879
1880 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
1881
1882 if (enc_alg->algorithm != ENCR_UNDEFINED)
1883 {
1884 rthdr->rta_type = XFRMA_ALG_CRYPT;
1885 alg_name = lookup_algorithm(encryption_algs, enc_alg, &key_size);
1886 if (alg_name == NULL)
1887 {
1888 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1889 encryption_algorithm_names, enc_alg->algorithm);
1890 return FAILED;
1891 }
1892 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1893 encryption_algorithm_names, enc_alg->algorithm, key_size);
1894
1895 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1896 hdr->nlmsg_len += rthdr->rta_len;
1897 if (hdr->nlmsg_len > sizeof(request))
1898 {
1899 return FAILED;
1900 }
1901
1902 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1903 algo->alg_key_len = key_size;
1904 strcpy(algo->alg_name, alg_name);
1905 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1906
1907 rthdr = XFRM_RTA_NEXT(rthdr);
1908 }
1909
1910 if (int_alg->algorithm != AUTH_UNDEFINED)
1911 {
1912 rthdr->rta_type = XFRMA_ALG_AUTH;
1913 alg_name = lookup_algorithm(integrity_algs, int_alg, &key_size);
1914 if (alg_name == NULL)
1915 {
1916 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1917 integrity_algorithm_names, int_alg->algorithm);
1918 return FAILED;
1919 }
1920 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1921 integrity_algorithm_names, int_alg->algorithm, key_size);
1922
1923 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1924 hdr->nlmsg_len += rthdr->rta_len;
1925 if (hdr->nlmsg_len > sizeof(request))
1926 {
1927 return FAILED;
1928 }
1929
1930 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1931 algo->alg_key_len = key_size;
1932 strcpy(algo->alg_name, alg_name);
1933 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1934
1935 rthdr = XFRM_RTA_NEXT(rthdr);
1936 }
1937
1938 /* TODO: add IPComp here */
1939
1940 if (encap)
1941 {
1942 rthdr->rta_type = XFRMA_ENCAP;
1943 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
1944
1945 hdr->nlmsg_len += rthdr->rta_len;
1946 if (hdr->nlmsg_len > sizeof(request))
1947 {
1948 return FAILED;
1949 }
1950
1951 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
1952 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1953 tmpl->encap_sport = htons(src->get_port(src));
1954 tmpl->encap_dport = htons(dst->get_port(dst));
1955 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1956 /* encap_oa could probably be derived from the
1957 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
1958 * pluto does the same as we do here but it uses encap_oa in the
1959 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
1960 * the kernel ignores it anyway
1961 * -> does that mean that NAT-T encap doesn't work in transport mode?
1962 * No. The reason the kernel ignores NAT-OA is that it recomputes
1963 * (or, rather, just ignores) the checksum. If packets pass
1964 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
1965 rthdr = XFRM_RTA_NEXT(rthdr);
1966 }
1967
1968 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
1969 {
1970 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
1971 return FAILED;
1972 }
1973 return SUCCESS;
1974 }
1975
1976 /**
1977 * Implementation of kernel_interface_t.update_sa.
1978 */
1979 static status_t update_sa(private_kernel_interface_t *this,
1980 u_int32_t spi, protocol_id_t protocol,
1981 host_t *src, host_t *dst,
1982 host_t *new_src, host_t *new_dst, bool encap)
1983 {
1984 unsigned char request[BUFFER_SIZE], *pos;
1985 struct nlmsghdr *hdr, *out = NULL;
1986 struct xfrm_usersa_id *sa_id;
1987 struct xfrm_usersa_info *out_sa = NULL, *sa;
1988 size_t len;
1989 struct rtattr *rta;
1990 size_t rtasize;
1991 struct xfrm_encap_tmpl* tmpl = NULL;
1992
1993 memset(&request, 0, sizeof(request));
1994
1995 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
1996
1997 /* query the exisiting SA first */
1998 hdr = (struct nlmsghdr*)request;
1999 hdr->nlmsg_flags = NLM_F_REQUEST;
2000 hdr->nlmsg_type = XFRM_MSG_GETSA;
2001 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2002
2003 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2004 host2xfrm(dst, &sa_id->daddr);
2005 sa_id->spi = spi;
2006 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2007 sa_id->family = dst->get_family(dst);
2008
2009 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2010 {
2011 hdr = out;
2012 while (NLMSG_OK(hdr, len))
2013 {
2014 switch (hdr->nlmsg_type)
2015 {
2016 case XFRM_MSG_NEWSA:
2017 {
2018 out_sa = NLMSG_DATA(hdr);
2019 break;
2020 }
2021 case NLMSG_ERROR:
2022 {
2023 struct nlmsgerr *err = NLMSG_DATA(hdr);
2024 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2025 strerror(-err->error), -err->error);
2026 break;
2027 }
2028 default:
2029 hdr = NLMSG_NEXT(hdr, len);
2030 continue;
2031 case NLMSG_DONE:
2032 break;
2033 }
2034 break;
2035 }
2036 }
2037 if (out_sa == NULL ||
2038 this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2039 {
2040 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2041 free(out);
2042 return FAILED;
2043 }
2044
2045 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2046 spi, src, dst, new_src, new_dst);
2047
2048 /* copy over the SA from out to request */
2049 hdr = (struct nlmsghdr*)request;
2050 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2051 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2052 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2053 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2054 sa = NLMSG_DATA(hdr);
2055 sa->family = new_dst->get_family(new_dst);
2056
2057 if (!src->ip_equals(src, new_src))
2058 {
2059 host2xfrm(new_src, &sa->saddr);
2060 }
2061 if (!dst->ip_equals(dst, new_dst))
2062 {
2063 host2xfrm(new_dst, &sa->id.daddr);
2064 }
2065
2066 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2067 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2068 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2069 while(RTA_OK(rta, rtasize))
2070 {
2071 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2072 if (rta->rta_type != XFRMA_ENCAP || encap)
2073 {
2074 if (rta->rta_type == XFRMA_ENCAP)
2075 { /* update encap tmpl */
2076 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2077 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2078 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2079 }
2080 memcpy(pos, rta, rta->rta_len);
2081 pos += rta->rta_len;
2082 hdr->nlmsg_len += rta->rta_len;
2083 }
2084 rta = RTA_NEXT(rta, rtasize);
2085 }
2086 if (tmpl == NULL && encap)
2087 { /* add tmpl if we are enabling it */
2088 rta = (struct rtattr*)pos;
2089 rta->rta_type = XFRMA_ENCAP;
2090 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2091 hdr->nlmsg_len += rta->rta_len;
2092 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2093 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2094 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2095 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2096 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2097 }
2098
2099 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2100 {
2101 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2102 free(out);
2103 return FAILED;
2104 }
2105 free(out);
2106
2107 return SUCCESS;
2108 }
2109
2110 /**
2111 * Implementation of kernel_interface_t.query_sa.
2112 */
2113 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2114 u_int32_t spi, protocol_id_t protocol,
2115 u_int32_t *use_time)
2116 {
2117 unsigned char request[BUFFER_SIZE];
2118 struct nlmsghdr *out = NULL, *hdr;
2119 struct xfrm_usersa_id *sa_id;
2120 struct xfrm_usersa_info *sa = NULL;
2121 size_t len;
2122
2123 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2124 memset(&request, 0, sizeof(request));
2125
2126 hdr = (struct nlmsghdr*)request;
2127 hdr->nlmsg_flags = NLM_F_REQUEST;
2128 hdr->nlmsg_type = XFRM_MSG_GETSA;
2129 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2130
2131 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2132 host2xfrm(dst, &sa_id->daddr);
2133 sa_id->spi = spi;
2134 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2135 sa_id->family = dst->get_family(dst);
2136
2137 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2138 {
2139 hdr = out;
2140 while (NLMSG_OK(hdr, len))
2141 {
2142 switch (hdr->nlmsg_type)
2143 {
2144 case XFRM_MSG_NEWSA:
2145 {
2146 sa = NLMSG_DATA(hdr);
2147 break;
2148 }
2149 case NLMSG_ERROR:
2150 {
2151 struct nlmsgerr *err = NLMSG_DATA(hdr);
2152 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2153 strerror(-err->error), -err->error);
2154 break;
2155 }
2156 default:
2157 hdr = NLMSG_NEXT(hdr, len);
2158 continue;
2159 case NLMSG_DONE:
2160 break;
2161 }
2162 break;
2163 }
2164 }
2165
2166 if (sa == NULL)
2167 {
2168 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2169 free(out);
2170 return FAILED;
2171 }
2172
2173 *use_time = sa->curlft.use_time;
2174 free (out);
2175 return SUCCESS;
2176 }
2177
2178 /**
2179 * Implementation of kernel_interface_t.del_sa.
2180 */
2181 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2182 u_int32_t spi, protocol_id_t protocol)
2183 {
2184 unsigned char request[BUFFER_SIZE];
2185 struct nlmsghdr *hdr;
2186 struct xfrm_usersa_id *sa_id;
2187
2188 memset(&request, 0, sizeof(request));
2189
2190 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2191
2192 hdr = (struct nlmsghdr*)request;
2193 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2194 hdr->nlmsg_type = XFRM_MSG_DELSA;
2195 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2196
2197 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2198 host2xfrm(dst, &sa_id->daddr);
2199 sa_id->spi = spi;
2200 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2201 sa_id->family = dst->get_family(dst);
2202
2203 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2204 {
2205 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2206 return FAILED;
2207 }
2208 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2209 return SUCCESS;
2210 }
2211
2212 /**
2213 * Implementation of kernel_interface_t.add_policy.
2214 */
2215 static status_t add_policy(private_kernel_interface_t *this,
2216 host_t *src, host_t *dst,
2217 traffic_selector_t *src_ts,
2218 traffic_selector_t *dst_ts,
2219 policy_dir_t direction, protocol_id_t protocol,
2220 u_int32_t reqid, bool high_prio, mode_t mode)
2221 {
2222 iterator_t *iterator;
2223 policy_entry_t *current, *policy;
2224 bool found = FALSE;
2225 unsigned char request[BUFFER_SIZE];
2226 struct xfrm_userpolicy_info *policy_info;
2227 struct nlmsghdr *hdr;
2228
2229 /* create a policy */
2230 policy = malloc_thing(policy_entry_t);
2231 memset(policy, 0, sizeof(policy_entry_t));
2232 policy->sel = ts2selector(src_ts, dst_ts);
2233 policy->direction = direction;
2234
2235 /* find the policy, which matches EXACTLY */
2236 pthread_mutex_lock(&this->mutex);
2237 iterator = this->policies->create_iterator(this->policies, TRUE);
2238 while (iterator->iterate(iterator, (void**)&current))
2239 {
2240 if (memcmp(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) == 0 &&
2241 policy->direction == current->direction)
2242 {
2243 /* use existing policy */
2244 current->refcount++;
2245 DBG2(DBG_KNL, "policy %R===%R already exists, increasing ",
2246 "refcount", src_ts, dst_ts);
2247 free(policy);
2248 policy = current;
2249 found = TRUE;
2250 break;
2251 }
2252 }
2253 iterator->destroy(iterator);
2254 if (!found)
2255 { /* apply the new one, if we have no such policy */
2256 this->policies->insert_last(this->policies, policy);
2257 policy->refcount = 1;
2258 }
2259
2260 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2261
2262 memset(&request, 0, sizeof(request));
2263 hdr = (struct nlmsghdr*)request;
2264 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2265 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2266 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2267
2268 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2269 policy_info->sel = policy->sel;
2270 policy_info->dir = policy->direction;
2271 /* calculate priority based on source selector size, small size = high prio */
2272 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2273 policy_info->priority -= policy->sel.prefixlen_s * 10;
2274 policy_info->priority -= policy->sel.proto ? 2 : 0;
2275 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2276 policy_info->action = XFRM_POLICY_ALLOW;
2277 policy_info->share = XFRM_SHARE_ANY;
2278 pthread_mutex_unlock(&this->mutex);
2279
2280 /* policies don't expire */
2281 policy_info->lft.soft_byte_limit = XFRM_INF;
2282 policy_info->lft.soft_packet_limit = XFRM_INF;
2283 policy_info->lft.hard_byte_limit = XFRM_INF;
2284 policy_info->lft.hard_packet_limit = XFRM_INF;
2285 policy_info->lft.soft_add_expires_seconds = 0;
2286 policy_info->lft.hard_add_expires_seconds = 0;
2287 policy_info->lft.soft_use_expires_seconds = 0;
2288 policy_info->lft.hard_use_expires_seconds = 0;
2289
2290 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2291 rthdr->rta_type = XFRMA_TMPL;
2292
2293 rthdr->rta_len = sizeof(struct xfrm_user_tmpl);
2294 rthdr->rta_len = RTA_LENGTH(rthdr->rta_len);
2295
2296 hdr->nlmsg_len += rthdr->rta_len;
2297 if (hdr->nlmsg_len > sizeof(request))
2298 {
2299 return FAILED;
2300 }
2301
2302 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2303 tmpl->reqid = reqid;
2304 tmpl->id.proto = (protocol == PROTO_AH) ? KERNEL_AH : KERNEL_ESP;
2305 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2306 tmpl->mode = mode;
2307 tmpl->family = src->get_family(src);
2308
2309 host2xfrm(src, &tmpl->saddr);
2310 host2xfrm(dst, &tmpl->id.daddr);
2311
2312 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2313 {
2314 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2315 return FAILED;
2316 }
2317
2318 /* install a route, if:
2319 * - we are NOT updating a policy
2320 * - this is a forward policy (to just get one for each child)
2321 * - we are in tunnel mode
2322 * - we are not using IPv6 (does not work correctly yet!)
2323 */
2324 if (policy->route == NULL && direction == POLICY_FWD &&
2325 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6)
2326 {
2327 policy->route = malloc_thing(route_entry_t);
2328 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2329 {
2330 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2331 policy->route->gateway = get_route(this, src, TRUE);
2332 policy->route->if_index = get_interface_index(this, dst);
2333 policy->route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2334 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr, policy->route->dst_net.len);
2335 policy->route->prefixlen = policy->sel.prefixlen_s;
2336
2337 if (manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2338 policy->route) != SUCCESS)
2339 {
2340 DBG1(DBG_KNL, "unable to install source route for %H",
2341 policy->route->src_ip);
2342 route_entry_destroy(policy->route);
2343 policy->route = NULL;
2344 }
2345 }
2346 else
2347 {
2348 free(policy->route);
2349 policy->route = NULL;
2350 }
2351 }
2352
2353 return SUCCESS;
2354 }
2355
2356 /**
2357 * Implementation of kernel_interface_t.query_policy.
2358 */
2359 static status_t query_policy(private_kernel_interface_t *this,
2360 traffic_selector_t *src_ts,
2361 traffic_selector_t *dst_ts,
2362 policy_dir_t direction, u_int32_t *use_time)
2363 {
2364 unsigned char request[BUFFER_SIZE];
2365 struct nlmsghdr *out = NULL, *hdr;
2366 struct xfrm_userpolicy_id *policy_id;
2367 struct xfrm_userpolicy_info *policy = NULL;
2368 size_t len;
2369
2370 memset(&request, 0, sizeof(request));
2371
2372 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2373
2374 hdr = (struct nlmsghdr*)request;
2375 hdr->nlmsg_flags = NLM_F_REQUEST;
2376 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2377 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2378
2379 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2380 policy_id->sel = ts2selector(src_ts, dst_ts);
2381 policy_id->dir = direction;
2382
2383 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2384 {
2385 hdr = out;
2386 while (NLMSG_OK(hdr, len))
2387 {
2388 switch (hdr->nlmsg_type)
2389 {
2390 case XFRM_MSG_NEWPOLICY:
2391 {
2392 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2393 break;
2394 }
2395 case NLMSG_ERROR:
2396 {
2397 struct nlmsgerr *err = NLMSG_DATA(hdr);
2398 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2399 strerror(-err->error), -err->error);
2400 break;
2401 }
2402 default:
2403 hdr = NLMSG_NEXT(hdr, len);
2404 continue;
2405 case NLMSG_DONE:
2406 break;
2407 }
2408 break;
2409 }
2410 }
2411
2412 if (policy == NULL)
2413 {
2414 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2415 free(out);
2416 return FAILED;
2417 }
2418 *use_time = (time_t)policy->curlft.use_time;
2419
2420 free(out);
2421 return SUCCESS;
2422 }
2423
2424 /**
2425 * Implementation of kernel_interface_t.del_policy.
2426 */
2427 static status_t del_policy(private_kernel_interface_t *this,
2428 traffic_selector_t *src_ts,
2429 traffic_selector_t *dst_ts,
2430 policy_dir_t direction)
2431 {
2432 policy_entry_t *current, policy, *to_delete = NULL;
2433 route_entry_t *route;
2434 unsigned char request[BUFFER_SIZE];
2435 struct nlmsghdr *hdr;
2436 struct xfrm_userpolicy_id *policy_id;
2437 iterator_t *iterator;
2438
2439 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2440
2441 /* create a policy */
2442 memset(&policy, 0, sizeof(policy_entry_t));
2443 policy.sel = ts2selector(src_ts, dst_ts);
2444 policy.direction = direction;
2445
2446 /* find the policy */
2447 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2448 while (iterator->iterate(iterator, (void**)&current))
2449 {
2450 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2451 policy.direction == current->direction)
2452 {
2453 to_delete = current;
2454 if (--to_delete->refcount > 0)
2455 {
2456 /* is used by more SAs, keep in kernel */
2457 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2458 iterator->destroy(iterator);
2459 return SUCCESS;
2460 }
2461 /* remove if last reference */
2462 iterator->remove(iterator);
2463 break;
2464 }
2465 }
2466 iterator->destroy(iterator);
2467 if (!to_delete)
2468 {
2469 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2470 return NOT_FOUND;
2471 }
2472
2473 memset(&request, 0, sizeof(request));
2474
2475 hdr = (struct nlmsghdr*)request;
2476 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2477 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2478 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2479
2480 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2481 policy_id->sel = to_delete->sel;
2482 policy_id->dir = direction;
2483
2484 route = to_delete->route;
2485 free(to_delete);
2486
2487 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2488 {
2489 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2490 return FAILED;
2491 }
2492
2493 if (route)
2494 {
2495 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2496 {
2497 DBG1(DBG_KNL, "error uninstalling route installed with "
2498 "policy %R===%R", src_ts, dst_ts);
2499 }
2500 route_entry_destroy(route);
2501 }
2502 return SUCCESS;
2503 }
2504
2505 /**
2506 * Implementation of kernel_interface_t.destroy.
2507 */
2508 static void destroy(private_kernel_interface_t *this)
2509 {
2510 manage_rule(this, RTM_DELRULE, IPSEC_ROUTING_TABLE, IPSEC_ROUTING_TABLE_PRIO);
2511
2512 this->job->cancel(this->job);
2513 close(this->socket_xfrm_events);
2514 close(this->socket_xfrm);
2515 close(this->socket_rt_events);
2516 close(this->socket_rt);
2517 this->policies->destroy(this->policies);
2518 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2519 free(this);
2520 }
2521
2522 /*
2523 * Described in header.
2524 */
2525 kernel_interface_t *kernel_interface_create()
2526 {
2527 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2528 struct sockaddr_nl addr;
2529
2530 /* public functions */
2531 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2532 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,algorithm_t*,algorithm_t*,prf_plus_t*,mode_t,bool,bool))add_sa;
2533 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2534 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2535 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2536 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t))add_policy;
2537 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2538 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2539 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2540 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2541 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2542 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2543 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2544 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2545
2546 /* private members */
2547 this->policies = linked_list_create();
2548 this->ifaces = linked_list_create();
2549 this->hiter = NULL;
2550 this->seq = 200;
2551 pthread_mutex_init(&this->mutex,NULL);
2552 timerclear(&this->last_roam);
2553
2554 memset(&addr, 0, sizeof(addr));
2555 addr.nl_family = AF_NETLINK;
2556
2557 /* create and bind RT socket */
2558 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2559 if (this->socket_rt <= 0)
2560 {
2561 charon->kill(charon, "unable to create RT netlink socket");
2562 }
2563 addr.nl_groups = 0;
2564 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2565 {
2566 charon->kill(charon, "unable to bind RT netlink socket");
2567 }
2568
2569 /* create and bind RT socket for events (address/interface/route changes) */
2570 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2571 if (this->socket_rt_events <= 0)
2572 {
2573 charon->kill(charon, "unable to create RT event socket");
2574 }
2575 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2576 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
2577 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
2578 {
2579 charon->kill(charon, "unable to bind RT event socket");
2580 }
2581
2582 /* create and bind XFRM socket */
2583 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2584 if (this->socket_xfrm <= 0)
2585 {
2586 charon->kill(charon, "unable to create XFRM netlink socket");
2587 }
2588 addr.nl_groups = 0;
2589 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
2590 {
2591 charon->kill(charon, "unable to bind XFRM netlink socket");
2592 }
2593
2594 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
2595 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2596 if (this->socket_xfrm_events <= 0)
2597 {
2598 charon->kill(charon, "unable to create XFRM event socket");
2599 }
2600 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
2601 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
2602 {
2603 charon->kill(charon, "unable to bind XFRM event socket");
2604 }
2605
2606 this->job = callback_job_create((callback_job_cb_t)receive_events,
2607 this, NULL, NULL);
2608 charon->processor->queue_job(charon->processor, (job_t*)this->job);
2609
2610 if (init_address_list(this) != SUCCESS)
2611 {
2612 charon->kill(charon, "unable to get interface list");
2613 }
2614
2615 if (manage_rule(this, RTM_NEWRULE, IPSEC_ROUTING_TABLE,
2616 IPSEC_ROUTING_TABLE_PRIO) != SUCCESS)
2617 {
2618 DBG1(DBG_KNL, "unable to create routing table rule");
2619 }
2620
2621 return &this->public;
2622 }
2623