do not complain about existing routes
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /*
2 * Copyright (C) 2006-2008 Tobias Brunner
3 * Copyright (C) 2005-2007 Martin Willi
4 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
5 * Copyright (C) 2006 Daniel Roethlisberger
6 * Copyright (C) 2005 Jan Hutter
7 * Hochschule fuer Technik Rapperswil
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your
12 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 * for more details.
18 *
19 * $Id$
20 */
21
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <linux/netlink.h>
26 #include <linux/rtnetlink.h>
27 #include <linux/xfrm.h>
28 #include <linux/udp.h>
29 #include <netinet/in.h>
30 #include <pthread.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <errno.h>
34 #include <string.h>
35 #include <net/if.h>
36 #include <sys/ioctl.h>
37
38 #include "kernel_interface.h"
39
40 #include <daemon.h>
41 #include <utils/linked_list.h>
42 #include <processing/jobs/delete_child_sa_job.h>
43 #include <processing/jobs/rekey_child_sa_job.h>
44 #include <processing/jobs/acquire_job.h>
45 #include <processing/jobs/callback_job.h>
46 #include <processing/jobs/roam_job.h>
47
48 /** required for Linux 2.6.26 kernel and later */
49 #ifndef XFRM_STATE_AF_UNSPEC
50 #define XFRM_STATE_AF_UNSPEC 32
51 #endif
52
53 /** routing table for routes installed by us */
54 #ifndef IPSEC_ROUTING_TABLE
55 #define IPSEC_ROUTING_TABLE 100
56 #endif
57 #ifndef IPSEC_ROUTING_TABLE_PRIO
58 #define IPSEC_ROUTING_TABLE_PRIO 100
59 #endif
60
61 /** default priority of installed policies */
62 #define PRIO_LOW 3000
63 #define PRIO_HIGH 2000
64
65 /** delay before firing roam jobs (ms) */
66 #define ROAM_DELAY 100
67
68 #define BUFFER_SIZE 1024
69
70 /**
71 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
72 * 'usual' netlink data x like 'struct xfrm_usersa_info'
73 */
74 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
75 /**
76 * returns a pointer to the next rtattr following rta.
77 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
78 */
79 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
80 /**
81 * returns the total size of attached rta data
82 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
83 */
84 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
85
86 typedef struct kernel_algorithm_t kernel_algorithm_t;
87
88 /**
89 * Mapping from the algorithms defined in IKEv2 to
90 * kernel level algorithm names and their key length
91 */
92 struct kernel_algorithm_t {
93 /**
94 * Identifier specified in IKEv2
95 */
96 int ikev2_id;
97
98 /**
99 * Name of the algorithm, as used as kernel identifier
100 */
101 char *name;
102
103 /**
104 * Key length in bits, if fixed size
105 */
106 u_int key_size;
107 };
108 #define END_OF_LIST -1
109
110 /**
111 * Algorithms for encryption
112 */
113 static kernel_algorithm_t encryption_algs[] = {
114 /* {ENCR_DES_IV64, "***", 0}, */
115 {ENCR_DES, "des", 64},
116 {ENCR_3DES, "des3_ede", 192},
117 /* {ENCR_RC5, "***", 0}, */
118 /* {ENCR_IDEA, "***", 0}, */
119 {ENCR_CAST, "cast128", 0},
120 {ENCR_BLOWFISH, "blowfish", 0},
121 /* {ENCR_3IDEA, "***", 0}, */
122 /* {ENCR_DES_IV32, "***", 0}, */
123 {ENCR_NULL, "cipher_null", 0},
124 {ENCR_AES_CBC, "aes", 0},
125 /* {ENCR_AES_CTR, "***", 0}, */
126 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))", 64}, /* key_size = ICV size */
127 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))", 96}, /* key_size = ICV size */
128 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))", 128}, /* key_size = ICV size */
129 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))", 64}, /* key_size = ICV size */
130 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))", 96}, /* key_size = ICV size */
131 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))", 128}, /* key_size = ICV size */
132 {END_OF_LIST, NULL, 0},
133 };
134
135 /**
136 * Algorithms for integrity protection
137 */
138 static kernel_algorithm_t integrity_algs[] = {
139 {AUTH_HMAC_MD5_96, "md5", 128},
140 {AUTH_HMAC_SHA1_96, "sha1", 160},
141 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
142 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
143 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
144 /* {AUTH_DES_MAC, "***", 0}, */
145 /* {AUTH_KPDK_MD5, "***", 0}, */
146 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
147 {END_OF_LIST, NULL, 0},
148 };
149
150 /**
151 * Algorithms for IPComp
152 */
153 static kernel_algorithm_t compression_algs[] = {
154 /* {IPCOMP_OUI, "***", 0}, */
155 {IPCOMP_DEFLATE, "deflate", 0},
156 {IPCOMP_LZS, "lzs", 0},
157 {IPCOMP_LZJH, "lzjh", 0},
158 {END_OF_LIST, NULL, 0},
159 };
160
161 /**
162 * Look up a kernel algorithm name and its key size
163 */
164 static char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
165 u_int16_t ikev2_algo, u_int16_t *key_size)
166 {
167 while (kernel_algo->ikev2_id != END_OF_LIST)
168 {
169 if (ikev2_algo == kernel_algo->ikev2_id)
170 {
171 /* match, evaluate key length */
172 if (key_size && *key_size == 0)
173 { /* update key size if not set */
174 *key_size = kernel_algo->key_size;
175 }
176 return kernel_algo->name;
177 }
178 kernel_algo++;
179 }
180 return NULL;
181 }
182
183 typedef struct route_entry_t route_entry_t;
184
185 /**
186 * installed routing entry
187 */
188 struct route_entry_t {
189
190 /** Index of the interface the route is bound to */
191 int if_index;
192
193 /** Source ip of the route */
194 host_t *src_ip;
195
196 /** gateway for this route */
197 host_t *gateway;
198
199 /** Destination net */
200 chunk_t dst_net;
201
202 /** Destination net prefixlen */
203 u_int8_t prefixlen;
204 };
205
206 /**
207 * destroy an route_entry_t object
208 */
209 static void route_entry_destroy(route_entry_t *this)
210 {
211 this->src_ip->destroy(this->src_ip);
212 this->gateway->destroy(this->gateway);
213 chunk_free(&this->dst_net);
214 free(this);
215 }
216
217 typedef struct policy_entry_t policy_entry_t;
218
219 /**
220 * installed kernel policy.
221 */
222 struct policy_entry_t {
223
224 /** direction of this policy: in, out, forward */
225 u_int8_t direction;
226
227 /** protocol ID: ESP/AH */
228 protocol_id_t proto;
229
230 /** reqid of the policy */
231 u_int32_t reqid;
232
233 /** parameters of installed policy */
234 struct xfrm_selector sel;
235
236 /** associated route installed for this policy */
237 route_entry_t *route;
238
239 /** by how many CHILD_SA's this policy is used */
240 u_int refcount;
241 };
242
243 typedef struct addr_entry_t addr_entry_t;
244
245 /**
246 * IP address in an inface_entry_t
247 */
248 struct addr_entry_t {
249
250 /** The ip address */
251 host_t *ip;
252
253 /** virtual IP managed by us */
254 bool virtual;
255
256 /** scope of the address */
257 u_char scope;
258
259 /** Number of times this IP is used, if virtual */
260 u_int refcount;
261 };
262
263 /**
264 * destroy a addr_entry_t object
265 */
266 static void addr_entry_destroy(addr_entry_t *this)
267 {
268 this->ip->destroy(this->ip);
269 free(this);
270 }
271
272 typedef struct iface_entry_t iface_entry_t;
273
274 /**
275 * A network interface on this system, containing addr_entry_t's
276 */
277 struct iface_entry_t {
278
279 /** interface index */
280 int ifindex;
281
282 /** name of the interface */
283 char ifname[IFNAMSIZ];
284
285 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
286 u_int flags;
287
288 /** list of addresses as host_t */
289 linked_list_t *addrs;
290 };
291
292 /**
293 * destroy an interface entry
294 */
295 static void iface_entry_destroy(iface_entry_t *this)
296 {
297 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
298 free(this);
299 }
300
301 typedef struct private_kernel_interface_t private_kernel_interface_t;
302
303 /**
304 * Private variables and functions of kernel_interface class.
305 */
306 struct private_kernel_interface_t {
307 /**
308 * Public part of the kernel_interface_t object.
309 */
310 kernel_interface_t public;
311
312 /**
313 * mutex to lock access to netlink socket
314 */
315 pthread_mutex_t nl_mutex;
316
317 /**
318 * mutex to lock access to various lists
319 */
320 pthread_mutex_t mutex;
321
322 /**
323 * condition variable to signal virtual IP add/removal
324 */
325 pthread_cond_t cond;
326
327 /**
328 * List of installed policies (policy_entry_t)
329 */
330 linked_list_t *policies;
331
332 /**
333 * Cached list of interfaces and its adresses (iface_entry_t)
334 */
335 linked_list_t *ifaces;
336
337 /**
338 * iterator used in hook()
339 */
340 iterator_t *hiter;
341
342 /**
343 * job receiving netlink events
344 */
345 callback_job_t *job;
346
347 /**
348 * current sequence number for netlink request
349 */
350 int seq;
351
352 /**
353 * Netlink xfrm socket (IPsec)
354 */
355 int socket_xfrm;
356
357 /**
358 * netlink xfrm socket to receive acquire and expire events
359 */
360 int socket_xfrm_events;
361
362 /**
363 * Netlink rt socket (routing)
364 */
365 int socket_rt;
366
367 /**
368 * Netlink rt socket to receive address change events
369 */
370 int socket_rt_events;
371
372 /**
373 * time of the last roam_job
374 */
375 struct timeval last_roam;
376
377 /**
378 * whether to install routes along policies
379 */
380 bool install_routes;
381
382 /**
383 * routing table to install routes
384 */
385 int routing_table;
386
387 /**
388 * priority of used routing table
389 */
390 int routing_table_prio;
391 };
392
393 /**
394 * convert a IKEv2 specific protocol identifier to the kernel one
395 */
396 static u_int8_t proto_ike2kernel(protocol_id_t proto)
397 {
398 switch (proto)
399 {
400 case PROTO_ESP:
401 return IPPROTO_ESP;
402 case PROTO_AH:
403 return IPPROTO_AH;
404 default:
405 return proto;
406 }
407 }
408
409 /**
410 * reverse of ike2kernel
411 */
412 static protocol_id_t proto_kernel2ike(u_int8_t proto)
413 {
414 switch (proto)
415 {
416 case IPPROTO_ESP:
417 return PROTO_ESP;
418 case IPPROTO_AH:
419 return PROTO_AH;
420 default:
421 return proto;
422 }
423 }
424
425 /**
426 * convert a host_t to a struct xfrm_address
427 */
428 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
429 {
430 chunk_t chunk = host->get_address(host);
431 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
432 }
433
434 /**
435 * convert a traffic selector address range to subnet and its mask.
436 */
437 static void ts2subnet(traffic_selector_t* ts,
438 xfrm_address_t *net, u_int8_t *mask)
439 {
440 /* there is no way to do this cleanly, as the address range may
441 * be anything else but a subnet. We use from_addr as subnet
442 * and try to calculate a usable subnet mask.
443 */
444 int byte, bit;
445 bool found = FALSE;
446 chunk_t from, to;
447 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
448
449 from = ts->get_from_address(ts);
450 to = ts->get_to_address(ts);
451
452 *mask = (size * 8);
453 /* go trough all bits of the addresses, beginning in the front.
454 * as long as they are equal, the subnet gets larger
455 */
456 for (byte = 0; byte < size; byte++)
457 {
458 for (bit = 7; bit >= 0; bit--)
459 {
460 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
461 {
462 *mask = ((7 - bit) + (byte * 8));
463 found = TRUE;
464 break;
465 }
466 }
467 if (found)
468 {
469 break;
470 }
471 }
472 memcpy(net, from.ptr, from.len);
473 chunk_free(&from);
474 chunk_free(&to);
475 }
476
477 /**
478 * convert a traffic selector port range to port/portmask
479 */
480 static void ts2ports(traffic_selector_t* ts,
481 u_int16_t *port, u_int16_t *mask)
482 {
483 /* linux does not seem to accept complex portmasks. Only
484 * any or a specific port is allowed. We set to any, if we have
485 * a port range, or to a specific, if we have one port only.
486 */
487 u_int16_t from, to;
488
489 from = ts->get_from_port(ts);
490 to = ts->get_to_port(ts);
491
492 if (from == to)
493 {
494 *port = htons(from);
495 *mask = ~0;
496 }
497 else
498 {
499 *port = 0;
500 *mask = 0;
501 }
502 }
503
504 /**
505 * convert a pair of traffic_selectors to a xfrm_selector
506 */
507 static struct xfrm_selector ts2selector(traffic_selector_t *src,
508 traffic_selector_t *dst)
509 {
510 struct xfrm_selector sel;
511
512 memset(&sel, 0, sizeof(sel));
513 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
514 /* src or dest proto may be "any" (0), use more restrictive one */
515 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
516 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
517 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
518 ts2ports(dst, &sel.dport, &sel.dport_mask);
519 ts2ports(src, &sel.sport, &sel.sport_mask);
520 sel.ifindex = 0;
521 sel.user = 0;
522
523 return sel;
524 }
525
526 /**
527 * Creates an rtattr and adds it to the netlink message
528 */
529 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
530 size_t buflen)
531 {
532 struct rtattr *rta;
533
534 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
535 {
536 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
537 return;
538 }
539
540 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
541 rta->rta_type = rta_type;
542 rta->rta_len = RTA_LENGTH(data.len);
543 memcpy(RTA_DATA(rta), data.ptr, data.len);
544 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
545 }
546
547 /**
548 * process a XFRM_MSG_ACQUIRE from kernel
549 */
550 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
551 {
552 u_int32_t reqid = 0;
553 job_t *job;
554 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
555 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
556
557 if (RTA_OK(rtattr, rtsize))
558 {
559 if (rtattr->rta_type == XFRMA_TMPL)
560 {
561 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
562 reqid = tmpl->reqid;
563 }
564 }
565 if (reqid == 0)
566 {
567 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
568 return;
569 }
570 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
571 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
572 job = (job_t*)acquire_job_create(reqid);
573 charon->processor->queue_job(charon->processor, job);
574 }
575
576 /**
577 * process a XFRM_MSG_EXPIRE from kernel
578 */
579 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
580 {
581 job_t *job;
582 protocol_id_t protocol;
583 u_int32_t spi, reqid;
584 struct xfrm_user_expire *expire;
585
586 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
587 protocol = proto_kernel2ike(expire->state.id.proto);
588 spi = expire->state.id.spi;
589 reqid = expire->state.reqid;
590
591 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
592
593 if (protocol != PROTO_ESP && protocol != PROTO_AH)
594 {
595 DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA 0x%x (reqid %d) which is "
596 "not a CHILD_SA", ntohl(spi), reqid);
597 return;
598 }
599
600 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
601 expire->hard ? "delete" : "rekey", protocol_id_names,
602 protocol, ntohl(spi), reqid);
603 if (expire->hard)
604 {
605 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
606 }
607 else
608 {
609 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
610 }
611 charon->processor->queue_job(charon->processor, job);
612 }
613
614 /**
615 * start a roaming job. We delay it for a second and fire only one job
616 * for multiple events. Otherwise we would create two many jobs.
617 */
618 static void fire_roam_job(private_kernel_interface_t *this, bool address)
619 {
620 struct timeval now;
621
622 if (gettimeofday(&now, NULL) == 0)
623 {
624 if (timercmp(&now, &this->last_roam, >))
625 {
626 now.tv_usec += ROAM_DELAY * 1000;
627 while (now.tv_usec > 1000000)
628 {
629 now.tv_sec++;
630 now.tv_usec -= 1000000;
631 }
632 this->last_roam = now;
633 charon->scheduler->schedule_job(charon->scheduler,
634 (job_t*)roam_job_create(address), ROAM_DELAY);
635 }
636 }
637 }
638
639 /**
640 * process RTM_NEWLINK/RTM_DELLINK from kernel
641 */
642 static void process_link(private_kernel_interface_t *this,
643 struct nlmsghdr *hdr, bool event)
644 {
645 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
646 struct rtattr *rta = IFLA_RTA(msg);
647 size_t rtasize = IFLA_PAYLOAD (hdr);
648 iterator_t *iterator;
649 iface_entry_t *current, *entry = NULL;
650 char *name = NULL;
651 bool update = FALSE;
652
653 while(RTA_OK(rta, rtasize))
654 {
655 switch (rta->rta_type)
656 {
657 case IFLA_IFNAME:
658 name = RTA_DATA(rta);
659 break;
660 }
661 rta = RTA_NEXT(rta, rtasize);
662 }
663 if (!name)
664 {
665 name = "(unknown)";
666 }
667
668 switch (hdr->nlmsg_type)
669 {
670 case RTM_NEWLINK:
671 {
672 if (msg->ifi_flags & IFF_LOOPBACK)
673 { /* ignore loopback interfaces */
674 break;
675 }
676 iterator = this->ifaces->create_iterator_locked(this->ifaces,
677 &this->mutex);
678 while (iterator->iterate(iterator, (void**)&current))
679 {
680 if (current->ifindex == msg->ifi_index)
681 {
682 entry = current;
683 break;
684 }
685 }
686 if (!entry)
687 {
688 entry = malloc_thing(iface_entry_t);
689 entry->ifindex = msg->ifi_index;
690 entry->flags = 0;
691 entry->addrs = linked_list_create();
692 this->ifaces->insert_last(this->ifaces, entry);
693 }
694 memcpy(entry->ifname, name, IFNAMSIZ);
695 entry->ifname[IFNAMSIZ-1] = '\0';
696 if (event)
697 {
698 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
699 {
700 update = TRUE;
701 DBG1(DBG_KNL, "interface %s activated", name);
702 }
703 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
704 {
705 update = TRUE;
706 DBG1(DBG_KNL, "interface %s deactivated", name);
707 }
708 }
709 entry->flags = msg->ifi_flags;
710 iterator->destroy(iterator);
711 break;
712 }
713 case RTM_DELLINK:
714 {
715 iterator = this->ifaces->create_iterator_locked(this->ifaces,
716 &this->mutex);
717 while (iterator->iterate(iterator, (void**)&current))
718 {
719 if (current->ifindex == msg->ifi_index)
720 {
721 /* we do not remove it, as an address may be added to a
722 * "down" interface and we wan't to know that. */
723 current->flags = msg->ifi_flags;
724 break;
725 }
726 }
727 iterator->destroy(iterator);
728 break;
729 }
730 }
731
732 /* send an update to all IKE_SAs */
733 if (update && event)
734 {
735 fire_roam_job(this, TRUE);
736 }
737 }
738
739 /**
740 * process RTM_NEWADDR/RTM_DELADDR from kernel
741 */
742 static void process_addr(private_kernel_interface_t *this,
743 struct nlmsghdr *hdr, bool event)
744 {
745 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
746 struct rtattr *rta = IFA_RTA(msg);
747 size_t rtasize = IFA_PAYLOAD (hdr);
748 host_t *host = NULL;
749 iterator_t *ifaces, *addrs;
750 iface_entry_t *iface;
751 addr_entry_t *addr;
752 chunk_t local = chunk_empty, address = chunk_empty;
753 bool update = FALSE, found = FALSE, changed = FALSE;
754
755 while(RTA_OK(rta, rtasize))
756 {
757 switch (rta->rta_type)
758 {
759 case IFA_LOCAL:
760 local.ptr = RTA_DATA(rta);
761 local.len = RTA_PAYLOAD(rta);
762 break;
763 case IFA_ADDRESS:
764 address.ptr = RTA_DATA(rta);
765 address.len = RTA_PAYLOAD(rta);
766 break;
767 }
768 rta = RTA_NEXT(rta, rtasize);
769 }
770
771 /* For PPP interfaces, we need the IFA_LOCAL address,
772 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
773 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
774 if (local.ptr)
775 {
776 host = host_create_from_chunk(msg->ifa_family, local, 0);
777 }
778 else if (address.ptr)
779 {
780 host = host_create_from_chunk(msg->ifa_family, address, 0);
781 }
782
783 if (host == NULL)
784 { /* bad family? */
785 return;
786 }
787
788 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
789 while (ifaces->iterate(ifaces, (void**)&iface))
790 {
791 if (iface->ifindex == msg->ifa_index)
792 {
793 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
794 while (addrs->iterate(addrs, (void**)&addr))
795 {
796 if (host->ip_equals(host, addr->ip))
797 {
798 found = TRUE;
799 if (hdr->nlmsg_type == RTM_DELADDR)
800 {
801 changed = TRUE;
802 addrs->remove(addrs);
803 if (!addr->virtual)
804 {
805 DBG1(DBG_KNL, "%H disappeared from %s",
806 host, iface->ifname);
807 }
808 addr_entry_destroy(addr);
809 }
810 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
811 {
812 addr->refcount = 1;
813 }
814 }
815 }
816 addrs->destroy(addrs);
817
818 if (hdr->nlmsg_type == RTM_NEWADDR)
819 {
820 if (!found)
821 {
822 found = TRUE;
823 changed = TRUE;
824 addr = malloc_thing(addr_entry_t);
825 addr->ip = host->clone(host);
826 addr->virtual = FALSE;
827 addr->refcount = 1;
828 addr->scope = msg->ifa_scope;
829
830 iface->addrs->insert_last(iface->addrs, addr);
831 if (event)
832 {
833 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
834 }
835 }
836 }
837 if (found && (iface->flags & IFF_UP))
838 {
839 update = TRUE;
840 }
841 break;
842 }
843 }
844 ifaces->destroy(ifaces);
845 host->destroy(host);
846
847 /* send an update to all IKE_SAs */
848 if (update && event && changed)
849 {
850 fire_roam_job(this, TRUE);
851 }
852 }
853
854 /**
855 * Receives events from kernel
856 */
857 static job_requeue_t receive_events(private_kernel_interface_t *this)
858 {
859 char response[1024];
860 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
861 struct sockaddr_nl addr;
862 socklen_t addr_len = sizeof(addr);
863 int len, oldstate, maxfd, selected;
864 fd_set rfds;
865
866 FD_ZERO(&rfds);
867 FD_SET(this->socket_xfrm_events, &rfds);
868 FD_SET(this->socket_rt_events, &rfds);
869 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
870
871 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
872 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
873 pthread_setcancelstate(oldstate, NULL);
874 if (selected <= 0)
875 {
876 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
877 return JOB_REQUEUE_FAIR;
878 }
879 if (FD_ISSET(this->socket_xfrm_events, &rfds))
880 {
881 selected = this->socket_xfrm_events;
882 }
883 else if (FD_ISSET(this->socket_rt_events, &rfds))
884 {
885 selected = this->socket_rt_events;
886 }
887 else
888 {
889 return JOB_REQUEUE_DIRECT;
890 }
891
892 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
893 (struct sockaddr*)&addr, &addr_len);
894 if (len < 0)
895 {
896 switch (errno)
897 {
898 case EINTR:
899 /* interrupted, try again */
900 return JOB_REQUEUE_DIRECT;
901 case EAGAIN:
902 /* no data ready, select again */
903 return JOB_REQUEUE_DIRECT;
904 default:
905 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
906 sleep(1);
907 return JOB_REQUEUE_FAIR;
908 }
909 }
910 if (addr.nl_pid != 0)
911 { /* not from kernel. not interested, try another one */
912 return JOB_REQUEUE_DIRECT;
913 }
914
915 while (NLMSG_OK(hdr, len))
916 {
917 /* looks good so far, dispatch netlink message */
918 if (selected == this->socket_xfrm_events)
919 {
920 switch (hdr->nlmsg_type)
921 {
922 case XFRM_MSG_ACQUIRE:
923 process_acquire(this, hdr);
924 break;
925 case XFRM_MSG_EXPIRE:
926 process_expire(this, hdr);
927 break;
928 default:
929 break;
930 }
931 }
932 else if (selected == this->socket_rt_events)
933 {
934 switch (hdr->nlmsg_type)
935 {
936 case RTM_NEWADDR:
937 case RTM_DELADDR:
938 process_addr(this, hdr, TRUE);
939 pthread_cond_signal(&this->cond);
940 break;
941 case RTM_NEWLINK:
942 case RTM_DELLINK:
943 process_link(this, hdr, TRUE);
944 pthread_cond_signal(&this->cond);
945 break;
946 case RTM_NEWROUTE:
947 case RTM_DELROUTE:
948 fire_roam_job(this, FALSE);
949 break;
950 default:
951 break;
952 }
953 }
954 hdr = NLMSG_NEXT(hdr, len);
955 }
956 return JOB_REQUEUE_DIRECT;
957 }
958
959 /**
960 * send a netlink message and wait for a reply
961 */
962 static status_t netlink_send(private_kernel_interface_t *this,
963 int socket, struct nlmsghdr *in,
964 struct nlmsghdr **out, size_t *out_len)
965 {
966 int len, addr_len;
967 struct sockaddr_nl addr;
968 chunk_t result = chunk_empty, tmp;
969 struct nlmsghdr *msg, peek;
970
971 pthread_mutex_lock(&this->nl_mutex);
972
973 in->nlmsg_seq = ++this->seq;
974 in->nlmsg_pid = getpid();
975
976 memset(&addr, 0, sizeof(addr));
977 addr.nl_family = AF_NETLINK;
978 addr.nl_pid = 0;
979 addr.nl_groups = 0;
980
981 while (TRUE)
982 {
983 len = sendto(socket, in, in->nlmsg_len, 0,
984 (struct sockaddr*)&addr, sizeof(addr));
985
986 if (len != in->nlmsg_len)
987 {
988 if (errno == EINTR)
989 {
990 /* interrupted, try again */
991 continue;
992 }
993 pthread_mutex_unlock(&this->nl_mutex);
994 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
995 return FAILED;
996 }
997 break;
998 }
999
1000 while (TRUE)
1001 {
1002 char buf[4096];
1003 tmp.len = sizeof(buf);
1004 tmp.ptr = buf;
1005 msg = (struct nlmsghdr*)tmp.ptr;
1006
1007 memset(&addr, 0, sizeof(addr));
1008 addr.nl_family = AF_NETLINK;
1009 addr.nl_pid = getpid();
1010 addr.nl_groups = 0;
1011 addr_len = sizeof(addr);
1012
1013 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
1014 (struct sockaddr*)&addr, &addr_len);
1015
1016 if (len < 0)
1017 {
1018 if (errno == EINTR)
1019 {
1020 DBG1(DBG_KNL, "got interrupted");
1021 /* interrupted, try again */
1022 continue;
1023 }
1024 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
1025 pthread_mutex_unlock(&this->nl_mutex);
1026 return FAILED;
1027 }
1028 if (!NLMSG_OK(msg, len))
1029 {
1030 DBG1(DBG_KNL, "received corrupted netlink message");
1031 pthread_mutex_unlock(&this->nl_mutex);
1032 return FAILED;
1033 }
1034 if (msg->nlmsg_seq != this->seq)
1035 {
1036 DBG1(DBG_KNL, "received invalid netlink sequence number");
1037 if (msg->nlmsg_seq < this->seq)
1038 {
1039 continue;
1040 }
1041 pthread_mutex_unlock(&this->nl_mutex);
1042 return FAILED;
1043 }
1044
1045 tmp.len = len;
1046 result = chunk_cata("cc", result, tmp);
1047
1048 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
1049 * numbers to detect multi header messages */
1050 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
1051 (struct sockaddr*)&addr, &addr_len);
1052
1053 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
1054 {
1055 /* seems to be multipart */
1056 continue;
1057 }
1058 break;
1059 }
1060
1061 *out_len = result.len;
1062 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
1063
1064 pthread_mutex_unlock(&this->nl_mutex);
1065
1066 return SUCCESS;
1067 }
1068
1069 /**
1070 * send a netlink message and wait for its acknowlegde
1071 */
1072 static status_t netlink_send_ack(private_kernel_interface_t *this,
1073 int socket, struct nlmsghdr *in)
1074 {
1075 struct nlmsghdr *out, *hdr;
1076 size_t len;
1077
1078 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
1079 {
1080 return FAILED;
1081 }
1082 hdr = out;
1083 while (NLMSG_OK(hdr, len))
1084 {
1085 switch (hdr->nlmsg_type)
1086 {
1087 case NLMSG_ERROR:
1088 {
1089 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
1090
1091 if (err->error)
1092 {
1093 if (-err->error == EEXIST)
1094 { /* do not report existing routes */
1095 free(out);
1096 return ALREADY_DONE;
1097 }
1098 DBG1(DBG_KNL, "received netlink error: %s (%d)",
1099 strerror(-err->error), -err->error);
1100 free(out);
1101 return FAILED;
1102 }
1103 free(out);
1104 return SUCCESS;
1105 }
1106 default:
1107 hdr = NLMSG_NEXT(hdr, len);
1108 continue;
1109 case NLMSG_DONE:
1110 break;
1111 }
1112 break;
1113 }
1114 DBG1(DBG_KNL, "netlink request not acknowlegded");
1115 free(out);
1116 return FAILED;
1117 }
1118
1119 /**
1120 * Initialize a list of local addresses.
1121 */
1122 static status_t init_address_list(private_kernel_interface_t *this)
1123 {
1124 char request[BUFFER_SIZE];
1125 struct nlmsghdr *out, *current, *in;
1126 struct rtgenmsg *msg;
1127 size_t len;
1128 iterator_t *ifaces, *addrs;
1129 iface_entry_t *iface;
1130 addr_entry_t *addr;
1131
1132 DBG1(DBG_KNL, "listening on interfaces:");
1133
1134 memset(&request, 0, sizeof(request));
1135
1136 in = (struct nlmsghdr*)&request;
1137 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1138 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1139 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1140 msg->rtgen_family = AF_UNSPEC;
1141
1142 /* get all links */
1143 in->nlmsg_type = RTM_GETLINK;
1144 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1145 {
1146 return FAILED;
1147 }
1148 current = out;
1149 while (NLMSG_OK(current, len))
1150 {
1151 switch (current->nlmsg_type)
1152 {
1153 case NLMSG_DONE:
1154 break;
1155 case RTM_NEWLINK:
1156 process_link(this, current, FALSE);
1157 /* fall through */
1158 default:
1159 current = NLMSG_NEXT(current, len);
1160 continue;
1161 }
1162 break;
1163 }
1164 free(out);
1165
1166 /* get all interface addresses */
1167 in->nlmsg_type = RTM_GETADDR;
1168 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1169 {
1170 return FAILED;
1171 }
1172 current = out;
1173 while (NLMSG_OK(current, len))
1174 {
1175 switch (current->nlmsg_type)
1176 {
1177 case NLMSG_DONE:
1178 break;
1179 case RTM_NEWADDR:
1180 process_addr(this, current, FALSE);
1181 /* fall through */
1182 default:
1183 current = NLMSG_NEXT(current, len);
1184 continue;
1185 }
1186 break;
1187 }
1188 free(out);
1189
1190 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1191 while (ifaces->iterate(ifaces, (void**)&iface))
1192 {
1193 if (iface->flags & IFF_UP)
1194 {
1195 DBG1(DBG_KNL, " %s", iface->ifname);
1196 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1197 while (addrs->iterate(addrs, (void**)&addr))
1198 {
1199 DBG1(DBG_KNL, " %H", addr->ip);
1200 }
1201 addrs->destroy(addrs);
1202 }
1203 }
1204 ifaces->destroy(ifaces);
1205 return SUCCESS;
1206 }
1207
1208 /**
1209 * iterator hook to iterate over addrs
1210 */
1211 static hook_result_t addr_hook(private_kernel_interface_t *this,
1212 addr_entry_t *in, host_t **out)
1213 {
1214 if (in->virtual)
1215 { /* skip virtual interfaces added by us */
1216 return HOOK_SKIP;
1217 }
1218 if (in->scope >= RT_SCOPE_LINK)
1219 { /* skip addresses with a unusable scope */
1220 return HOOK_SKIP;
1221 }
1222 *out = in->ip;
1223 return HOOK_NEXT;
1224 }
1225
1226 /**
1227 * iterator hook to iterate over ifaces
1228 */
1229 static hook_result_t iface_hook(private_kernel_interface_t *this,
1230 iface_entry_t *in, host_t **out)
1231 {
1232 if (!(in->flags & IFF_UP))
1233 { /* skip interfaces not up */
1234 return HOOK_SKIP;
1235 }
1236
1237 if (this->hiter == NULL)
1238 {
1239 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1240 this->hiter->set_iterator_hook(this->hiter,
1241 (iterator_hook_t*)addr_hook, this);
1242 }
1243 while (this->hiter->iterate(this->hiter, (void**)out))
1244 {
1245 return HOOK_AGAIN;
1246 }
1247 this->hiter->destroy(this->hiter);
1248 this->hiter = NULL;
1249 return HOOK_SKIP;
1250 }
1251
1252 /**
1253 * Implements kernel_interface_t.create_address_iterator.
1254 */
1255 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1256 {
1257 iterator_t *iterator;
1258
1259 /* This iterator is not only hooked, is is double-hooked. As we have stored
1260 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1261 * entries in each interface we iterate. This does the iface_hook. The
1262 * addr_hook returns the ip instead of the addr_entry. */
1263
1264 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1265 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1266 return iterator;
1267 }
1268
1269 /**
1270 * implementation of kernel_interface_t.get_interface_name
1271 */
1272 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1273 {
1274 iterator_t *ifaces, *addrs;
1275 iface_entry_t *iface;
1276 addr_entry_t *addr;
1277 char *name = NULL;
1278
1279 DBG2(DBG_KNL, "getting interface name for %H", ip);
1280
1281 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1282 while (ifaces->iterate(ifaces, (void**)&iface))
1283 {
1284 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1285 while (addrs->iterate(addrs, (void**)&addr))
1286 {
1287 if (ip->ip_equals(ip, addr->ip))
1288 {
1289 name = strdup(iface->ifname);
1290 break;
1291 }
1292 }
1293 addrs->destroy(addrs);
1294 if (name)
1295 {
1296 break;
1297 }
1298 }
1299 ifaces->destroy(ifaces);
1300
1301 if (name)
1302 {
1303 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1304 }
1305 else
1306 {
1307 DBG2(DBG_KNL, "%H is not a local address", ip);
1308 }
1309 return name;
1310 }
1311
1312 /**
1313 * Tries to find an ip address of a local interface that is included in the
1314 * supplied traffic selector.
1315 */
1316 static status_t get_address_by_ts(private_kernel_interface_t *this,
1317 traffic_selector_t *ts, host_t **ip)
1318 {
1319 iterator_t *ifaces, *addrs;
1320 iface_entry_t *iface;
1321 addr_entry_t *addr;
1322 host_t *host;
1323 int family;
1324 bool found = FALSE;
1325
1326 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1327
1328 /* if we have a family which includes localhost, we do not
1329 * search for an IP, we use the default */
1330 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1331
1332 if (family == AF_INET)
1333 {
1334 host = host_create_from_string("127.0.0.1", 0);
1335 }
1336 else
1337 {
1338 host = host_create_from_string("::1", 0);
1339 }
1340
1341 if (ts->includes(ts, host))
1342 {
1343 *ip = host_create_any(family);
1344 host->destroy(host);
1345 DBG2(DBG_KNL, "using host %H", *ip);
1346 return SUCCESS;
1347 }
1348 host->destroy(host);
1349
1350 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1351 while (ifaces->iterate(ifaces, (void**)&iface))
1352 {
1353 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1354 while (addrs->iterate(addrs, (void**)&addr))
1355 {
1356 if (ts->includes(ts, addr->ip))
1357 {
1358 found = TRUE;
1359 *ip = addr->ip->clone(addr->ip);
1360 break;
1361 }
1362 }
1363 addrs->destroy(addrs);
1364 if (found)
1365 {
1366 break;
1367 }
1368 }
1369 ifaces->destroy(ifaces);
1370
1371 if (!found)
1372 {
1373 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1374 return FAILED;
1375 }
1376 DBG2(DBG_KNL, "using host %H", *ip);
1377 return SUCCESS;
1378 }
1379
1380 /**
1381 * get the interface of a local address
1382 */
1383 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1384 {
1385 iterator_t *ifaces, *addrs;
1386 iface_entry_t *iface;
1387 addr_entry_t *addr;
1388 int ifindex = 0;
1389
1390 DBG2(DBG_KNL, "getting iface for %H", ip);
1391
1392 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1393 while (ifaces->iterate(ifaces, (void**)&iface))
1394 {
1395 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1396 while (addrs->iterate(addrs, (void**)&addr))
1397 {
1398 if (ip->ip_equals(ip, addr->ip))
1399 {
1400 ifindex = iface->ifindex;
1401 break;
1402 }
1403 }
1404 addrs->destroy(addrs);
1405 if (ifindex)
1406 {
1407 break;
1408 }
1409 }
1410 ifaces->destroy(ifaces);
1411
1412 if (ifindex == 0)
1413 {
1414 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1415 }
1416 return ifindex;
1417 }
1418
1419 /**
1420 * get the refcount of a virtual ip
1421 */
1422 static int get_vip_refcount(private_kernel_interface_t *this, host_t* ip)
1423 {
1424 iterator_t *ifaces, *addrs;
1425 iface_entry_t *iface;
1426 addr_entry_t *addr;
1427 int refcount = 0;
1428
1429 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
1430 while (ifaces->iterate(ifaces, (void**)&iface))
1431 {
1432 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1433 while (addrs->iterate(addrs, (void**)&addr))
1434 {
1435 if (addr->virtual && (iface->flags & IFF_UP) &&
1436 ip->ip_equals(ip, addr->ip))
1437 {
1438 refcount = addr->refcount;
1439 break;
1440 }
1441 }
1442 addrs->destroy(addrs);
1443 if (refcount)
1444 {
1445 break;
1446 }
1447 }
1448 ifaces->destroy(ifaces);
1449
1450 return refcount;
1451 }
1452
1453 /**
1454 * Manages the creation and deletion of ip addresses on an interface.
1455 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1456 */
1457 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1458 int flags, int if_index, host_t *ip)
1459 {
1460 unsigned char request[BUFFER_SIZE];
1461 struct nlmsghdr *hdr;
1462 struct ifaddrmsg *msg;
1463 chunk_t chunk;
1464
1465 memset(&request, 0, sizeof(request));
1466
1467 chunk = ip->get_address(ip);
1468
1469 hdr = (struct nlmsghdr*)request;
1470 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1471 hdr->nlmsg_type = nlmsg_type;
1472 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1473
1474 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1475 msg->ifa_family = ip->get_family(ip);
1476 msg->ifa_flags = 0;
1477 msg->ifa_prefixlen = 8 * chunk.len;
1478 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1479 msg->ifa_index = if_index;
1480
1481 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1482
1483 return netlink_send_ack(this, this->socket_rt, hdr);
1484 }
1485
1486 /**
1487 * Manages source routes in the routing table.
1488 * By setting the appropriate nlmsg_type, the route added or r.
1489 */
1490 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1491 int flags, route_entry_t *route)
1492 {
1493 unsigned char request[BUFFER_SIZE];
1494 struct nlmsghdr *hdr;
1495 struct rtmsg *msg;
1496 chunk_t chunk;
1497
1498 /* if route is 0.0.0.0/0, we can't install it, as it would
1499 * overwrite the default route. Instead, we add two routes:
1500 * 0.0.0.0/1 and 128.0.0.0/1 */
1501 if (this->routing_table == 0 && route->prefixlen == 0)
1502 {
1503 route_entry_t half;
1504 status_t status;
1505
1506 half.dst_net = chunk_alloca(route->dst_net.len);
1507 memset(half.dst_net.ptr, 0, half.dst_net.len);
1508 half.src_ip = route->src_ip;
1509 half.gateway = route->gateway;
1510 half.if_index = route->if_index;
1511 half.prefixlen = 1;
1512
1513 status = manage_srcroute(this, nlmsg_type, flags, &half);
1514 half.dst_net.ptr[0] |= 0x80;
1515 status = manage_srcroute(this, nlmsg_type, flags, &half);
1516 return status;
1517 }
1518
1519 memset(&request, 0, sizeof(request));
1520
1521 hdr = (struct nlmsghdr*)request;
1522 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1523 hdr->nlmsg_type = nlmsg_type;
1524 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1525
1526 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1527 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1528 msg->rtm_dst_len = route->prefixlen;
1529 msg->rtm_table = this->routing_table;
1530 msg->rtm_protocol = RTPROT_STATIC;
1531 msg->rtm_type = RTN_UNICAST;
1532 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1533
1534 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1535 chunk = route->src_ip->get_address(route->src_ip);
1536 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1537 chunk = route->gateway->get_address(route->gateway);
1538 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1539 chunk.ptr = (char*)&route->if_index;
1540 chunk.len = sizeof(route->if_index);
1541 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1542
1543 return netlink_send_ack(this, this->socket_rt, hdr);
1544 }
1545
1546 /**
1547 * create or delete an rule to use our routing table
1548 */
1549 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1550 u_int32_t table, u_int32_t prio)
1551 {
1552 unsigned char request[BUFFER_SIZE];
1553 struct nlmsghdr *hdr;
1554 struct rtmsg *msg;
1555 chunk_t chunk;
1556
1557 memset(&request, 0, sizeof(request));
1558 hdr = (struct nlmsghdr*)request;
1559 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1560 hdr->nlmsg_type = nlmsg_type;
1561 if (nlmsg_type == RTM_NEWRULE)
1562 {
1563 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1564 }
1565 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1566
1567 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1568 msg->rtm_table = table;
1569 msg->rtm_family = AF_INET;
1570 msg->rtm_protocol = RTPROT_BOOT;
1571 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1572 msg->rtm_type = RTN_UNICAST;
1573
1574 chunk = chunk_from_thing(prio);
1575 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1576
1577 return netlink_send_ack(this, this->socket_rt, hdr);
1578 }
1579
1580 /**
1581 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1582 */
1583 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1584 {
1585 int bit, byte;
1586
1587 if (addr.len != net.len)
1588 {
1589 return FALSE;
1590 }
1591 /* scan through all bits, beginning in the front */
1592 for (byte = 0; byte < addr.len; byte++)
1593 {
1594 for (bit = 7; bit >= 0; bit--)
1595 {
1596 /* check if bits are equal (or we reached the end of the net) */
1597 if (bit + byte * 8 > net_len)
1598 {
1599 return TRUE;
1600 }
1601 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1602 {
1603 return FALSE;
1604 }
1605 }
1606 }
1607 return TRUE;
1608 }
1609
1610 /**
1611 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1612 */
1613 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1614 bool nexthop)
1615 {
1616 unsigned char request[BUFFER_SIZE];
1617 struct nlmsghdr *hdr, *out, *current;
1618 struct rtmsg *msg;
1619 chunk_t chunk;
1620 size_t len;
1621 int best = -1;
1622 host_t *src = NULL, *gtw = NULL;
1623
1624 DBG2(DBG_KNL, "getting address to reach %H", dest);
1625
1626 memset(&request, 0, sizeof(request));
1627
1628 hdr = (struct nlmsghdr*)request;
1629 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1630 hdr->nlmsg_type = RTM_GETROUTE;
1631 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1632
1633 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1634 msg->rtm_family = dest->get_family(dest);
1635
1636 chunk = dest->get_address(dest);
1637 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1638
1639 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1640 {
1641 DBG1(DBG_KNL, "getting address to %H failed", dest);
1642 return NULL;
1643 }
1644 current = out;
1645 while (NLMSG_OK(current, len))
1646 {
1647 switch (current->nlmsg_type)
1648 {
1649 case NLMSG_DONE:
1650 break;
1651 case RTM_NEWROUTE:
1652 {
1653 struct rtattr *rta;
1654 size_t rtasize;
1655 chunk_t rta_gtw, rta_src, rta_dst;
1656 u_int32_t rta_oif = 0;
1657
1658 rta_gtw = rta_src = rta_dst = chunk_empty;
1659 msg = (struct rtmsg*)(NLMSG_DATA(current));
1660 rta = RTM_RTA(msg);
1661 rtasize = RTM_PAYLOAD(current);
1662 while (RTA_OK(rta, rtasize))
1663 {
1664 switch (rta->rta_type)
1665 {
1666 case RTA_PREFSRC:
1667 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1668 break;
1669 case RTA_GATEWAY:
1670 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1671 break;
1672 case RTA_DST:
1673 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1674 break;
1675 case RTA_OIF:
1676 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1677 {
1678 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1679 }
1680 break;
1681 }
1682 rta = RTA_NEXT(rta, rtasize);
1683 }
1684
1685 /* apply the route if:
1686 * - it is not from our own ipsec routing table
1687 * - is better than a previous one
1688 * - is the default route or
1689 * - its destination net contains our destination
1690 */
1691 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
1692 && msg->rtm_dst_len > best
1693 && (msg->rtm_dst_len == 0 || /* default route */
1694 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
1695 {
1696 iterator_t *ifaces, *addrs;
1697 iface_entry_t *iface;
1698 addr_entry_t *addr;
1699
1700 best = msg->rtm_dst_len;
1701 if (nexthop)
1702 {
1703 DESTROY_IF(gtw);
1704 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1705 }
1706 else if (rta_src.ptr)
1707 {
1708 DESTROY_IF(src);
1709 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1710 if (get_vip_refcount(this, src))
1711 { /* skip source address if it is installed by us */
1712 DESTROY_IF(src);
1713 src = NULL;
1714 current = NLMSG_NEXT(current, len);
1715 continue;
1716 }
1717 }
1718 else
1719 {
1720 /* no source addr, get one from the interfaces */
1721 ifaces = this->ifaces->create_iterator_locked(
1722 this->ifaces, &this->mutex);
1723 while (ifaces->iterate(ifaces, (void**)&iface))
1724 {
1725 if (iface->ifindex == rta_oif)
1726 {
1727 addrs = iface->addrs->create_iterator(
1728 iface->addrs, TRUE);
1729 while (addrs->iterate(addrs, (void**)&addr))
1730 {
1731 chunk_t ip = addr->ip->get_address(addr->ip);
1732 if (msg->rtm_dst_len == 0
1733 || addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1734 {
1735 DESTROY_IF(src);
1736 src = addr->ip->clone(addr->ip);
1737 break;
1738 }
1739 }
1740 addrs->destroy(addrs);
1741 }
1742 }
1743 ifaces->destroy(ifaces);
1744 }
1745 }
1746 /* FALL through */
1747 }
1748 default:
1749 current = NLMSG_NEXT(current, len);
1750 continue;
1751 }
1752 break;
1753 }
1754 free(out);
1755
1756 if (nexthop)
1757 {
1758 if (gtw)
1759 {
1760 return gtw;
1761 }
1762 return dest->clone(dest);
1763 }
1764 return src;
1765 }
1766
1767 /**
1768 * Implementation of kernel_interface_t.get_source_addr.
1769 */
1770 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1771 {
1772 return get_route(this, dest, FALSE);
1773 }
1774
1775 /**
1776 * Implementation of kernel_interface_t.add_ip.
1777 */
1778 static status_t add_ip(private_kernel_interface_t *this,
1779 host_t *virtual_ip, host_t *iface_ip)
1780 {
1781 iface_entry_t *iface;
1782 addr_entry_t *addr;
1783 iterator_t *addrs, *ifaces;
1784 int ifindex;
1785
1786 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1787
1788 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1789 while (ifaces->iterate(ifaces, (void**)&iface))
1790 {
1791 bool iface_found = FALSE;
1792
1793 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1794 while (addrs->iterate(addrs, (void**)&addr))
1795 {
1796 if (iface_ip->ip_equals(iface_ip, addr->ip))
1797 {
1798 iface_found = TRUE;
1799 }
1800 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1801 {
1802 addr->refcount++;
1803 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1804 virtual_ip, iface->ifname);
1805 addrs->destroy(addrs);
1806 ifaces->destroy(ifaces);
1807 return SUCCESS;
1808 }
1809 }
1810 addrs->destroy(addrs);
1811
1812 if (iface_found)
1813 {
1814 ifindex = iface->ifindex;
1815 addr = malloc_thing(addr_entry_t);
1816 addr->ip = virtual_ip->clone(virtual_ip);
1817 addr->refcount = 0;
1818 addr->virtual = TRUE;
1819 addr->scope = RT_SCOPE_UNIVERSE;
1820 iface->addrs->insert_last(iface->addrs, addr);
1821
1822 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1823 ifindex, virtual_ip) == SUCCESS)
1824 {
1825 while (get_vip_refcount(this, virtual_ip) == 0)
1826 { /* wait until address appears */
1827 pthread_cond_wait(&this->cond, &this->mutex);
1828 }
1829 ifaces->destroy(ifaces);
1830 return SUCCESS;
1831 }
1832 ifaces->destroy(ifaces);
1833 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1834 return FAILED;
1835 }
1836 }
1837 ifaces->destroy(ifaces);
1838
1839 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1840 "virtual IP %H", iface_ip, virtual_ip);
1841 return FAILED;
1842 }
1843
1844 /**
1845 * Implementation of kernel_interface_t.del_ip.
1846 */
1847 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1848 {
1849 iface_entry_t *iface;
1850 addr_entry_t *addr;
1851 iterator_t *addrs, *ifaces;
1852 status_t status;
1853 int ifindex;
1854
1855 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1856
1857 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1858 while (ifaces->iterate(ifaces, (void**)&iface))
1859 {
1860 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1861 while (addrs->iterate(addrs, (void**)&addr))
1862 {
1863 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1864 {
1865 ifindex = iface->ifindex;
1866 if (addr->refcount == 1)
1867 {
1868 status = manage_ipaddr(this, RTM_DELADDR, 0,
1869 ifindex, virtual_ip);
1870 if (status == SUCCESS)
1871 { /* wait until the address is really gone */
1872 while (get_vip_refcount(this, virtual_ip) > 0)
1873 {
1874 pthread_cond_wait(&this->cond, &this->mutex);
1875 }
1876 }
1877 addrs->destroy(addrs);
1878 ifaces->destroy(ifaces);
1879 return status;
1880 }
1881 else
1882 {
1883 addr->refcount--;
1884 }
1885 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1886 virtual_ip);
1887 addrs->destroy(addrs);
1888 ifaces->destroy(ifaces);
1889 return SUCCESS;
1890 }
1891 }
1892 addrs->destroy(addrs);
1893 }
1894 ifaces->destroy(ifaces);
1895
1896 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1897 return FAILED;
1898 }
1899
1900 /**
1901 * Get an SPI for a specific protocol from the kernel.
1902 */
1903 static status_t get_spi_internal(private_kernel_interface_t *this,
1904 host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
1905 u_int32_t reqid, u_int32_t *spi)
1906 {
1907 unsigned char request[BUFFER_SIZE];
1908 struct nlmsghdr *hdr, *out;
1909 struct xfrm_userspi_info *userspi;
1910 u_int32_t received_spi = 0;
1911 size_t len;
1912
1913 memset(&request, 0, sizeof(request));
1914
1915 hdr = (struct nlmsghdr*)request;
1916 hdr->nlmsg_flags = NLM_F_REQUEST;
1917 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1918 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1919
1920 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1921 host2xfrm(src, &userspi->info.saddr);
1922 host2xfrm(dst, &userspi->info.id.daddr);
1923 userspi->info.id.proto = proto;
1924 userspi->info.mode = TRUE; /* tunnel mode */
1925 userspi->info.reqid = reqid;
1926 userspi->info.family = src->get_family(src);
1927 userspi->min = min;
1928 userspi->max = max;
1929
1930 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1931 {
1932 hdr = out;
1933 while (NLMSG_OK(hdr, len))
1934 {
1935 switch (hdr->nlmsg_type)
1936 {
1937 case XFRM_MSG_NEWSA:
1938 {
1939 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1940 received_spi = usersa->id.spi;
1941 break;
1942 }
1943 case NLMSG_ERROR:
1944 {
1945 struct nlmsgerr *err = NLMSG_DATA(hdr);
1946
1947 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1948 strerror(-err->error), -err->error);
1949 break;
1950 }
1951 default:
1952 hdr = NLMSG_NEXT(hdr, len);
1953 continue;
1954 case NLMSG_DONE:
1955 break;
1956 }
1957 break;
1958 }
1959 free(out);
1960 }
1961
1962 if (received_spi == 0)
1963 {
1964 return FAILED;
1965 }
1966
1967 *spi = received_spi;
1968 return SUCCESS;
1969 }
1970
1971 /**
1972 * Implementation of kernel_interface_t.get_spi.
1973 */
1974 static status_t get_spi(private_kernel_interface_t *this,
1975 host_t *src, host_t *dst,
1976 protocol_id_t protocol, u_int32_t reqid,
1977 u_int32_t *spi)
1978 {
1979 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1980
1981 if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
1982 0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
1983 {
1984 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1985 return FAILED;
1986 }
1987
1988 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", *spi, reqid);
1989
1990 return SUCCESS;
1991 }
1992
1993 /**
1994 * Implementation of kernel_interface_t.get_cpi.
1995 */
1996 static status_t get_cpi(private_kernel_interface_t *this,
1997 host_t *src, host_t *dst,
1998 u_int32_t reqid, u_int16_t *cpi)
1999 {
2000 u_int32_t received_spi = 0;
2001 DBG2(DBG_KNL, "getting CPI for reqid %d", reqid);
2002
2003 if (get_spi_internal(this, src, dst,
2004 IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
2005 {
2006 DBG1(DBG_KNL, "unable to get CPI for reqid %d", reqid);
2007 return FAILED;
2008 }
2009
2010 *cpi = htons((u_int16_t)ntohl(received_spi));
2011
2012 DBG2(DBG_KNL, "got CPI 0x%x for reqid %d", *cpi, reqid);
2013
2014 return SUCCESS;
2015 }
2016
2017 /**
2018 * Implementation of kernel_interface_t.add_sa.
2019 */
2020 static status_t add_sa(private_kernel_interface_t *this,
2021 host_t *src, host_t *dst, u_int32_t spi,
2022 protocol_id_t protocol, u_int32_t reqid,
2023 u_int64_t expire_soft, u_int64_t expire_hard,
2024 u_int16_t enc_alg, u_int16_t enc_size,
2025 u_int16_t int_alg, u_int16_t int_size,
2026 prf_plus_t *prf_plus, mode_t mode,
2027 u_int16_t ipcomp, bool encap,
2028 bool replace)
2029 {
2030 unsigned char request[BUFFER_SIZE];
2031 char *alg_name;
2032 /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
2033 u_int16_t add_keymat = 32;
2034 struct nlmsghdr *hdr;
2035 struct xfrm_usersa_info *sa;
2036
2037 memset(&request, 0, sizeof(request));
2038
2039 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x and reqid %d", spi, reqid);
2040
2041 hdr = (struct nlmsghdr*)request;
2042 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2043 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
2044 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2045
2046 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
2047 host2xfrm(src, &sa->saddr);
2048 host2xfrm(dst, &sa->id.daddr);
2049 sa->id.spi = spi;
2050 sa->id.proto = proto_ike2kernel(protocol);
2051 sa->family = src->get_family(src);
2052 sa->mode = mode;
2053 if (mode == MODE_TUNNEL)
2054 {
2055 sa->flags |= XFRM_STATE_AF_UNSPEC;
2056 }
2057 sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
2058 sa->reqid = reqid;
2059 /* we currently do not expire SAs by volume/packet count */
2060 sa->lft.soft_byte_limit = XFRM_INF;
2061 sa->lft.hard_byte_limit = XFRM_INF;
2062 sa->lft.soft_packet_limit = XFRM_INF;
2063 sa->lft.hard_packet_limit = XFRM_INF;
2064 /* we use lifetimes since added, not since used */
2065 sa->lft.soft_add_expires_seconds = expire_soft;
2066 sa->lft.hard_add_expires_seconds = expire_hard;
2067 sa->lft.soft_use_expires_seconds = 0;
2068 sa->lft.hard_use_expires_seconds = 0;
2069
2070 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
2071
2072 switch (enc_alg)
2073 {
2074 case ENCR_UNDEFINED:
2075 /* no encryption */
2076 break;
2077 case ENCR_AES_CCM_ICV8:
2078 case ENCR_AES_CCM_ICV12:
2079 case ENCR_AES_CCM_ICV16:
2080 /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
2081 add_keymat = 24;
2082 /* fall-through */
2083 case ENCR_AES_GCM_ICV8:
2084 case ENCR_AES_GCM_ICV12:
2085 case ENCR_AES_GCM_ICV16:
2086 {
2087 u_int16_t icv_size = 0;
2088 rthdr->rta_type = XFRMA_ALG_AEAD;
2089 alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
2090 if (alg_name == NULL)
2091 {
2092 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2093 encryption_algorithm_names, enc_alg);
2094 return FAILED;
2095 }
2096 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2097 encryption_algorithm_names, enc_alg, enc_size);
2098
2099 /* additional KEYMAT required */
2100 enc_size += add_keymat;
2101
2102 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
2103 hdr->nlmsg_len += rthdr->rta_len;
2104 if (hdr->nlmsg_len > sizeof(request))
2105 {
2106 return FAILED;
2107 }
2108
2109 struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
2110 algo->alg_key_len = enc_size;
2111 algo->alg_icv_len = icv_size;
2112 strcpy(algo->alg_name, alg_name);
2113 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2114
2115 rthdr = XFRM_RTA_NEXT(rthdr);
2116 break;
2117 }
2118 default:
2119 {
2120 rthdr->rta_type = XFRMA_ALG_CRYPT;
2121 alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
2122 if (alg_name == NULL)
2123 {
2124 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2125 encryption_algorithm_names, enc_alg);
2126 return FAILED;
2127 }
2128 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2129 encryption_algorithm_names, enc_alg, enc_size);
2130
2131 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
2132 hdr->nlmsg_len += rthdr->rta_len;
2133 if (hdr->nlmsg_len > sizeof(request))
2134 {
2135 return FAILED;
2136 }
2137
2138 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2139 algo->alg_key_len = enc_size;
2140 strcpy(algo->alg_name, alg_name);
2141 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2142
2143 rthdr = XFRM_RTA_NEXT(rthdr);
2144 break;
2145 }
2146 }
2147
2148 if (int_alg != AUTH_UNDEFINED)
2149 {
2150 rthdr->rta_type = XFRMA_ALG_AUTH;
2151 alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
2152 if (alg_name == NULL)
2153 {
2154 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2155 integrity_algorithm_names, int_alg);
2156 return FAILED;
2157 }
2158 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
2159 integrity_algorithm_names, int_alg, int_size);
2160
2161 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
2162 hdr->nlmsg_len += rthdr->rta_len;
2163 if (hdr->nlmsg_len > sizeof(request))
2164 {
2165 return FAILED;
2166 }
2167
2168 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2169 algo->alg_key_len = int_size;
2170 strcpy(algo->alg_name, alg_name);
2171 prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
2172
2173 rthdr = XFRM_RTA_NEXT(rthdr);
2174 }
2175
2176 if (ipcomp != IPCOMP_NONE)
2177 {
2178 rthdr->rta_type = XFRMA_ALG_COMP;
2179 alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
2180 if (alg_name == NULL)
2181 {
2182 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2183 ipcomp_transform_names, ipcomp);
2184 return FAILED;
2185 }
2186 DBG2(DBG_KNL, " using compression algorithm %N",
2187 ipcomp_transform_names, ipcomp);
2188
2189 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
2190 hdr->nlmsg_len += rthdr->rta_len;
2191 if (hdr->nlmsg_len > sizeof(request))
2192 {
2193 return FAILED;
2194 }
2195
2196 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2197 algo->alg_key_len = 0;
2198 strcpy(algo->alg_name, alg_name);
2199
2200 rthdr = XFRM_RTA_NEXT(rthdr);
2201 }
2202
2203 if (encap)
2204 {
2205 rthdr->rta_type = XFRMA_ENCAP;
2206 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2207
2208 hdr->nlmsg_len += rthdr->rta_len;
2209 if (hdr->nlmsg_len > sizeof(request))
2210 {
2211 return FAILED;
2212 }
2213
2214 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
2215 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2216 tmpl->encap_sport = htons(src->get_port(src));
2217 tmpl->encap_dport = htons(dst->get_port(dst));
2218 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2219 /* encap_oa could probably be derived from the
2220 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
2221 * pluto does the same as we do here but it uses encap_oa in the
2222 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
2223 * the kernel ignores it anyway
2224 * -> does that mean that NAT-T encap doesn't work in transport mode?
2225 * No. The reason the kernel ignores NAT-OA is that it recomputes
2226 * (or, rather, just ignores) the checksum. If packets pass
2227 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
2228 rthdr = XFRM_RTA_NEXT(rthdr);
2229 }
2230
2231 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2232 {
2233 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
2234 return FAILED;
2235 }
2236 return SUCCESS;
2237 }
2238
2239 /**
2240 * Get the replay state (i.e. sequence numbers) of an SA.
2241 */
2242 static status_t get_replay_state(private_kernel_interface_t *this,
2243 u_int32_t spi, protocol_id_t protocol, host_t *dst,
2244 struct xfrm_replay_state *replay)
2245 {
2246 unsigned char request[BUFFER_SIZE];
2247 struct nlmsghdr *hdr, *out = NULL;
2248 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2249 size_t len;
2250 struct rtattr *rta;
2251 size_t rtasize;
2252
2253 memset(&request, 0, sizeof(request));
2254
2255 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI 0x%x", spi);
2256
2257 hdr = (struct nlmsghdr*)request;
2258 hdr->nlmsg_flags = NLM_F_REQUEST;
2259 hdr->nlmsg_type = XFRM_MSG_GETAE;
2260 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2261
2262 aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
2263 aevent_id->flags = XFRM_AE_RVAL;
2264
2265 host2xfrm(dst, &aevent_id->sa_id.daddr);
2266 aevent_id->sa_id.spi = spi;
2267 aevent_id->sa_id.proto = proto_ike2kernel(protocol);
2268 aevent_id->sa_id.family = dst->get_family(dst);
2269
2270 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2271 {
2272 hdr = out;
2273 while (NLMSG_OK(hdr, len))
2274 {
2275 switch (hdr->nlmsg_type)
2276 {
2277 case XFRM_MSG_NEWAE:
2278 {
2279 out_aevent = NLMSG_DATA(hdr);
2280 break;
2281 }
2282 case NLMSG_ERROR:
2283 {
2284 struct nlmsgerr *err = NLMSG_DATA(hdr);
2285 DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
2286 strerror(-err->error), -err->error);
2287 break;
2288 }
2289 default:
2290 hdr = NLMSG_NEXT(hdr, len);
2291 continue;
2292 case NLMSG_DONE:
2293 break;
2294 }
2295 break;
2296 }
2297 }
2298
2299 if (out_aevent == NULL)
2300 {
2301 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2302 free(out);
2303 return FAILED;
2304 }
2305
2306 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2307 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2308 while(RTA_OK(rta, rtasize))
2309 {
2310 if (rta->rta_type == XFRMA_REPLAY_VAL)
2311 {
2312 memcpy(replay, RTA_DATA(rta), rta->rta_len);
2313 free(out);
2314 return SUCCESS;
2315 }
2316 rta = RTA_NEXT(rta, rtasize);
2317 }
2318
2319 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2320 free(out);
2321 return FAILED;
2322 }
2323
2324 /**
2325 * Implementation of kernel_interface_t.update_sa.
2326 */
2327 static status_t update_sa(private_kernel_interface_t *this,
2328 u_int32_t spi, protocol_id_t protocol,
2329 host_t *src, host_t *dst,
2330 host_t *new_src, host_t *new_dst, bool encap)
2331 {
2332 unsigned char request[BUFFER_SIZE], *pos;
2333 struct nlmsghdr *hdr, *out = NULL;
2334 struct xfrm_usersa_id *sa_id;
2335 struct xfrm_usersa_info *out_sa = NULL, *sa;
2336 size_t len;
2337 struct rtattr *rta;
2338 size_t rtasize;
2339 struct xfrm_encap_tmpl* tmpl = NULL;
2340 bool got_replay_state;
2341 struct xfrm_replay_state replay;
2342
2343 memset(&request, 0, sizeof(request));
2344
2345 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
2346
2347 /* query the exisiting SA first */
2348 hdr = (struct nlmsghdr*)request;
2349 hdr->nlmsg_flags = NLM_F_REQUEST;
2350 hdr->nlmsg_type = XFRM_MSG_GETSA;
2351 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2352
2353 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2354 host2xfrm(dst, &sa_id->daddr);
2355 sa_id->spi = spi;
2356 sa_id->proto = proto_ike2kernel(protocol);
2357 sa_id->family = dst->get_family(dst);
2358
2359 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2360 {
2361 hdr = out;
2362 while (NLMSG_OK(hdr, len))
2363 {
2364 switch (hdr->nlmsg_type)
2365 {
2366 case XFRM_MSG_NEWSA:
2367 {
2368 out_sa = NLMSG_DATA(hdr);
2369 break;
2370 }
2371 case NLMSG_ERROR:
2372 {
2373 struct nlmsgerr *err = NLMSG_DATA(hdr);
2374 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2375 strerror(-err->error), -err->error);
2376 break;
2377 }
2378 default:
2379 hdr = NLMSG_NEXT(hdr, len);
2380 continue;
2381 case NLMSG_DONE:
2382 break;
2383 }
2384 break;
2385 }
2386 }
2387 if (out_sa == NULL)
2388 {
2389 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2390 free(out);
2391 return FAILED;
2392 }
2393
2394 /* try to get the replay state */
2395 got_replay_state = (get_replay_state(
2396 this, spi, protocol, dst, &replay) == SUCCESS);
2397
2398 /* delete the old SA */
2399 if (this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2400 {
2401 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI 0x%x", spi);
2402 free(out);
2403 return FAILED;
2404 }
2405
2406 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2407 spi, src, dst, new_src, new_dst);
2408
2409 /* copy over the SA from out to request */
2410 hdr = (struct nlmsghdr*)request;
2411 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2412 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2413 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2414 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2415 sa = NLMSG_DATA(hdr);
2416 sa->family = new_dst->get_family(new_dst);
2417
2418 if (!src->ip_equals(src, new_src))
2419 {
2420 host2xfrm(new_src, &sa->saddr);
2421 }
2422 if (!dst->ip_equals(dst, new_dst))
2423 {
2424 host2xfrm(new_dst, &sa->id.daddr);
2425 }
2426
2427 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2428 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2429 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2430 while(RTA_OK(rta, rtasize))
2431 {
2432 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2433 if (rta->rta_type != XFRMA_ENCAP || encap)
2434 {
2435 if (rta->rta_type == XFRMA_ENCAP)
2436 { /* update encap tmpl */
2437 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2438 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2439 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2440 }
2441 memcpy(pos, rta, rta->rta_len);
2442 pos += RTA_ALIGN(rta->rta_len);
2443 hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
2444 }
2445 rta = RTA_NEXT(rta, rtasize);
2446 }
2447
2448 rta = (struct rtattr*)pos;
2449 if (tmpl == NULL && encap)
2450 { /* add tmpl if we are enabling it */
2451 rta->rta_type = XFRMA_ENCAP;
2452 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2453
2454 hdr->nlmsg_len += rta->rta_len;
2455 if (hdr->nlmsg_len > sizeof(request))
2456 {
2457 return FAILED;
2458 }
2459
2460 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2461 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2462 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2463 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2464 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2465
2466 rta = XFRM_RTA_NEXT(rta);
2467 }
2468
2469 if (got_replay_state)
2470 { /* copy the replay data if available */
2471 rta->rta_type = XFRMA_REPLAY_VAL;
2472 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
2473
2474 hdr->nlmsg_len += rta->rta_len;
2475 if (hdr->nlmsg_len > sizeof(request))
2476 {
2477 return FAILED;
2478 }
2479 memcpy(RTA_DATA(rta), &replay, sizeof(replay));
2480
2481 rta = XFRM_RTA_NEXT(rta);
2482 }
2483
2484 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2485 {
2486 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2487 free(out);
2488 return FAILED;
2489 }
2490 free(out);
2491
2492 return SUCCESS;
2493 }
2494
2495 /**
2496 * Implementation of kernel_interface_t.query_sa.
2497 */
2498 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2499 u_int32_t spi, protocol_id_t protocol,
2500 u_int32_t *use_time)
2501 {
2502 unsigned char request[BUFFER_SIZE];
2503 struct nlmsghdr *out = NULL, *hdr;
2504 struct xfrm_usersa_id *sa_id;
2505 struct xfrm_usersa_info *sa = NULL;
2506 size_t len;
2507
2508 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2509 memset(&request, 0, sizeof(request));
2510
2511 hdr = (struct nlmsghdr*)request;
2512 hdr->nlmsg_flags = NLM_F_REQUEST;
2513 hdr->nlmsg_type = XFRM_MSG_GETSA;
2514 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2515
2516 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2517 host2xfrm(dst, &sa_id->daddr);
2518 sa_id->spi = spi;
2519 sa_id->proto = proto_ike2kernel(protocol);
2520 sa_id->family = dst->get_family(dst);
2521
2522 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2523 {
2524 hdr = out;
2525 while (NLMSG_OK(hdr, len))
2526 {
2527 switch (hdr->nlmsg_type)
2528 {
2529 case XFRM_MSG_NEWSA:
2530 {
2531 sa = NLMSG_DATA(hdr);
2532 break;
2533 }
2534 case NLMSG_ERROR:
2535 {
2536 struct nlmsgerr *err = NLMSG_DATA(hdr);
2537 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2538 strerror(-err->error), -err->error);
2539 break;
2540 }
2541 default:
2542 hdr = NLMSG_NEXT(hdr, len);
2543 continue;
2544 case NLMSG_DONE:
2545 break;
2546 }
2547 break;
2548 }
2549 }
2550
2551 if (sa == NULL)
2552 {
2553 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2554 free(out);
2555 return FAILED;
2556 }
2557
2558 *use_time = sa->curlft.use_time;
2559 free (out);
2560 return SUCCESS;
2561 }
2562
2563 /**
2564 * Implementation of kernel_interface_t.del_sa.
2565 */
2566 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2567 u_int32_t spi, protocol_id_t protocol)
2568 {
2569 unsigned char request[BUFFER_SIZE];
2570 struct nlmsghdr *hdr;
2571 struct xfrm_usersa_id *sa_id;
2572
2573 memset(&request, 0, sizeof(request));
2574
2575 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2576
2577 hdr = (struct nlmsghdr*)request;
2578 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2579 hdr->nlmsg_type = XFRM_MSG_DELSA;
2580 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2581
2582 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2583 host2xfrm(dst, &sa_id->daddr);
2584 sa_id->spi = spi;
2585 sa_id->proto = proto_ike2kernel(protocol);
2586 sa_id->family = dst->get_family(dst);
2587
2588 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2589 {
2590 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2591 return FAILED;
2592 }
2593 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2594 return SUCCESS;
2595 }
2596
2597 /**
2598 * Implementation of kernel_interface_t.add_policy.
2599 */
2600 static status_t add_policy(private_kernel_interface_t *this,
2601 host_t *src, host_t *dst,
2602 traffic_selector_t *src_ts,
2603 traffic_selector_t *dst_ts,
2604 policy_dir_t direction, protocol_id_t protocol,
2605 u_int32_t reqid, bool high_prio, mode_t mode,
2606 u_int16_t ipcomp)
2607 {
2608 iterator_t *iterator;
2609 policy_entry_t *current, *policy;
2610 bool found = FALSE;
2611 unsigned char request[BUFFER_SIZE];
2612 struct xfrm_userpolicy_info *policy_info;
2613 struct nlmsghdr *hdr;
2614
2615 /* create a policy */
2616 policy = malloc_thing(policy_entry_t);
2617 memset(policy, 0, sizeof(policy_entry_t));
2618 policy->sel = ts2selector(src_ts, dst_ts);
2619 policy->direction = direction;
2620 policy->proto = protocol;
2621
2622 /* find the policy, which matches EXACTLY */
2623 pthread_mutex_lock(&this->mutex);
2624 iterator = this->policies->create_iterator(this->policies, TRUE);
2625 while (iterator->iterate(iterator, (void**)&current))
2626 {
2627 if (memeq(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) &&
2628 policy->direction == current->direction &&
2629 policy->proto == current->proto)
2630 {
2631 /* use existing policy */
2632 current->refcount++;
2633 DBG2(DBG_KNL, "policy %R===%R already exists, increasing "
2634 "refcount", src_ts, dst_ts);
2635 free(policy);
2636 policy = current;
2637 found = TRUE;
2638 break;
2639 }
2640 }
2641 iterator->destroy(iterator);
2642 if (!found)
2643 { /* apply the new one, if we have no such policy */
2644 this->policies->insert_last(this->policies, policy);
2645 policy->refcount = 1;
2646 }
2647
2648 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2649
2650 memset(&request, 0, sizeof(request));
2651 hdr = (struct nlmsghdr*)request;
2652 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2653 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2654 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2655
2656 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2657 policy_info->sel = policy->sel;
2658 policy_info->dir = policy->direction;
2659 /* calculate priority based on source selector size, small size = high prio */
2660 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2661 policy_info->priority -= policy->sel.prefixlen_s * 10;
2662 policy_info->priority -= policy->sel.proto ? 2 : 0;
2663 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2664 policy_info->action = XFRM_POLICY_ALLOW;
2665 policy_info->share = XFRM_SHARE_ANY;
2666 pthread_mutex_unlock(&this->mutex);
2667
2668 /* policies don't expire */
2669 policy_info->lft.soft_byte_limit = XFRM_INF;
2670 policy_info->lft.soft_packet_limit = XFRM_INF;
2671 policy_info->lft.hard_byte_limit = XFRM_INF;
2672 policy_info->lft.hard_packet_limit = XFRM_INF;
2673 policy_info->lft.soft_add_expires_seconds = 0;
2674 policy_info->lft.hard_add_expires_seconds = 0;
2675 policy_info->lft.soft_use_expires_seconds = 0;
2676 policy_info->lft.hard_use_expires_seconds = 0;
2677
2678 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2679 rthdr->rta_type = XFRMA_TMPL;
2680 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2681
2682 hdr->nlmsg_len += rthdr->rta_len;
2683 if (hdr->nlmsg_len > sizeof(request))
2684 {
2685 return FAILED;
2686 }
2687
2688 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2689
2690 if (ipcomp != IPCOMP_NONE)
2691 {
2692 tmpl->reqid = reqid;
2693 tmpl->id.proto = IPPROTO_COMP;
2694 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2695 tmpl->mode = mode;
2696 tmpl->optional = direction != POLICY_OUT;
2697 tmpl->family = src->get_family(src);
2698
2699 host2xfrm(src, &tmpl->saddr);
2700 host2xfrm(dst, &tmpl->id.daddr);
2701
2702 /* add an additional xfrm_user_tmpl */
2703 rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2704 hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2705 if (hdr->nlmsg_len > sizeof(request))
2706 {
2707 return FAILED;
2708 }
2709
2710 tmpl++;
2711 }
2712
2713 tmpl->reqid = reqid;
2714 tmpl->id.proto = proto_ike2kernel(protocol);
2715 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2716 tmpl->mode = mode;
2717 tmpl->family = src->get_family(src);
2718
2719 host2xfrm(src, &tmpl->saddr);
2720 host2xfrm(dst, &tmpl->id.daddr);
2721
2722 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2723 {
2724 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2725 return FAILED;
2726 }
2727
2728 /* install a route, if:
2729 * - we are NOT updating a policy
2730 * - this is a forward policy (to just get one for each child)
2731 * - we are in tunnel mode
2732 * - we are not using IPv6 (does not work correctly yet!)
2733 * - routing is not disabled via strongswan.conf
2734 */
2735 if (policy->route == NULL && direction == POLICY_FWD &&
2736 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
2737 this->install_routes)
2738 {
2739 policy->route = malloc_thing(route_entry_t);
2740 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2741 {
2742 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2743 policy->route->gateway = get_route(this, src, TRUE);
2744 policy->route->if_index = get_interface_index(this, dst);
2745 policy->route->dst_net = chunk_alloc(
2746 policy->sel.family == AF_INET ? 4 : 16);
2747 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr,
2748 policy->route->dst_net.len);
2749 policy->route->prefixlen = policy->sel.prefixlen_s;
2750
2751 switch (manage_srcroute(this, RTM_NEWROUTE,
2752 NLM_F_CREATE | NLM_F_EXCL, policy->route))
2753 {
2754 default:
2755 DBG1(DBG_KNL, "unable to install source route for %H",
2756 policy->route->src_ip);
2757 /* FALL */
2758 case ALREADY_DONE:
2759 /* route exists, do not uninstall */
2760 route_entry_destroy(policy->route);
2761 policy->route = NULL;
2762 break;
2763 case SUCCESS:
2764 break;
2765 }
2766 }
2767 else
2768 {
2769 free(policy->route);
2770 policy->route = NULL;
2771 }
2772 }
2773
2774 return SUCCESS;
2775 }
2776
2777 /**
2778 * Implementation of kernel_interface_t.query_policy.
2779 */
2780 static status_t query_policy(private_kernel_interface_t *this,
2781 traffic_selector_t *src_ts,
2782 traffic_selector_t *dst_ts,
2783 policy_dir_t direction, u_int32_t *use_time)
2784 {
2785 unsigned char request[BUFFER_SIZE];
2786 struct nlmsghdr *out = NULL, *hdr;
2787 struct xfrm_userpolicy_id *policy_id;
2788 struct xfrm_userpolicy_info *policy = NULL;
2789 size_t len;
2790
2791 memset(&request, 0, sizeof(request));
2792
2793 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2794
2795 hdr = (struct nlmsghdr*)request;
2796 hdr->nlmsg_flags = NLM_F_REQUEST;
2797 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2798 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2799
2800 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2801 policy_id->sel = ts2selector(src_ts, dst_ts);
2802 policy_id->dir = direction;
2803
2804 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2805 {
2806 hdr = out;
2807 while (NLMSG_OK(hdr, len))
2808 {
2809 switch (hdr->nlmsg_type)
2810 {
2811 case XFRM_MSG_NEWPOLICY:
2812 {
2813 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2814 break;
2815 }
2816 case NLMSG_ERROR:
2817 {
2818 struct nlmsgerr *err = NLMSG_DATA(hdr);
2819 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2820 strerror(-err->error), -err->error);
2821 break;
2822 }
2823 default:
2824 hdr = NLMSG_NEXT(hdr, len);
2825 continue;
2826 case NLMSG_DONE:
2827 break;
2828 }
2829 break;
2830 }
2831 }
2832
2833 if (policy == NULL)
2834 {
2835 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2836 free(out);
2837 return FAILED;
2838 }
2839 *use_time = (time_t)policy->curlft.use_time;
2840
2841 free(out);
2842 return SUCCESS;
2843 }
2844
2845 /**
2846 * Implementation of kernel_interface_t.del_policy.
2847 */
2848 static status_t del_policy(private_kernel_interface_t *this,
2849 traffic_selector_t *src_ts,
2850 traffic_selector_t *dst_ts,
2851 policy_dir_t direction)
2852 {
2853 policy_entry_t *current, policy, *to_delete = NULL;
2854 route_entry_t *route;
2855 unsigned char request[BUFFER_SIZE];
2856 struct nlmsghdr *hdr;
2857 struct xfrm_userpolicy_id *policy_id;
2858 iterator_t *iterator;
2859
2860 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2861
2862 /* create a policy */
2863 memset(&policy, 0, sizeof(policy_entry_t));
2864 policy.sel = ts2selector(src_ts, dst_ts);
2865 policy.direction = direction;
2866
2867 /* find the policy */
2868 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2869 while (iterator->iterate(iterator, (void**)&current))
2870 {
2871 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2872 policy.direction == current->direction)
2873 {
2874 to_delete = current;
2875 if (--to_delete->refcount > 0)
2876 {
2877 /* is used by more SAs, keep in kernel */
2878 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2879 iterator->destroy(iterator);
2880 return SUCCESS;
2881 }
2882 /* remove if last reference */
2883 iterator->remove(iterator);
2884 break;
2885 }
2886 }
2887 iterator->destroy(iterator);
2888 if (!to_delete)
2889 {
2890 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2891 return NOT_FOUND;
2892 }
2893
2894 memset(&request, 0, sizeof(request));
2895
2896 hdr = (struct nlmsghdr*)request;
2897 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2898 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2899 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2900
2901 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2902 policy_id->sel = to_delete->sel;
2903 policy_id->dir = direction;
2904
2905 route = to_delete->route;
2906 free(to_delete);
2907
2908 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2909 {
2910 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2911 return FAILED;
2912 }
2913
2914 if (route)
2915 {
2916 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2917 {
2918 DBG1(DBG_KNL, "error uninstalling route installed with "
2919 "policy %R===%R", src_ts, dst_ts);
2920 }
2921 route_entry_destroy(route);
2922 }
2923 return SUCCESS;
2924 }
2925
2926 /**
2927 * Implementation of kernel_interface_t.destroy.
2928 */
2929 static void destroy(private_kernel_interface_t *this)
2930 {
2931 if (this->routing_table)
2932 {
2933 manage_rule(this, RTM_DELRULE, this->routing_table,
2934 this->routing_table_prio);
2935 }
2936
2937 this->job->cancel(this->job);
2938 close(this->socket_xfrm_events);
2939 close(this->socket_xfrm);
2940 close(this->socket_rt_events);
2941 close(this->socket_rt);
2942 this->policies->destroy(this->policies);
2943 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2944 free(this);
2945 }
2946
2947 /*
2948 * Described in header.
2949 */
2950 kernel_interface_t *kernel_interface_create()
2951 {
2952 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2953 struct sockaddr_nl addr;
2954
2955 /* public functions */
2956 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2957 this->public.get_cpi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
2958 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
2959 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2960 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2961 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2962 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
2963 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2964 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2965 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2966 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2967 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2968 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2969 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2970 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2971
2972 /* private members */
2973 this->policies = linked_list_create();
2974 this->ifaces = linked_list_create();
2975 this->hiter = NULL;
2976 this->seq = 200;
2977 pthread_mutex_init(&this->mutex, NULL);
2978 pthread_mutex_init(&this->nl_mutex, NULL);
2979 pthread_cond_init(&this->cond, NULL);
2980 timerclear(&this->last_roam);
2981 this->install_routes = lib->settings->get_bool(lib->settings,
2982 "charon.install_routes", TRUE);
2983 this->routing_table = lib->settings->get_int(lib->settings,
2984 "charon.routing_table", IPSEC_ROUTING_TABLE);
2985 this->routing_table_prio = lib->settings->get_int(lib->settings,
2986 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
2987 memset(&addr, 0, sizeof(addr));
2988 addr.nl_family = AF_NETLINK;
2989
2990 /* create and bind RT socket */
2991 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2992 if (this->socket_rt <= 0)
2993 {
2994 charon->kill(charon, "unable to create RT netlink socket");
2995 }
2996 addr.nl_groups = 0;
2997 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2998 {
2999 charon->kill(charon, "unable to bind RT netlink socket");
3000 }
3001
3002 /* create and bind RT socket for events (address/interface/route changes) */
3003 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
3004 if (this->socket_rt_events <= 0)
3005 {
3006 charon->kill(charon, "unable to create RT event socket");
3007 }