ike/kernel protocol identifier conversion functions
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /*
2 * Copyright (C) 2006-2008 Tobias Brunner
3 * Copyright (C) 2005-2007 Martin Willi
4 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
5 * Copyright (C) 2006 Daniel Roethlisberger
6 * Copyright (C) 2005 Jan Hutter
7 * Hochschule fuer Technik Rapperswil
8 * Copyright (C) 2003 Herbert Xu.
9 *
10 * Based on xfrm code from pluto.
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 2 of the License, or (at your
15 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * $Id$
23 */
24
25 #include <sys/types.h>
26 #include <sys/socket.h>
27 #include <sys/time.h>
28 #include <linux/netlink.h>
29 #include <linux/rtnetlink.h>
30 #include <linux/xfrm.h>
31 #include <linux/udp.h>
32 #include <netinet/in.h>
33 #include <pthread.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <net/if.h>
39 #include <sys/ioctl.h>
40
41 #include "kernel_interface.h"
42
43 #include <daemon.h>
44 #include <utils/linked_list.h>
45 #include <processing/jobs/delete_child_sa_job.h>
46 #include <processing/jobs/rekey_child_sa_job.h>
47 #include <processing/jobs/acquire_job.h>
48 #include <processing/jobs/callback_job.h>
49 #include <processing/jobs/roam_job.h>
50
51 /** routing table for routes installed by us */
52 #ifndef IPSEC_ROUTING_TABLE
53 #define IPSEC_ROUTING_TABLE 100
54 #endif
55 #ifndef IPSEC_ROUTING_TABLE_PRIO
56 #define IPSEC_ROUTING_TABLE_PRIO 100
57 #endif
58
59 /** default priority of installed policies */
60 #define PRIO_LOW 3000
61 #define PRIO_HIGH 2000
62
63 /** delay before firing roam jobs (ms) */
64 #define ROAM_DELAY 100
65
66 #define BUFFER_SIZE 1024
67
68 /**
69 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
70 * 'usual' netlink data x like 'struct xfrm_usersa_info'
71 */
72 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
73 /**
74 * returns a pointer to the next rtattr following rta.
75 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
76 */
77 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
78 /**
79 * returns the total size of attached rta data
80 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
81 */
82 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
83
84 typedef struct kernel_algorithm_t kernel_algorithm_t;
85
86 /**
87 * Mapping from the algorithms defined in IKEv2 to
88 * kernel level algorithm names and their key length
89 */
90 struct kernel_algorithm_t {
91 /**
92 * Identifier specified in IKEv2
93 */
94 int ikev2_id;
95
96 /**
97 * Name of the algorithm, as used as kernel identifier
98 */
99 char *name;
100
101 /**
102 * Key length in bits, if fixed size
103 */
104 u_int key_size;
105 };
106 #define END_OF_LIST -1
107
108 /**
109 * Algorithms for encryption
110 */
111 static kernel_algorithm_t encryption_algs[] = {
112 /* {ENCR_DES_IV64, "***", 0}, */
113 {ENCR_DES, "des", 64},
114 {ENCR_3DES, "des3_ede", 192},
115 /* {ENCR_RC5, "***", 0}, */
116 /* {ENCR_IDEA, "***", 0}, */
117 {ENCR_CAST, "cast128", 0},
118 {ENCR_BLOWFISH, "blowfish", 0},
119 /* {ENCR_3IDEA, "***", 0}, */
120 /* {ENCR_DES_IV32, "***", 0}, */
121 {ENCR_NULL, "cipher_null", 0},
122 {ENCR_AES_CBC, "aes", 0},
123 /* {ENCR_AES_CTR, "***", 0}, */
124 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))", 64}, /* key_size = ICV size */
125 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))", 96}, /* key_size = ICV size */
126 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))", 128}, /* key_size = ICV size */
127 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))", 64}, /* key_size = ICV size */
128 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))", 96}, /* key_size = ICV size */
129 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))", 128}, /* key_size = ICV size */
130 {END_OF_LIST, NULL, 0},
131 };
132
133 /**
134 * Algorithms for integrity protection
135 */
136 static kernel_algorithm_t integrity_algs[] = {
137 {AUTH_HMAC_MD5_96, "md5", 128},
138 {AUTH_HMAC_SHA1_96, "sha1", 160},
139 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
140 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
141 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
142 /* {AUTH_DES_MAC, "***", 0}, */
143 /* {AUTH_KPDK_MD5, "***", 0}, */
144 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
145 {END_OF_LIST, NULL, 0},
146 };
147
148 /**
149 * Algorithms for IPComp
150 */
151 static kernel_algorithm_t compression_algs[] = {
152 /* {IPCOMP_OUI, "***", 0}, */
153 {IPCOMP_DEFLATE, "deflate", 0},
154 {IPCOMP_LZS, "lzs", 0},
155 {IPCOMP_LZJH, "lzjh", 0},
156 {END_OF_LIST, NULL, 0},
157 };
158
159 /**
160 * Look up a kernel algorithm name and its key size
161 */
162 static char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
163 u_int16_t ikev2_algo, u_int16_t *key_size)
164 {
165 while (kernel_algo->ikev2_id != END_OF_LIST)
166 {
167 if (ikev2_algo == kernel_algo->ikev2_id)
168 {
169 /* match, evaluate key length */
170 if (key_size && *key_size == 0)
171 { /* update key size if not set */
172 *key_size = kernel_algo->key_size;
173 }
174 return kernel_algo->name;
175 }
176 kernel_algo++;
177 }
178 return NULL;
179 }
180
181 typedef struct route_entry_t route_entry_t;
182
183 /**
184 * installed routing entry
185 */
186 struct route_entry_t {
187
188 /** Index of the interface the route is bound to */
189 int if_index;
190
191 /** Source ip of the route */
192 host_t *src_ip;
193
194 /** gateway for this route */
195 host_t *gateway;
196
197 /** Destination net */
198 chunk_t dst_net;
199
200 /** Destination net prefixlen */
201 u_int8_t prefixlen;
202 };
203
204 /**
205 * destroy an route_entry_t object
206 */
207 static void route_entry_destroy(route_entry_t *this)
208 {
209 this->src_ip->destroy(this->src_ip);
210 this->gateway->destroy(this->gateway);
211 chunk_free(&this->dst_net);
212 free(this);
213 }
214
215 typedef struct policy_entry_t policy_entry_t;
216
217 /**
218 * installed kernel policy.
219 */
220 struct policy_entry_t {
221
222 /** direction of this policy: in, out, forward */
223 u_int8_t direction;
224
225 /** reqid of the policy */
226 u_int32_t reqid;
227
228 /** parameters of installed policy */
229 struct xfrm_selector sel;
230
231 /** associated route installed for this policy */
232 route_entry_t *route;
233
234 /** by how many CHILD_SA's this policy is used */
235 u_int refcount;
236 };
237
238 typedef struct addr_entry_t addr_entry_t;
239
240 /**
241 * IP address in an inface_entry_t
242 */
243 struct addr_entry_t {
244
245 /** The ip address */
246 host_t *ip;
247
248 /** virtual IP managed by us */
249 bool virtual;
250
251 /** scope of the address */
252 u_char scope;
253
254 /** Number of times this IP is used, if virtual */
255 u_int refcount;
256 };
257
258 /**
259 * destroy a addr_entry_t object
260 */
261 static void addr_entry_destroy(addr_entry_t *this)
262 {
263 this->ip->destroy(this->ip);
264 free(this);
265 }
266
267 typedef struct iface_entry_t iface_entry_t;
268
269 /**
270 * A network interface on this system, containing addr_entry_t's
271 */
272 struct iface_entry_t {
273
274 /** interface index */
275 int ifindex;
276
277 /** name of the interface */
278 char ifname[IFNAMSIZ];
279
280 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
281 u_int flags;
282
283 /** list of addresses as host_t */
284 linked_list_t *addrs;
285 };
286
287 /**
288 * destroy an interface entry
289 */
290 static void iface_entry_destroy(iface_entry_t *this)
291 {
292 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
293 free(this);
294 }
295
296 typedef struct private_kernel_interface_t private_kernel_interface_t;
297
298 /**
299 * Private variables and functions of kernel_interface class.
300 */
301 struct private_kernel_interface_t {
302 /**
303 * Public part of the kernel_interface_t object.
304 */
305 kernel_interface_t public;
306
307 /**
308 * mutex to lock access to netlink socket
309 */
310 pthread_mutex_t nl_mutex;
311
312 /**
313 * mutex to lock access to various lists
314 */
315 pthread_mutex_t mutex;
316
317 /**
318 * condition variable to signal virtual IP add/removal
319 */
320 pthread_cond_t cond;
321
322 /**
323 * List of installed policies (policy_entry_t)
324 */
325 linked_list_t *policies;
326
327 /**
328 * Cached list of interfaces and its adresses (iface_entry_t)
329 */
330 linked_list_t *ifaces;
331
332 /**
333 * iterator used in hook()
334 */
335 iterator_t *hiter;
336
337 /**
338 * job receiving netlink events
339 */
340 callback_job_t *job;
341
342 /**
343 * current sequence number for netlink request
344 */
345 int seq;
346
347 /**
348 * Netlink xfrm socket (IPsec)
349 */
350 int socket_xfrm;
351
352 /**
353 * netlink xfrm socket to receive acquire and expire events
354 */
355 int socket_xfrm_events;
356
357 /**
358 * Netlink rt socket (routing)
359 */
360 int socket_rt;
361
362 /**
363 * Netlink rt socket to receive address change events
364 */
365 int socket_rt_events;
366
367 /**
368 * time of the last roam_job
369 */
370 struct timeval last_roam;
371
372 /**
373 * whether to install routes along policies
374 */
375 bool install_routes;
376
377 /**
378 * routing table to install routes
379 */
380 int routing_table;
381
382 /**
383 * priority of used routing table
384 */
385 int routing_table_prio;
386 };
387
388 /**
389 * convert a IKEv2 specific protocol identifier to the kernel one
390 */
391 static u_int8_t proto_ike2kernel(protocol_id_t proto)
392 {
393 switch (proto)
394 {
395 case PROTO_ESP:
396 return IPPROTO_ESP;
397 case PROTO_AH:
398 return IPPROTO_AH;
399 default:
400 return proto;
401 }
402 }
403
404 /**
405 * reverse of ike2kernel
406 */
407 static protocol_id_t proto_kernel2ike(u_int8_t proto)
408 {
409 switch (proto)
410 {
411 case IPPROTO_ESP:
412 return PROTO_ESP;
413 case IPPROTO_AH:
414 return PROTO_AH;
415 default:
416 return proto;
417 }
418 }
419
420 /**
421 * convert a host_t to a struct xfrm_address
422 */
423 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
424 {
425 chunk_t chunk = host->get_address(host);
426 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
427 }
428
429 /**
430 * convert a traffic selector address range to subnet and its mask.
431 */
432 static void ts2subnet(traffic_selector_t* ts,
433 xfrm_address_t *net, u_int8_t *mask)
434 {
435 /* there is no way to do this cleanly, as the address range may
436 * be anything else but a subnet. We use from_addr as subnet
437 * and try to calculate a usable subnet mask.
438 */
439 int byte, bit;
440 bool found = FALSE;
441 chunk_t from, to;
442 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
443
444 from = ts->get_from_address(ts);
445 to = ts->get_to_address(ts);
446
447 *mask = (size * 8);
448 /* go trough all bits of the addresses, beginning in the front.
449 * as long as they are equal, the subnet gets larger
450 */
451 for (byte = 0; byte < size; byte++)
452 {
453 for (bit = 7; bit >= 0; bit--)
454 {
455 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
456 {
457 *mask = ((7 - bit) + (byte * 8));
458 found = TRUE;
459 break;
460 }
461 }
462 if (found)
463 {
464 break;
465 }
466 }
467 memcpy(net, from.ptr, from.len);
468 chunk_free(&from);
469 chunk_free(&to);
470 }
471
472 /**
473 * convert a traffic selector port range to port/portmask
474 */
475 static void ts2ports(traffic_selector_t* ts,
476 u_int16_t *port, u_int16_t *mask)
477 {
478 /* linux does not seem to accept complex portmasks. Only
479 * any or a specific port is allowed. We set to any, if we have
480 * a port range, or to a specific, if we have one port only.
481 */
482 u_int16_t from, to;
483
484 from = ts->get_from_port(ts);
485 to = ts->get_to_port(ts);
486
487 if (from == to)
488 {
489 *port = htons(from);
490 *mask = ~0;
491 }
492 else
493 {
494 *port = 0;
495 *mask = 0;
496 }
497 }
498
499 /**
500 * convert a pair of traffic_selectors to a xfrm_selector
501 */
502 static struct xfrm_selector ts2selector(traffic_selector_t *src,
503 traffic_selector_t *dst)
504 {
505 struct xfrm_selector sel;
506
507 memset(&sel, 0, sizeof(sel));
508 sel.family = src->get_type(src) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
509 /* src or dest proto may be "any" (0), use more restrictive one */
510 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
511 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
512 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
513 ts2ports(dst, &sel.dport, &sel.dport_mask);
514 ts2ports(src, &sel.sport, &sel.sport_mask);
515 sel.ifindex = 0;
516 sel.user = 0;
517
518 return sel;
519 }
520
521 /**
522 * Creates an rtattr and adds it to the netlink message
523 */
524 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
525 size_t buflen)
526 {
527 struct rtattr *rta;
528
529 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
530 {
531 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
532 return;
533 }
534
535 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
536 rta->rta_type = rta_type;
537 rta->rta_len = RTA_LENGTH(data.len);
538 memcpy(RTA_DATA(rta), data.ptr, data.len);
539 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
540 }
541
542 /**
543 * process a XFRM_MSG_ACQUIRE from kernel
544 */
545 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
546 {
547 u_int32_t reqid = 0;
548 job_t *job;
549 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
550 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
551
552 if (RTA_OK(rtattr, rtsize))
553 {
554 if (rtattr->rta_type == XFRMA_TMPL)
555 {
556 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
557 reqid = tmpl->reqid;
558 }
559 }
560 if (reqid == 0)
561 {
562 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
563 return;
564 }
565 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
566 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
567 job = (job_t*)acquire_job_create(reqid);
568 charon->processor->queue_job(charon->processor, job);
569 }
570
571 /**
572 * process a XFRM_MSG_EXPIRE from kernel
573 */
574 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
575 {
576 job_t *job;
577 protocol_id_t protocol;
578 u_int32_t spi, reqid;
579 struct xfrm_user_expire *expire;
580
581 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
582 protocol = proto_kernel2ike(expire->state.id.proto);
583 spi = expire->state.id.spi;
584 reqid = expire->state.reqid;
585
586 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
587
588 if (protocol != PROTO_ESP && protocol != PROTO_AH)
589 {
590 DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA 0x%x (reqid %d) which is "
591 "not a CHILD_SA", ntohl(spi), reqid);
592 return;
593 }
594
595 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
596 expire->hard ? "delete" : "rekey", protocol_id_names,
597 protocol, ntohl(spi), reqid);
598 if (expire->hard)
599 {
600 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
601 }
602 else
603 {
604 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
605 }
606 charon->processor->queue_job(charon->processor, job);
607 }
608
609 /**
610 * start a roaming job. We delay it for a second and fire only one job
611 * for multiple events. Otherwise we would create two many jobs.
612 */
613 static void fire_roam_job(private_kernel_interface_t *this, bool address)
614 {
615 struct timeval now;
616
617 if (gettimeofday(&now, NULL) == 0)
618 {
619 if (timercmp(&now, &this->last_roam, >))
620 {
621 now.tv_usec += ROAM_DELAY * 1000;
622 while (now.tv_usec > 1000000)
623 {
624 now.tv_sec++;
625 now.tv_usec -= 1000000;
626 }
627 this->last_roam = now;
628 charon->scheduler->schedule_job(charon->scheduler,
629 (job_t*)roam_job_create(address), ROAM_DELAY);
630 }
631 }
632 }
633
634 /**
635 * process RTM_NEWLINK/RTM_DELLINK from kernel
636 */
637 static void process_link(private_kernel_interface_t *this,
638 struct nlmsghdr *hdr, bool event)
639 {
640 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
641 struct rtattr *rta = IFLA_RTA(msg);
642 size_t rtasize = IFLA_PAYLOAD (hdr);
643 iterator_t *iterator;
644 iface_entry_t *current, *entry = NULL;
645 char *name = NULL;
646 bool update = FALSE;
647
648 while(RTA_OK(rta, rtasize))
649 {
650 switch (rta->rta_type)
651 {
652 case IFLA_IFNAME:
653 name = RTA_DATA(rta);
654 break;
655 }
656 rta = RTA_NEXT(rta, rtasize);
657 }
658 if (!name)
659 {
660 name = "(unknown)";
661 }
662
663 switch (hdr->nlmsg_type)
664 {
665 case RTM_NEWLINK:
666 {
667 if (msg->ifi_flags & IFF_LOOPBACK)
668 { /* ignore loopback interfaces */
669 break;
670 }
671 iterator = this->ifaces->create_iterator_locked(this->ifaces,
672 &this->mutex);
673 while (iterator->iterate(iterator, (void**)&current))
674 {
675 if (current->ifindex == msg->ifi_index)
676 {
677 entry = current;
678 break;
679 }
680 }
681 if (!entry)
682 {
683 entry = malloc_thing(iface_entry_t);
684 entry->ifindex = msg->ifi_index;
685 entry->flags = 0;
686 entry->addrs = linked_list_create();
687 this->ifaces->insert_last(this->ifaces, entry);
688 }
689 memcpy(entry->ifname, name, IFNAMSIZ);
690 entry->ifname[IFNAMSIZ-1] = '\0';
691 if (event)
692 {
693 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
694 {
695 update = TRUE;
696 DBG1(DBG_KNL, "interface %s activated", name);
697 }
698 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
699 {
700 update = TRUE;
701 DBG1(DBG_KNL, "interface %s deactivated", name);
702 }
703 }
704 entry->flags = msg->ifi_flags;
705 iterator->destroy(iterator);
706 break;
707 }
708 case RTM_DELLINK:
709 {
710 iterator = this->ifaces->create_iterator_locked(this->ifaces,
711 &this->mutex);
712 while (iterator->iterate(iterator, (void**)&current))
713 {
714 if (current->ifindex == msg->ifi_index)
715 {
716 /* we do not remove it, as an address may be added to a
717 * "down" interface and we wan't to know that. */
718 current->flags = msg->ifi_flags;
719 break;
720 }
721 }
722 iterator->destroy(iterator);
723 break;
724 }
725 }
726
727 /* send an update to all IKE_SAs */
728 if (update && event)
729 {
730 fire_roam_job(this, TRUE);
731 }
732 }
733
734 /**
735 * process RTM_NEWADDR/RTM_DELADDR from kernel
736 */
737 static void process_addr(private_kernel_interface_t *this,
738 struct nlmsghdr *hdr, bool event)
739 {
740 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
741 struct rtattr *rta = IFA_RTA(msg);
742 size_t rtasize = IFA_PAYLOAD (hdr);
743 host_t *host = NULL;
744 iterator_t *ifaces, *addrs;
745 iface_entry_t *iface;
746 addr_entry_t *addr;
747 chunk_t local = chunk_empty, address = chunk_empty;
748 bool update = FALSE, found = FALSE, changed = FALSE;
749
750 while(RTA_OK(rta, rtasize))
751 {
752 switch (rta->rta_type)
753 {
754 case IFA_LOCAL:
755 local.ptr = RTA_DATA(rta);
756 local.len = RTA_PAYLOAD(rta);
757 break;
758 case IFA_ADDRESS:
759 address.ptr = RTA_DATA(rta);
760 address.len = RTA_PAYLOAD(rta);
761 break;
762 }
763 rta = RTA_NEXT(rta, rtasize);
764 }
765
766 /* For PPP interfaces, we need the IFA_LOCAL address,
767 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
768 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
769 if (local.ptr)
770 {
771 host = host_create_from_chunk(msg->ifa_family, local, 0);
772 }
773 else if (address.ptr)
774 {
775 host = host_create_from_chunk(msg->ifa_family, address, 0);
776 }
777
778 if (host == NULL)
779 { /* bad family? */
780 return;
781 }
782
783 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
784 while (ifaces->iterate(ifaces, (void**)&iface))
785 {
786 if (iface->ifindex == msg->ifa_index)
787 {
788 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
789 while (addrs->iterate(addrs, (void**)&addr))
790 {
791 if (host->ip_equals(host, addr->ip))
792 {
793 found = TRUE;
794 if (hdr->nlmsg_type == RTM_DELADDR)
795 {
796 changed = TRUE;
797 addrs->remove(addrs);
798 if (!addr->virtual)
799 {
800 DBG1(DBG_KNL, "%H disappeared from %s",
801 host, iface->ifname);
802 }
803 addr_entry_destroy(addr);
804 }
805 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
806 {
807 addr->refcount = 1;
808 }
809 }
810 }
811 addrs->destroy(addrs);
812
813 if (hdr->nlmsg_type == RTM_NEWADDR)
814 {
815 if (!found)
816 {
817 found = TRUE;
818 changed = TRUE;
819 addr = malloc_thing(addr_entry_t);
820 addr->ip = host->clone(host);
821 addr->virtual = FALSE;
822 addr->refcount = 1;
823 addr->scope = msg->ifa_scope;
824
825 iface->addrs->insert_last(iface->addrs, addr);
826 if (event)
827 {
828 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
829 }
830 }
831 }
832 if (found && (iface->flags & IFF_UP))
833 {
834 update = TRUE;
835 }
836 break;
837 }
838 }
839 ifaces->destroy(ifaces);
840 host->destroy(host);
841
842 /* send an update to all IKE_SAs */
843 if (update && event && changed)
844 {
845 fire_roam_job(this, TRUE);
846 }
847 }
848
849 /**
850 * Receives events from kernel
851 */
852 static job_requeue_t receive_events(private_kernel_interface_t *this)
853 {
854 char response[1024];
855 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
856 struct sockaddr_nl addr;
857 socklen_t addr_len = sizeof(addr);
858 int len, oldstate, maxfd, selected;
859 fd_set rfds;
860
861 FD_ZERO(&rfds);
862 FD_SET(this->socket_xfrm_events, &rfds);
863 FD_SET(this->socket_rt_events, &rfds);
864 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
865
866 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
867 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
868 pthread_setcancelstate(oldstate, NULL);
869 if (selected <= 0)
870 {
871 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
872 return JOB_REQUEUE_FAIR;
873 }
874 if (FD_ISSET(this->socket_xfrm_events, &rfds))
875 {
876 selected = this->socket_xfrm_events;
877 }
878 else if (FD_ISSET(this->socket_rt_events, &rfds))
879 {
880 selected = this->socket_rt_events;
881 }
882 else
883 {
884 return JOB_REQUEUE_DIRECT;
885 }
886
887 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
888 (struct sockaddr*)&addr, &addr_len);
889 if (len < 0)
890 {
891 switch (errno)
892 {
893 case EINTR:
894 /* interrupted, try again */
895 return JOB_REQUEUE_DIRECT;
896 case EAGAIN:
897 /* no data ready, select again */
898 return JOB_REQUEUE_DIRECT;
899 default:
900 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
901 sleep(1);
902 return JOB_REQUEUE_FAIR;
903 }
904 }
905 if (addr.nl_pid != 0)
906 { /* not from kernel. not interested, try another one */
907 return JOB_REQUEUE_DIRECT;
908 }
909
910 while (NLMSG_OK(hdr, len))
911 {
912 /* looks good so far, dispatch netlink message */
913 if (selected == this->socket_xfrm_events)
914 {
915 switch (hdr->nlmsg_type)
916 {
917 case XFRM_MSG_ACQUIRE:
918 process_acquire(this, hdr);
919 break;
920 case XFRM_MSG_EXPIRE:
921 process_expire(this, hdr);
922 break;
923 default:
924 break;
925 }
926 }
927 else if (selected == this->socket_rt_events)
928 {
929 switch (hdr->nlmsg_type)
930 {
931 case RTM_NEWADDR:
932 case RTM_DELADDR:
933 process_addr(this, hdr, TRUE);
934 pthread_cond_signal(&this->cond);
935 break;
936 case RTM_NEWLINK:
937 case RTM_DELLINK:
938 process_link(this, hdr, TRUE);
939 pthread_cond_signal(&this->cond);
940 break;
941 case RTM_NEWROUTE:
942 case RTM_DELROUTE:
943 fire_roam_job(this, FALSE);
944 break;
945 default:
946 break;
947 }
948 }
949 hdr = NLMSG_NEXT(hdr, len);
950 }
951 return JOB_REQUEUE_DIRECT;
952 }
953
954 /**
955 * send a netlink message and wait for a reply
956 */
957 static status_t netlink_send(private_kernel_interface_t *this,
958 int socket, struct nlmsghdr *in,
959 struct nlmsghdr **out, size_t *out_len)
960 {
961 int len, addr_len;
962 struct sockaddr_nl addr;
963 chunk_t result = chunk_empty, tmp;
964 struct nlmsghdr *msg, peek;
965
966 pthread_mutex_lock(&this->nl_mutex);
967
968 in->nlmsg_seq = ++this->seq;
969 in->nlmsg_pid = getpid();
970
971 memset(&addr, 0, sizeof(addr));
972 addr.nl_family = AF_NETLINK;
973 addr.nl_pid = 0;
974 addr.nl_groups = 0;
975
976 while (TRUE)
977 {
978 len = sendto(socket, in, in->nlmsg_len, 0,
979 (struct sockaddr*)&addr, sizeof(addr));
980
981 if (len != in->nlmsg_len)
982 {
983 if (errno == EINTR)
984 {
985 /* interrupted, try again */
986 continue;
987 }
988 pthread_mutex_unlock(&this->nl_mutex);
989 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
990 return FAILED;
991 }
992 break;
993 }
994
995 while (TRUE)
996 {
997 char buf[4096];
998 tmp.len = sizeof(buf);
999 tmp.ptr = buf;
1000 msg = (struct nlmsghdr*)tmp.ptr;
1001
1002 memset(&addr, 0, sizeof(addr));
1003 addr.nl_family = AF_NETLINK;
1004 addr.nl_pid = getpid();
1005 addr.nl_groups = 0;
1006 addr_len = sizeof(addr);
1007
1008 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
1009 (struct sockaddr*)&addr, &addr_len);
1010
1011 if (len < 0)
1012 {
1013 if (errno == EINTR)
1014 {
1015 DBG1(DBG_KNL, "got interrupted");
1016 /* interrupted, try again */
1017 continue;
1018 }
1019 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
1020 pthread_mutex_unlock(&this->nl_mutex);
1021 return FAILED;
1022 }
1023 if (!NLMSG_OK(msg, len))
1024 {
1025 DBG1(DBG_KNL, "received corrupted netlink message");
1026 pthread_mutex_unlock(&this->nl_mutex);
1027 return FAILED;
1028 }
1029 if (msg->nlmsg_seq != this->seq)
1030 {
1031 DBG1(DBG_KNL, "received invalid netlink sequence number");
1032 if (msg->nlmsg_seq < this->seq)
1033 {
1034 continue;
1035 }
1036 pthread_mutex_unlock(&this->nl_mutex);
1037 return FAILED;
1038 }
1039
1040 tmp.len = len;
1041 result = chunk_cata("cc", result, tmp);
1042
1043 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
1044 * numbers to detect multi header messages */
1045 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
1046 (struct sockaddr*)&addr, &addr_len);
1047
1048 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
1049 {
1050 /* seems to be multipart */
1051 continue;
1052 }
1053 break;
1054 }
1055
1056 *out_len = result.len;
1057 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
1058
1059 pthread_mutex_unlock(&this->nl_mutex);
1060
1061 return SUCCESS;
1062 }
1063
1064 /**
1065 * send a netlink message and wait for its acknowlegde
1066 */
1067 static status_t netlink_send_ack(private_kernel_interface_t *this,
1068 int socket, struct nlmsghdr *in)
1069 {
1070 struct nlmsghdr *out, *hdr;
1071 size_t len;
1072
1073 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
1074 {
1075 return FAILED;
1076 }
1077 hdr = out;
1078 while (NLMSG_OK(hdr, len))
1079 {
1080 switch (hdr->nlmsg_type)
1081 {
1082 case NLMSG_ERROR:
1083 {
1084 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
1085
1086 if (err->error)
1087 {
1088 DBG1(DBG_KNL, "received netlink error: %s (%d)",
1089 strerror(-err->error), -err->error);
1090 free(out);
1091 return FAILED;
1092 }
1093 free(out);
1094 return SUCCESS;
1095 }
1096 default:
1097 hdr = NLMSG_NEXT(hdr, len);
1098 continue;
1099 case NLMSG_DONE:
1100 break;
1101 }
1102 break;
1103 }
1104 DBG1(DBG_KNL, "netlink request not acknowlegded");
1105 free(out);
1106 return FAILED;
1107 }
1108
1109 /**
1110 * Initialize a list of local addresses.
1111 */
1112 static status_t init_address_list(private_kernel_interface_t *this)
1113 {
1114 char request[BUFFER_SIZE];
1115 struct nlmsghdr *out, *current, *in;
1116 struct rtgenmsg *msg;
1117 size_t len;
1118 iterator_t *ifaces, *addrs;
1119 iface_entry_t *iface;
1120 addr_entry_t *addr;
1121
1122 DBG1(DBG_KNL, "listening on interfaces:");
1123
1124 memset(&request, 0, sizeof(request));
1125
1126 in = (struct nlmsghdr*)&request;
1127 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1128 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1129 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1130 msg->rtgen_family = AF_UNSPEC;
1131
1132 /* get all links */
1133 in->nlmsg_type = RTM_GETLINK;
1134 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1135 {
1136 return FAILED;
1137 }
1138 current = out;
1139 while (NLMSG_OK(current, len))
1140 {
1141 switch (current->nlmsg_type)
1142 {
1143 case NLMSG_DONE:
1144 break;
1145 case RTM_NEWLINK:
1146 process_link(this, current, FALSE);
1147 /* fall through */
1148 default:
1149 current = NLMSG_NEXT(current, len);
1150 continue;
1151 }
1152 break;
1153 }
1154 free(out);
1155
1156 /* get all interface addresses */
1157 in->nlmsg_type = RTM_GETADDR;
1158 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1159 {
1160 return FAILED;
1161 }
1162 current = out;
1163 while (NLMSG_OK(current, len))
1164 {
1165 switch (current->nlmsg_type)
1166 {
1167 case NLMSG_DONE:
1168 break;
1169 case RTM_NEWADDR:
1170 process_addr(this, current, FALSE);
1171 /* fall through */
1172 default:
1173 current = NLMSG_NEXT(current, len);
1174 continue;
1175 }
1176 break;
1177 }
1178 free(out);
1179
1180 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1181 while (ifaces->iterate(ifaces, (void**)&iface))
1182 {
1183 if (iface->flags & IFF_UP)
1184 {
1185 DBG1(DBG_KNL, " %s", iface->ifname);
1186 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1187 while (addrs->iterate(addrs, (void**)&addr))
1188 {
1189 DBG1(DBG_KNL, " %H", addr->ip);
1190 }
1191 addrs->destroy(addrs);
1192 }
1193 }
1194 ifaces->destroy(ifaces);
1195 return SUCCESS;
1196 }
1197
1198 /**
1199 * iterator hook to iterate over addrs
1200 */
1201 static hook_result_t addr_hook(private_kernel_interface_t *this,
1202 addr_entry_t *in, host_t **out)
1203 {
1204 if (in->virtual)
1205 { /* skip virtual interfaces added by us */
1206 return HOOK_SKIP;
1207 }
1208 if (in->scope >= RT_SCOPE_LINK)
1209 { /* skip addresses with a unusable scope */
1210 return HOOK_SKIP;
1211 }
1212 *out = in->ip;
1213 return HOOK_NEXT;
1214 }
1215
1216 /**
1217 * iterator hook to iterate over ifaces
1218 */
1219 static hook_result_t iface_hook(private_kernel_interface_t *this,
1220 iface_entry_t *in, host_t **out)
1221 {
1222 if (!(in->flags & IFF_UP))
1223 { /* skip interfaces not up */
1224 return HOOK_SKIP;
1225 }
1226
1227 if (this->hiter == NULL)
1228 {
1229 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1230 this->hiter->set_iterator_hook(this->hiter,
1231 (iterator_hook_t*)addr_hook, this);
1232 }
1233 while (this->hiter->iterate(this->hiter, (void**)out))
1234 {
1235 return HOOK_AGAIN;
1236 }
1237 this->hiter->destroy(this->hiter);
1238 this->hiter = NULL;
1239 return HOOK_SKIP;
1240 }
1241
1242 /**
1243 * Implements kernel_interface_t.create_address_iterator.
1244 */
1245 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1246 {
1247 iterator_t *iterator;
1248
1249 /* This iterator is not only hooked, is is double-hooked. As we have stored
1250 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1251 * entries in each interface we iterate. This does the iface_hook. The
1252 * addr_hook returns the ip instead of the addr_entry. */
1253
1254 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1255 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1256 return iterator;
1257 }
1258
1259 /**
1260 * implementation of kernel_interface_t.get_interface_name
1261 */
1262 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1263 {
1264 iterator_t *ifaces, *addrs;
1265 iface_entry_t *iface;
1266 addr_entry_t *addr;
1267 char *name = NULL;
1268
1269 DBG2(DBG_KNL, "getting interface name for %H", ip);
1270
1271 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1272 while (ifaces->iterate(ifaces, (void**)&iface))
1273 {
1274 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1275 while (addrs->iterate(addrs, (void**)&addr))
1276 {
1277 if (ip->ip_equals(ip, addr->ip))
1278 {
1279 name = strdup(iface->ifname);
1280 break;
1281 }
1282 }
1283 addrs->destroy(addrs);
1284 if (name)
1285 {
1286 break;
1287 }
1288 }
1289 ifaces->destroy(ifaces);
1290
1291 if (name)
1292 {
1293 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1294 }
1295 else
1296 {
1297 DBG2(DBG_KNL, "%H is not a local address", ip);
1298 }
1299 return name;
1300 }
1301
1302 /**
1303 * Tries to find an ip address of a local interface that is included in the
1304 * supplied traffic selector.
1305 */
1306 static status_t get_address_by_ts(private_kernel_interface_t *this,
1307 traffic_selector_t *ts, host_t **ip)
1308 {
1309 iterator_t *ifaces, *addrs;
1310 iface_entry_t *iface;
1311 addr_entry_t *addr;
1312 host_t *host;
1313 int family;
1314 bool found = FALSE;
1315
1316 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1317
1318 /* if we have a family which includes localhost, we do not
1319 * search for an IP, we use the default */
1320 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1321
1322 if (family == AF_INET)
1323 {
1324 host = host_create_from_string("127.0.0.1", 0);
1325 }
1326 else
1327 {
1328 host = host_create_from_string("::1", 0);
1329 }
1330
1331 if (ts->includes(ts, host))
1332 {
1333 *ip = host_create_any(family);
1334 host->destroy(host);
1335 DBG2(DBG_KNL, "using host %H", *ip);
1336 return SUCCESS;
1337 }
1338 host->destroy(host);
1339
1340 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1341 while (ifaces->iterate(ifaces, (void**)&iface))
1342 {
1343 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1344 while (addrs->iterate(addrs, (void**)&addr))
1345 {
1346 if (ts->includes(ts, addr->ip))
1347 {
1348 found = TRUE;
1349 *ip = addr->ip->clone(addr->ip);
1350 break;
1351 }
1352 }
1353 addrs->destroy(addrs);
1354 if (found)
1355 {
1356 break;
1357 }
1358 }
1359 ifaces->destroy(ifaces);
1360
1361 if (!found)
1362 {
1363 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1364 return FAILED;
1365 }
1366 DBG2(DBG_KNL, "using host %H", *ip);
1367 return SUCCESS;
1368 }
1369
1370 /**
1371 * get the interface of a local address
1372 */
1373 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1374 {
1375 iterator_t *ifaces, *addrs;
1376 iface_entry_t *iface;
1377 addr_entry_t *addr;
1378 int ifindex = 0;
1379
1380 DBG2(DBG_KNL, "getting iface for %H", ip);
1381
1382 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1383 while (ifaces->iterate(ifaces, (void**)&iface))
1384 {
1385 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1386 while (addrs->iterate(addrs, (void**)&addr))
1387 {
1388 if (ip->ip_equals(ip, addr->ip))
1389 {
1390 ifindex = iface->ifindex;
1391 break;
1392 }
1393 }
1394 addrs->destroy(addrs);
1395 if (ifindex)
1396 {
1397 break;
1398 }
1399 }
1400 ifaces->destroy(ifaces);
1401
1402 if (ifindex == 0)
1403 {
1404 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1405 }
1406 return ifindex;
1407 }
1408
1409 /**
1410 * get the refcount of a virtual ip
1411 */
1412 static int get_vip_refcount(private_kernel_interface_t *this, host_t* ip)
1413 {
1414 iterator_t *ifaces, *addrs;
1415 iface_entry_t *iface;
1416 addr_entry_t *addr;
1417 int refcount = 0;
1418
1419 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
1420 while (ifaces->iterate(ifaces, (void**)&iface))
1421 {
1422 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1423 while (addrs->iterate(addrs, (void**)&addr))
1424 {
1425 if (addr->virtual && (iface->flags & IFF_UP) &&
1426 ip->ip_equals(ip, addr->ip))
1427 {
1428 refcount = addr->refcount;
1429 break;
1430 }
1431 }
1432 addrs->destroy(addrs);
1433 if (refcount)
1434 {
1435 break;
1436 }
1437 }
1438 ifaces->destroy(ifaces);
1439
1440 return refcount;
1441 }
1442
1443 /**
1444 * Manages the creation and deletion of ip addresses on an interface.
1445 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1446 */
1447 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1448 int flags, int if_index, host_t *ip)
1449 {
1450 unsigned char request[BUFFER_SIZE];
1451 struct nlmsghdr *hdr;
1452 struct ifaddrmsg *msg;
1453 chunk_t chunk;
1454
1455 memset(&request, 0, sizeof(request));
1456
1457 chunk = ip->get_address(ip);
1458
1459 hdr = (struct nlmsghdr*)request;
1460 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1461 hdr->nlmsg_type = nlmsg_type;
1462 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1463
1464 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1465 msg->ifa_family = ip->get_family(ip);
1466 msg->ifa_flags = 0;
1467 msg->ifa_prefixlen = 8 * chunk.len;
1468 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1469 msg->ifa_index = if_index;
1470
1471 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1472
1473 return netlink_send_ack(this, this->socket_rt, hdr);
1474 }
1475
1476 /**
1477 * Manages source routes in the routing table.
1478 * By setting the appropriate nlmsg_type, the route added or r.
1479 */
1480 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1481 int flags, route_entry_t *route)
1482 {
1483 unsigned char request[BUFFER_SIZE];
1484 struct nlmsghdr *hdr;
1485 struct rtmsg *msg;
1486 chunk_t chunk;
1487
1488 /* if route is 0.0.0.0/0, we can't install it, as it would
1489 * overwrite the default route. Instead, we add two routes:
1490 * 0.0.0.0/1 and 128.0.0.0/1 */
1491 if (this->routing_table == 0 && route->prefixlen == 0)
1492 {
1493 route_entry_t half;
1494 status_t status;
1495
1496 half.dst_net = chunk_alloca(route->dst_net.len);
1497 memset(half.dst_net.ptr, 0, half.dst_net.len);
1498 half.src_ip = route->src_ip;
1499 half.gateway = route->gateway;
1500 half.if_index = route->if_index;
1501 half.prefixlen = 1;
1502
1503 status = manage_srcroute(this, nlmsg_type, flags, &half);
1504 half.dst_net.ptr[0] |= 0x80;
1505 status = manage_srcroute(this, nlmsg_type, flags, &half);
1506 return status;
1507 }
1508
1509 memset(&request, 0, sizeof(request));
1510
1511 hdr = (struct nlmsghdr*)request;
1512 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1513 hdr->nlmsg_type = nlmsg_type;
1514 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1515
1516 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1517 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1518 msg->rtm_dst_len = route->prefixlen;
1519 msg->rtm_table = this->routing_table;
1520 msg->rtm_protocol = RTPROT_STATIC;
1521 msg->rtm_type = RTN_UNICAST;
1522 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1523
1524 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1525 chunk = route->src_ip->get_address(route->src_ip);
1526 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1527 chunk = route->gateway->get_address(route->gateway);
1528 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1529 chunk.ptr = (char*)&route->if_index;
1530 chunk.len = sizeof(route->if_index);
1531 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1532
1533 return netlink_send_ack(this, this->socket_rt, hdr);
1534 }
1535
1536 /**
1537 * create or delete an rule to use our routing table
1538 */
1539 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1540 u_int32_t table, u_int32_t prio)
1541 {
1542 unsigned char request[BUFFER_SIZE];
1543 struct nlmsghdr *hdr;
1544 struct rtmsg *msg;
1545 chunk_t chunk;
1546
1547 memset(&request, 0, sizeof(request));
1548 hdr = (struct nlmsghdr*)request;
1549 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1550 hdr->nlmsg_type = nlmsg_type;
1551 if (nlmsg_type == RTM_NEWRULE)
1552 {
1553 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1554 }
1555 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1556
1557 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1558 msg->rtm_table = table;
1559 msg->rtm_family = AF_INET;
1560 msg->rtm_protocol = RTPROT_BOOT;
1561 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1562 msg->rtm_type = RTN_UNICAST;
1563
1564 chunk = chunk_from_thing(prio);
1565 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1566
1567 return netlink_send_ack(this, this->socket_rt, hdr);
1568 }
1569
1570 /**
1571 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1572 */
1573 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1574 {
1575 int bit, byte;
1576
1577 if (addr.len != net.len)
1578 {
1579 return FALSE;
1580 }
1581 /* scan through all bits, beginning in the front */
1582 for (byte = 0; byte < addr.len; byte++)
1583 {
1584 for (bit = 7; bit >= 0; bit--)
1585 {
1586 /* check if bits are equal (or we reached the end of the net) */
1587 if (bit + byte * 8 > net_len)
1588 {
1589 return TRUE;
1590 }
1591 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1592 {
1593 return FALSE;
1594 }
1595 }
1596 }
1597 return TRUE;
1598 }
1599
1600 /**
1601 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1602 */
1603 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1604 bool nexthop)
1605 {
1606 unsigned char request[BUFFER_SIZE];
1607 struct nlmsghdr *hdr, *out, *current;
1608 struct rtmsg *msg;
1609 chunk_t chunk;
1610 size_t len;
1611 int best = -1;
1612 host_t *src = NULL, *gtw = NULL;
1613
1614 DBG2(DBG_KNL, "getting address to reach %H", dest);
1615
1616 memset(&request, 0, sizeof(request));
1617
1618 hdr = (struct nlmsghdr*)request;
1619 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1620 hdr->nlmsg_type = RTM_GETROUTE;
1621 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1622
1623 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1624 msg->rtm_family = dest->get_family(dest);
1625
1626 chunk = dest->get_address(dest);
1627 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1628
1629 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1630 {
1631 DBG1(DBG_KNL, "getting address to %H failed", dest);
1632 return NULL;
1633 }
1634 current = out;
1635 while (NLMSG_OK(current, len))
1636 {
1637 switch (current->nlmsg_type)
1638 {
1639 case NLMSG_DONE:
1640 break;
1641 case RTM_NEWROUTE:
1642 {
1643 struct rtattr *rta;
1644 size_t rtasize;
1645 chunk_t rta_gtw, rta_src, rta_dst;
1646 u_int32_t rta_oif = 0;
1647
1648 rta_gtw = rta_src = rta_dst = chunk_empty;
1649 msg = (struct rtmsg*)(NLMSG_DATA(current));
1650 rta = RTM_RTA(msg);
1651 rtasize = RTM_PAYLOAD(current);
1652 while (RTA_OK(rta, rtasize))
1653 {
1654 switch (rta->rta_type)
1655 {
1656 case RTA_PREFSRC:
1657 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1658 break;
1659 case RTA_GATEWAY:
1660 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1661 break;
1662 case RTA_DST:
1663 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1664 break;
1665 case RTA_OIF:
1666 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1667 {
1668 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1669 }
1670 break;
1671 }
1672 rta = RTA_NEXT(rta, rtasize);
1673 }
1674
1675 /* apply the route if:
1676 * - it is not from our own ipsec routing table
1677 * - is better than a previous one
1678 * - is the default route or
1679 * - its destination net contains our destination
1680 */
1681 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
1682 && msg->rtm_dst_len > best
1683 && (msg->rtm_dst_len == 0 || /* default route */
1684 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
1685 {
1686 iterator_t *ifaces, *addrs;
1687 iface_entry_t *iface;
1688 addr_entry_t *addr;
1689
1690 best = msg->rtm_dst_len;
1691 if (nexthop)
1692 {
1693 DESTROY_IF(gtw);
1694 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1695 }
1696 else if (rta_src.ptr)
1697 {
1698 DESTROY_IF(src);
1699 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1700 if (get_vip_refcount(this, src))
1701 { /* skip source address if it is installed by us */
1702 DESTROY_IF(src);
1703 src = NULL;
1704 current = NLMSG_NEXT(current, len);
1705 continue;
1706 }
1707 }
1708 else
1709 {
1710 /* no source addr, get one from the interfaces */
1711 ifaces = this->ifaces->create_iterator_locked(
1712 this->ifaces, &this->mutex);
1713 while (ifaces->iterate(ifaces, (void**)&iface))
1714 {
1715 if (iface->ifindex == rta_oif)
1716 {
1717 addrs = iface->addrs->create_iterator(
1718 iface->addrs, TRUE);
1719 while (addrs->iterate(addrs, (void**)&addr))
1720 {
1721 chunk_t ip = addr->ip->get_address(addr->ip);
1722 if (msg->rtm_dst_len == 0
1723 || addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1724 {
1725 DESTROY_IF(src);
1726 src = addr->ip->clone(addr->ip);
1727 break;
1728 }
1729 }
1730 addrs->destroy(addrs);
1731 }
1732 }
1733 ifaces->destroy(ifaces);
1734 }
1735 }
1736 /* FALL through */
1737 }
1738 default:
1739 current = NLMSG_NEXT(current, len);
1740 continue;
1741 }
1742 break;
1743 }
1744 free(out);
1745
1746 if (nexthop)
1747 {
1748 if (gtw)
1749 {
1750 return gtw;
1751 }
1752 return dest->clone(dest);
1753 }
1754 return src;
1755 }
1756
1757 /**
1758 * Implementation of kernel_interface_t.get_source_addr.
1759 */
1760 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1761 {
1762 return get_route(this, dest, FALSE);
1763 }
1764
1765 /**
1766 * Implementation of kernel_interface_t.add_ip.
1767 */
1768 static status_t add_ip(private_kernel_interface_t *this,
1769 host_t *virtual_ip, host_t *iface_ip)
1770 {
1771 iface_entry_t *iface;
1772 addr_entry_t *addr;
1773 iterator_t *addrs, *ifaces;
1774 int ifindex;
1775
1776 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1777
1778 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1779 while (ifaces->iterate(ifaces, (void**)&iface))
1780 {
1781 bool iface_found = FALSE;
1782
1783 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1784 while (addrs->iterate(addrs, (void**)&addr))
1785 {
1786 if (iface_ip->ip_equals(iface_ip, addr->ip))
1787 {
1788 iface_found = TRUE;
1789 }
1790 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1791 {
1792 addr->refcount++;
1793 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1794 virtual_ip, iface->ifname);
1795 addrs->destroy(addrs);
1796 ifaces->destroy(ifaces);
1797 return SUCCESS;
1798 }
1799 }
1800 addrs->destroy(addrs);
1801
1802 if (iface_found)
1803 {
1804 ifindex = iface->ifindex;
1805 addr = malloc_thing(addr_entry_t);
1806 addr->ip = virtual_ip->clone(virtual_ip);
1807 addr->refcount = 0;
1808 addr->virtual = TRUE;
1809 addr->scope = RT_SCOPE_UNIVERSE;
1810 iface->addrs->insert_last(iface->addrs, addr);
1811
1812 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1813 ifindex, virtual_ip) == SUCCESS)
1814 {
1815 while (get_vip_refcount(this, virtual_ip) == 0)
1816 { /* wait until address appears */
1817 pthread_cond_wait(&this->cond, &this->mutex);
1818 }
1819 ifaces->destroy(ifaces);
1820 return SUCCESS;
1821 }
1822 ifaces->destroy(ifaces);
1823 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1824 return FAILED;
1825 }
1826 }
1827 ifaces->destroy(ifaces);
1828
1829 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1830 "virtual IP %H", iface_ip, virtual_ip);
1831 return FAILED;
1832 }
1833
1834 /**
1835 * Implementation of kernel_interface_t.del_ip.
1836 */
1837 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1838 {
1839 iface_entry_t *iface;
1840 addr_entry_t *addr;
1841 iterator_t *addrs, *ifaces;
1842 status_t status;
1843 int ifindex;
1844
1845 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1846
1847 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1848 while (ifaces->iterate(ifaces, (void**)&iface))
1849 {
1850 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1851 while (addrs->iterate(addrs, (void**)&addr))
1852 {
1853 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1854 {
1855 ifindex = iface->ifindex;
1856 if (addr->refcount == 1)
1857 {
1858 status = manage_ipaddr(this, RTM_DELADDR, 0,
1859 ifindex, virtual_ip);
1860 if (status == SUCCESS)
1861 { /* wait until the address is really gone */
1862 while (get_vip_refcount(this, virtual_ip) > 0)
1863 {
1864 pthread_cond_wait(&this->cond, &this->mutex);
1865 }
1866 }
1867 addrs->destroy(addrs);
1868 ifaces->destroy(ifaces);
1869 return status;
1870 }
1871 else
1872 {
1873 addr->refcount--;
1874 }
1875 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1876 virtual_ip);
1877 addrs->destroy(addrs);
1878 ifaces->destroy(ifaces);
1879 return SUCCESS;
1880 }
1881 }
1882 addrs->destroy(addrs);
1883 }
1884 ifaces->destroy(ifaces);
1885
1886 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1887 return FAILED;
1888 }
1889
1890 /**
1891 * Get an SPI for a specific protocol from the kernel.
1892 */
1893 static status_t get_spi_internal(private_kernel_interface_t *this,
1894 host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
1895 u_int32_t reqid, u_int32_t *spi)
1896 {
1897 unsigned char request[BUFFER_SIZE];
1898 struct nlmsghdr *hdr, *out;
1899 struct xfrm_userspi_info *userspi;
1900 u_int32_t received_spi = 0;
1901 size_t len;
1902
1903 memset(&request, 0, sizeof(request));
1904
1905 hdr = (struct nlmsghdr*)request;
1906 hdr->nlmsg_flags = NLM_F_REQUEST;
1907 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1908 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1909
1910 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1911 host2xfrm(src, &userspi->info.saddr);
1912 host2xfrm(dst, &userspi->info.id.daddr);
1913 userspi->info.id.proto = proto;
1914 userspi->info.mode = TRUE; /* tunnel mode */
1915 userspi->info.reqid = reqid;
1916 userspi->info.family = src->get_family(src);
1917 userspi->min = min;
1918 userspi->max = max;
1919
1920 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1921 {
1922 hdr = out;
1923 while (NLMSG_OK(hdr, len))
1924 {
1925 switch (hdr->nlmsg_type)
1926 {
1927 case XFRM_MSG_NEWSA:
1928 {
1929 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1930 received_spi = usersa->id.spi;
1931 break;
1932 }
1933 case NLMSG_ERROR:
1934 {
1935 struct nlmsgerr *err = NLMSG_DATA(hdr);
1936
1937 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1938 strerror(-err->error), -err->error);
1939 break;
1940 }
1941 default:
1942 hdr = NLMSG_NEXT(hdr, len);
1943 continue;
1944 case NLMSG_DONE:
1945 break;
1946 }
1947 break;
1948 }
1949 free(out);
1950 }
1951
1952 if (received_spi == 0)
1953 {
1954 return FAILED;
1955 }
1956
1957 *spi = received_spi;
1958 return SUCCESS;
1959 }
1960
1961 /**
1962 * Implementation of kernel_interface_t.get_spi.
1963 */
1964 static status_t get_spi(private_kernel_interface_t *this,
1965 host_t *src, host_t *dst,
1966 protocol_id_t protocol, u_int32_t reqid,
1967 u_int32_t *spi)
1968 {
1969 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1970
1971 if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
1972 0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
1973 {
1974 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1975 return FAILED;
1976 }
1977
1978 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", *spi, reqid);
1979
1980 return SUCCESS;
1981 }
1982
1983 /**
1984 * Implementation of kernel_interface_t.get_cpi.
1985 */
1986 static status_t get_cpi(private_kernel_interface_t *this,
1987 host_t *src, host_t *dst,
1988 u_int32_t reqid, u_int16_t *cpi)
1989 {
1990 u_int32_t received_spi = 0;
1991 DBG2(DBG_KNL, "getting CPI for reqid %d", reqid);
1992
1993 if (get_spi_internal(this, src, dst,
1994 IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
1995 {
1996 DBG1(DBG_KNL, "unable to get CPI for reqid %d", reqid);
1997 return FAILED;
1998 }
1999
2000 *cpi = htons((u_int16_t)ntohl(received_spi));
2001
2002 DBG2(DBG_KNL, "got CPI 0x%x for reqid %d", *cpi, reqid);
2003
2004 return SUCCESS;
2005 }
2006
2007 /**
2008 * Implementation of kernel_interface_t.add_sa.
2009 */
2010 static status_t add_sa(private_kernel_interface_t *this,
2011 host_t *src, host_t *dst, u_int32_t spi,
2012 protocol_id_t protocol, u_int32_t reqid,
2013 u_int64_t expire_soft, u_int64_t expire_hard,
2014 u_int16_t enc_alg, u_int16_t enc_size,
2015 u_int16_t int_alg, u_int16_t int_size,
2016 prf_plus_t *prf_plus, mode_t mode,
2017 u_int16_t ipcomp, bool encap,
2018 bool replace)
2019 {
2020 unsigned char request[BUFFER_SIZE];
2021 char *alg_name;
2022 /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
2023 u_int16_t add_keymat = 32;
2024 struct nlmsghdr *hdr;
2025 struct xfrm_usersa_info *sa;
2026
2027 memset(&request, 0, sizeof(request));
2028
2029 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x and reqid %d", spi, reqid);
2030
2031 hdr = (struct nlmsghdr*)request;
2032 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2033 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
2034 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2035
2036 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
2037 host2xfrm(src, &sa->saddr);
2038 host2xfrm(dst, &sa->id.daddr);
2039 sa->id.spi = spi;
2040 sa->id.proto = proto_ike2kernel(protocol);
2041 sa->family = src->get_family(src);
2042 sa->mode = mode;
2043 sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
2044 sa->reqid = reqid;
2045 /* we currently do not expire SAs by volume/packet count */
2046 sa->lft.soft_byte_limit = XFRM_INF;
2047 sa->lft.hard_byte_limit = XFRM_INF;
2048 sa->lft.soft_packet_limit = XFRM_INF;
2049 sa->lft.hard_packet_limit = XFRM_INF;
2050 /* we use lifetimes since added, not since used */
2051 sa->lft.soft_add_expires_seconds = expire_soft;
2052 sa->lft.hard_add_expires_seconds = expire_hard;
2053 sa->lft.soft_use_expires_seconds = 0;
2054 sa->lft.hard_use_expires_seconds = 0;
2055
2056 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
2057
2058 switch (enc_alg)
2059 {
2060 case ENCR_UNDEFINED:
2061 /* no encryption */
2062 break;
2063 case ENCR_AES_CCM_ICV8:
2064 case ENCR_AES_CCM_ICV12:
2065 case ENCR_AES_CCM_ICV16:
2066 /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
2067 add_keymat = 24;
2068 /* fall-through */
2069 case ENCR_AES_GCM_ICV8:
2070 case ENCR_AES_GCM_ICV12:
2071 case ENCR_AES_GCM_ICV16:
2072 {
2073 u_int16_t icv_size = 0;
2074 rthdr->rta_type = XFRMA_ALG_AEAD;
2075 alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
2076 if (alg_name == NULL)
2077 {
2078 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2079 encryption_algorithm_names, enc_alg);
2080 return FAILED;
2081 }
2082 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2083 encryption_algorithm_names, enc_alg, enc_size);
2084
2085 /* additional KEYMAT required */
2086 enc_size += add_keymat;
2087
2088 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
2089 hdr->nlmsg_len += rthdr->rta_len;
2090 if (hdr->nlmsg_len > sizeof(request))
2091 {
2092 return FAILED;
2093 }
2094
2095 struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
2096 algo->alg_key_len = enc_size;
2097 algo->alg_icv_len = icv_size;
2098 strcpy(algo->alg_name, alg_name);
2099 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2100
2101 rthdr = XFRM_RTA_NEXT(rthdr);
2102 break;
2103 }
2104 default:
2105 {
2106 rthdr->rta_type = XFRMA_ALG_CRYPT;
2107 alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
2108 if (alg_name == NULL)
2109 {
2110 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2111 encryption_algorithm_names, enc_alg);
2112 return FAILED;
2113 }
2114 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2115 encryption_algorithm_names, enc_alg, enc_size);
2116
2117 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
2118 hdr->nlmsg_len += rthdr->rta_len;
2119 if (hdr->nlmsg_len > sizeof(request))
2120 {
2121 return FAILED;
2122 }
2123
2124 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2125 algo->alg_key_len = enc_size;
2126 strcpy(algo->alg_name, alg_name);
2127 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2128
2129 rthdr = XFRM_RTA_NEXT(rthdr);
2130 break;
2131 }
2132 }
2133
2134 if (int_alg != AUTH_UNDEFINED)
2135 {
2136 rthdr->rta_type = XFRMA_ALG_AUTH;
2137 alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
2138 if (alg_name == NULL)
2139 {
2140 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2141 integrity_algorithm_names, int_alg);
2142 return FAILED;
2143 }
2144 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
2145 integrity_algorithm_names, int_alg, int_size);
2146
2147 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
2148 hdr->nlmsg_len += rthdr->rta_len;
2149 if (hdr->nlmsg_len > sizeof(request))
2150 {
2151 return FAILED;
2152 }
2153
2154 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2155 algo->alg_key_len = int_size;
2156 strcpy(algo->alg_name, alg_name);
2157 prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
2158
2159 rthdr = XFRM_RTA_NEXT(rthdr);
2160 }
2161
2162 if (ipcomp != IPCOMP_NONE)
2163 {
2164 rthdr->rta_type = XFRMA_ALG_COMP;
2165 alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
2166 if (alg_name == NULL)
2167 {
2168 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2169 ipcomp_transform_names, ipcomp);
2170 return FAILED;
2171 }
2172 DBG2(DBG_KNL, " using compression algorithm %N",
2173 ipcomp_transform_names, ipcomp);
2174
2175 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
2176 hdr->nlmsg_len += rthdr->rta_len;
2177 if (hdr->nlmsg_len > sizeof(request))
2178 {
2179 return FAILED;
2180 }
2181
2182 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2183 algo->alg_key_len = 0;
2184 strcpy(algo->alg_name, alg_name);
2185
2186 rthdr = XFRM_RTA_NEXT(rthdr);
2187 }
2188
2189 if (encap)
2190 {
2191 rthdr->rta_type = XFRMA_ENCAP;
2192 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2193
2194 hdr->nlmsg_len += rthdr->rta_len;
2195 if (hdr->nlmsg_len > sizeof(request))
2196 {
2197 return FAILED;
2198 }
2199
2200 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
2201 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2202 tmpl->encap_sport = htons(src->get_port(src));
2203 tmpl->encap_dport = htons(dst->get_port(dst));
2204 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2205 /* encap_oa could probably be derived from the
2206 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
2207 * pluto does the same as we do here but it uses encap_oa in the
2208 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
2209 * the kernel ignores it anyway
2210 * -> does that mean that NAT-T encap doesn't work in transport mode?
2211 * No. The reason the kernel ignores NAT-OA is that it recomputes
2212 * (or, rather, just ignores) the checksum. If packets pass
2213 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
2214 rthdr = XFRM_RTA_NEXT(rthdr);
2215 }
2216
2217 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2218 {
2219 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
2220 return FAILED;
2221 }
2222 return SUCCESS;
2223 }
2224
2225 /**
2226 * Get the replay state (i.e. sequence numbers) of an SA.
2227 */
2228 static status_t get_replay_state(private_kernel_interface_t *this,
2229 u_int32_t spi, protocol_id_t protocol, host_t *dst,
2230 struct xfrm_replay_state *replay)
2231 {
2232 unsigned char request[BUFFER_SIZE];
2233 struct nlmsghdr *hdr, *out = NULL;
2234 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2235 size_t len;
2236 struct rtattr *rta;
2237 size_t rtasize;
2238
2239 memset(&request, 0, sizeof(request));
2240
2241 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI 0x%x", spi);
2242
2243 hdr = (struct nlmsghdr*)request;
2244 hdr->nlmsg_flags = NLM_F_REQUEST;
2245 hdr->nlmsg_type = XFRM_MSG_GETAE;
2246 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2247
2248 aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
2249 aevent_id->flags = XFRM_AE_RVAL;
2250
2251 host2xfrm(dst, &aevent_id->sa_id.daddr);
2252 aevent_id->sa_id.spi = spi;
2253 aevent_id->sa_id.proto = proto_ike2kernel(protocol);
2254 aevent_id->sa_id.family = dst->get_family(dst);
2255
2256 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2257 {
2258 hdr = out;
2259 while (NLMSG_OK(hdr, len))
2260 {
2261 switch (hdr->nlmsg_type)
2262 {
2263 case XFRM_MSG_NEWAE:
2264 {
2265 out_aevent = NLMSG_DATA(hdr);
2266 break;
2267 }
2268 case NLMSG_ERROR:
2269 {
2270 struct nlmsgerr *err = NLMSG_DATA(hdr);
2271 DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
2272 strerror(-err->error), -err->error);
2273 break;
2274 }
2275 default:
2276 hdr = NLMSG_NEXT(hdr, len);
2277 continue;
2278 case NLMSG_DONE:
2279 break;
2280 }
2281 break;
2282 }
2283 }
2284
2285 if (out_aevent == NULL)
2286 {
2287 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2288 free(out);
2289 return FAILED;
2290 }
2291
2292 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2293 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2294 while(RTA_OK(rta, rtasize))
2295 {
2296 if (rta->rta_type == XFRMA_REPLAY_VAL)
2297 {
2298 memcpy(replay, RTA_DATA(rta), rta->rta_len);
2299 free(out);
2300 return SUCCESS;
2301 }
2302 rta = RTA_NEXT(rta, rtasize);
2303 }
2304
2305 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2306 free(out);
2307 return FAILED;
2308 }
2309
2310 /**
2311 * Implementation of kernel_interface_t.update_sa.
2312 */
2313 static status_t update_sa(private_kernel_interface_t *this,
2314 u_int32_t spi, protocol_id_t protocol,
2315 host_t *src, host_t *dst,
2316 host_t *new_src, host_t *new_dst, bool encap)
2317 {
2318 unsigned char request[BUFFER_SIZE], *pos;
2319 struct nlmsghdr *hdr, *out = NULL;
2320 struct xfrm_usersa_id *sa_id;
2321 struct xfrm_usersa_info *out_sa = NULL, *sa;
2322 size_t len;
2323 struct rtattr *rta;
2324 size_t rtasize;
2325 struct xfrm_encap_tmpl* tmpl = NULL;
2326 bool got_replay_state;
2327 struct xfrm_replay_state replay;
2328
2329 memset(&request, 0, sizeof(request));
2330
2331 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
2332
2333 /* query the exisiting SA first */
2334 hdr = (struct nlmsghdr*)request;
2335 hdr->nlmsg_flags = NLM_F_REQUEST;
2336 hdr->nlmsg_type = XFRM_MSG_GETSA;
2337 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2338
2339 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2340 host2xfrm(dst, &sa_id->daddr);
2341 sa_id->spi = spi;
2342 sa_id->proto = proto_ike2kernel(protocol);
2343 sa_id->family = dst->get_family(dst);
2344
2345 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2346 {
2347 hdr = out;
2348 while (NLMSG_OK(hdr, len))
2349 {
2350 switch (hdr->nlmsg_type)
2351 {
2352 case XFRM_MSG_NEWSA:
2353 {
2354 out_sa = NLMSG_DATA(hdr);
2355 break;
2356 }
2357 case NLMSG_ERROR:
2358 {
2359 struct nlmsgerr *err = NLMSG_DATA(hdr);
2360 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2361 strerror(-err->error), -err->error);
2362 break;
2363 }
2364 default:
2365 hdr = NLMSG_NEXT(hdr, len);
2366 continue;
2367 case NLMSG_DONE:
2368 break;
2369 }
2370 break;
2371 }
2372 }
2373 if (out_sa == NULL)
2374 {
2375 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2376 free(out);
2377 return FAILED;
2378 }
2379
2380 /* try to get the replay state */
2381 got_replay_state = (get_replay_state(
2382 this, spi, protocol, dst, &replay) == SUCCESS);
2383
2384 /* delete the old SA */
2385 if (this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2386 {
2387 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI 0x%x", spi);
2388 free(out);
2389 return FAILED;
2390 }
2391
2392 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2393 spi, src, dst, new_src, new_dst);
2394
2395 /* copy over the SA from out to request */
2396 hdr = (struct nlmsghdr*)request;
2397 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2398 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2399 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2400 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2401 sa = NLMSG_DATA(hdr);
2402 sa->family = new_dst->get_family(new_dst);
2403
2404 if (!src->ip_equals(src, new_src))
2405 {
2406 host2xfrm(new_src, &sa->saddr);
2407 }
2408 if (!dst->ip_equals(dst, new_dst))
2409 {
2410 host2xfrm(new_dst, &sa->id.daddr);
2411 }
2412
2413 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2414 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2415 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2416 while(RTA_OK(rta, rtasize))
2417 {
2418 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2419 if (rta->rta_type != XFRMA_ENCAP || encap)
2420 {
2421 if (rta->rta_type == XFRMA_ENCAP)
2422 { /* update encap tmpl */
2423 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2424 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2425 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2426 }
2427 memcpy(pos, rta, rta->rta_len);
2428 pos += RTA_ALIGN(rta->rta_len);
2429 hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
2430 }
2431 rta = RTA_NEXT(rta, rtasize);
2432 }
2433
2434 rta = (struct rtattr*)pos;
2435 if (tmpl == NULL && encap)
2436 { /* add tmpl if we are enabling it */
2437 rta->rta_type = XFRMA_ENCAP;
2438 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2439
2440 hdr->nlmsg_len += rta->rta_len;
2441 if (hdr->nlmsg_len > sizeof(request))
2442 {
2443 return FAILED;
2444 }
2445
2446 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2447 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2448 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2449 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2450 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2451
2452 rta = XFRM_RTA_NEXT(rta);
2453 }
2454
2455 if (got_replay_state)
2456 { /* copy the replay data if available */
2457 rta->rta_type = XFRMA_REPLAY_VAL;
2458 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
2459
2460 hdr->nlmsg_len += rta->rta_len;
2461 if (hdr->nlmsg_len > sizeof(request))
2462 {
2463 return FAILED;
2464 }
2465 memcpy(RTA_DATA(rta), &replay, sizeof(replay));
2466
2467 rta = XFRM_RTA_NEXT(rta);
2468 }
2469
2470 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2471 {
2472 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2473 free(out);
2474 return FAILED;
2475 }
2476 free(out);
2477
2478 return SUCCESS;
2479 }
2480
2481 /**
2482 * Implementation of kernel_interface_t.query_sa.
2483 */
2484 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2485 u_int32_t spi, protocol_id_t protocol,
2486 u_int32_t *use_time)
2487 {
2488 unsigned char request[BUFFER_SIZE];
2489 struct nlmsghdr *out = NULL, *hdr;
2490 struct xfrm_usersa_id *sa_id;
2491 struct xfrm_usersa_info *sa = NULL;
2492 size_t len;
2493
2494 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2495 memset(&request, 0, sizeof(request));
2496
2497 hdr = (struct nlmsghdr*)request;
2498 hdr->nlmsg_flags = NLM_F_REQUEST;
2499 hdr->nlmsg_type = XFRM_MSG_GETSA;
2500 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2501
2502 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2503 host2xfrm(dst, &sa_id->daddr);
2504 sa_id->spi = spi;
2505 sa_id->proto = proto_ike2kernel(protocol);
2506 sa_id->family = dst->get_family(dst);
2507
2508 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2509 {
2510 hdr = out;
2511 while (NLMSG_OK(hdr, len))
2512 {
2513 switch (hdr->nlmsg_type)
2514 {
2515 case XFRM_MSG_NEWSA:
2516 {
2517 sa = NLMSG_DATA(hdr);
2518 break;
2519 }
2520 case NLMSG_ERROR:
2521 {
2522 struct nlmsgerr *err = NLMSG_DATA(hdr);
2523 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2524 strerror(-err->error), -err->error);
2525 break;
2526 }
2527 default:
2528 hdr = NLMSG_NEXT(hdr, len);
2529 continue;
2530 case NLMSG_DONE:
2531 break;
2532 }
2533 break;
2534 }
2535 }
2536
2537 if (sa == NULL)
2538 {
2539 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2540 free(out);
2541 return FAILED;
2542 }
2543
2544 *use_time = sa->curlft.use_time;
2545 free (out);
2546 return SUCCESS;
2547 }
2548
2549 /**
2550 * Implementation of kernel_interface_t.del_sa.
2551 */
2552 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2553 u_int32_t spi, protocol_id_t protocol)
2554 {
2555 unsigned char request[BUFFER_SIZE];
2556 struct nlmsghdr *hdr;
2557 struct xfrm_usersa_id *sa_id;
2558
2559 memset(&request, 0, sizeof(request));
2560
2561 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2562
2563 hdr = (struct nlmsghdr*)request;
2564 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2565 hdr->nlmsg_type = XFRM_MSG_DELSA;
2566 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2567
2568 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2569 host2xfrm(dst, &sa_id->daddr);
2570 sa_id->spi = spi;
2571 sa_id->proto = proto_ike2kernel(protocol);
2572 sa_id->family = dst->get_family(dst);
2573
2574 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2575 {
2576 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2577 return FAILED;
2578 }
2579 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2580 return SUCCESS;
2581 }
2582
2583 /**
2584 * Implementation of kernel_interface_t.add_policy.
2585 */
2586 static status_t add_policy(private_kernel_interface_t *this,
2587 host_t *src, host_t *dst,
2588 traffic_selector_t *src_ts,
2589 traffic_selector_t *dst_ts,
2590 policy_dir_t direction, protocol_id_t protocol,
2591 u_int32_t reqid, bool high_prio, mode_t mode,
2592 u_int16_t ipcomp)
2593 {
2594 iterator_t *iterator;
2595 policy_entry_t *current, *policy;
2596 bool found = FALSE;
2597 unsigned char request[BUFFER_SIZE];
2598 struct xfrm_userpolicy_info *policy_info;
2599 struct nlmsghdr *hdr;
2600
2601 /* create a policy */
2602 policy = malloc_thing(policy_entry_t);
2603 memset(policy, 0, sizeof(policy_entry_t));
2604 policy->sel = ts2selector(src_ts, dst_ts);
2605 policy->direction = direction;
2606
2607 /* find the policy, which matches EXACTLY */
2608 pthread_mutex_lock(&this->mutex);
2609 iterator = this->policies->create_iterator(this->policies, TRUE);
2610 while (iterator->iterate(iterator, (void**)&current))
2611 {
2612 if (memcmp(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) == 0 &&
2613 policy->direction == current->direction)
2614 {
2615 /* use existing policy */
2616 current->refcount++;
2617 DBG2(DBG_KNL, "policy %R===%R already exists, increasing "
2618 "refcount", src_ts, dst_ts);
2619 free(policy);
2620 policy = current;
2621 found = TRUE;
2622 break;
2623 }
2624 }
2625 iterator->destroy(iterator);
2626 if (!found)
2627 { /* apply the new one, if we have no such policy */
2628 this->policies->insert_last(this->policies, policy);
2629 policy->refcount = 1;
2630 }
2631
2632 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2633
2634 memset(&request, 0, sizeof(request));
2635 hdr = (struct nlmsghdr*)request;
2636 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2637 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2638 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2639
2640 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2641 policy_info->sel = policy->sel;
2642 policy_info->dir = policy->direction;
2643 /* calculate priority based on source selector size, small size = high prio */
2644 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2645 policy_info->priority -= policy->sel.prefixlen_s * 10;
2646 policy_info->priority -= policy->sel.proto ? 2 : 0;
2647 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2648 policy_info->action = XFRM_POLICY_ALLOW;
2649 policy_info->share = XFRM_SHARE_ANY;
2650 pthread_mutex_unlock(&this->mutex);
2651
2652 /* policies don't expire */
2653 policy_info->lft.soft_byte_limit = XFRM_INF;
2654 policy_info->lft.soft_packet_limit = XFRM_INF;
2655 policy_info->lft.hard_byte_limit = XFRM_INF;
2656 policy_info->lft.hard_packet_limit = XFRM_INF;
2657 policy_info->lft.soft_add_expires_seconds = 0;
2658 policy_info->lft.hard_add_expires_seconds = 0;
2659 policy_info->lft.soft_use_expires_seconds = 0;
2660 policy_info->lft.hard_use_expires_seconds = 0;
2661
2662 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2663 rthdr->rta_type = XFRMA_TMPL;
2664 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2665
2666 hdr->nlmsg_len += rthdr->rta_len;
2667 if (hdr->nlmsg_len > sizeof(request))
2668 {
2669 return FAILED;
2670 }
2671
2672 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2673
2674 if (ipcomp != IPCOMP_NONE)
2675 {
2676 tmpl->reqid = reqid;
2677 tmpl->id.proto = IPPROTO_COMP;
2678 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2679 tmpl->mode = mode;
2680 tmpl->optional = direction != POLICY_OUT;
2681 tmpl->family = src->get_family(src);
2682
2683 host2xfrm(src, &tmpl->saddr);
2684 host2xfrm(dst, &tmpl->id.daddr);
2685
2686 /* add an additional xfrm_user_tmpl */
2687 rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2688 hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2689 if (hdr->nlmsg_len > sizeof(request))
2690 {
2691 return FAILED;
2692 }
2693
2694 tmpl++;
2695 }
2696
2697 tmpl->reqid = reqid;
2698 tmpl->id.proto = proto_ike2kernel(protocol);
2699 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2700 tmpl->mode = mode;
2701 tmpl->family = src->get_family(src);
2702
2703 host2xfrm(src, &tmpl->saddr);
2704 host2xfrm(dst, &tmpl->id.daddr);
2705
2706 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2707 {
2708 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2709 return FAILED;
2710 }
2711
2712 /* install a route, if:
2713 * - we are NOT updating a policy
2714 * - this is a forward policy (to just get one for each child)
2715 * - we are in tunnel mode
2716 * - we are not using IPv6 (does not work correctly yet!)
2717 * - routing is not disabled via strongswan.conf
2718 */
2719 if (policy->route == NULL && direction == POLICY_FWD &&
2720 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
2721 this->install_routes)
2722 {
2723 policy->route = malloc_thing(route_entry_t);
2724 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2725 {
2726 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2727 policy->route->gateway = get_route(this, src, TRUE);
2728 policy->route->if_index = get_interface_index(this, dst);
2729 policy->route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2730 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr, policy->route->dst_net.len);
2731 policy->route->prefixlen = policy->sel.prefixlen_s;
2732
2733 if (manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2734 policy->route) != SUCCESS)
2735 {
2736 DBG1(DBG_KNL, "unable to install source route for %H",
2737 policy->route->src_ip);
2738 route_entry_destroy(policy->route);
2739 policy->route = NULL;
2740 }
2741 }
2742 else
2743 {
2744 free(policy->route);
2745 policy->route = NULL;
2746 }
2747 }
2748
2749 return SUCCESS;
2750 }
2751
2752 /**
2753 * Implementation of kernel_interface_t.query_policy.
2754 */
2755 static status_t query_policy(private_kernel_interface_t *this,
2756 traffic_selector_t *src_ts,
2757 traffic_selector_t *dst_ts,
2758 policy_dir_t direction, u_int32_t *use_time)
2759 {
2760 unsigned char request[BUFFER_SIZE];
2761 struct nlmsghdr *out = NULL, *hdr;
2762 struct xfrm_userpolicy_id *policy_id;
2763 struct xfrm_userpolicy_info *policy = NULL;
2764 size_t len;
2765
2766 memset(&request, 0, sizeof(request));
2767
2768 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2769
2770 hdr = (struct nlmsghdr*)request;
2771 hdr->nlmsg_flags = NLM_F_REQUEST;
2772 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2773 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2774
2775 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2776 policy_id->sel = ts2selector(src_ts, dst_ts);
2777 policy_id->dir = direction;
2778
2779 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2780 {
2781 hdr = out;
2782 while (NLMSG_OK(hdr, len))
2783 {
2784 switch (hdr->nlmsg_type)
2785 {
2786 case XFRM_MSG_NEWPOLICY:
2787 {
2788 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2789 break;
2790 }
2791 case NLMSG_ERROR:
2792 {
2793 struct nlmsgerr *err = NLMSG_DATA(hdr);
2794 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2795 strerror(-err->error), -err->error);
2796 break;
2797 }
2798 default:
2799 hdr = NLMSG_NEXT(hdr, len);
2800 continue;
2801 case NLMSG_DONE:
2802 break;
2803 }
2804 break;
2805 }
2806 }
2807
2808 if (policy == NULL)
2809 {
2810 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2811 free(out);
2812 return FAILED;
2813 }
2814 *use_time = (time_t)policy->curlft.use_time;
2815
2816 free(out);
2817 return SUCCESS;
2818 }
2819
2820 /**
2821 * Implementation of kernel_interface_t.del_policy.
2822 */
2823 static status_t del_policy(private_kernel_interface_t *this,
2824 traffic_selector_t *src_ts,
2825 traffic_selector_t *dst_ts,
2826 policy_dir_t direction)
2827 {
2828 policy_entry_t *current, policy, *to_delete = NULL;
2829 route_entry_t *route;
2830 unsigned char request[BUFFER_SIZE];
2831 struct nlmsghdr *hdr;
2832 struct xfrm_userpolicy_id *policy_id;
2833 iterator_t *iterator;
2834
2835 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2836
2837 /* create a policy */
2838 memset(&policy, 0, sizeof(policy_entry_t));
2839 policy.sel = ts2selector(src_ts, dst_ts);
2840 policy.direction = direction;
2841
2842 /* find the policy */
2843 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2844 while (iterator->iterate(iterator, (void**)&current))
2845 {
2846 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2847 policy.direction == current->direction)
2848 {
2849 to_delete = current;
2850 if (--to_delete->refcount > 0)
2851 {
2852 /* is used by more SAs, keep in kernel */
2853 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2854 iterator->destroy(iterator);
2855 return SUCCESS;
2856 }
2857 /* remove if last reference */
2858 iterator->remove(iterator);
2859 break;
2860 }
2861 }
2862 iterator->destroy(iterator);
2863 if (!to_delete)
2864 {
2865 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2866 return NOT_FOUND;
2867 }
2868
2869 memset(&request, 0, sizeof(request));
2870
2871 hdr = (struct nlmsghdr*)request;
2872 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2873 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2874 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2875
2876 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2877 policy_id->sel = to_delete->sel;
2878 policy_id->dir = direction;
2879
2880 route = to_delete->route;
2881 free(to_delete);
2882
2883 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2884 {
2885 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2886 return FAILED;
2887 }
2888
2889 if (route)
2890 {
2891 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2892 {
2893 DBG1(DBG_KNL, "error uninstalling route installed with "
2894 "policy %R===%R", src_ts, dst_ts);
2895 }
2896 route_entry_destroy(route);
2897 }
2898 return SUCCESS;
2899 }
2900
2901 /**
2902 * Implementation of kernel_interface_t.destroy.
2903 */
2904 static void destroy(private_kernel_interface_t *this)
2905 {
2906 if (this->routing_table)
2907 {
2908 manage_rule(this, RTM_DELRULE, this->routing_table,
2909 this->routing_table_prio);
2910 }
2911
2912 this->job->cancel(this->job);
2913 close(this->socket_xfrm_events);
2914 close(this->socket_xfrm);
2915 close(this->socket_rt_events);
2916 close(this->socket_rt);
2917 this->policies->destroy(this->policies);
2918 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2919 free(this);
2920 }
2921
2922 /*
2923 * Described in header.
2924 */
2925 kernel_interface_t *kernel_interface_create()
2926 {
2927 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2928 struct sockaddr_nl addr;
2929
2930 /* public functions */
2931 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2932 this->public.get_cpi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
2933 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
2934 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2935 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2936 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2937 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
2938 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2939 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2940 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2941 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2942 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2943 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2944 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2945 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2946
2947 /* private members */
2948 this->policies = linked_list_create();
2949 this->ifaces = linked_list_create();
2950 this->hiter = NULL;
2951 this->seq = 200;
2952 pthread_mutex_init(&this->mutex, NULL);
2953 pthread_mutex_init(&this->nl_mutex, NULL);
2954 pthread_cond_init(&this->cond, NULL);
2955 timerclear(&this->last_roam);
2956 this->install_routes = lib->settings->get_bool(lib->settings,
2957 "charon.install_routes", TRUE);
2958 this->routing_table = lib->settings->get_int(lib->settings,
2959 "charon.routing_table", IPSEC_ROUTING_TABLE);
2960 this->routing_table_prio = lib->settings->get_int(lib->settings,
2961 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
2962 memset(&addr, 0, sizeof(addr));
2963 addr.nl_family = AF_NETLINK;
2964
2965 /* create and bind RT socket */
2966 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2967 if (this->socket_rt <= 0)
2968 {
2969 charon->kill(charon, "unable to create RT netlink socket");
2970 }
2971 addr.nl_groups = 0;
2972 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2973 {
2974 charon->kill(charon, "unable to bind RT netlink socket");
2975 }
2976
2977 /* create and bind RT socket for events (address/interface/route changes) */
2978 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2979 if (this->socket_rt_events <= 0)
2980 {
2981 charon->kill(charon, "unable to create RT event socket");
2982 }
2983 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2984 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
2985 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
2986 {
2987 charon->kill(charon, "unable to bind RT event socket");
2988 }
2989
2990 /* create and bind XFRM socket */
2991 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2992 if (this->socket_xfrm <= 0)
2993 {
2994 charon->kill(charon, "unable to create XFRM netlink socket");
2995 }
2996 addr.nl_groups = 0;
2997 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
2998 {
2999 charon->kill(charon, "unable to bind XFRM netlink socket");
3000 }
3001
3002 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
3003 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3004 if (this->socket_xfrm_events <= 0)
3005 {
3006 charon->kill(charon, "unable to create XFRM event socket");
3007 }
3008 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
3009 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
3010 {
3011 charon->kill(charon, "unable to bind XFRM event socket");
3012 }
3013
3014 this->job = callback_job_create((callback_job_cb_t)receive_events,
3015 this, NULL, NULL);
3016 charon->processor->queue_job(charon->processor, (job_t*)this->job);
3017
3018 if (init_address_list(this) != SUCCESS)
3019 {
3020 charon->kill(charon, "unable to get interface list");
3021 }
3022
3023 if (this->routing_table)
3024 {
3025 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
3026 this->routing_table_prio) != SUCCESS)
3027 {
3028 DBG1(DBG_KNL, "unable to create routing table rule");
3029 }
3030 }
3031
3032 return &this->public;
3033 }
3034