set XFRM_STATE_AF_UNSPEC flag only in IPsec tunnel mode
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /*
2 * Copyright (C) 2006-2008 Tobias Brunner
3 * Copyright (C) 2005-2007 Martin Willi
4 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
5 * Copyright (C) 2006 Daniel Roethlisberger
6 * Copyright (C) 2005 Jan Hutter
7 * Hochschule fuer Technik Rapperswil
8 * Copyright (C) 2003 Herbert Xu.
9 *
10 * Based on xfrm code from pluto.
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 2 of the License, or (at your
15 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * $Id$
23 */
24
25 #include <sys/types.h>
26 #include <sys/socket.h>
27 #include <sys/time.h>
28 #include <linux/netlink.h>
29 #include <linux/rtnetlink.h>
30 #include <linux/xfrm.h>
31 #include <linux/udp.h>
32 #include <netinet/in.h>
33 #include <pthread.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <net/if.h>
39 #include <sys/ioctl.h>
40
41 #include "kernel_interface.h"
42
43 #include <daemon.h>
44 #include <utils/linked_list.h>
45 #include <processing/jobs/delete_child_sa_job.h>
46 #include <processing/jobs/rekey_child_sa_job.h>
47 #include <processing/jobs/acquire_job.h>
48 #include <processing/jobs/callback_job.h>
49 #include <processing/jobs/roam_job.h>
50
51 /** required for Linux 2.6.26 kernel and later */
52 #ifndef XFRM_STATE_AF_UNSPEC
53 #define XFRM_STATE_AF_UNSPEC 32
54 #endif
55
56 /** routing table for routes installed by us */
57 #ifndef IPSEC_ROUTING_TABLE
58 #define IPSEC_ROUTING_TABLE 100
59 #endif
60 #ifndef IPSEC_ROUTING_TABLE_PRIO
61 #define IPSEC_ROUTING_TABLE_PRIO 100
62 #endif
63
64 /** default priority of installed policies */
65 #define PRIO_LOW 3000
66 #define PRIO_HIGH 2000
67
68 /** delay before firing roam jobs (ms) */
69 #define ROAM_DELAY 100
70
71 #define BUFFER_SIZE 1024
72
73 /**
74 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
75 * 'usual' netlink data x like 'struct xfrm_usersa_info'
76 */
77 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
78 /**
79 * returns a pointer to the next rtattr following rta.
80 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
81 */
82 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
83 /**
84 * returns the total size of attached rta data
85 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
86 */
87 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
88
89 typedef struct kernel_algorithm_t kernel_algorithm_t;
90
91 /**
92 * Mapping from the algorithms defined in IKEv2 to
93 * kernel level algorithm names and their key length
94 */
95 struct kernel_algorithm_t {
96 /**
97 * Identifier specified in IKEv2
98 */
99 int ikev2_id;
100
101 /**
102 * Name of the algorithm, as used as kernel identifier
103 */
104 char *name;
105
106 /**
107 * Key length in bits, if fixed size
108 */
109 u_int key_size;
110 };
111 #define END_OF_LIST -1
112
113 /**
114 * Algorithms for encryption
115 */
116 static kernel_algorithm_t encryption_algs[] = {
117 /* {ENCR_DES_IV64, "***", 0}, */
118 {ENCR_DES, "des", 64},
119 {ENCR_3DES, "des3_ede", 192},
120 /* {ENCR_RC5, "***", 0}, */
121 /* {ENCR_IDEA, "***", 0}, */
122 {ENCR_CAST, "cast128", 0},
123 {ENCR_BLOWFISH, "blowfish", 0},
124 /* {ENCR_3IDEA, "***", 0}, */
125 /* {ENCR_DES_IV32, "***", 0}, */
126 {ENCR_NULL, "cipher_null", 0},
127 {ENCR_AES_CBC, "aes", 0},
128 /* {ENCR_AES_CTR, "***", 0}, */
129 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))", 64}, /* key_size = ICV size */
130 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))", 96}, /* key_size = ICV size */
131 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))", 128}, /* key_size = ICV size */
132 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))", 64}, /* key_size = ICV size */
133 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))", 96}, /* key_size = ICV size */
134 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))", 128}, /* key_size = ICV size */
135 {END_OF_LIST, NULL, 0},
136 };
137
138 /**
139 * Algorithms for integrity protection
140 */
141 static kernel_algorithm_t integrity_algs[] = {
142 {AUTH_HMAC_MD5_96, "md5", 128},
143 {AUTH_HMAC_SHA1_96, "sha1", 160},
144 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
145 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
146 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
147 /* {AUTH_DES_MAC, "***", 0}, */
148 /* {AUTH_KPDK_MD5, "***", 0}, */
149 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
150 {END_OF_LIST, NULL, 0},
151 };
152
153 /**
154 * Algorithms for IPComp
155 */
156 static kernel_algorithm_t compression_algs[] = {
157 /* {IPCOMP_OUI, "***", 0}, */
158 {IPCOMP_DEFLATE, "deflate", 0},
159 {IPCOMP_LZS, "lzs", 0},
160 {IPCOMP_LZJH, "lzjh", 0},
161 {END_OF_LIST, NULL, 0},
162 };
163
164 /**
165 * Look up a kernel algorithm name and its key size
166 */
167 static char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
168 u_int16_t ikev2_algo, u_int16_t *key_size)
169 {
170 while (kernel_algo->ikev2_id != END_OF_LIST)
171 {
172 if (ikev2_algo == kernel_algo->ikev2_id)
173 {
174 /* match, evaluate key length */
175 if (key_size && *key_size == 0)
176 { /* update key size if not set */
177 *key_size = kernel_algo->key_size;
178 }
179 return kernel_algo->name;
180 }
181 kernel_algo++;
182 }
183 return NULL;
184 }
185
186 typedef struct route_entry_t route_entry_t;
187
188 /**
189 * installed routing entry
190 */
191 struct route_entry_t {
192
193 /** Index of the interface the route is bound to */
194 int if_index;
195
196 /** Source ip of the route */
197 host_t *src_ip;
198
199 /** gateway for this route */
200 host_t *gateway;
201
202 /** Destination net */
203 chunk_t dst_net;
204
205 /** Destination net prefixlen */
206 u_int8_t prefixlen;
207 };
208
209 /**
210 * destroy an route_entry_t object
211 */
212 static void route_entry_destroy(route_entry_t *this)
213 {
214 this->src_ip->destroy(this->src_ip);
215 this->gateway->destroy(this->gateway);
216 chunk_free(&this->dst_net);
217 free(this);
218 }
219
220 typedef struct policy_entry_t policy_entry_t;
221
222 /**
223 * installed kernel policy.
224 */
225 struct policy_entry_t {
226
227 /** direction of this policy: in, out, forward */
228 u_int8_t direction;
229
230 /** reqid of the policy */
231 u_int32_t reqid;
232
233 /** parameters of installed policy */
234 struct xfrm_selector sel;
235
236 /** associated route installed for this policy */
237 route_entry_t *route;
238
239 /** by how many CHILD_SA's this policy is used */
240 u_int refcount;
241 };
242
243 typedef struct addr_entry_t addr_entry_t;
244
245 /**
246 * IP address in an inface_entry_t
247 */
248 struct addr_entry_t {
249
250 /** The ip address */
251 host_t *ip;
252
253 /** virtual IP managed by us */
254 bool virtual;
255
256 /** scope of the address */
257 u_char scope;
258
259 /** Number of times this IP is used, if virtual */
260 u_int refcount;
261 };
262
263 /**
264 * destroy a addr_entry_t object
265 */
266 static void addr_entry_destroy(addr_entry_t *this)
267 {
268 this->ip->destroy(this->ip);
269 free(this);
270 }
271
272 typedef struct iface_entry_t iface_entry_t;
273
274 /**
275 * A network interface on this system, containing addr_entry_t's
276 */
277 struct iface_entry_t {
278
279 /** interface index */
280 int ifindex;
281
282 /** name of the interface */
283 char ifname[IFNAMSIZ];
284
285 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
286 u_int flags;
287
288 /** list of addresses as host_t */
289 linked_list_t *addrs;
290 };
291
292 /**
293 * destroy an interface entry
294 */
295 static void iface_entry_destroy(iface_entry_t *this)
296 {
297 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
298 free(this);
299 }
300
301 typedef struct private_kernel_interface_t private_kernel_interface_t;
302
303 /**
304 * Private variables and functions of kernel_interface class.
305 */
306 struct private_kernel_interface_t {
307 /**
308 * Public part of the kernel_interface_t object.
309 */
310 kernel_interface_t public;
311
312 /**
313 * mutex to lock access to netlink socket
314 */
315 pthread_mutex_t nl_mutex;
316
317 /**
318 * mutex to lock access to various lists
319 */
320 pthread_mutex_t mutex;
321
322 /**
323 * condition variable to signal virtual IP add/removal
324 */
325 pthread_cond_t cond;
326
327 /**
328 * List of installed policies (policy_entry_t)
329 */
330 linked_list_t *policies;
331
332 /**
333 * Cached list of interfaces and its adresses (iface_entry_t)
334 */
335 linked_list_t *ifaces;
336
337 /**
338 * iterator used in hook()
339 */
340 iterator_t *hiter;
341
342 /**
343 * job receiving netlink events
344 */
345 callback_job_t *job;
346
347 /**
348 * current sequence number for netlink request
349 */
350 int seq;
351
352 /**
353 * Netlink xfrm socket (IPsec)
354 */
355 int socket_xfrm;
356
357 /**
358 * netlink xfrm socket to receive acquire and expire events
359 */
360 int socket_xfrm_events;
361
362 /**
363 * Netlink rt socket (routing)
364 */
365 int socket_rt;
366
367 /**
368 * Netlink rt socket to receive address change events
369 */
370 int socket_rt_events;
371
372 /**
373 * time of the last roam_job
374 */
375 struct timeval last_roam;
376
377 /**
378 * whether to install routes along policies
379 */
380 bool install_routes;
381
382 /**
383 * routing table to install routes
384 */
385 int routing_table;
386
387 /**
388 * priority of used routing table
389 */
390 int routing_table_prio;
391 };
392
393 /**
394 * convert a IKEv2 specific protocol identifier to the kernel one
395 */
396 static u_int8_t proto_ike2kernel(protocol_id_t proto)
397 {
398 switch (proto)
399 {
400 case PROTO_ESP:
401 return IPPROTO_ESP;
402 case PROTO_AH:
403 return IPPROTO_AH;
404 default:
405 return proto;
406 }
407 }
408
409 /**
410 * reverse of ike2kernel
411 */
412 static protocol_id_t proto_kernel2ike(u_int8_t proto)
413 {
414 switch (proto)
415 {
416 case IPPROTO_ESP:
417 return PROTO_ESP;
418 case IPPROTO_AH:
419 return PROTO_AH;
420 default:
421 return proto;
422 }
423 }
424
425 /**
426 * convert a host_t to a struct xfrm_address
427 */
428 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
429 {
430 chunk_t chunk = host->get_address(host);
431 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
432 }
433
434 /**
435 * convert a traffic selector address range to subnet and its mask.
436 */
437 static void ts2subnet(traffic_selector_t* ts,
438 xfrm_address_t *net, u_int8_t *mask)
439 {
440 /* there is no way to do this cleanly, as the address range may
441 * be anything else but a subnet. We use from_addr as subnet
442 * and try to calculate a usable subnet mask.
443 */
444 int byte, bit;
445 bool found = FALSE;
446 chunk_t from, to;
447 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
448
449 from = ts->get_from_address(ts);
450 to = ts->get_to_address(ts);
451
452 *mask = (size * 8);
453 /* go trough all bits of the addresses, beginning in the front.
454 * as long as they are equal, the subnet gets larger
455 */
456 for (byte = 0; byte < size; byte++)
457 {
458 for (bit = 7; bit >= 0; bit--)
459 {
460 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
461 {
462 *mask = ((7 - bit) + (byte * 8));
463 found = TRUE;
464 break;
465 }
466 }
467 if (found)
468 {
469 break;
470 }
471 }
472 memcpy(net, from.ptr, from.len);
473 chunk_free(&from);
474 chunk_free(&to);
475 }
476
477 /**
478 * convert a traffic selector port range to port/portmask
479 */
480 static void ts2ports(traffic_selector_t* ts,
481 u_int16_t *port, u_int16_t *mask)
482 {
483 /* linux does not seem to accept complex portmasks. Only
484 * any or a specific port is allowed. We set to any, if we have
485 * a port range, or to a specific, if we have one port only.
486 */
487 u_int16_t from, to;
488
489 from = ts->get_from_port(ts);
490 to = ts->get_to_port(ts);
491
492 if (from == to)
493 {
494 *port = htons(from);
495 *mask = ~0;
496 }
497 else
498 {
499 *port = 0;
500 *mask = 0;
501 }
502 }
503
504 /**
505 * convert a pair of traffic_selectors to a xfrm_selector
506 */
507 static struct xfrm_selector ts2selector(traffic_selector_t *src,
508 traffic_selector_t *dst)
509 {
510 struct xfrm_selector sel;
511
512 memset(&sel, 0, sizeof(sel));
513 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
514 /* src or dest proto may be "any" (0), use more restrictive one */
515 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
516 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
517 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
518 ts2ports(dst, &sel.dport, &sel.dport_mask);
519 ts2ports(src, &sel.sport, &sel.sport_mask);
520 sel.ifindex = 0;
521 sel.user = 0;
522
523 return sel;
524 }
525
526 /**
527 * Creates an rtattr and adds it to the netlink message
528 */
529 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
530 size_t buflen)
531 {
532 struct rtattr *rta;
533
534 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
535 {
536 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
537 return;
538 }
539
540 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
541 rta->rta_type = rta_type;
542 rta->rta_len = RTA_LENGTH(data.len);
543 memcpy(RTA_DATA(rta), data.ptr, data.len);
544 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
545 }
546
547 /**
548 * process a XFRM_MSG_ACQUIRE from kernel
549 */
550 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
551 {
552 u_int32_t reqid = 0;
553 job_t *job;
554 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
555 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
556
557 if (RTA_OK(rtattr, rtsize))
558 {
559 if (rtattr->rta_type == XFRMA_TMPL)
560 {
561 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
562 reqid = tmpl->reqid;
563 }
564 }
565 if (reqid == 0)
566 {
567 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
568 return;
569 }
570 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
571 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
572 job = (job_t*)acquire_job_create(reqid);
573 charon->processor->queue_job(charon->processor, job);
574 }
575
576 /**
577 * process a XFRM_MSG_EXPIRE from kernel
578 */
579 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
580 {
581 job_t *job;
582 protocol_id_t protocol;
583 u_int32_t spi, reqid;
584 struct xfrm_user_expire *expire;
585
586 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
587 protocol = proto_kernel2ike(expire->state.id.proto);
588 spi = expire->state.id.spi;
589 reqid = expire->state.reqid;
590
591 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
592
593 if (protocol != PROTO_ESP && protocol != PROTO_AH)
594 {
595 DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA 0x%x (reqid %d) which is "
596 "not a CHILD_SA", ntohl(spi), reqid);
597 return;
598 }
599
600 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
601 expire->hard ? "delete" : "rekey", protocol_id_names,
602 protocol, ntohl(spi), reqid);
603 if (expire->hard)
604 {
605 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
606 }
607 else
608 {
609 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
610 }
611 charon->processor->queue_job(charon->processor, job);
612 }
613
614 /**
615 * start a roaming job. We delay it for a second and fire only one job
616 * for multiple events. Otherwise we would create two many jobs.
617 */
618 static void fire_roam_job(private_kernel_interface_t *this, bool address)
619 {
620 struct timeval now;
621
622 if (gettimeofday(&now, NULL) == 0)
623 {
624 if (timercmp(&now, &this->last_roam, >))
625 {
626 now.tv_usec += ROAM_DELAY * 1000;
627 while (now.tv_usec > 1000000)
628 {
629 now.tv_sec++;
630 now.tv_usec -= 1000000;
631 }
632 this->last_roam = now;
633 charon->scheduler->schedule_job(charon->scheduler,
634 (job_t*)roam_job_create(address), ROAM_DELAY);
635 }
636 }
637 }
638
639 /**
640 * process RTM_NEWLINK/RTM_DELLINK from kernel
641 */
642 static void process_link(private_kernel_interface_t *this,
643 struct nlmsghdr *hdr, bool event)
644 {
645 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
646 struct rtattr *rta = IFLA_RTA(msg);
647 size_t rtasize = IFLA_PAYLOAD (hdr);
648 iterator_t *iterator;
649 iface_entry_t *current, *entry = NULL;
650 char *name = NULL;
651 bool update = FALSE;
652
653 while(RTA_OK(rta, rtasize))
654 {
655 switch (rta->rta_type)
656 {
657 case IFLA_IFNAME:
658 name = RTA_DATA(rta);
659 break;
660 }
661 rta = RTA_NEXT(rta, rtasize);
662 }
663 if (!name)
664 {
665 name = "(unknown)";
666 }
667
668 switch (hdr->nlmsg_type)
669 {
670 case RTM_NEWLINK:
671 {
672 if (msg->ifi_flags & IFF_LOOPBACK)
673 { /* ignore loopback interfaces */
674 break;
675 }
676 iterator = this->ifaces->create_iterator_locked(this->ifaces,
677 &this->mutex);
678 while (iterator->iterate(iterator, (void**)&current))
679 {
680 if (current->ifindex == msg->ifi_index)
681 {
682 entry = current;
683 break;
684 }
685 }
686 if (!entry)
687 {
688 entry = malloc_thing(iface_entry_t);
689 entry->ifindex = msg->ifi_index;
690 entry->flags = 0;
691 entry->addrs = linked_list_create();
692 this->ifaces->insert_last(this->ifaces, entry);
693 }
694 memcpy(entry->ifname, name, IFNAMSIZ);
695 entry->ifname[IFNAMSIZ-1] = '\0';
696 if (event)
697 {
698 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
699 {
700 update = TRUE;
701 DBG1(DBG_KNL, "interface %s activated", name);
702 }
703 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
704 {
705 update = TRUE;
706 DBG1(DBG_KNL, "interface %s deactivated", name);
707 }
708 }
709 entry->flags = msg->ifi_flags;
710 iterator->destroy(iterator);
711 break;
712 }
713 case RTM_DELLINK:
714 {
715 iterator = this->ifaces->create_iterator_locked(this->ifaces,
716 &this->mutex);
717 while (iterator->iterate(iterator, (void**)&current))
718 {
719 if (current->ifindex == msg->ifi_index)
720 {
721 /* we do not remove it, as an address may be added to a
722 * "down" interface and we wan't to know that. */
723 current->flags = msg->ifi_flags;
724 break;
725 }
726 }
727 iterator->destroy(iterator);
728 break;
729 }
730 }
731
732 /* send an update to all IKE_SAs */
733 if (update && event)
734 {
735 fire_roam_job(this, TRUE);
736 }
737 }
738
739 /**
740 * process RTM_NEWADDR/RTM_DELADDR from kernel
741 */
742 static void process_addr(private_kernel_interface_t *this,
743 struct nlmsghdr *hdr, bool event)
744 {
745 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
746 struct rtattr *rta = IFA_RTA(msg);
747 size_t rtasize = IFA_PAYLOAD (hdr);
748 host_t *host = NULL;
749 iterator_t *ifaces, *addrs;
750 iface_entry_t *iface;
751 addr_entry_t *addr;
752 chunk_t local = chunk_empty, address = chunk_empty;
753 bool update = FALSE, found = FALSE, changed = FALSE;
754
755 while(RTA_OK(rta, rtasize))
756 {
757 switch (rta->rta_type)
758 {
759 case IFA_LOCAL:
760 local.ptr = RTA_DATA(rta);
761 local.len = RTA_PAYLOAD(rta);
762 break;
763 case IFA_ADDRESS:
764 address.ptr = RTA_DATA(rta);
765 address.len = RTA_PAYLOAD(rta);
766 break;
767 }
768 rta = RTA_NEXT(rta, rtasize);
769 }
770
771 /* For PPP interfaces, we need the IFA_LOCAL address,
772 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
773 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
774 if (local.ptr)
775 {
776 host = host_create_from_chunk(msg->ifa_family, local, 0);
777 }
778 else if (address.ptr)
779 {
780 host = host_create_from_chunk(msg->ifa_family, address, 0);
781 }
782
783 if (host == NULL)
784 { /* bad family? */
785 return;
786 }
787
788 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
789 while (ifaces->iterate(ifaces, (void**)&iface))
790 {
791 if (iface->ifindex == msg->ifa_index)
792 {
793 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
794 while (addrs->iterate(addrs, (void**)&addr))
795 {
796 if (host->ip_equals(host, addr->ip))
797 {
798 found = TRUE;
799 if (hdr->nlmsg_type == RTM_DELADDR)
800 {
801 changed = TRUE;
802 addrs->remove(addrs);
803 if (!addr->virtual)
804 {
805 DBG1(DBG_KNL, "%H disappeared from %s",
806 host, iface->ifname);
807 }
808 addr_entry_destroy(addr);
809 }
810 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
811 {
812 addr->refcount = 1;
813 }
814 }
815 }
816 addrs->destroy(addrs);
817
818 if (hdr->nlmsg_type == RTM_NEWADDR)
819 {
820 if (!found)
821 {
822 found = TRUE;
823 changed = TRUE;
824 addr = malloc_thing(addr_entry_t);
825 addr->ip = host->clone(host);
826 addr->virtual = FALSE;
827 addr->refcount = 1;
828 addr->scope = msg->ifa_scope;
829
830 iface->addrs->insert_last(iface->addrs, addr);
831 if (event)
832 {
833 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
834 }
835 }
836 }
837 if (found && (iface->flags & IFF_UP))
838 {
839 update = TRUE;
840 }
841 break;
842 }
843 }
844 ifaces->destroy(ifaces);
845 host->destroy(host);
846
847 /* send an update to all IKE_SAs */
848 if (update && event && changed)
849 {
850 fire_roam_job(this, TRUE);
851 }
852 }
853
854 /**
855 * Receives events from kernel
856 */
857 static job_requeue_t receive_events(private_kernel_interface_t *this)
858 {
859 char response[1024];
860 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
861 struct sockaddr_nl addr;
862 socklen_t addr_len = sizeof(addr);
863 int len, oldstate, maxfd, selected;
864 fd_set rfds;
865
866 FD_ZERO(&rfds);
867 FD_SET(this->socket_xfrm_events, &rfds);
868 FD_SET(this->socket_rt_events, &rfds);
869 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
870
871 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
872 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
873 pthread_setcancelstate(oldstate, NULL);
874 if (selected <= 0)
875 {
876 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
877 return JOB_REQUEUE_FAIR;
878 }
879 if (FD_ISSET(this->socket_xfrm_events, &rfds))
880 {
881 selected = this->socket_xfrm_events;
882 }
883 else if (FD_ISSET(this->socket_rt_events, &rfds))
884 {
885 selected = this->socket_rt_events;
886 }
887 else
888 {
889 return JOB_REQUEUE_DIRECT;
890 }
891
892 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
893 (struct sockaddr*)&addr, &addr_len);
894 if (len < 0)
895 {
896 switch (errno)
897 {
898 case EINTR:
899 /* interrupted, try again */
900 return JOB_REQUEUE_DIRECT;
901 case EAGAIN:
902 /* no data ready, select again */
903 return JOB_REQUEUE_DIRECT;
904 default:
905 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
906 sleep(1);
907 return JOB_REQUEUE_FAIR;
908 }
909 }
910 if (addr.nl_pid != 0)
911 { /* not from kernel. not interested, try another one */
912 return JOB_REQUEUE_DIRECT;
913 }
914
915 while (NLMSG_OK(hdr, len))
916 {
917 /* looks good so far, dispatch netlink message */
918 if (selected == this->socket_xfrm_events)
919 {
920 switch (hdr->nlmsg_type)
921 {
922 case XFRM_MSG_ACQUIRE:
923 process_acquire(this, hdr);
924 break;
925 case XFRM_MSG_EXPIRE:
926 process_expire(this, hdr);
927 break;
928 default:
929 break;
930 }
931 }
932 else if (selected == this->socket_rt_events)
933 {
934 switch (hdr->nlmsg_type)
935 {
936 case RTM_NEWADDR:
937 case RTM_DELADDR:
938 process_addr(this, hdr, TRUE);
939 pthread_cond_signal(&this->cond);
940 break;
941 case RTM_NEWLINK:
942 case RTM_DELLINK:
943 process_link(this, hdr, TRUE);
944 pthread_cond_signal(&this->cond);
945 break;
946 case RTM_NEWROUTE:
947 case RTM_DELROUTE:
948 fire_roam_job(this, FALSE);
949 break;
950 default:
951 break;
952 }
953 }
954 hdr = NLMSG_NEXT(hdr, len);
955 }
956 return JOB_REQUEUE_DIRECT;
957 }
958
959 /**
960 * send a netlink message and wait for a reply
961 */
962 static status_t netlink_send(private_kernel_interface_t *this,
963 int socket, struct nlmsghdr *in,
964 struct nlmsghdr **out, size_t *out_len)
965 {
966 int len, addr_len;
967 struct sockaddr_nl addr;
968 chunk_t result = chunk_empty, tmp;
969 struct nlmsghdr *msg, peek;
970
971 pthread_mutex_lock(&this->nl_mutex);
972
973 in->nlmsg_seq = ++this->seq;
974 in->nlmsg_pid = getpid();
975
976 memset(&addr, 0, sizeof(addr));
977 addr.nl_family = AF_NETLINK;
978 addr.nl_pid = 0;
979 addr.nl_groups = 0;
980
981 while (TRUE)
982 {
983 len = sendto(socket, in, in->nlmsg_len, 0,
984 (struct sockaddr*)&addr, sizeof(addr));
985
986 if (len != in->nlmsg_len)
987 {
988 if (errno == EINTR)
989 {
990 /* interrupted, try again */
991 continue;
992 }
993 pthread_mutex_unlock(&this->nl_mutex);
994 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
995 return FAILED;
996 }
997 break;
998 }
999
1000 while (TRUE)
1001 {
1002 char buf[4096];
1003 tmp.len = sizeof(buf);
1004 tmp.ptr = buf;
1005 msg = (struct nlmsghdr*)tmp.ptr;
1006
1007 memset(&addr, 0, sizeof(addr));
1008 addr.nl_family = AF_NETLINK;
1009 addr.nl_pid = getpid();
1010 addr.nl_groups = 0;
1011 addr_len = sizeof(addr);
1012
1013 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
1014 (struct sockaddr*)&addr, &addr_len);
1015
1016 if (len < 0)
1017 {
1018 if (errno == EINTR)
1019 {
1020 DBG1(DBG_KNL, "got interrupted");
1021 /* interrupted, try again */
1022 continue;
1023 }
1024 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
1025 pthread_mutex_unlock(&this->nl_mutex);
1026 return FAILED;
1027 }
1028 if (!NLMSG_OK(msg, len))
1029 {
1030 DBG1(DBG_KNL, "received corrupted netlink message");
1031 pthread_mutex_unlock(&this->nl_mutex);
1032 return FAILED;
1033 }
1034 if (msg->nlmsg_seq != this->seq)
1035 {
1036 DBG1(DBG_KNL, "received invalid netlink sequence number");
1037 if (msg->nlmsg_seq < this->seq)
1038 {
1039 continue;
1040 }
1041 pthread_mutex_unlock(&this->nl_mutex);
1042 return FAILED;
1043 }
1044
1045 tmp.len = len;
1046 result = chunk_cata("cc", result, tmp);
1047
1048 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
1049 * numbers to detect multi header messages */
1050 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
1051 (struct sockaddr*)&addr, &addr_len);
1052
1053 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
1054 {
1055 /* seems to be multipart */
1056 continue;
1057 }
1058 break;
1059 }
1060
1061 *out_len = result.len;
1062 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
1063
1064 pthread_mutex_unlock(&this->nl_mutex);
1065
1066 return SUCCESS;
1067 }
1068
1069 /**
1070 * send a netlink message and wait for its acknowlegde
1071 */
1072 static status_t netlink_send_ack(private_kernel_interface_t *this,
1073 int socket, struct nlmsghdr *in)
1074 {
1075 struct nlmsghdr *out, *hdr;
1076 size_t len;
1077
1078 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
1079 {
1080 return FAILED;
1081 }
1082 hdr = out;
1083 while (NLMSG_OK(hdr, len))
1084 {
1085 switch (hdr->nlmsg_type)
1086 {
1087 case NLMSG_ERROR:
1088 {
1089 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
1090
1091 if (err->error)
1092 {
1093 DBG1(DBG_KNL, "received netlink error: %s (%d)",
1094 strerror(-err->error), -err->error);
1095 free(out);
1096 return FAILED;
1097 }
1098 free(out);
1099 return SUCCESS;
1100 }
1101 default:
1102 hdr = NLMSG_NEXT(hdr, len);
1103 continue;
1104 case NLMSG_DONE:
1105 break;
1106 }
1107 break;
1108 }
1109 DBG1(DBG_KNL, "netlink request not acknowlegded");
1110 free(out);
1111 return FAILED;
1112 }
1113
1114 /**
1115 * Initialize a list of local addresses.
1116 */
1117 static status_t init_address_list(private_kernel_interface_t *this)
1118 {
1119 char request[BUFFER_SIZE];
1120 struct nlmsghdr *out, *current, *in;
1121 struct rtgenmsg *msg;
1122 size_t len;
1123 iterator_t *ifaces, *addrs;
1124 iface_entry_t *iface;
1125 addr_entry_t *addr;
1126
1127 DBG1(DBG_KNL, "listening on interfaces:");
1128
1129 memset(&request, 0, sizeof(request));
1130
1131 in = (struct nlmsghdr*)&request;
1132 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1133 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1134 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1135 msg->rtgen_family = AF_UNSPEC;
1136
1137 /* get all links */
1138 in->nlmsg_type = RTM_GETLINK;
1139 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1140 {
1141 return FAILED;
1142 }
1143 current = out;
1144 while (NLMSG_OK(current, len))
1145 {
1146 switch (current->nlmsg_type)
1147 {
1148 case NLMSG_DONE:
1149 break;
1150 case RTM_NEWLINK:
1151 process_link(this, current, FALSE);
1152 /* fall through */
1153 default:
1154 current = NLMSG_NEXT(current, len);
1155 continue;
1156 }
1157 break;
1158 }
1159 free(out);
1160
1161 /* get all interface addresses */
1162 in->nlmsg_type = RTM_GETADDR;
1163 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1164 {
1165 return FAILED;
1166 }
1167 current = out;
1168 while (NLMSG_OK(current, len))
1169 {
1170 switch (current->nlmsg_type)
1171 {
1172 case NLMSG_DONE:
1173 break;
1174 case RTM_NEWADDR:
1175 process_addr(this, current, FALSE);
1176 /* fall through */
1177 default:
1178 current = NLMSG_NEXT(current, len);
1179 continue;
1180 }
1181 break;
1182 }
1183 free(out);
1184
1185 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1186 while (ifaces->iterate(ifaces, (void**)&iface))
1187 {
1188 if (iface->flags & IFF_UP)
1189 {
1190 DBG1(DBG_KNL, " %s", iface->ifname);
1191 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1192 while (addrs->iterate(addrs, (void**)&addr))
1193 {
1194 DBG1(DBG_KNL, " %H", addr->ip);
1195 }
1196 addrs->destroy(addrs);
1197 }
1198 }
1199 ifaces->destroy(ifaces);
1200 return SUCCESS;
1201 }
1202
1203 /**
1204 * iterator hook to iterate over addrs
1205 */
1206 static hook_result_t addr_hook(private_kernel_interface_t *this,
1207 addr_entry_t *in, host_t **out)
1208 {
1209 if (in->virtual)
1210 { /* skip virtual interfaces added by us */
1211 return HOOK_SKIP;
1212 }
1213 if (in->scope >= RT_SCOPE_LINK)
1214 { /* skip addresses with a unusable scope */
1215 return HOOK_SKIP;
1216 }
1217 *out = in->ip;
1218 return HOOK_NEXT;
1219 }
1220
1221 /**
1222 * iterator hook to iterate over ifaces
1223 */
1224 static hook_result_t iface_hook(private_kernel_interface_t *this,
1225 iface_entry_t *in, host_t **out)
1226 {
1227 if (!(in->flags & IFF_UP))
1228 { /* skip interfaces not up */
1229 return HOOK_SKIP;
1230 }
1231
1232 if (this->hiter == NULL)
1233 {
1234 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1235 this->hiter->set_iterator_hook(this->hiter,
1236 (iterator_hook_t*)addr_hook, this);
1237 }
1238 while (this->hiter->iterate(this->hiter, (void**)out))
1239 {
1240 return HOOK_AGAIN;
1241 }
1242 this->hiter->destroy(this->hiter);
1243 this->hiter = NULL;
1244 return HOOK_SKIP;
1245 }
1246
1247 /**
1248 * Implements kernel_interface_t.create_address_iterator.
1249 */
1250 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1251 {
1252 iterator_t *iterator;
1253
1254 /* This iterator is not only hooked, is is double-hooked. As we have stored
1255 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1256 * entries in each interface we iterate. This does the iface_hook. The
1257 * addr_hook returns the ip instead of the addr_entry. */
1258
1259 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1260 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1261 return iterator;
1262 }
1263
1264 /**
1265 * implementation of kernel_interface_t.get_interface_name
1266 */
1267 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1268 {
1269 iterator_t *ifaces, *addrs;
1270 iface_entry_t *iface;
1271 addr_entry_t *addr;
1272 char *name = NULL;
1273
1274 DBG2(DBG_KNL, "getting interface name for %H", ip);
1275
1276 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1277 while (ifaces->iterate(ifaces, (void**)&iface))
1278 {
1279 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1280 while (addrs->iterate(addrs, (void**)&addr))
1281 {
1282 if (ip->ip_equals(ip, addr->ip))
1283 {
1284 name = strdup(iface->ifname);
1285 break;
1286 }
1287 }
1288 addrs->destroy(addrs);
1289 if (name)
1290 {
1291 break;
1292 }
1293 }
1294 ifaces->destroy(ifaces);
1295
1296 if (name)
1297 {
1298 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1299 }
1300 else
1301 {
1302 DBG2(DBG_KNL, "%H is not a local address", ip);
1303 }
1304 return name;
1305 }
1306
1307 /**
1308 * Tries to find an ip address of a local interface that is included in the
1309 * supplied traffic selector.
1310 */
1311 static status_t get_address_by_ts(private_kernel_interface_t *this,
1312 traffic_selector_t *ts, host_t **ip)
1313 {
1314 iterator_t *ifaces, *addrs;
1315 iface_entry_t *iface;
1316 addr_entry_t *addr;
1317 host_t *host;
1318 int family;
1319 bool found = FALSE;
1320
1321 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1322
1323 /* if we have a family which includes localhost, we do not
1324 * search for an IP, we use the default */
1325 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1326
1327 if (family == AF_INET)
1328 {
1329 host = host_create_from_string("127.0.0.1", 0);
1330 }
1331 else
1332 {
1333 host = host_create_from_string("::1", 0);
1334 }
1335
1336 if (ts->includes(ts, host))
1337 {
1338 *ip = host_create_any(family);
1339 host->destroy(host);
1340 DBG2(DBG_KNL, "using host %H", *ip);
1341 return SUCCESS;
1342 }
1343 host->destroy(host);
1344
1345 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1346 while (ifaces->iterate(ifaces, (void**)&iface))
1347 {
1348 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1349 while (addrs->iterate(addrs, (void**)&addr))
1350 {
1351 if (ts->includes(ts, addr->ip))
1352 {
1353 found = TRUE;
1354 *ip = addr->ip->clone(addr->ip);
1355 break;
1356 }
1357 }
1358 addrs->destroy(addrs);
1359 if (found)
1360 {
1361 break;
1362 }
1363 }
1364 ifaces->destroy(ifaces);
1365
1366 if (!found)
1367 {
1368 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1369 return FAILED;
1370 }
1371 DBG2(DBG_KNL, "using host %H", *ip);
1372 return SUCCESS;
1373 }
1374
1375 /**
1376 * get the interface of a local address
1377 */
1378 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1379 {
1380 iterator_t *ifaces, *addrs;
1381 iface_entry_t *iface;
1382 addr_entry_t *addr;
1383 int ifindex = 0;
1384
1385 DBG2(DBG_KNL, "getting iface for %H", ip);
1386
1387 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1388 while (ifaces->iterate(ifaces, (void**)&iface))
1389 {
1390 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1391 while (addrs->iterate(addrs, (void**)&addr))
1392 {
1393 if (ip->ip_equals(ip, addr->ip))
1394 {
1395 ifindex = iface->ifindex;
1396 break;
1397 }
1398 }
1399 addrs->destroy(addrs);
1400 if (ifindex)
1401 {
1402 break;
1403 }
1404 }
1405 ifaces->destroy(ifaces);
1406
1407 if (ifindex == 0)
1408 {
1409 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1410 }
1411 return ifindex;
1412 }
1413
1414 /**
1415 * get the refcount of a virtual ip
1416 */
1417 static int get_vip_refcount(private_kernel_interface_t *this, host_t* ip)
1418 {
1419 iterator_t *ifaces, *addrs;
1420 iface_entry_t *iface;
1421 addr_entry_t *addr;
1422 int refcount = 0;
1423
1424 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
1425 while (ifaces->iterate(ifaces, (void**)&iface))
1426 {
1427 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1428 while (addrs->iterate(addrs, (void**)&addr))
1429 {
1430 if (addr->virtual && (iface->flags & IFF_UP) &&
1431 ip->ip_equals(ip, addr->ip))
1432 {
1433 refcount = addr->refcount;
1434 break;
1435 }
1436 }
1437 addrs->destroy(addrs);
1438 if (refcount)
1439 {
1440 break;
1441 }
1442 }
1443 ifaces->destroy(ifaces);
1444
1445 return refcount;
1446 }
1447
1448 /**
1449 * Manages the creation and deletion of ip addresses on an interface.
1450 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1451 */
1452 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1453 int flags, int if_index, host_t *ip)
1454 {
1455 unsigned char request[BUFFER_SIZE];
1456 struct nlmsghdr *hdr;
1457 struct ifaddrmsg *msg;
1458 chunk_t chunk;
1459
1460 memset(&request, 0, sizeof(request));
1461
1462 chunk = ip->get_address(ip);
1463
1464 hdr = (struct nlmsghdr*)request;
1465 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1466 hdr->nlmsg_type = nlmsg_type;
1467 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1468
1469 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1470 msg->ifa_family = ip->get_family(ip);
1471 msg->ifa_flags = 0;
1472 msg->ifa_prefixlen = 8 * chunk.len;
1473 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1474 msg->ifa_index = if_index;
1475
1476 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1477
1478 return netlink_send_ack(this, this->socket_rt, hdr);
1479 }
1480
1481 /**
1482 * Manages source routes in the routing table.
1483 * By setting the appropriate nlmsg_type, the route added or r.
1484 */
1485 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1486 int flags, route_entry_t *route)
1487 {
1488 unsigned char request[BUFFER_SIZE];
1489 struct nlmsghdr *hdr;
1490 struct rtmsg *msg;
1491 chunk_t chunk;
1492
1493 /* if route is 0.0.0.0/0, we can't install it, as it would
1494 * overwrite the default route. Instead, we add two routes:
1495 * 0.0.0.0/1 and 128.0.0.0/1 */
1496 if (this->routing_table == 0 && route->prefixlen == 0)
1497 {
1498 route_entry_t half;
1499 status_t status;
1500
1501 half.dst_net = chunk_alloca(route->dst_net.len);
1502 memset(half.dst_net.ptr, 0, half.dst_net.len);
1503 half.src_ip = route->src_ip;
1504 half.gateway = route->gateway;
1505 half.if_index = route->if_index;
1506 half.prefixlen = 1;
1507
1508 status = manage_srcroute(this, nlmsg_type, flags, &half);
1509 half.dst_net.ptr[0] |= 0x80;
1510 status = manage_srcroute(this, nlmsg_type, flags, &half);
1511 return status;
1512 }
1513
1514 memset(&request, 0, sizeof(request));
1515
1516 hdr = (struct nlmsghdr*)request;
1517 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1518 hdr->nlmsg_type = nlmsg_type;
1519 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1520
1521 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1522 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1523 msg->rtm_dst_len = route->prefixlen;
1524 msg->rtm_table = this->routing_table;
1525 msg->rtm_protocol = RTPROT_STATIC;
1526 msg->rtm_type = RTN_UNICAST;
1527 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1528
1529 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1530 chunk = route->src_ip->get_address(route->src_ip);
1531 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1532 chunk = route->gateway->get_address(route->gateway);
1533 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1534 chunk.ptr = (char*)&route->if_index;
1535 chunk.len = sizeof(route->if_index);
1536 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1537
1538 return netlink_send_ack(this, this->socket_rt, hdr);
1539 }
1540
1541 /**
1542 * create or delete an rule to use our routing table
1543 */
1544 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1545 u_int32_t table, u_int32_t prio)
1546 {
1547 unsigned char request[BUFFER_SIZE];
1548 struct nlmsghdr *hdr;
1549 struct rtmsg *msg;
1550 chunk_t chunk;
1551
1552 memset(&request, 0, sizeof(request));
1553 hdr = (struct nlmsghdr*)request;
1554 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1555 hdr->nlmsg_type = nlmsg_type;
1556 if (nlmsg_type == RTM_NEWRULE)
1557 {
1558 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1559 }
1560 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1561
1562 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1563 msg->rtm_table = table;
1564 msg->rtm_family = AF_INET;
1565 msg->rtm_protocol = RTPROT_BOOT;
1566 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1567 msg->rtm_type = RTN_UNICAST;
1568
1569 chunk = chunk_from_thing(prio);
1570 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1571
1572 return netlink_send_ack(this, this->socket_rt, hdr);
1573 }
1574
1575 /**
1576 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1577 */
1578 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1579 {
1580 int bit, byte;
1581
1582 if (addr.len != net.len)
1583 {
1584 return FALSE;
1585 }
1586 /* scan through all bits, beginning in the front */
1587 for (byte = 0; byte < addr.len; byte++)
1588 {
1589 for (bit = 7; bit >= 0; bit--)
1590 {
1591 /* check if bits are equal (or we reached the end of the net) */
1592 if (bit + byte * 8 > net_len)
1593 {
1594 return TRUE;
1595 }
1596 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1597 {
1598 return FALSE;
1599 }
1600 }
1601 }
1602 return TRUE;
1603 }
1604
1605 /**
1606 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1607 */
1608 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1609 bool nexthop)
1610 {
1611 unsigned char request[BUFFER_SIZE];
1612 struct nlmsghdr *hdr, *out, *current;
1613 struct rtmsg *msg;
1614 chunk_t chunk;
1615 size_t len;
1616 int best = -1;
1617 host_t *src = NULL, *gtw = NULL;
1618
1619 DBG2(DBG_KNL, "getting address to reach %H", dest);
1620
1621 memset(&request, 0, sizeof(request));
1622
1623 hdr = (struct nlmsghdr*)request;
1624 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1625 hdr->nlmsg_type = RTM_GETROUTE;
1626 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1627
1628 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1629 msg->rtm_family = dest->get_family(dest);
1630
1631 chunk = dest->get_address(dest);
1632 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1633
1634 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1635 {
1636 DBG1(DBG_KNL, "getting address to %H failed", dest);
1637 return NULL;
1638 }
1639 current = out;
1640 while (NLMSG_OK(current, len))
1641 {
1642 switch (current->nlmsg_type)
1643 {
1644 case NLMSG_DONE:
1645 break;
1646 case RTM_NEWROUTE:
1647 {
1648 struct rtattr *rta;
1649 size_t rtasize;
1650 chunk_t rta_gtw, rta_src, rta_dst;
1651 u_int32_t rta_oif = 0;
1652
1653 rta_gtw = rta_src = rta_dst = chunk_empty;
1654 msg = (struct rtmsg*)(NLMSG_DATA(current));
1655 rta = RTM_RTA(msg);
1656 rtasize = RTM_PAYLOAD(current);
1657 while (RTA_OK(rta, rtasize))
1658 {
1659 switch (rta->rta_type)
1660 {
1661 case RTA_PREFSRC:
1662 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1663 break;
1664 case RTA_GATEWAY:
1665 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1666 break;
1667 case RTA_DST:
1668 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1669 break;
1670 case RTA_OIF:
1671 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1672 {
1673 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1674 }
1675 break;
1676 }
1677 rta = RTA_NEXT(rta, rtasize);
1678 }
1679
1680 /* apply the route if:
1681 * - it is not from our own ipsec routing table
1682 * - is better than a previous one
1683 * - is the default route or
1684 * - its destination net contains our destination
1685 */
1686 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
1687 && msg->rtm_dst_len > best
1688 && (msg->rtm_dst_len == 0 || /* default route */
1689 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
1690 {
1691 iterator_t *ifaces, *addrs;
1692 iface_entry_t *iface;
1693 addr_entry_t *addr;
1694
1695 best = msg->rtm_dst_len;
1696 if (nexthop)
1697 {
1698 DESTROY_IF(gtw);
1699 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1700 }
1701 else if (rta_src.ptr)
1702 {
1703 DESTROY_IF(src);
1704 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1705 if (get_vip_refcount(this, src))
1706 { /* skip source address if it is installed by us */
1707 DESTROY_IF(src);
1708 src = NULL;
1709 current = NLMSG_NEXT(current, len);
1710 continue;
1711 }
1712 }
1713 else
1714 {
1715 /* no source addr, get one from the interfaces */
1716 ifaces = this->ifaces->create_iterator_locked(
1717 this->ifaces, &this->mutex);
1718 while (ifaces->iterate(ifaces, (void**)&iface))
1719 {
1720 if (iface->ifindex == rta_oif)
1721 {
1722 addrs = iface->addrs->create_iterator(
1723 iface->addrs, TRUE);
1724 while (addrs->iterate(addrs, (void**)&addr))
1725 {
1726 chunk_t ip = addr->ip->get_address(addr->ip);
1727 if (msg->rtm_dst_len == 0
1728 || addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1729 {
1730 DESTROY_IF(src);
1731 src = addr->ip->clone(addr->ip);
1732 break;
1733 }
1734 }
1735 addrs->destroy(addrs);
1736 }
1737 }
1738 ifaces->destroy(ifaces);
1739 }
1740 }
1741 /* FALL through */
1742 }
1743 default:
1744 current = NLMSG_NEXT(current, len);
1745 continue;
1746 }
1747 break;
1748 }
1749 free(out);
1750
1751 if (nexthop)
1752 {
1753 if (gtw)
1754 {
1755 return gtw;
1756 }
1757 return dest->clone(dest);
1758 }
1759 return src;
1760 }
1761
1762 /**
1763 * Implementation of kernel_interface_t.get_source_addr.
1764 */
1765 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1766 {
1767 return get_route(this, dest, FALSE);
1768 }
1769
1770 /**
1771 * Implementation of kernel_interface_t.add_ip.
1772 */
1773 static status_t add_ip(private_kernel_interface_t *this,
1774 host_t *virtual_ip, host_t *iface_ip)
1775 {
1776 iface_entry_t *iface;
1777 addr_entry_t *addr;
1778 iterator_t *addrs, *ifaces;
1779 int ifindex;
1780
1781 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1782
1783 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1784 while (ifaces->iterate(ifaces, (void**)&iface))
1785 {
1786 bool iface_found = FALSE;
1787
1788 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1789 while (addrs->iterate(addrs, (void**)&addr))
1790 {
1791 if (iface_ip->ip_equals(iface_ip, addr->ip))
1792 {
1793 iface_found = TRUE;
1794 }
1795 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1796 {
1797 addr->refcount++;
1798 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1799 virtual_ip, iface->ifname);
1800 addrs->destroy(addrs);
1801 ifaces->destroy(ifaces);
1802 return SUCCESS;
1803 }
1804 }
1805 addrs->destroy(addrs);
1806
1807 if (iface_found)
1808 {
1809 ifindex = iface->ifindex;
1810 addr = malloc_thing(addr_entry_t);
1811 addr->ip = virtual_ip->clone(virtual_ip);
1812 addr->refcount = 0;
1813 addr->virtual = TRUE;
1814 addr->scope = RT_SCOPE_UNIVERSE;
1815 iface->addrs->insert_last(iface->addrs, addr);
1816
1817 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1818 ifindex, virtual_ip) == SUCCESS)
1819 {
1820 while (get_vip_refcount(this, virtual_ip) == 0)
1821 { /* wait until address appears */
1822 pthread_cond_wait(&this->cond, &this->mutex);
1823 }
1824 ifaces->destroy(ifaces);
1825 return SUCCESS;
1826 }
1827 ifaces->destroy(ifaces);
1828 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1829 return FAILED;
1830 }
1831 }
1832 ifaces->destroy(ifaces);
1833
1834 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1835 "virtual IP %H", iface_ip, virtual_ip);
1836 return FAILED;
1837 }
1838
1839 /**
1840 * Implementation of kernel_interface_t.del_ip.
1841 */
1842 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1843 {
1844 iface_entry_t *iface;
1845 addr_entry_t *addr;
1846 iterator_t *addrs, *ifaces;
1847 status_t status;
1848 int ifindex;
1849
1850 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1851
1852 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1853 while (ifaces->iterate(ifaces, (void**)&iface))
1854 {
1855 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1856 while (addrs->iterate(addrs, (void**)&addr))
1857 {
1858 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1859 {
1860 ifindex = iface->ifindex;
1861 if (addr->refcount == 1)
1862 {
1863 status = manage_ipaddr(this, RTM_DELADDR, 0,
1864 ifindex, virtual_ip);
1865 if (status == SUCCESS)
1866 { /* wait until the address is really gone */
1867 while (get_vip_refcount(this, virtual_ip) > 0)
1868 {
1869 pthread_cond_wait(&this->cond, &this->mutex);
1870 }
1871 }
1872 addrs->destroy(addrs);
1873 ifaces->destroy(ifaces);
1874 return status;
1875 }
1876 else
1877 {
1878 addr->refcount--;
1879 }
1880 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1881 virtual_ip);
1882 addrs->destroy(addrs);
1883 ifaces->destroy(ifaces);
1884 return SUCCESS;
1885 }
1886 }
1887 addrs->destroy(addrs);
1888 }
1889 ifaces->destroy(ifaces);
1890
1891 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1892 return FAILED;
1893 }
1894
1895 /**
1896 * Get an SPI for a specific protocol from the kernel.
1897 */
1898 static status_t get_spi_internal(private_kernel_interface_t *this,
1899 host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
1900 u_int32_t reqid, u_int32_t *spi)
1901 {
1902 unsigned char request[BUFFER_SIZE];
1903 struct nlmsghdr *hdr, *out;
1904 struct xfrm_userspi_info *userspi;
1905 u_int32_t received_spi = 0;
1906 size_t len;
1907
1908 memset(&request, 0, sizeof(request));
1909
1910 hdr = (struct nlmsghdr*)request;
1911 hdr->nlmsg_flags = NLM_F_REQUEST;
1912 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1913 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1914
1915 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1916 host2xfrm(src, &userspi->info.saddr);
1917 host2xfrm(dst, &userspi->info.id.daddr);
1918 userspi->info.id.proto = proto;
1919 userspi->info.mode = TRUE; /* tunnel mode */
1920 userspi->info.reqid = reqid;
1921 userspi->info.family = src->get_family(src);
1922 userspi->min = min;
1923 userspi->max = max;
1924
1925 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1926 {
1927 hdr = out;
1928 while (NLMSG_OK(hdr, len))
1929 {
1930 switch (hdr->nlmsg_type)
1931 {
1932 case XFRM_MSG_NEWSA:
1933 {
1934 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1935 received_spi = usersa->id.spi;
1936 break;
1937 }
1938 case NLMSG_ERROR:
1939 {
1940 struct nlmsgerr *err = NLMSG_DATA(hdr);
1941
1942 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1943 strerror(-err->error), -err->error);
1944 break;
1945 }
1946 default:
1947 hdr = NLMSG_NEXT(hdr, len);
1948 continue;
1949 case NLMSG_DONE:
1950 break;
1951 }
1952 break;
1953 }
1954 free(out);
1955 }
1956
1957 if (received_spi == 0)
1958 {
1959 return FAILED;
1960 }
1961
1962 *spi = received_spi;
1963 return SUCCESS;
1964 }
1965
1966 /**
1967 * Implementation of kernel_interface_t.get_spi.
1968 */
1969 static status_t get_spi(private_kernel_interface_t *this,
1970 host_t *src, host_t *dst,
1971 protocol_id_t protocol, u_int32_t reqid,
1972 u_int32_t *spi)
1973 {
1974 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1975
1976 if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
1977 0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
1978 {
1979 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1980 return FAILED;
1981 }
1982
1983 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", *spi, reqid);
1984
1985 return SUCCESS;
1986 }
1987
1988 /**
1989 * Implementation of kernel_interface_t.get_cpi.
1990 */
1991 static status_t get_cpi(private_kernel_interface_t *this,
1992 host_t *src, host_t *dst,
1993 u_int32_t reqid, u_int16_t *cpi)
1994 {
1995 u_int32_t received_spi = 0;
1996 DBG2(DBG_KNL, "getting CPI for reqid %d", reqid);
1997
1998 if (get_spi_internal(this, src, dst,
1999 IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
2000 {
2001 DBG1(DBG_KNL, "unable to get CPI for reqid %d", reqid);
2002 return FAILED;
2003 }
2004
2005 *cpi = htons((u_int16_t)ntohl(received_spi));
2006
2007 DBG2(DBG_KNL, "got CPI 0x%x for reqid %d", *cpi, reqid);
2008
2009 return SUCCESS;
2010 }
2011
2012 /**
2013 * Implementation of kernel_interface_t.add_sa.
2014 */
2015 static status_t add_sa(private_kernel_interface_t *this,
2016 host_t *src, host_t *dst, u_int32_t spi,
2017 protocol_id_t protocol, u_int32_t reqid,
2018 u_int64_t expire_soft, u_int64_t expire_hard,
2019 u_int16_t enc_alg, u_int16_t enc_size,
2020 u_int16_t int_alg, u_int16_t int_size,
2021 prf_plus_t *prf_plus, mode_t mode,
2022 u_int16_t ipcomp, bool encap,
2023 bool replace)
2024 {
2025 unsigned char request[BUFFER_SIZE];
2026 char *alg_name;
2027 /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
2028 u_int16_t add_keymat = 32;
2029 struct nlmsghdr *hdr;
2030 struct xfrm_usersa_info *sa;
2031
2032 memset(&request, 0, sizeof(request));
2033
2034 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x and reqid %d", spi, reqid);
2035
2036 hdr = (struct nlmsghdr*)request;
2037 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2038 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
2039 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2040
2041 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
2042 host2xfrm(src, &sa->saddr);
2043 host2xfrm(dst, &sa->id.daddr);
2044 sa->id.spi = spi;
2045 sa->id.proto = proto_ike2kernel(protocol);
2046 sa->family = src->get_family(src);
2047 sa->mode = mode;
2048 if (mode == MODE_TUNNEL)
2049 {
2050 sa->flags |= XFRM_STATE_AF_UNSPEC;
2051 }
2052 sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
2053 sa->reqid = reqid;
2054 /* we currently do not expire SAs by volume/packet count */
2055 sa->lft.soft_byte_limit = XFRM_INF;
2056 sa->lft.hard_byte_limit = XFRM_INF;
2057 sa->lft.soft_packet_limit = XFRM_INF;
2058 sa->lft.hard_packet_limit = XFRM_INF;
2059 /* we use lifetimes since added, not since used */
2060 sa->lft.soft_add_expires_seconds = expire_soft;
2061 sa->lft.hard_add_expires_seconds = expire_hard;
2062 sa->lft.soft_use_expires_seconds = 0;
2063 sa->lft.hard_use_expires_seconds = 0;
2064
2065 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
2066
2067 switch (enc_alg)
2068 {
2069 case ENCR_UNDEFINED:
2070 /* no encryption */
2071 break;
2072 case ENCR_AES_CCM_ICV8:
2073 case ENCR_AES_CCM_ICV12:
2074 case ENCR_AES_CCM_ICV16:
2075 /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
2076 add_keymat = 24;
2077 /* fall-through */
2078 case ENCR_AES_GCM_ICV8:
2079 case ENCR_AES_GCM_ICV12:
2080 case ENCR_AES_GCM_ICV16:
2081 {
2082 u_int16_t icv_size = 0;
2083 rthdr->rta_type = XFRMA_ALG_AEAD;
2084 alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
2085 if (alg_name == NULL)
2086 {
2087 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2088 encryption_algorithm_names, enc_alg);
2089 return FAILED;
2090 }
2091 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2092 encryption_algorithm_names, enc_alg, enc_size);
2093
2094 /* additional KEYMAT required */
2095 enc_size += add_keymat;
2096
2097 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
2098 hdr->nlmsg_len += rthdr->rta_len;
2099 if (hdr->nlmsg_len > sizeof(request))
2100 {
2101 return FAILED;
2102 }
2103
2104 struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
2105 algo->alg_key_len = enc_size;
2106 algo->alg_icv_len = icv_size;
2107 strcpy(algo->alg_name, alg_name);
2108 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2109
2110 rthdr = XFRM_RTA_NEXT(rthdr);
2111 break;
2112 }
2113 default:
2114 {
2115 rthdr->rta_type = XFRMA_ALG_CRYPT;
2116 alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
2117 if (alg_name == NULL)
2118 {
2119 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2120 encryption_algorithm_names, enc_alg);
2121 return FAILED;
2122 }
2123 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2124 encryption_algorithm_names, enc_alg, enc_size);
2125
2126 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
2127 hdr->nlmsg_len += rthdr->rta_len;
2128 if (hdr->nlmsg_len > sizeof(request))
2129 {
2130 return FAILED;
2131 }
2132
2133 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2134 algo->alg_key_len = enc_size;
2135 strcpy(algo->alg_name, alg_name);
2136 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2137
2138 rthdr = XFRM_RTA_NEXT(rthdr);
2139 break;
2140 }
2141 }
2142
2143 if (int_alg != AUTH_UNDEFINED)
2144 {
2145 rthdr->rta_type = XFRMA_ALG_AUTH;
2146 alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
2147 if (alg_name == NULL)
2148 {
2149 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2150 integrity_algorithm_names, int_alg);
2151 return FAILED;
2152 }
2153 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
2154 integrity_algorithm_names, int_alg, int_size);
2155
2156 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
2157 hdr->nlmsg_len += rthdr->rta_len;
2158 if (hdr->nlmsg_len > sizeof(request))
2159 {
2160 return FAILED;
2161 }
2162
2163 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2164 algo->alg_key_len = int_size;
2165 strcpy(algo->alg_name, alg_name);
2166 prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
2167
2168 rthdr = XFRM_RTA_NEXT(rthdr);
2169 }
2170
2171 if (ipcomp != IPCOMP_NONE)
2172 {
2173 rthdr->rta_type = XFRMA_ALG_COMP;
2174 alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
2175 if (alg_name == NULL)
2176 {
2177 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2178 ipcomp_transform_names, ipcomp);
2179 return FAILED;
2180 }
2181 DBG2(DBG_KNL, " using compression algorithm %N",
2182 ipcomp_transform_names, ipcomp);
2183
2184 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
2185 hdr->nlmsg_len += rthdr->rta_len;
2186 if (hdr->nlmsg_len > sizeof(request))
2187 {
2188 return FAILED;
2189 }
2190
2191 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2192 algo->alg_key_len = 0;
2193 strcpy(algo->alg_name, alg_name);
2194
2195 rthdr = XFRM_RTA_NEXT(rthdr);
2196 }
2197
2198 if (encap)
2199 {
2200 rthdr->rta_type = XFRMA_ENCAP;
2201 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2202
2203 hdr->nlmsg_len += rthdr->rta_len;
2204 if (hdr->nlmsg_len > sizeof(request))
2205 {
2206 return FAILED;
2207 }
2208
2209 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
2210 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2211 tmpl->encap_sport = htons(src->get_port(src));
2212 tmpl->encap_dport = htons(dst->get_port(dst));
2213 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2214 /* encap_oa could probably be derived from the
2215 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
2216 * pluto does the same as we do here but it uses encap_oa in the
2217 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
2218 * the kernel ignores it anyway
2219 * -> does that mean that NAT-T encap doesn't work in transport mode?
2220 * No. The reason the kernel ignores NAT-OA is that it recomputes
2221 * (or, rather, just ignores) the checksum. If packets pass
2222 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
2223 rthdr = XFRM_RTA_NEXT(rthdr);
2224 }
2225
2226 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2227 {
2228 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
2229 return FAILED;
2230 }
2231 return SUCCESS;
2232 }
2233
2234 /**
2235 * Get the replay state (i.e. sequence numbers) of an SA.
2236 */
2237 static status_t get_replay_state(private_kernel_interface_t *this,
2238 u_int32_t spi, protocol_id_t protocol, host_t *dst,
2239 struct xfrm_replay_state *replay)
2240 {
2241 unsigned char request[BUFFER_SIZE];
2242 struct nlmsghdr *hdr, *out = NULL;
2243 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2244 size_t len;
2245 struct rtattr *rta;
2246 size_t rtasize;
2247
2248 memset(&request, 0, sizeof(request));
2249
2250 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI 0x%x", spi);
2251
2252 hdr = (struct nlmsghdr*)request;
2253 hdr->nlmsg_flags = NLM_F_REQUEST;
2254 hdr->nlmsg_type = XFRM_MSG_GETAE;
2255 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2256
2257 aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
2258 aevent_id->flags = XFRM_AE_RVAL;
2259
2260 host2xfrm(dst, &aevent_id->sa_id.daddr);
2261 aevent_id->sa_id.spi = spi;
2262 aevent_id->sa_id.proto = proto_ike2kernel(protocol);
2263 aevent_id->sa_id.family = dst->get_family(dst);
2264
2265 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2266 {
2267 hdr = out;
2268 while (NLMSG_OK(hdr, len))
2269 {
2270 switch (hdr->nlmsg_type)
2271 {
2272 case XFRM_MSG_NEWAE:
2273 {
2274 out_aevent = NLMSG_DATA(hdr);
2275 break;
2276 }
2277 case NLMSG_ERROR:
2278 {
2279 struct nlmsgerr *err = NLMSG_DATA(hdr);
2280 DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
2281 strerror(-err->error), -err->error);
2282 break;
2283 }
2284 default:
2285 hdr = NLMSG_NEXT(hdr, len);
2286 continue;
2287 case NLMSG_DONE:
2288 break;
2289 }
2290 break;
2291 }
2292 }
2293
2294 if (out_aevent == NULL)
2295 {
2296 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2297 free(out);
2298 return FAILED;
2299 }
2300
2301 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2302 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2303 while(RTA_OK(rta, rtasize))
2304 {
2305 if (rta->rta_type == XFRMA_REPLAY_VAL)
2306 {
2307 memcpy(replay, RTA_DATA(rta), rta->rta_len);
2308 free(out);
2309 return SUCCESS;
2310 }
2311 rta = RTA_NEXT(rta, rtasize);
2312 }
2313
2314 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2315 free(out);
2316 return FAILED;
2317 }
2318
2319 /**
2320 * Implementation of kernel_interface_t.update_sa.
2321 */
2322 static status_t update_sa(private_kernel_interface_t *this,
2323 u_int32_t spi, protocol_id_t protocol,
2324 host_t *src, host_t *dst,
2325 host_t *new_src, host_t *new_dst, bool encap)
2326 {
2327 unsigned char request[BUFFER_SIZE], *pos;
2328 struct nlmsghdr *hdr, *out = NULL;
2329 struct xfrm_usersa_id *sa_id;
2330 struct xfrm_usersa_info *out_sa = NULL, *sa;
2331 size_t len;
2332 struct rtattr *rta;
2333 size_t rtasize;
2334 struct xfrm_encap_tmpl* tmpl = NULL;
2335 bool got_replay_state;
2336 struct xfrm_replay_state replay;
2337
2338 memset(&request, 0, sizeof(request));
2339
2340 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
2341
2342 /* query the exisiting SA first */
2343 hdr = (struct nlmsghdr*)request;
2344 hdr->nlmsg_flags = NLM_F_REQUEST;
2345 hdr->nlmsg_type = XFRM_MSG_GETSA;
2346 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2347
2348 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2349 host2xfrm(dst, &sa_id->daddr);
2350 sa_id->spi = spi;
2351 sa_id->proto = proto_ike2kernel(protocol);
2352 sa_id->family = dst->get_family(dst);
2353
2354 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2355 {
2356 hdr = out;
2357 while (NLMSG_OK(hdr, len))
2358 {
2359 switch (hdr->nlmsg_type)
2360 {
2361 case XFRM_MSG_NEWSA:
2362 {
2363 out_sa = NLMSG_DATA(hdr);
2364 break;
2365 }
2366 case NLMSG_ERROR:
2367 {
2368 struct nlmsgerr *err = NLMSG_DATA(hdr);
2369 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2370 strerror(-err->error), -err->error);
2371 break;
2372 }
2373 default:
2374 hdr = NLMSG_NEXT(hdr, len);
2375 continue;
2376 case NLMSG_DONE:
2377 break;
2378 }
2379 break;
2380 }
2381 }
2382 if (out_sa == NULL)
2383 {
2384 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2385 free(out);
2386 return FAILED;
2387 }
2388
2389 /* try to get the replay state */
2390 got_replay_state = (get_replay_state(
2391 this, spi, protocol, dst, &replay) == SUCCESS);
2392
2393 /* delete the old SA */
2394 if (this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2395 {
2396 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI 0x%x", spi);
2397 free(out);
2398 return FAILED;
2399 }
2400
2401 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2402 spi, src, dst, new_src, new_dst);
2403
2404 /* copy over the SA from out to request */
2405 hdr = (struct nlmsghdr*)request;
2406 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2407 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2408 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2409 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2410 sa = NLMSG_DATA(hdr);
2411 sa->family = new_dst->get_family(new_dst);
2412
2413 if (!src->ip_equals(src, new_src))
2414 {
2415 host2xfrm(new_src, &sa->saddr);
2416 }
2417 if (!dst->ip_equals(dst, new_dst))
2418 {
2419 host2xfrm(new_dst, &sa->id.daddr);
2420 }
2421
2422 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2423 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2424 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2425 while(RTA_OK(rta, rtasize))
2426 {
2427 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2428 if (rta->rta_type != XFRMA_ENCAP || encap)
2429 {
2430 if (rta->rta_type == XFRMA_ENCAP)
2431 { /* update encap tmpl */
2432 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2433 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2434 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2435 }
2436 memcpy(pos, rta, rta->rta_len);
2437 pos += RTA_ALIGN(rta->rta_len);
2438 hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
2439 }
2440 rta = RTA_NEXT(rta, rtasize);
2441 }
2442
2443 rta = (struct rtattr*)pos;
2444 if (tmpl == NULL && encap)
2445 { /* add tmpl if we are enabling it */
2446 rta->rta_type = XFRMA_ENCAP;
2447 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2448
2449 hdr->nlmsg_len += rta->rta_len;
2450 if (hdr->nlmsg_len > sizeof(request))
2451 {
2452 return FAILED;
2453 }
2454
2455 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2456 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2457 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2458 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2459 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2460
2461 rta = XFRM_RTA_NEXT(rta);
2462 }
2463
2464 if (got_replay_state)
2465 { /* copy the replay data if available */
2466 rta->rta_type = XFRMA_REPLAY_VAL;
2467 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
2468
2469 hdr->nlmsg_len += rta->rta_len;
2470 if (hdr->nlmsg_len > sizeof(request))
2471 {
2472 return FAILED;
2473 }
2474 memcpy(RTA_DATA(rta), &replay, sizeof(replay));
2475
2476 rta = XFRM_RTA_NEXT(rta);
2477 }
2478
2479 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2480 {
2481 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2482 free(out);
2483 return FAILED;
2484 }
2485 free(out);
2486
2487 return SUCCESS;
2488 }
2489
2490 /**
2491 * Implementation of kernel_interface_t.query_sa.
2492 */
2493 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2494 u_int32_t spi, protocol_id_t protocol,
2495 u_int32_t *use_time)
2496 {
2497 unsigned char request[BUFFER_SIZE];
2498 struct nlmsghdr *out = NULL, *hdr;
2499 struct xfrm_usersa_id *sa_id;
2500 struct xfrm_usersa_info *sa = NULL;
2501 size_t len;
2502
2503 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2504 memset(&request, 0, sizeof(request));
2505
2506 hdr = (struct nlmsghdr*)request;
2507 hdr->nlmsg_flags = NLM_F_REQUEST;
2508 hdr->nlmsg_type = XFRM_MSG_GETSA;
2509 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2510
2511 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2512 host2xfrm(dst, &sa_id->daddr);
2513 sa_id->spi = spi;
2514 sa_id->proto = proto_ike2kernel(protocol);
2515 sa_id->family = dst->get_family(dst);
2516
2517 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2518 {
2519 hdr = out;
2520 while (NLMSG_OK(hdr, len))
2521 {
2522 switch (hdr->nlmsg_type)
2523 {
2524 case XFRM_MSG_NEWSA:
2525 {
2526 sa = NLMSG_DATA(hdr);
2527 break;
2528 }
2529 case NLMSG_ERROR:
2530 {
2531 struct nlmsgerr *err = NLMSG_DATA(hdr);
2532 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2533 strerror(-err->error), -err->error);
2534 break;
2535 }
2536 default:
2537 hdr = NLMSG_NEXT(hdr, len);
2538 continue;
2539 case NLMSG_DONE:
2540 break;
2541 }
2542 break;
2543 }
2544 }
2545
2546 if (sa == NULL)
2547 {
2548 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2549 free(out);
2550 return FAILED;
2551 }
2552
2553 *use_time = sa->curlft.use_time;
2554 free (out);
2555 return SUCCESS;
2556 }
2557
2558 /**
2559 * Implementation of kernel_interface_t.del_sa.
2560 */
2561 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2562 u_int32_t spi, protocol_id_t protocol)
2563 {
2564 unsigned char request[BUFFER_SIZE];
2565 struct nlmsghdr *hdr;
2566 struct xfrm_usersa_id *sa_id;
2567
2568 memset(&request, 0, sizeof(request));
2569
2570 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2571
2572 hdr = (struct nlmsghdr*)request;
2573 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2574 hdr->nlmsg_type = XFRM_MSG_DELSA;
2575 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2576
2577 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2578 host2xfrm(dst, &sa_id->daddr);
2579 sa_id->spi = spi;
2580 sa_id->proto = proto_ike2kernel(protocol);
2581 sa_id->family = dst->get_family(dst);
2582
2583 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2584 {
2585 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2586 return FAILED;
2587 }
2588 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2589 return SUCCESS;
2590 }
2591
2592 /**
2593 * Implementation of kernel_interface_t.add_policy.
2594 */
2595 static status_t add_policy(private_kernel_interface_t *this,
2596 host_t *src, host_t *dst,
2597 traffic_selector_t *src_ts,
2598 traffic_selector_t *dst_ts,
2599 policy_dir_t direction, protocol_id_t protocol,
2600 u_int32_t reqid, bool high_prio, mode_t mode,
2601 u_int16_t ipcomp)
2602 {
2603 iterator_t *iterator;
2604 policy_entry_t *current, *policy;
2605 bool found = FALSE;
2606 unsigned char request[BUFFER_SIZE];
2607 struct xfrm_userpolicy_info *policy_info;
2608 struct nlmsghdr *hdr;
2609
2610 /* create a policy */
2611 policy = malloc_thing(policy_entry_t);
2612 memset(policy, 0, sizeof(policy_entry_t));
2613 policy->sel = ts2selector(src_ts, dst_ts);
2614 policy->direction = direction;
2615
2616 /* find the policy, which matches EXACTLY */
2617 pthread_mutex_lock(&this->mutex);
2618 iterator = this->policies->create_iterator(this->policies, TRUE);
2619 while (iterator->iterate(iterator, (void**)&current))
2620 {
2621 if (memcmp(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) == 0 &&
2622 policy->direction == current->direction)
2623 {
2624 /* use existing policy */
2625 current->refcount++;
2626 DBG2(DBG_KNL, "policy %R===%R already exists, increasing "
2627 "refcount", src_ts, dst_ts);
2628 free(policy);
2629 policy = current;
2630 found = TRUE;
2631 break;
2632 }
2633 }
2634 iterator->destroy(iterator);
2635 if (!found)
2636 { /* apply the new one, if we have no such policy */
2637 this->policies->insert_last(this->policies, policy);
2638 policy->refcount = 1;
2639 }
2640
2641 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2642
2643 memset(&request, 0, sizeof(request));
2644 hdr = (struct nlmsghdr*)request;
2645 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2646 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2647 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2648
2649 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2650 policy_info->sel = policy->sel;
2651 policy_info->dir = policy->direction;
2652 /* calculate priority based on source selector size, small size = high prio */
2653 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2654 policy_info->priority -= policy->sel.prefixlen_s * 10;
2655 policy_info->priority -= policy->sel.proto ? 2 : 0;
2656 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2657 policy_info->action = XFRM_POLICY_ALLOW;
2658 policy_info->share = XFRM_SHARE_ANY;
2659 pthread_mutex_unlock(&this->mutex);
2660
2661 /* policies don't expire */
2662 policy_info->lft.soft_byte_limit = XFRM_INF;
2663 policy_info->lft.soft_packet_limit = XFRM_INF;
2664 policy_info->lft.hard_byte_limit = XFRM_INF;
2665 policy_info->lft.hard_packet_limit = XFRM_INF;
2666 policy_info->lft.soft_add_expires_seconds = 0;
2667 policy_info->lft.hard_add_expires_seconds = 0;
2668 policy_info->lft.soft_use_expires_seconds = 0;
2669 policy_info->lft.hard_use_expires_seconds = 0;
2670
2671 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2672 rthdr->rta_type = XFRMA_TMPL;
2673 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2674
2675 hdr->nlmsg_len += rthdr->rta_len;
2676 if (hdr->nlmsg_len > sizeof(request))
2677 {
2678 return FAILED;
2679 }
2680
2681 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2682
2683 if (ipcomp != IPCOMP_NONE)
2684 {
2685 tmpl->reqid = reqid;
2686 tmpl->id.proto = IPPROTO_COMP;
2687 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2688 tmpl->mode = mode;
2689 tmpl->optional = direction != POLICY_OUT;
2690 tmpl->family = src->get_family(src);
2691
2692 host2xfrm(src, &tmpl->saddr);
2693 host2xfrm(dst, &tmpl->id.daddr);
2694
2695 /* add an additional xfrm_user_tmpl */
2696 rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2697 hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2698 if (hdr->nlmsg_len > sizeof(request))
2699 {
2700 return FAILED;
2701 }
2702
2703 tmpl++;
2704 }
2705
2706 tmpl->reqid = reqid;
2707 tmpl->id.proto = proto_ike2kernel(protocol);
2708 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2709 tmpl->mode = mode;
2710 tmpl->family = src->get_family(src);
2711
2712 host2xfrm(src, &tmpl->saddr);
2713 host2xfrm(dst, &tmpl->id.daddr);
2714
2715 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2716 {
2717 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2718 return FAILED;
2719 }
2720
2721 /* install a route, if:
2722 * - we are NOT updating a policy
2723 * - this is a forward policy (to just get one for each child)
2724 * - we are in tunnel mode
2725 * - we are not using IPv6 (does not work correctly yet!)
2726 * - routing is not disabled via strongswan.conf
2727 */
2728 if (policy->route == NULL && direction == POLICY_FWD &&
2729 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
2730 this->install_routes)
2731 {
2732 policy->route = malloc_thing(route_entry_t);
2733 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2734 {
2735 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2736 policy->route->gateway = get_route(this, src, TRUE);
2737 policy->route->if_index = get_interface_index(this, dst);
2738 policy->route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2739 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr, policy->route->dst_net.len);
2740 policy->route->prefixlen = policy->sel.prefixlen_s;
2741
2742 if (manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2743 policy->route) != SUCCESS)
2744 {
2745 DBG1(DBG_KNL, "unable to install source route for %H",
2746 policy->route->src_ip);
2747 route_entry_destroy(policy->route);
2748 policy->route = NULL;
2749 }
2750 }
2751 else
2752 {
2753 free(policy->route);
2754 policy->route = NULL;
2755 }
2756 }
2757
2758 return SUCCESS;
2759 }
2760
2761 /**
2762 * Implementation of kernel_interface_t.query_policy.
2763 */
2764 static status_t query_policy(private_kernel_interface_t *this,
2765 traffic_selector_t *src_ts,
2766 traffic_selector_t *dst_ts,
2767 policy_dir_t direction, u_int32_t *use_time)
2768 {
2769 unsigned char request[BUFFER_SIZE];
2770 struct nlmsghdr *out = NULL, *hdr;
2771 struct xfrm_userpolicy_id *policy_id;
2772 struct xfrm_userpolicy_info *policy = NULL;
2773 size_t len;
2774
2775 memset(&request, 0, sizeof(request));
2776
2777 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2778
2779 hdr = (struct nlmsghdr*)request;
2780 hdr->nlmsg_flags = NLM_F_REQUEST;
2781 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2782 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2783
2784 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2785 policy_id->sel = ts2selector(src_ts, dst_ts);
2786 policy_id->dir = direction;
2787
2788 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2789 {
2790 hdr = out;
2791 while (NLMSG_OK(hdr, len))
2792 {
2793 switch (hdr->nlmsg_type)
2794 {
2795 case XFRM_MSG_NEWPOLICY:
2796 {
2797 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2798 break;
2799 }
2800 case NLMSG_ERROR:
2801 {
2802 struct nlmsgerr *err = NLMSG_DATA(hdr);
2803 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2804 strerror(-err->error), -err->error);
2805 break;
2806 }
2807 default:
2808 hdr = NLMSG_NEXT(hdr, len);
2809 continue;
2810 case NLMSG_DONE:
2811 break;
2812 }
2813 break;
2814 }
2815 }
2816
2817 if (policy == NULL)
2818 {
2819 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2820 free(out);
2821 return FAILED;
2822 }
2823 *use_time = (time_t)policy->curlft.use_time;
2824
2825 free(out);
2826 return SUCCESS;
2827 }
2828
2829 /**
2830 * Implementation of kernel_interface_t.del_policy.
2831 */
2832 static status_t del_policy(private_kernel_interface_t *this,
2833 traffic_selector_t *src_ts,
2834 traffic_selector_t *dst_ts,
2835 policy_dir_t direction)
2836 {
2837 policy_entry_t *current, policy, *to_delete = NULL;
2838 route_entry_t *route;
2839 unsigned char request[BUFFER_SIZE];
2840 struct nlmsghdr *hdr;
2841 struct xfrm_userpolicy_id *policy_id;
2842 iterator_t *iterator;
2843
2844 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2845
2846 /* create a policy */
2847 memset(&policy, 0, sizeof(policy_entry_t));
2848 policy.sel = ts2selector(src_ts, dst_ts);
2849 policy.direction = direction;
2850
2851 /* find the policy */
2852 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2853 while (iterator->iterate(iterator, (void**)&current))
2854 {
2855 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2856 policy.direction == current->direction)
2857 {
2858 to_delete = current;
2859 if (--to_delete->refcount > 0)
2860 {
2861 /* is used by more SAs, keep in kernel */
2862 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2863 iterator->destroy(iterator);
2864 return SUCCESS;
2865 }
2866 /* remove if last reference */
2867 iterator->remove(iterator);
2868 break;
2869 }
2870 }
2871 iterator->destroy(iterator);
2872 if (!to_delete)
2873 {
2874 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2875 return NOT_FOUND;
2876 }
2877
2878 memset(&request, 0, sizeof(request));
2879
2880 hdr = (struct nlmsghdr*)request;
2881 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2882 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2883 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2884
2885 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2886 policy_id->sel = to_delete->sel;
2887 policy_id->dir = direction;
2888
2889 route = to_delete->route;
2890 free(to_delete);
2891
2892 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2893 {
2894 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2895 return FAILED;
2896 }
2897
2898 if (route)
2899 {
2900 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2901 {
2902 DBG1(DBG_KNL, "error uninstalling route installed with "
2903 "policy %R===%R", src_ts, dst_ts);
2904 }
2905 route_entry_destroy(route);
2906 }
2907 return SUCCESS;
2908 }
2909
2910 /**
2911 * Implementation of kernel_interface_t.destroy.
2912 */
2913 static void destroy(private_kernel_interface_t *this)
2914 {
2915 if (this->routing_table)
2916 {
2917 manage_rule(this, RTM_DELRULE, this->routing_table,
2918 this->routing_table_prio);
2919 }
2920
2921 this->job->cancel(this->job);
2922 close(this->socket_xfrm_events);
2923 close(this->socket_xfrm);
2924 close(this->socket_rt_events);
2925 close(this->socket_rt);
2926 this->policies->destroy(this->policies);
2927 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2928 free(this);
2929 }
2930
2931 /*
2932 * Described in header.
2933 */
2934 kernel_interface_t *kernel_interface_create()
2935 {
2936 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2937 struct sockaddr_nl addr;
2938
2939 /* public functions */
2940 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2941 this->public.get_cpi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
2942 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
2943 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2944 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2945 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2946 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
2947 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2948 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2949 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2950 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2951 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2952 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2953 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2954 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2955
2956 /* private members */
2957 this->policies = linked_list_create();
2958 this->ifaces = linked_list_create();
2959 this->hiter = NULL;
2960 this->seq = 200;
2961 pthread_mutex_init(&this->mutex, NULL);
2962 pthread_mutex_init(&this->nl_mutex, NULL);
2963 pthread_cond_init(&this->cond, NULL);
2964 timerclear(&this->last_roam);
2965 this->install_routes = lib->settings->get_bool(lib->settings,
2966 "charon.install_routes", TRUE);
2967 this->routing_table = lib->settings->get_int(lib->settings,
2968 "charon.routing_table", IPSEC_ROUTING_TABLE);
2969 this->routing_table_prio = lib->settings->get_int(lib->settings,
2970 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
2971 memset(&addr, 0, sizeof(addr));
2972 addr.nl_family = AF_NETLINK;
2973
2974 /* create and bind RT socket */
2975 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2976 if (this->socket_rt <= 0)
2977 {
2978 charon->kill(charon, "unable to create RT netlink socket");
2979 }
2980 addr.nl_groups = 0;
2981 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2982 {
2983 charon->kill(charon, "unable to bind RT netlink socket");
2984 }
2985
2986 /* create and bind RT socket for events (address/interface/route changes) */
2987 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2988 if (this->socket_rt_events <= 0)
2989 {
2990 charon->kill(charon, "unable to create RT event socket");
2991 }
2992 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2993 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
2994 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
2995 {
2996 charon->kill(charon, "unable to bind RT event socket");
2997 }
2998
2999 /* create and bind XFRM socket */
3000 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3001 if (this->socket_xfrm <= 0)
3002 {
3003 charon->kill(charon, "unable to create XFRM netlink socket");
3004 }
3005 addr.nl_groups = 0;