0b9d44806b37ee16a281ec8df6d3d5eb421cab80
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /*
2 * Copyright (C) 2006-2008 Tobias Brunner
3 * Copyright (C) 2005-2007 Martin Willi
4 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
5 * Copyright (C) 2006 Daniel Roethlisberger
6 * Copyright (C) 2005 Jan Hutter
7 * Hochschule fuer Technik Rapperswil
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your
12 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 * for more details.
18 *
19 * $Id$
20 */
21
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <linux/netlink.h>
26 #include <linux/rtnetlink.h>
27 #include <linux/xfrm.h>
28 #include <linux/udp.h>
29 #include <netinet/in.h>
30 #include <pthread.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <errno.h>
34 #include <string.h>
35 #include <net/if.h>
36 #include <sys/ioctl.h>
37
38 #include "kernel_interface.h"
39
40 #include <daemon.h>
41 #include <utils/linked_list.h>
42 #include <processing/jobs/delete_child_sa_job.h>
43 #include <processing/jobs/rekey_child_sa_job.h>
44 #include <processing/jobs/acquire_job.h>
45 #include <processing/jobs/callback_job.h>
46 #include <processing/jobs/roam_job.h>
47
48 /** required for Linux 2.6.26 kernel and later */
49 #ifndef XFRM_STATE_AF_UNSPEC
50 #define XFRM_STATE_AF_UNSPEC 32
51 #endif
52
53 /** routing table for routes installed by us */
54 #ifndef IPSEC_ROUTING_TABLE
55 #define IPSEC_ROUTING_TABLE 100
56 #endif
57 #ifndef IPSEC_ROUTING_TABLE_PRIO
58 #define IPSEC_ROUTING_TABLE_PRIO 100
59 #endif
60
61 /** default priority of installed policies */
62 #define PRIO_LOW 3000
63 #define PRIO_HIGH 2000
64
65 /** delay before firing roam jobs (ms) */
66 #define ROAM_DELAY 100
67
68 #define BUFFER_SIZE 1024
69
70 /**
71 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
72 * 'usual' netlink data x like 'struct xfrm_usersa_info'
73 */
74 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
75 /**
76 * returns a pointer to the next rtattr following rta.
77 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
78 */
79 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
80 /**
81 * returns the total size of attached rta data
82 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
83 */
84 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
85
86 typedef struct kernel_algorithm_t kernel_algorithm_t;
87
88 /**
89 * Mapping from the algorithms defined in IKEv2 to
90 * kernel level algorithm names and their key length
91 */
92 struct kernel_algorithm_t {
93 /**
94 * Identifier specified in IKEv2
95 */
96 int ikev2_id;
97
98 /**
99 * Name of the algorithm, as used as kernel identifier
100 */
101 char *name;
102
103 /**
104 * Key length in bits, if fixed size
105 */
106 u_int key_size;
107 };
108
109 ENUM(policy_dir_names, POLICY_IN, POLICY_FWD,
110 "in",
111 "out",
112 "fwd"
113 );
114
115 #define END_OF_LIST -1
116
117 /**
118 * Algorithms for encryption
119 */
120 static kernel_algorithm_t encryption_algs[] = {
121 /* {ENCR_DES_IV64, "***", 0}, */
122 {ENCR_DES, "des", 64},
123 {ENCR_3DES, "des3_ede", 192},
124 /* {ENCR_RC5, "***", 0}, */
125 /* {ENCR_IDEA, "***", 0}, */
126 {ENCR_CAST, "cast128", 0},
127 {ENCR_BLOWFISH, "blowfish", 0},
128 /* {ENCR_3IDEA, "***", 0}, */
129 /* {ENCR_DES_IV32, "***", 0}, */
130 {ENCR_NULL, "cipher_null", 0},
131 {ENCR_AES_CBC, "aes", 0},
132 /* {ENCR_AES_CTR, "***", 0}, */
133 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))", 64}, /* key_size = ICV size */
134 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))", 96}, /* key_size = ICV size */
135 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))", 128}, /* key_size = ICV size */
136 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))", 64}, /* key_size = ICV size */
137 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))", 96}, /* key_size = ICV size */
138 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))", 128}, /* key_size = ICV size */
139 {END_OF_LIST, NULL, 0},
140 };
141
142 /**
143 * Algorithms for integrity protection
144 */
145 static kernel_algorithm_t integrity_algs[] = {
146 {AUTH_HMAC_MD5_96, "md5", 128},
147 {AUTH_HMAC_SHA1_96, "sha1", 160},
148 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
149 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
150 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
151 /* {AUTH_DES_MAC, "***", 0}, */
152 /* {AUTH_KPDK_MD5, "***", 0}, */
153 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
154 {END_OF_LIST, NULL, 0},
155 };
156
157 /**
158 * Algorithms for IPComp
159 */
160 static kernel_algorithm_t compression_algs[] = {
161 /* {IPCOMP_OUI, "***", 0}, */
162 {IPCOMP_DEFLATE, "deflate", 0},
163 {IPCOMP_LZS, "lzs", 0},
164 {IPCOMP_LZJH, "lzjh", 0},
165 {END_OF_LIST, NULL, 0},
166 };
167
168 /**
169 * Look up a kernel algorithm name and its key size
170 */
171 static char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
172 u_int16_t ikev2_algo, u_int16_t *key_size)
173 {
174 while (kernel_algo->ikev2_id != END_OF_LIST)
175 {
176 if (ikev2_algo == kernel_algo->ikev2_id)
177 {
178 /* match, evaluate key length */
179 if (key_size && *key_size == 0)
180 { /* update key size if not set */
181 *key_size = kernel_algo->key_size;
182 }
183 return kernel_algo->name;
184 }
185 kernel_algo++;
186 }
187 return NULL;
188 }
189
190 typedef struct route_entry_t route_entry_t;
191
192 /**
193 * installed routing entry
194 */
195 struct route_entry_t {
196
197 /** Index of the interface the route is bound to */
198 int if_index;
199
200 /** Source ip of the route */
201 host_t *src_ip;
202
203 /** gateway for this route */
204 host_t *gateway;
205
206 /** Destination net */
207 chunk_t dst_net;
208
209 /** Destination net prefixlen */
210 u_int8_t prefixlen;
211 };
212
213 /**
214 * destroy an route_entry_t object
215 */
216 static void route_entry_destroy(route_entry_t *this)
217 {
218 this->src_ip->destroy(this->src_ip);
219 this->gateway->destroy(this->gateway);
220 chunk_free(&this->dst_net);
221 free(this);
222 }
223
224 typedef struct policy_entry_t policy_entry_t;
225
226 /**
227 * installed kernel policy.
228 */
229 struct policy_entry_t {
230
231 /** direction of this policy: in, out, forward */
232 u_int8_t direction;
233
234 /** reqid of the policy */
235 u_int32_t reqid;
236
237 /** parameters of installed policy */
238 struct xfrm_selector sel;
239
240 /** associated route installed for this policy */
241 route_entry_t *route;
242
243 /** by how many CHILD_SA's this policy is used */
244 u_int refcount;
245 };
246
247 typedef struct addr_entry_t addr_entry_t;
248
249 /**
250 * IP address in an inface_entry_t
251 */
252 struct addr_entry_t {
253
254 /** The ip address */
255 host_t *ip;
256
257 /** virtual IP managed by us */
258 bool virtual;
259
260 /** scope of the address */
261 u_char scope;
262
263 /** Number of times this IP is used, if virtual */
264 u_int refcount;
265 };
266
267 /**
268 * destroy a addr_entry_t object
269 */
270 static void addr_entry_destroy(addr_entry_t *this)
271 {
272 this->ip->destroy(this->ip);
273 free(this);
274 }
275
276 typedef struct iface_entry_t iface_entry_t;
277
278 /**
279 * A network interface on this system, containing addr_entry_t's
280 */
281 struct iface_entry_t {
282
283 /** interface index */
284 int ifindex;
285
286 /** name of the interface */
287 char ifname[IFNAMSIZ];
288
289 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
290 u_int flags;
291
292 /** list of addresses as host_t */
293 linked_list_t *addrs;
294 };
295
296 /**
297 * destroy an interface entry
298 */
299 static void iface_entry_destroy(iface_entry_t *this)
300 {
301 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
302 free(this);
303 }
304
305 typedef struct private_kernel_interface_t private_kernel_interface_t;
306
307 /**
308 * Private variables and functions of kernel_interface class.
309 */
310 struct private_kernel_interface_t {
311 /**
312 * Public part of the kernel_interface_t object.
313 */
314 kernel_interface_t public;
315
316 /**
317 * mutex to lock access to netlink socket
318 */
319 pthread_mutex_t nl_mutex;
320
321 /**
322 * mutex to lock access to various lists
323 */
324 pthread_mutex_t mutex;
325
326 /**
327 * condition variable to signal virtual IP add/removal
328 */
329 pthread_cond_t cond;
330
331 /**
332 * List of installed policies (policy_entry_t)
333 */
334 linked_list_t *policies;
335
336 /**
337 * Cached list of interfaces and its adresses (iface_entry_t)
338 */
339 linked_list_t *ifaces;
340
341 /**
342 * iterator used in hook()
343 */
344 iterator_t *hiter;
345
346 /**
347 * job receiving netlink events
348 */
349 callback_job_t *job;
350
351 /**
352 * current sequence number for netlink request
353 */
354 int seq;
355
356 /**
357 * Netlink xfrm socket (IPsec)
358 */
359 int socket_xfrm;
360
361 /**
362 * netlink xfrm socket to receive acquire and expire events
363 */
364 int socket_xfrm_events;
365
366 /**
367 * Netlink rt socket (routing)
368 */
369 int socket_rt;
370
371 /**
372 * Netlink rt socket to receive address change events
373 */
374 int socket_rt_events;
375
376 /**
377 * time of the last roam_job
378 */
379 struct timeval last_roam;
380
381 /**
382 * whether to install routes along policies
383 */
384 bool install_routes;
385
386 /**
387 * routing table to install routes
388 */
389 int routing_table;
390
391 /**
392 * priority of used routing table
393 */
394 int routing_table_prio;
395 };
396
397 /**
398 * convert a IKEv2 specific protocol identifier to the kernel one
399 */
400 static u_int8_t proto_ike2kernel(protocol_id_t proto)
401 {
402 switch (proto)
403 {
404 case PROTO_ESP:
405 return IPPROTO_ESP;
406 case PROTO_AH:
407 return IPPROTO_AH;
408 default:
409 return proto;
410 }
411 }
412
413 /**
414 * reverse of ike2kernel
415 */
416 static protocol_id_t proto_kernel2ike(u_int8_t proto)
417 {
418 switch (proto)
419 {
420 case IPPROTO_ESP:
421 return PROTO_ESP;
422 case IPPROTO_AH:
423 return PROTO_AH;
424 default:
425 return proto;
426 }
427 }
428
429 /**
430 * convert a host_t to a struct xfrm_address
431 */
432 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
433 {
434 chunk_t chunk = host->get_address(host);
435 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
436 }
437
438 /**
439 * convert a traffic selector address range to subnet and its mask.
440 */
441 static void ts2subnet(traffic_selector_t* ts,
442 xfrm_address_t *net, u_int8_t *mask)
443 {
444 /* there is no way to do this cleanly, as the address range may
445 * be anything else but a subnet. We use from_addr as subnet
446 * and try to calculate a usable subnet mask.
447 */
448 int byte, bit;
449 bool found = FALSE;
450 chunk_t from, to;
451 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
452
453 from = ts->get_from_address(ts);
454 to = ts->get_to_address(ts);
455
456 *mask = (size * 8);
457 /* go trough all bits of the addresses, beginning in the front.
458 * as long as they are equal, the subnet gets larger
459 */
460 for (byte = 0; byte < size; byte++)
461 {
462 for (bit = 7; bit >= 0; bit--)
463 {
464 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
465 {
466 *mask = ((7 - bit) + (byte * 8));
467 found = TRUE;
468 break;
469 }
470 }
471 if (found)
472 {
473 break;
474 }
475 }
476 memcpy(net, from.ptr, from.len);
477 chunk_free(&from);
478 chunk_free(&to);
479 }
480
481 /**
482 * convert a traffic selector port range to port/portmask
483 */
484 static void ts2ports(traffic_selector_t* ts,
485 u_int16_t *port, u_int16_t *mask)
486 {
487 /* linux does not seem to accept complex portmasks. Only
488 * any or a specific port is allowed. We set to any, if we have
489 * a port range, or to a specific, if we have one port only.
490 */
491 u_int16_t from, to;
492
493 from = ts->get_from_port(ts);
494 to = ts->get_to_port(ts);
495
496 if (from == to)
497 {
498 *port = htons(from);
499 *mask = ~0;
500 }
501 else
502 {
503 *port = 0;
504 *mask = 0;
505 }
506 }
507
508 /**
509 * convert a pair of traffic_selectors to a xfrm_selector
510 */
511 static struct xfrm_selector ts2selector(traffic_selector_t *src,
512 traffic_selector_t *dst)
513 {
514 struct xfrm_selector sel;
515
516 memset(&sel, 0, sizeof(sel));
517 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
518 /* src or dest proto may be "any" (0), use more restrictive one */
519 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
520 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
521 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
522 ts2ports(dst, &sel.dport, &sel.dport_mask);
523 ts2ports(src, &sel.sport, &sel.sport_mask);
524 sel.ifindex = 0;
525 sel.user = 0;
526
527 return sel;
528 }
529
530 /**
531 * Creates an rtattr and adds it to the netlink message
532 */
533 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
534 size_t buflen)
535 {
536 struct rtattr *rta;
537
538 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
539 {
540 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
541 return;
542 }
543
544 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
545 rta->rta_type = rta_type;
546 rta->rta_len = RTA_LENGTH(data.len);
547 memcpy(RTA_DATA(rta), data.ptr, data.len);
548 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
549 }
550
551 /**
552 * process a XFRM_MSG_ACQUIRE from kernel
553 */
554 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
555 {
556 u_int32_t reqid = 0;
557 int proto = 0;
558 job_t *job;
559 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
560 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
561
562 if (RTA_OK(rtattr, rtsize))
563 {
564 if (rtattr->rta_type == XFRMA_TMPL)
565 {
566 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
567 reqid = tmpl->reqid;
568 proto = tmpl->id.proto;
569 }
570 }
571 switch (proto)
572 {
573 case 0:
574 case IPPROTO_ESP:
575 case IPPROTO_AH:
576 break;
577 default:
578 /* acquire for AH/ESP only, not for IPCOMP */
579 return;
580 }
581 if (reqid == 0)
582 {
583 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
584 return;
585 }
586 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
587 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid {%d}", reqid);
588 job = (job_t*)acquire_job_create(reqid);
589 charon->processor->queue_job(charon->processor, job);
590 }
591
592 /**
593 * process a XFRM_MSG_EXPIRE from kernel
594 */
595 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
596 {
597 job_t *job;
598 protocol_id_t protocol;
599 u_int32_t spi, reqid;
600 struct xfrm_user_expire *expire;
601
602 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
603 protocol = proto_kernel2ike(expire->state.id.proto);
604 spi = expire->state.id.spi;
605 reqid = expire->state.reqid;
606
607 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
608
609 if (protocol != PROTO_ESP && protocol != PROTO_AH)
610 {
611 DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA with SPI %.8x and reqid {%d} "
612 "which is not a CHILD_SA", ntohl(spi), reqid);
613 return;
614 }
615
616 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA with SPI %.8x and reqid {%d}",
617 expire->hard ? "delete" : "rekey", protocol_id_names,
618 protocol, ntohl(spi), reqid);
619 if (expire->hard)
620 {
621 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
622 }
623 else
624 {
625 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
626 }
627 charon->processor->queue_job(charon->processor, job);
628 }
629
630 /**
631 * start a roaming job. We delay it for a second and fire only one job
632 * for multiple events. Otherwise we would create two many jobs.
633 */
634 static void fire_roam_job(private_kernel_interface_t *this, bool address)
635 {
636 struct timeval now;
637
638 if (gettimeofday(&now, NULL) == 0)
639 {
640 if (timercmp(&now, &this->last_roam, >))
641 {
642 now.tv_usec += ROAM_DELAY * 1000;
643 while (now.tv_usec > 1000000)
644 {
645 now.tv_sec++;
646 now.tv_usec -= 1000000;
647 }
648 this->last_roam = now;
649 charon->scheduler->schedule_job(charon->scheduler,
650 (job_t*)roam_job_create(address), ROAM_DELAY);
651 }
652 }
653 }
654
655 /**
656 * get the refcount of a virtual ip
657 */
658 static int get_vip_refcount(private_kernel_interface_t *this, host_t* ip)
659 {
660 iterator_t *ifaces, *addrs;
661 iface_entry_t *iface;
662 addr_entry_t *addr;
663 int refcount = 0;
664
665 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
666 while (ifaces->iterate(ifaces, (void**)&iface))
667 {
668 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
669 while (addrs->iterate(addrs, (void**)&addr))
670 {
671 if (addr->virtual && (iface->flags & IFF_UP) &&
672 ip->ip_equals(ip, addr->ip))
673 {
674 refcount = addr->refcount;
675 break;
676 }
677 }
678 addrs->destroy(addrs);
679 if (refcount)
680 {
681 break;
682 }
683 }
684 ifaces->destroy(ifaces);
685
686 return refcount;
687 }
688
689 /**
690 * process RTM_NEWLINK/RTM_DELLINK from kernel
691 */
692 static void process_link(private_kernel_interface_t *this,
693 struct nlmsghdr *hdr, bool event)
694 {
695 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
696 struct rtattr *rta = IFLA_RTA(msg);
697 size_t rtasize = IFLA_PAYLOAD (hdr);
698 iterator_t *iterator;
699 iface_entry_t *current, *entry = NULL;
700 char *name = NULL;
701 bool update = FALSE;
702
703 while(RTA_OK(rta, rtasize))
704 {
705 switch (rta->rta_type)
706 {
707 case IFLA_IFNAME:
708 name = RTA_DATA(rta);
709 break;
710 }
711 rta = RTA_NEXT(rta, rtasize);
712 }
713 if (!name)
714 {
715 name = "(unknown)";
716 }
717
718 switch (hdr->nlmsg_type)
719 {
720 case RTM_NEWLINK:
721 {
722 if (msg->ifi_flags & IFF_LOOPBACK)
723 { /* ignore loopback interfaces */
724 break;
725 }
726 iterator = this->ifaces->create_iterator_locked(this->ifaces,
727 &this->mutex);
728 while (iterator->iterate(iterator, (void**)&current))
729 {
730 if (current->ifindex == msg->ifi_index)
731 {
732 entry = current;
733 break;
734 }
735 }
736 if (!entry)
737 {
738 entry = malloc_thing(iface_entry_t);
739 entry->ifindex = msg->ifi_index;
740 entry->flags = 0;
741 entry->addrs = linked_list_create();
742 this->ifaces->insert_last(this->ifaces, entry);
743 }
744 memcpy(entry->ifname, name, IFNAMSIZ);
745 entry->ifname[IFNAMSIZ-1] = '\0';
746 if (event)
747 {
748 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
749 {
750 update = TRUE;
751 DBG1(DBG_KNL, "interface %s activated", name);
752 }
753 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
754 {
755 update = TRUE;
756 DBG1(DBG_KNL, "interface %s deactivated", name);
757 }
758 }
759 entry->flags = msg->ifi_flags;
760 iterator->destroy(iterator);
761 break;
762 }
763 case RTM_DELLINK:
764 {
765 iterator = this->ifaces->create_iterator_locked(this->ifaces,
766 &this->mutex);
767 while (iterator->iterate(iterator, (void**)&current))
768 {
769 if (current->ifindex == msg->ifi_index)
770 {
771 /* we do not remove it, as an address may be added to a
772 * "down" interface and we wan't to know that. */
773 current->flags = msg->ifi_flags;
774 break;
775 }
776 }
777 iterator->destroy(iterator);
778 break;
779 }
780 }
781
782 /* send an update to all IKE_SAs */
783 if (update && event)
784 {
785 fire_roam_job(this, TRUE);
786 }
787 }
788
789 /**
790 * process RTM_NEWADDR/RTM_DELADDR from kernel
791 */
792 static void process_addr(private_kernel_interface_t *this,
793 struct nlmsghdr *hdr, bool event)
794 {
795 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
796 struct rtattr *rta = IFA_RTA(msg);
797 size_t rtasize = IFA_PAYLOAD (hdr);
798 host_t *host = NULL;
799 iterator_t *ifaces, *addrs;
800 iface_entry_t *iface;
801 addr_entry_t *addr;
802 chunk_t local = chunk_empty, address = chunk_empty;
803 bool update = FALSE, found = FALSE, changed = FALSE;
804
805 while(RTA_OK(rta, rtasize))
806 {
807 switch (rta->rta_type)
808 {
809 case IFA_LOCAL:
810 local.ptr = RTA_DATA(rta);
811 local.len = RTA_PAYLOAD(rta);
812 break;
813 case IFA_ADDRESS:
814 address.ptr = RTA_DATA(rta);
815 address.len = RTA_PAYLOAD(rta);
816 break;
817 }
818 rta = RTA_NEXT(rta, rtasize);
819 }
820
821 /* For PPP interfaces, we need the IFA_LOCAL address,
822 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
823 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
824 if (local.ptr)
825 {
826 host = host_create_from_chunk(msg->ifa_family, local, 0);
827 }
828 else if (address.ptr)
829 {
830 host = host_create_from_chunk(msg->ifa_family, address, 0);
831 }
832
833 if (host == NULL)
834 { /* bad family? */
835 return;
836 }
837
838 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
839 while (ifaces->iterate(ifaces, (void**)&iface))
840 {
841 if (iface->ifindex == msg->ifa_index)
842 {
843 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
844 while (addrs->iterate(addrs, (void**)&addr))
845 {
846 if (host->ip_equals(host, addr->ip))
847 {
848 found = TRUE;
849 if (hdr->nlmsg_type == RTM_DELADDR)
850 {
851 addrs->remove(addrs);
852 if (!addr->virtual)
853 {
854 changed = TRUE;
855 DBG1(DBG_KNL, "%H disappeared from %s",
856 host, iface->ifname);
857 }
858 addr_entry_destroy(addr);
859 }
860 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
861 {
862 addr->refcount = 1;
863 }
864 }
865 }
866 addrs->destroy(addrs);
867
868 if (hdr->nlmsg_type == RTM_NEWADDR)
869 {
870 if (!found)
871 {
872 found = TRUE;
873 changed = TRUE;
874 addr = malloc_thing(addr_entry_t);
875 addr->ip = host->clone(host);
876 addr->virtual = FALSE;
877 addr->refcount = 1;
878 addr->scope = msg->ifa_scope;
879
880 iface->addrs->insert_last(iface->addrs, addr);
881 if (event)
882 {
883 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
884 }
885 }
886 }
887 if (found && (iface->flags & IFF_UP))
888 {
889 update = TRUE;
890 }
891 break;
892 }
893 }
894 ifaces->destroy(ifaces);
895 host->destroy(host);
896
897 /* send an update to all IKE_SAs */
898 if (update && event && changed)
899 {
900 fire_roam_job(this, TRUE);
901 }
902 }
903
904 /**
905 * process RTM_NEWROUTE from kernel
906 */
907 static void process_route(private_kernel_interface_t *this, struct nlmsghdr *hdr)
908 {
909 struct rtmsg* msg = (struct rtmsg*)(NLMSG_DATA(hdr));
910 struct rtattr *rta = RTM_RTA(msg);
911 size_t rtasize = RTM_PAYLOAD(hdr);
912 host_t *host = NULL;
913
914 while(RTA_OK(rta, rtasize))
915 {
916 switch (rta->rta_type)
917 {
918 case RTA_PREFSRC:
919 host = host_create_from_chunk(msg->rtm_family,
920 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
921 break;
922 }
923 rta = RTA_NEXT(rta, rtasize);
924 }
925 if (host)
926 {
927 if (!get_vip_refcount(this, host))
928 { /* ignore routes added for virtual IPs */
929 fire_roam_job(this, FALSE);
930 }
931 host->destroy(host);
932 }
933 }
934
935 /**
936 * Receives events from kernel
937 */
938 static job_requeue_t receive_events(private_kernel_interface_t *this)
939 {
940 char response[1024];
941 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
942 struct sockaddr_nl addr;
943 socklen_t addr_len = sizeof(addr);
944 int len, oldstate, maxfd, selected;
945 fd_set rfds;
946
947 FD_ZERO(&rfds);
948 FD_SET(this->socket_xfrm_events, &rfds);
949 FD_SET(this->socket_rt_events, &rfds);
950 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
951
952 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
953 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
954 pthread_setcancelstate(oldstate, NULL);
955 if (selected <= 0)
956 {
957 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
958 return JOB_REQUEUE_FAIR;
959 }
960 if (FD_ISSET(this->socket_xfrm_events, &rfds))
961 {
962 selected = this->socket_xfrm_events;
963 }
964 else if (FD_ISSET(this->socket_rt_events, &rfds))
965 {
966 selected = this->socket_rt_events;
967 }
968 else
969 {
970 return JOB_REQUEUE_DIRECT;
971 }
972
973 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
974 (struct sockaddr*)&addr, &addr_len);
975 if (len < 0)
976 {
977 switch (errno)
978 {
979 case EINTR:
980 /* interrupted, try again */
981 return JOB_REQUEUE_DIRECT;
982 case EAGAIN:
983 /* no data ready, select again */
984 return JOB_REQUEUE_DIRECT;
985 default:
986 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
987 sleep(1);
988 return JOB_REQUEUE_FAIR;
989 }
990 }
991 if (addr.nl_pid != 0)
992 { /* not from kernel. not interested, try another one */
993 return JOB_REQUEUE_DIRECT;
994 }
995
996 while (NLMSG_OK(hdr, len))
997 {
998 /* looks good so far, dispatch netlink message */
999 if (selected == this->socket_xfrm_events)
1000 {
1001 switch (hdr->nlmsg_type)
1002 {
1003 case XFRM_MSG_ACQUIRE:
1004 process_acquire(this, hdr);
1005 break;
1006 case XFRM_MSG_EXPIRE:
1007 process_expire(this, hdr);
1008 break;
1009 default:
1010 break;
1011 }
1012 }
1013 else if (selected == this->socket_rt_events)
1014 {
1015 switch (hdr->nlmsg_type)
1016 {
1017 case RTM_NEWADDR:
1018 case RTM_DELADDR:
1019 process_addr(this, hdr, TRUE);
1020 pthread_cond_broadcast(&this->cond);
1021 break;
1022 case RTM_NEWLINK:
1023 case RTM_DELLINK:
1024 process_link(this, hdr, TRUE);
1025 pthread_cond_broadcast(&this->cond);
1026 break;
1027 case RTM_NEWROUTE:
1028 case RTM_DELROUTE:
1029 process_route(this, hdr);
1030 break;
1031 default:
1032 break;
1033 }
1034 }
1035 hdr = NLMSG_NEXT(hdr, len);
1036 }
1037 return JOB_REQUEUE_DIRECT;
1038 }
1039
1040 /**
1041 * send a netlink message and wait for a reply
1042 */
1043 static status_t netlink_send(private_kernel_interface_t *this,
1044 int socket, struct nlmsghdr *in,
1045 struct nlmsghdr **out, size_t *out_len)
1046 {
1047 int len, addr_len;
1048 struct sockaddr_nl addr;
1049 chunk_t result = chunk_empty, tmp;
1050 struct nlmsghdr *msg, peek;
1051
1052 pthread_mutex_lock(&this->nl_mutex);
1053
1054 in->nlmsg_seq = ++this->seq;
1055 in->nlmsg_pid = getpid();
1056
1057 memset(&addr, 0, sizeof(addr));
1058 addr.nl_family = AF_NETLINK;
1059 addr.nl_pid = 0;
1060 addr.nl_groups = 0;
1061
1062 while (TRUE)
1063 {
1064 len = sendto(socket, in, in->nlmsg_len, 0,
1065 (struct sockaddr*)&addr, sizeof(addr));
1066
1067 if (len != in->nlmsg_len)
1068 {
1069 if (errno == EINTR)
1070 {
1071 /* interrupted, try again */
1072 continue;
1073 }
1074 pthread_mutex_unlock(&this->nl_mutex);
1075 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
1076 return FAILED;
1077 }
1078 break;
1079 }
1080
1081 while (TRUE)
1082 {
1083 char buf[4096];
1084 tmp.len = sizeof(buf);
1085 tmp.ptr = buf;
1086 msg = (struct nlmsghdr*)tmp.ptr;
1087
1088 memset(&addr, 0, sizeof(addr));
1089 addr.nl_family = AF_NETLINK;
1090 addr.nl_pid = getpid();
1091 addr.nl_groups = 0;
1092 addr_len = sizeof(addr);
1093
1094 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
1095 (struct sockaddr*)&addr, &addr_len);
1096
1097 if (len < 0)
1098 {
1099 if (errno == EINTR)
1100 {
1101 DBG1(DBG_KNL, "got interrupted");
1102 /* interrupted, try again */
1103 continue;
1104 }
1105 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
1106 pthread_mutex_unlock(&this->nl_mutex);
1107 return FAILED;
1108 }
1109 if (!NLMSG_OK(msg, len))
1110 {
1111 DBG1(DBG_KNL, "received corrupted netlink message");
1112 pthread_mutex_unlock(&this->nl_mutex);
1113 return FAILED;
1114 }
1115 if (msg->nlmsg_seq != this->seq)
1116 {
1117 DBG1(DBG_KNL, "received invalid netlink sequence number");
1118 if (msg->nlmsg_seq < this->seq)
1119 {
1120 continue;
1121 }
1122 pthread_mutex_unlock(&this->nl_mutex);
1123 return FAILED;
1124 }
1125
1126 tmp.len = len;
1127 result = chunk_cata("cc", result, tmp);
1128
1129 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
1130 * numbers to detect multi header messages */
1131 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
1132 (struct sockaddr*)&addr, &addr_len);
1133
1134 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
1135 {
1136 /* seems to be multipart */
1137 continue;
1138 }
1139 break;
1140 }
1141
1142 *out_len = result.len;
1143 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
1144
1145 pthread_mutex_unlock(&this->nl_mutex);
1146
1147 return SUCCESS;
1148 }
1149
1150 /**
1151 * send a netlink message and wait for its acknowlegde
1152 */
1153 static status_t netlink_send_ack(private_kernel_interface_t *this,
1154 int socket, struct nlmsghdr *in)
1155 {
1156 struct nlmsghdr *out, *hdr;
1157 size_t len;
1158
1159 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
1160 {
1161 return FAILED;
1162 }
1163 hdr = out;
1164 while (NLMSG_OK(hdr, len))
1165 {
1166 switch (hdr->nlmsg_type)
1167 {
1168 case NLMSG_ERROR:
1169 {
1170 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
1171
1172 if (err->error)
1173 {
1174 if (-err->error == EEXIST)
1175 { /* do not report existing routes */
1176 free(out);
1177 return ALREADY_DONE;
1178 }
1179 DBG1(DBG_KNL, "received netlink error: %s (%d)",
1180 strerror(-err->error), -err->error);
1181 free(out);
1182 return FAILED;
1183 }
1184 free(out);
1185 return SUCCESS;
1186 }
1187 default:
1188 hdr = NLMSG_NEXT(hdr, len);
1189 continue;
1190 case NLMSG_DONE:
1191 break;
1192 }
1193 break;
1194 }
1195 DBG1(DBG_KNL, "netlink request not acknowlegded");
1196 free(out);
1197 return FAILED;
1198 }
1199
1200 /**
1201 * Initialize a list of local addresses.
1202 */
1203 static status_t init_address_list(private_kernel_interface_t *this)
1204 {
1205 char request[BUFFER_SIZE];
1206 struct nlmsghdr *out, *current, *in;
1207 struct rtgenmsg *msg;
1208 size_t len;
1209 iterator_t *ifaces, *addrs;
1210 iface_entry_t *iface;
1211 addr_entry_t *addr;
1212
1213 DBG1(DBG_KNL, "listening on interfaces:");
1214
1215 memset(&request, 0, sizeof(request));
1216
1217 in = (struct nlmsghdr*)&request;
1218 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1219 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1220 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1221 msg->rtgen_family = AF_UNSPEC;
1222
1223 /* get all links */
1224 in->nlmsg_type = RTM_GETLINK;
1225 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1226 {
1227 return FAILED;
1228 }
1229 current = out;
1230 while (NLMSG_OK(current, len))
1231 {
1232 switch (current->nlmsg_type)
1233 {
1234 case NLMSG_DONE:
1235 break;
1236 case RTM_NEWLINK:
1237 process_link(this, current, FALSE);
1238 /* fall through */
1239 default:
1240 current = NLMSG_NEXT(current, len);
1241 continue;
1242 }
1243 break;
1244 }
1245 free(out);
1246
1247 /* get all interface addresses */
1248 in->nlmsg_type = RTM_GETADDR;
1249 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1250 {
1251 return FAILED;
1252 }
1253 current = out;
1254 while (NLMSG_OK(current, len))
1255 {
1256 switch (current->nlmsg_type)
1257 {
1258 case NLMSG_DONE:
1259 break;
1260 case RTM_NEWADDR:
1261 process_addr(this, current, FALSE);
1262 /* fall through */
1263 default:
1264 current = NLMSG_NEXT(current, len);
1265 continue;
1266 }
1267 break;
1268 }
1269 free(out);
1270
1271 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1272 while (ifaces->iterate(ifaces, (void**)&iface))
1273 {
1274 if (iface->flags & IFF_UP)
1275 {
1276 DBG1(DBG_KNL, " %s", iface->ifname);
1277 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1278 while (addrs->iterate(addrs, (void**)&addr))
1279 {
1280 DBG1(DBG_KNL, " %H", addr->ip);
1281 }
1282 addrs->destroy(addrs);
1283 }
1284 }
1285 ifaces->destroy(ifaces);
1286 return SUCCESS;
1287 }
1288
1289 /**
1290 * iterator hook to iterate over addrs
1291 */
1292 static hook_result_t addr_hook(private_kernel_interface_t *this,
1293 addr_entry_t *in, host_t **out)
1294 {
1295 if (in->virtual)
1296 { /* skip virtual interfaces added by us */
1297 return HOOK_SKIP;
1298 }
1299 if (in->scope >= RT_SCOPE_LINK)
1300 { /* skip addresses with a unusable scope */
1301 return HOOK_SKIP;
1302 }
1303 *out = in->ip;
1304 return HOOK_NEXT;
1305 }
1306
1307 /**
1308 * iterator hook to iterate over ifaces
1309 */
1310 static hook_result_t iface_hook(private_kernel_interface_t *this,
1311 iface_entry_t *in, host_t **out)
1312 {
1313 if (!(in->flags & IFF_UP))
1314 { /* skip interfaces not up */
1315 return HOOK_SKIP;
1316 }
1317
1318 if (this->hiter == NULL)
1319 {
1320 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1321 this->hiter->set_iterator_hook(this->hiter,
1322 (iterator_hook_t*)addr_hook, this);
1323 }
1324 while (this->hiter->iterate(this->hiter, (void**)out))
1325 {
1326 return HOOK_AGAIN;
1327 }
1328 this->hiter->destroy(this->hiter);
1329 this->hiter = NULL;
1330 return HOOK_SKIP;
1331 }
1332
1333 /**
1334 * Implements kernel_interface_t.create_address_iterator.
1335 */
1336 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1337 {
1338 iterator_t *iterator;
1339
1340 /* This iterator is not only hooked, is is double-hooked. As we have stored
1341 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1342 * entries in each interface we iterate. This does the iface_hook. The
1343 * addr_hook returns the ip instead of the addr_entry. */
1344
1345 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1346 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1347 return iterator;
1348 }
1349
1350 /**
1351 * implementation of kernel_interface_t.get_interface_name
1352 */
1353 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1354 {
1355 iterator_t *ifaces, *addrs;
1356 iface_entry_t *iface;
1357 addr_entry_t *addr;
1358 char *name = NULL;
1359
1360 DBG2(DBG_KNL, "getting interface name for %H", ip);
1361
1362 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1363 while (ifaces->iterate(ifaces, (void**)&iface))
1364 {
1365 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1366 while (addrs->iterate(addrs, (void**)&addr))
1367 {
1368 if (ip->ip_equals(ip, addr->ip))
1369 {
1370 name = strdup(iface->ifname);
1371 break;
1372 }
1373 }
1374 addrs->destroy(addrs);
1375 if (name)
1376 {
1377 break;
1378 }
1379 }
1380 ifaces->destroy(ifaces);
1381
1382 if (name)
1383 {
1384 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1385 }
1386 else
1387 {
1388 DBG2(DBG_KNL, "%H is not a local address", ip);
1389 }
1390 return name;
1391 }
1392
1393 /**
1394 * Tries to find an ip address of a local interface that is included in the
1395 * supplied traffic selector.
1396 */
1397 static status_t get_address_by_ts(private_kernel_interface_t *this,
1398 traffic_selector_t *ts, host_t **ip)
1399 {
1400 iterator_t *ifaces, *addrs;
1401 iface_entry_t *iface;
1402 addr_entry_t *addr;
1403 host_t *host;
1404 int family;
1405 bool found = FALSE;
1406
1407 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1408
1409 /* if we have a family which includes localhost, we do not
1410 * search for an IP, we use the default */
1411 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1412
1413 if (family == AF_INET)
1414 {
1415 host = host_create_from_string("127.0.0.1", 0);
1416 }
1417 else
1418 {
1419 host = host_create_from_string("::1", 0);
1420 }
1421
1422 if (ts->includes(ts, host))
1423 {
1424 *ip = host_create_any(family);
1425 host->destroy(host);
1426 DBG2(DBG_KNL, "using host %H", *ip);
1427 return SUCCESS;
1428 }
1429 host->destroy(host);
1430
1431 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1432 while (ifaces->iterate(ifaces, (void**)&iface))
1433 {
1434 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1435 while (addrs->iterate(addrs, (void**)&addr))
1436 {
1437 if (ts->includes(ts, addr->ip))
1438 {
1439 found = TRUE;
1440 *ip = addr->ip->clone(addr->ip);
1441 break;
1442 }
1443 }
1444 addrs->destroy(addrs);
1445 if (found)
1446 {
1447 break;
1448 }
1449 }
1450 ifaces->destroy(ifaces);
1451
1452 if (!found)
1453 {
1454 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1455 return FAILED;
1456 }
1457 DBG2(DBG_KNL, "using host %H", *ip);
1458 return SUCCESS;
1459 }
1460
1461 /**
1462 * get the interface of a local address
1463 */
1464 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1465 {
1466 iterator_t *ifaces, *addrs;
1467 iface_entry_t *iface;
1468 addr_entry_t *addr;
1469 int ifindex = 0;
1470
1471 DBG2(DBG_KNL, "getting iface for %H", ip);
1472
1473 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1474 while (ifaces->iterate(ifaces, (void**)&iface))
1475 {
1476 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1477 while (addrs->iterate(addrs, (void**)&addr))
1478 {
1479 if (ip->ip_equals(ip, addr->ip))
1480 {
1481 ifindex = iface->ifindex;
1482 break;
1483 }
1484 }
1485 addrs->destroy(addrs);
1486 if (ifindex)
1487 {
1488 break;
1489 }
1490 }
1491 ifaces->destroy(ifaces);
1492
1493 if (ifindex == 0)
1494 {
1495 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1496 }
1497 return ifindex;
1498 }
1499
1500 /**
1501 * Manages the creation and deletion of ip addresses on an interface.
1502 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1503 */
1504 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1505 int flags, int if_index, host_t *ip)
1506 {
1507 unsigned char request[BUFFER_SIZE];
1508 struct nlmsghdr *hdr;
1509 struct ifaddrmsg *msg;
1510 chunk_t chunk;
1511
1512 memset(&request, 0, sizeof(request));
1513
1514 chunk = ip->get_address(ip);
1515
1516 hdr = (struct nlmsghdr*)request;
1517 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1518 hdr->nlmsg_type = nlmsg_type;
1519 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1520
1521 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1522 msg->ifa_family = ip->get_family(ip);
1523 msg->ifa_flags = 0;
1524 msg->ifa_prefixlen = 8 * chunk.len;
1525 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1526 msg->ifa_index = if_index;
1527
1528 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1529
1530 return netlink_send_ack(this, this->socket_rt, hdr);
1531 }
1532
1533 /**
1534 * Manages source routes in the routing table.
1535 * By setting the appropriate nlmsg_type, the route added or r.
1536 */
1537 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1538 int flags, route_entry_t *route)
1539 {
1540 unsigned char request[BUFFER_SIZE];
1541 struct nlmsghdr *hdr;
1542 struct rtmsg *msg;
1543 chunk_t chunk;
1544
1545 /* if route is 0.0.0.0/0, we can't install it, as it would
1546 * overwrite the default route. Instead, we add two routes:
1547 * 0.0.0.0/1 and 128.0.0.0/1 */
1548 if (this->routing_table == 0 && route->prefixlen == 0)
1549 {
1550 route_entry_t half;
1551 status_t status;
1552
1553 half.dst_net = chunk_alloca(route->dst_net.len);
1554 memset(half.dst_net.ptr, 0, half.dst_net.len);
1555 half.src_ip = route->src_ip;
1556 half.gateway = route->gateway;
1557 half.if_index = route->if_index;
1558 half.prefixlen = 1;
1559
1560 status = manage_srcroute(this, nlmsg_type, flags, &half);
1561 half.dst_net.ptr[0] |= 0x80;
1562 status = manage_srcroute(this, nlmsg_type, flags, &half);
1563 return status;
1564 }
1565
1566 memset(&request, 0, sizeof(request));
1567
1568 hdr = (struct nlmsghdr*)request;
1569 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1570 hdr->nlmsg_type = nlmsg_type;
1571 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1572
1573 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1574 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1575 msg->rtm_dst_len = route->prefixlen;
1576 msg->rtm_table = this->routing_table;
1577 msg->rtm_protocol = RTPROT_STATIC;
1578 msg->rtm_type = RTN_UNICAST;
1579 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1580
1581 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1582 chunk = route->src_ip->get_address(route->src_ip);
1583 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1584 chunk = route->gateway->get_address(route->gateway);
1585 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1586 chunk.ptr = (char*)&route->if_index;
1587 chunk.len = sizeof(route->if_index);
1588 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1589
1590 return netlink_send_ack(this, this->socket_rt, hdr);
1591 }
1592
1593 /**
1594 * create or delete an rule to use our routing table
1595 */
1596 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1597 u_int32_t table, u_int32_t prio)
1598 {
1599 unsigned char request[BUFFER_SIZE];
1600 struct nlmsghdr *hdr;
1601 struct rtmsg *msg;
1602 chunk_t chunk;
1603
1604 memset(&request, 0, sizeof(request));
1605 hdr = (struct nlmsghdr*)request;
1606 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1607 hdr->nlmsg_type = nlmsg_type;
1608 if (nlmsg_type == RTM_NEWRULE)
1609 {
1610 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1611 }
1612 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1613
1614 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1615 msg->rtm_table = table;
1616 msg->rtm_family = AF_INET;
1617 msg->rtm_protocol = RTPROT_BOOT;
1618 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1619 msg->rtm_type = RTN_UNICAST;
1620
1621 chunk = chunk_from_thing(prio);
1622 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1623
1624 return netlink_send_ack(this, this->socket_rt, hdr);
1625 }
1626
1627 /**
1628 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1629 */
1630 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1631 {
1632 int bit, byte;
1633
1634 if (addr.len != net.len)
1635 {
1636 return FALSE;
1637 }
1638 /* scan through all bits, beginning in the front */
1639 for (byte = 0; byte < addr.len; byte++)
1640 {
1641 for (bit = 7; bit >= 0; bit--)
1642 {
1643 /* check if bits are equal (or we reached the end of the net) */
1644 if (bit + byte * 8 > net_len)
1645 {
1646 return TRUE;
1647 }
1648 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1649 {
1650 return FALSE;
1651 }
1652 }
1653 }
1654 return TRUE;
1655 }
1656
1657 /**
1658 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1659 */
1660 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1661 bool nexthop)
1662 {
1663 unsigned char request[BUFFER_SIZE];
1664 struct nlmsghdr *hdr, *out, *current;
1665 struct rtmsg *msg;
1666 chunk_t chunk;
1667 size_t len;
1668 int best = -1;
1669 host_t *src = NULL, *gtw = NULL;
1670
1671 DBG2(DBG_KNL, "getting address to reach %H", dest);
1672
1673 memset(&request, 0, sizeof(request));
1674
1675 hdr = (struct nlmsghdr*)request;
1676 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1677 hdr->nlmsg_type = RTM_GETROUTE;
1678 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1679
1680 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1681 msg->rtm_family = dest->get_family(dest);
1682
1683 chunk = dest->get_address(dest);
1684 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1685
1686 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1687 {
1688 DBG1(DBG_KNL, "getting address to %H failed", dest);
1689 return NULL;
1690 }
1691 current = out;
1692 while (NLMSG_OK(current, len))
1693 {
1694 switch (current->nlmsg_type)
1695 {
1696 case NLMSG_DONE:
1697 break;
1698 case RTM_NEWROUTE:
1699 {
1700 struct rtattr *rta;
1701 size_t rtasize;
1702 chunk_t rta_gtw, rta_src, rta_dst;
1703 u_int32_t rta_oif = 0;
1704
1705 rta_gtw = rta_src = rta_dst = chunk_empty;
1706 msg = (struct rtmsg*)(NLMSG_DATA(current));
1707 rta = RTM_RTA(msg);
1708 rtasize = RTM_PAYLOAD(current);
1709 while (RTA_OK(rta, rtasize))
1710 {
1711 switch (rta->rta_type)
1712 {
1713 case RTA_PREFSRC:
1714 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1715 break;
1716 case RTA_GATEWAY:
1717 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1718 break;
1719 case RTA_DST:
1720 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1721 break;
1722 case RTA_OIF:
1723 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1724 {
1725 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1726 }
1727 break;
1728 }
1729 rta = RTA_NEXT(rta, rtasize);
1730 }
1731
1732 /* apply the route if:
1733 * - it is not from our own ipsec routing table
1734 * - is better than a previous one
1735 * - is the default route or
1736 * - its destination net contains our destination
1737 */
1738 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
1739 && msg->rtm_dst_len > best
1740 && (msg->rtm_dst_len == 0 || /* default route */
1741 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
1742 {
1743 iterator_t *ifaces, *addrs;
1744 iface_entry_t *iface;
1745 addr_entry_t *addr;
1746
1747 best = msg->rtm_dst_len;
1748 if (nexthop)
1749 {
1750 DESTROY_IF(gtw);
1751 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1752 }
1753 else if (rta_src.ptr)
1754 {
1755 DESTROY_IF(src);
1756 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1757 if (get_vip_refcount(this, src))
1758 { /* skip source address if it is installed by us */
1759 DESTROY_IF(src);
1760 src = NULL;
1761 current = NLMSG_NEXT(current, len);
1762 continue;
1763 }
1764 }
1765 else
1766 {
1767 /* no source addr, get one from the interfaces */
1768 ifaces = this->ifaces->create_iterator_locked(
1769 this->ifaces, &this->mutex);
1770 while (ifaces->iterate(ifaces, (void**)&iface))
1771 {
1772 if (iface->ifindex == rta_oif)
1773 {
1774 addrs = iface->addrs->create_iterator(
1775 iface->addrs, TRUE);
1776 while (addrs->iterate(addrs, (void**)&addr))
1777 {
1778 chunk_t ip = addr->ip->get_address(addr->ip);
1779 if (msg->rtm_dst_len == 0
1780 || addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1781 {
1782 DESTROY_IF(src);
1783 src = addr->ip->clone(addr->ip);
1784 break;
1785 }
1786 }
1787 addrs->destroy(addrs);
1788 }
1789 }
1790 ifaces->destroy(ifaces);
1791 }
1792 }
1793 /* FALL through */
1794 }
1795 default:
1796 current = NLMSG_NEXT(current, len);
1797 continue;
1798 }
1799 break;
1800 }
1801 free(out);
1802
1803 if (nexthop)
1804 {
1805 if (gtw)
1806 {
1807 return gtw;
1808 }
1809 return dest->clone(dest);
1810 }
1811 return src;
1812 }
1813
1814 /**
1815 * Implementation of kernel_interface_t.get_source_addr.
1816 */
1817 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1818 {
1819 return get_route(this, dest, FALSE);
1820 }
1821
1822 /**
1823 * Implementation of kernel_interface_t.add_ip.
1824 */
1825 static status_t add_ip(private_kernel_interface_t *this,
1826 host_t *virtual_ip, host_t *iface_ip)
1827 {
1828 iface_entry_t *iface;
1829 addr_entry_t *addr;
1830 iterator_t *addrs, *ifaces;
1831 int ifindex;
1832
1833 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1834
1835 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1836 while (ifaces->iterate(ifaces, (void**)&iface))
1837 {
1838 bool iface_found = FALSE;
1839
1840 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1841 while (addrs->iterate(addrs, (void**)&addr))
1842 {
1843 if (iface_ip->ip_equals(iface_ip, addr->ip))
1844 {
1845 iface_found = TRUE;
1846 }
1847 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1848 {
1849 addr->refcount++;
1850 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1851 virtual_ip, iface->ifname);
1852 addrs->destroy(addrs);
1853 ifaces->destroy(ifaces);
1854 return SUCCESS;
1855 }
1856 }
1857 addrs->destroy(addrs);
1858
1859 if (iface_found)
1860 {
1861 ifindex = iface->ifindex;
1862 addr = malloc_thing(addr_entry_t);
1863 addr->ip = virtual_ip->clone(virtual_ip);
1864 addr->refcount = 0;
1865 addr->virtual = TRUE;
1866 addr->scope = RT_SCOPE_UNIVERSE;
1867 iface->addrs->insert_last(iface->addrs, addr);
1868
1869 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1870 ifindex, virtual_ip) == SUCCESS)
1871 {
1872 while (get_vip_refcount(this, virtual_ip) == 0)
1873 { /* wait until address appears */
1874 pthread_cond_wait(&this->cond, &this->mutex);
1875 }
1876 ifaces->destroy(ifaces);
1877 return SUCCESS;
1878 }
1879 ifaces->destroy(ifaces);
1880 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1881 return FAILED;
1882 }
1883 }
1884 ifaces->destroy(ifaces);
1885
1886 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1887 "virtual IP %H", iface_ip, virtual_ip);
1888 return FAILED;
1889 }
1890
1891 /**
1892 * Implementation of kernel_interface_t.del_ip.
1893 */
1894 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1895 {
1896 iface_entry_t *iface;
1897 addr_entry_t *addr;
1898 iterator_t *addrs, *ifaces;
1899 status_t status;
1900 int ifindex;
1901
1902 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1903
1904 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1905 while (ifaces->iterate(ifaces, (void**)&iface))
1906 {
1907 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1908 while (addrs->iterate(addrs, (void**)&addr))
1909 {
1910 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1911 {
1912 ifindex = iface->ifindex;
1913 if (addr->refcount == 1)
1914 {
1915 status = manage_ipaddr(this, RTM_DELADDR, 0,
1916 ifindex, virtual_ip);
1917 if (status == SUCCESS)
1918 { /* wait until the address is really gone */
1919 while (get_vip_refcount(this, virtual_ip) > 0)
1920 {
1921 pthread_cond_wait(&this->cond, &this->mutex);
1922 }
1923 }
1924 addrs->destroy(addrs);
1925 ifaces->destroy(ifaces);
1926 return status;
1927 }
1928 else
1929 {
1930 addr->refcount--;
1931 }
1932 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1933 virtual_ip);
1934 addrs->destroy(addrs);
1935 ifaces->destroy(ifaces);
1936 return SUCCESS;
1937 }
1938 }
1939 addrs->destroy(addrs);
1940 }
1941 ifaces->destroy(ifaces);
1942
1943 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1944 return FAILED;
1945 }
1946
1947 /**
1948 * Get an SPI for a specific protocol from the kernel.
1949 */
1950 static status_t get_spi_internal(private_kernel_interface_t *this,
1951 host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
1952 u_int32_t reqid, u_int32_t *spi)
1953 {
1954 unsigned char request[BUFFER_SIZE];
1955 struct nlmsghdr *hdr, *out;
1956 struct xfrm_userspi_info *userspi;
1957 u_int32_t received_spi = 0;
1958 size_t len;
1959
1960 memset(&request, 0, sizeof(request));
1961
1962 hdr = (struct nlmsghdr*)request;
1963 hdr->nlmsg_flags = NLM_F_REQUEST;
1964 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1965 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1966
1967 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1968 host2xfrm(src, &userspi->info.saddr);
1969 host2xfrm(dst, &userspi->info.id.daddr);
1970 userspi->info.id.proto = proto;
1971 userspi->info.mode = TRUE; /* tunnel mode */
1972 userspi->info.reqid = reqid;
1973 userspi->info.family = src->get_family(src);
1974 userspi->min = min;
1975 userspi->max = max;
1976
1977 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1978 {
1979 hdr = out;
1980 while (NLMSG_OK(hdr, len))
1981 {
1982 switch (hdr->nlmsg_type)
1983 {
1984 case XFRM_MSG_NEWSA:
1985 {
1986 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1987 received_spi = usersa->id.spi;
1988 break;
1989 }
1990 case NLMSG_ERROR:
1991 {
1992 struct nlmsgerr *err = NLMSG_DATA(hdr);
1993
1994 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1995 strerror(-err->error), -err->error);
1996 break;
1997 }
1998 default:
1999 hdr = NLMSG_NEXT(hdr, len);
2000 continue;
2001 case NLMSG_DONE:
2002 break;
2003 }
2004 break;
2005 }
2006 free(out);
2007 }
2008
2009 if (received_spi == 0)
2010 {
2011 return FAILED;
2012 }
2013
2014 *spi = received_spi;
2015 return SUCCESS;
2016 }
2017
2018 /**
2019 * Implementation of kernel_interface_t.get_spi.
2020 */
2021 static status_t get_spi(private_kernel_interface_t *this,
2022 host_t *src, host_t *dst,
2023 protocol_id_t protocol, u_int32_t reqid,
2024 u_int32_t *spi)
2025 {
2026 DBG2(DBG_KNL, "getting SPI for reqid {%d}", reqid);
2027
2028 if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
2029 0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
2030 {
2031 DBG1(DBG_KNL, "unable to get SPI for reqid {%d}", reqid);
2032 return FAILED;
2033 }
2034
2035 DBG2(DBG_KNL, "got SPI %.8x for reqid {%d}", ntohl(*spi), reqid);
2036
2037 return SUCCESS;
2038 }
2039
2040 /**
2041 * Implementation of kernel_interface_t.get_cpi.
2042 */
2043 static status_t get_cpi(private_kernel_interface_t *this,
2044 host_t *src, host_t *dst,
2045 u_int32_t reqid, u_int16_t *cpi)
2046 {
2047 u_int32_t received_spi = 0;
2048
2049 DBG2(DBG_KNL, "getting CPI for reqid {%d}", reqid);
2050
2051 if (get_spi_internal(this, src, dst,
2052 IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
2053 {
2054 DBG1(DBG_KNL, "unable to get CPI for reqid {%d}", reqid);
2055 return FAILED;
2056 }
2057
2058 *cpi = htons((u_int16_t)ntohl(received_spi));
2059
2060 DBG2(DBG_KNL, "got CPI %.4x for reqid {%d}", ntohs(*cpi), reqid);
2061
2062 return SUCCESS;
2063 }
2064
2065 /**
2066 * Implementation of kernel_interface_t.add_sa.
2067 */
2068 static status_t add_sa(private_kernel_interface_t *this,
2069 host_t *src, host_t *dst, u_int32_t spi,
2070 protocol_id_t protocol, u_int32_t reqid,
2071 u_int64_t expire_soft, u_int64_t expire_hard,
2072 u_int16_t enc_alg, u_int16_t enc_size,
2073 u_int16_t int_alg, u_int16_t int_size,
2074 prf_plus_t *prf_plus, mode_t mode,
2075 u_int16_t ipcomp, bool encap,
2076 bool replace)
2077 {
2078 unsigned char request[BUFFER_SIZE];
2079 char *alg_name;
2080 /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
2081 u_int16_t add_keymat = 32;
2082 struct nlmsghdr *hdr;
2083 struct xfrm_usersa_info *sa;
2084
2085 memset(&request, 0, sizeof(request));
2086
2087 DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%d}", ntohl(spi), reqid);
2088
2089 hdr = (struct nlmsghdr*)request;
2090 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2091 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
2092 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2093
2094 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
2095 host2xfrm(src, &sa->saddr);
2096 host2xfrm(dst, &sa->id.daddr);
2097 sa->id.spi = spi;
2098 sa->id.proto = proto_ike2kernel(protocol);
2099 sa->family = src->get_family(src);
2100 sa->mode = mode;
2101 if (mode == MODE_TUNNEL)
2102 {
2103 sa->flags |= XFRM_STATE_AF_UNSPEC;
2104 }
2105 sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
2106 sa->reqid = reqid;
2107 /* we currently do not expire SAs by volume/packet count */
2108 sa->lft.soft_byte_limit = XFRM_INF;
2109 sa->lft.hard_byte_limit = XFRM_INF;
2110 sa->lft.soft_packet_limit = XFRM_INF;
2111 sa->lft.hard_packet_limit = XFRM_INF;
2112 /* we use lifetimes since added, not since used */
2113 sa->lft.soft_add_expires_seconds = expire_soft;
2114 sa->lft.hard_add_expires_seconds = expire_hard;
2115 sa->lft.soft_use_expires_seconds = 0;
2116 sa->lft.hard_use_expires_seconds = 0;
2117
2118 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
2119
2120 switch (enc_alg)
2121 {
2122 case ENCR_UNDEFINED:
2123 /* no encryption */
2124 break;
2125 case ENCR_AES_CCM_ICV8:
2126 case ENCR_AES_CCM_ICV12:
2127 case ENCR_AES_CCM_ICV16:
2128 /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
2129 add_keymat = 24;
2130 /* fall-through */
2131 case ENCR_AES_GCM_ICV8:
2132 case ENCR_AES_GCM_ICV12:
2133 case ENCR_AES_GCM_ICV16:
2134 {
2135 u_int16_t icv_size = 0;
2136 rthdr->rta_type = XFRMA_ALG_AEAD;
2137 alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
2138 if (alg_name == NULL)
2139 {
2140 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2141 encryption_algorithm_names, enc_alg);
2142 return FAILED;
2143 }
2144 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2145 encryption_algorithm_names, enc_alg, enc_size);
2146
2147 /* additional KEYMAT required */
2148 enc_size += add_keymat;
2149
2150 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
2151 hdr->nlmsg_len += rthdr->rta_len;
2152 if (hdr->nlmsg_len > sizeof(request))
2153 {
2154 return FAILED;
2155 }
2156
2157 struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
2158 algo->alg_key_len = enc_size;
2159 algo->alg_icv_len = icv_size;
2160 strcpy(algo->alg_name, alg_name);
2161 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2162
2163 rthdr = XFRM_RTA_NEXT(rthdr);
2164 break;
2165 }
2166 default:
2167 {
2168 rthdr->rta_type = XFRMA_ALG_CRYPT;
2169 alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
2170 if (alg_name == NULL)
2171 {
2172 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2173 encryption_algorithm_names, enc_alg);
2174 return FAILED;
2175 }
2176 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2177 encryption_algorithm_names, enc_alg, enc_size);
2178
2179 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
2180 hdr->nlmsg_len += rthdr->rta_len;
2181 if (hdr->nlmsg_len > sizeof(request))
2182 {
2183 return FAILED;
2184 }
2185
2186 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2187 algo->alg_key_len = enc_size;
2188 strcpy(algo->alg_name, alg_name);
2189 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2190
2191 rthdr = XFRM_RTA_NEXT(rthdr);
2192 break;
2193 }
2194 }
2195
2196 if (int_alg != AUTH_UNDEFINED)
2197 {
2198 rthdr->rta_type = XFRMA_ALG_AUTH;
2199 alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
2200 if (alg_name == NULL)
2201 {
2202 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2203 integrity_algorithm_names, int_alg);
2204 return FAILED;
2205 }
2206 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
2207 integrity_algorithm_names, int_alg, int_size);
2208
2209 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
2210 hdr->nlmsg_len += rthdr->rta_len;
2211 if (hdr->nlmsg_len > sizeof(request))
2212 {
2213 return FAILED;
2214 }
2215
2216 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2217 algo->alg_key_len = int_size;
2218 strcpy(algo->alg_name, alg_name);
2219 prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
2220
2221 rthdr = XFRM_RTA_NEXT(rthdr);
2222 }
2223
2224 if (ipcomp != IPCOMP_NONE)
2225 {
2226 rthdr->rta_type = XFRMA_ALG_COMP;
2227 alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
2228 if (alg_name == NULL)
2229 {
2230 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2231 ipcomp_transform_names, ipcomp);
2232 return FAILED;
2233 }
2234 DBG2(DBG_KNL, " using compression algorithm %N",
2235 ipcomp_transform_names, ipcomp);
2236
2237 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
2238 hdr->nlmsg_len += rthdr->rta_len;
2239 if (hdr->nlmsg_len > sizeof(request))
2240 {
2241 return FAILED;
2242 }
2243
2244 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2245 algo->alg_key_len = 0;
2246 strcpy(algo->alg_name, alg_name);
2247
2248 rthdr = XFRM_RTA_NEXT(rthdr);
2249 }
2250
2251 if (encap)
2252 {
2253 rthdr->rta_type = XFRMA_ENCAP;
2254 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2255
2256 hdr->nlmsg_len += rthdr->rta_len;
2257 if (hdr->nlmsg_len > sizeof(request))
2258 {
2259 return FAILED;
2260 }
2261
2262 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
2263 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2264 tmpl->encap_sport = htons(src->get_port(src));
2265 tmpl->encap_dport = htons(dst->get_port(dst));
2266 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2267 /* encap_oa could probably be derived from the
2268 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
2269 * pluto does the same as we do here but it uses encap_oa in the
2270 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
2271 * the kernel ignores it anyway
2272 * -> does that mean that NAT-T encap doesn't work in transport mode?
2273 * No. The reason the kernel ignores NAT-OA is that it recomputes
2274 * (or, rather, just ignores) the checksum. If packets pass
2275 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
2276 rthdr = XFRM_RTA_NEXT(rthdr);
2277 }
2278
2279 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2280 {
2281 DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x", ntohl(spi));
2282 return FAILED;
2283 }
2284 return SUCCESS;
2285 }
2286
2287 /**
2288 * Get the replay state (i.e. sequence numbers) of an SA.
2289 */
2290 static status_t get_replay_state(private_kernel_interface_t *this,
2291 u_int32_t spi, protocol_id_t protocol, host_t *dst,
2292 struct xfrm_replay_state *replay)
2293 {
2294 unsigned char request[BUFFER_SIZE];
2295 struct nlmsghdr *hdr, *out = NULL;
2296 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2297 size_t len;
2298 struct rtattr *rta;
2299 size_t rtasize;
2300
2301 memset(&request, 0, sizeof(request));
2302
2303 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI %.8x", ntohl(spi));
2304
2305 hdr = (struct nlmsghdr*)request;
2306 hdr->nlmsg_flags = NLM_F_REQUEST;
2307 hdr->nlmsg_type = XFRM_MSG_GETAE;
2308 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2309
2310 aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
2311 aevent_id->flags = XFRM_AE_RVAL;
2312
2313 host2xfrm(dst, &aevent_id->sa_id.daddr);
2314 aevent_id->sa_id.spi = spi;
2315 aevent_id->sa_id.proto = proto_ike2kernel(protocol);
2316 aevent_id->sa_id.family = dst->get_family(dst);
2317
2318 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2319 {
2320 hdr = out;
2321 while (NLMSG_OK(hdr, len))
2322 {
2323 switch (hdr->nlmsg_type)
2324 {
2325 case XFRM_MSG_NEWAE:
2326 {
2327 out_aevent = NLMSG_DATA(hdr);
2328 break;
2329 }
2330 case NLMSG_ERROR:
2331 {
2332 struct nlmsgerr *err = NLMSG_DATA(hdr);
2333 DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
2334 strerror(-err->error), -err->error);
2335 break;
2336 }
2337 default:
2338 hdr = NLMSG_NEXT(hdr, len);
2339 continue;
2340 case NLMSG_DONE:
2341 break;
2342 }
2343 break;
2344 }
2345 }
2346
2347 if (out_aevent == NULL)
2348 {
2349 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI %.8x",
2350 ntohl(spi));
2351 free(out);
2352 return FAILED;
2353 }
2354
2355 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2356 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2357 while(RTA_OK(rta, rtasize))
2358 {
2359 if (rta->rta_type == XFRMA_REPLAY_VAL)
2360 {
2361 memcpy(replay, RTA_DATA(rta), rta->rta_len);
2362 free(out);
2363 return SUCCESS;
2364 }
2365 rta = RTA_NEXT(rta, rtasize);
2366 }
2367
2368 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI %.8x",
2369 ntohl(spi));
2370 free(out);
2371 return FAILED;
2372 }
2373
2374 /**
2375 * Implementation of kernel_interface_t.update_sa.
2376 */
2377 static status_t update_sa(private_kernel_interface_t *this,
2378 u_int32_t spi, protocol_id_t protocol,
2379 host_t *src, host_t *dst,
2380 host_t *new_src, host_t *new_dst, bool encap)
2381 {
2382 unsigned char request[BUFFER_SIZE], *pos;
2383 struct nlmsghdr *hdr, *out = NULL;
2384 struct xfrm_usersa_id *sa_id;
2385 struct xfrm_usersa_info *out_sa = NULL, *sa;
2386 size_t len;
2387 struct rtattr *rta;
2388 size_t rtasize;
2389 struct xfrm_encap_tmpl* tmpl = NULL;
2390 bool got_replay_state;
2391 struct xfrm_replay_state replay;
2392
2393 memset(&request, 0, sizeof(request));
2394
2395 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x for update", ntohl(spi));
2396
2397 /* query the exisiting SA first */
2398 hdr = (struct nlmsghdr*)request;
2399 hdr->nlmsg_flags = NLM_F_REQUEST;
2400 hdr->nlmsg_type = XFRM_MSG_GETSA;
2401 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2402
2403 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2404 host2xfrm(dst, &sa_id->daddr);
2405 sa_id->spi = spi;
2406 sa_id->proto = proto_ike2kernel(protocol);
2407 sa_id->family = dst->get_family(dst);
2408
2409 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2410 {
2411 hdr = out;
2412 while (NLMSG_OK(hdr, len))
2413 {
2414 switch (hdr->nlmsg_type)
2415 {
2416 case XFRM_MSG_NEWSA:
2417 {
2418 out_sa = NLMSG_DATA(hdr);
2419 break;
2420 }
2421 case NLMSG_ERROR:
2422 {
2423 struct nlmsgerr *err = NLMSG_DATA(hdr);
2424 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2425 strerror(-err->error), -err->error);
2426 break;
2427 }
2428 default:
2429 hdr = NLMSG_NEXT(hdr, len);
2430 continue;
2431 case NLMSG_DONE:
2432 break;
2433 }
2434 break;
2435 }
2436 }
2437 if (out_sa == NULL)
2438 {
2439 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x", ntohl(spi));
2440 free(out);
2441 return FAILED;
2442 }
2443
2444 /* try to get the replay state */
2445 got_replay_state = (get_replay_state(
2446 this, spi, protocol, dst, &replay) == SUCCESS);
2447
2448 /* delete the old SA */
2449 if (this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2450 {
2451 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x", ntohl(spi));
2452 free(out);
2453 return FAILED;
2454 }
2455
2456 DBG2(DBG_KNL, "updating SAD entry with SPI %.8x from %#H..%#H to %#H..%#H",
2457 ntohl(spi), src, dst, new_src, new_dst);
2458
2459 /* copy over the SA from out to request */
2460 hdr = (struct nlmsghdr*)request;
2461 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2462 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2463 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2464 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2465 sa = NLMSG_DATA(hdr);
2466 sa->family = new_dst->get_family(new_dst);
2467
2468 if (!src->ip_equals(src, new_src))
2469 {
2470 host2xfrm(new_src, &sa->saddr);
2471 }
2472 if (!dst->ip_equals(dst, new_dst))
2473 {
2474 host2xfrm(new_dst, &sa->id.daddr);
2475 }
2476
2477 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2478 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2479 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2480 while(RTA_OK(rta, rtasize))
2481 {
2482 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2483 if (rta->rta_type != XFRMA_ENCAP || encap)
2484 {
2485 if (rta->rta_type == XFRMA_ENCAP)
2486 { /* update encap tmpl */
2487 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2488 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2489 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2490 }
2491 memcpy(pos, rta, rta->rta_len);
2492 pos += RTA_ALIGN(rta->rta_len);
2493 hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
2494 }
2495 rta = RTA_NEXT(rta, rtasize);
2496 }
2497
2498 rta = (struct rtattr*)pos;
2499 if (tmpl == NULL && encap)
2500 { /* add tmpl if we are enabling it */
2501 rta->rta_type = XFRMA_ENCAP;
2502 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2503
2504 hdr->nlmsg_len += rta->rta_len;
2505 if (hdr->nlmsg_len > sizeof(request))
2506 {
2507 return FAILED;
2508 }
2509
2510 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2511 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2512 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2513 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2514 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2515
2516 rta = XFRM_RTA_NEXT(rta);
2517 }
2518
2519 if (got_replay_state)
2520 { /* copy the replay data if available */
2521 rta->rta_type = XFRMA_REPLAY_VAL;
2522 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
2523
2524 hdr->nlmsg_len += rta->rta_len;
2525 if (hdr->nlmsg_len > sizeof(request))
2526 {
2527 return FAILED;
2528 }
2529 memcpy(RTA_DATA(rta), &replay, sizeof(replay));
2530
2531 rta = XFRM_RTA_NEXT(rta);
2532 }
2533
2534 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2535 {
2536 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x", ntohl(spi));
2537 free(out);
2538 return FAILED;
2539 }
2540 free(out);
2541
2542 return SUCCESS;
2543 }
2544
2545 /**
2546 * Implementation of kernel_interface_t.query_sa.
2547 */
2548 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2549 u_int32_t spi, protocol_id_t protocol,
2550 u_int32_t *use_time)
2551 {
2552 unsigned char request[BUFFER_SIZE];
2553 struct nlmsghdr *out = NULL, *hdr;
2554 struct xfrm_usersa_id *sa_id;
2555 struct xfrm_usersa_info *sa = NULL;
2556 size_t len;
2557
2558 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x", ntohl(spi));
2559 memset(&request, 0, sizeof(request));
2560
2561 hdr = (struct nlmsghdr*)request;
2562 hdr->nlmsg_flags = NLM_F_REQUEST;
2563 hdr->nlmsg_type = XFRM_MSG_GETSA;
2564 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2565
2566 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2567 host2xfrm(dst, &sa_id->daddr);
2568 sa_id->spi = spi;
2569 sa_id->proto = proto_ike2kernel(protocol);
2570 sa_id->family = dst->get_family(dst);
2571
2572 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2573 {
2574 hdr = out;
2575 while (NLMSG_OK(hdr, len))
2576 {
2577 switch (hdr->nlmsg_type)
2578 {
2579 case XFRM_MSG_NEWSA:
2580 {
2581 sa = NLMSG_DATA(hdr);
2582 break;
2583 }
2584 case NLMSG_ERROR:
2585 {
2586 struct nlmsgerr *err = NLMSG_DATA(hdr);
2587 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2588 strerror(-err->error), -err->error);
2589 break;
2590 }
2591 default:
2592 hdr = NLMSG_NEXT(hdr, len);
2593 continue;
2594 case NLMSG_DONE:
2595 break;
2596 }
2597 break;
2598 }
2599 }
2600
2601 if (sa == NULL)
2602 {
2603 DBG1(DBG_KNL, "unable to query SAD entry with SPI %.8x", ntohl(spi));
2604 free(out);
2605 return FAILED;
2606 }
2607
2608 *use_time = sa->curlft.use_time;
2609 free (out);
2610 return SUCCESS;
2611 }
2612
2613 /**
2614 * Implementation of kernel_interface_t.del_sa.
2615 */
2616 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2617 u_int32_t spi, protocol_id_t protocol)
2618 {
2619 unsigned char request[BUFFER_SIZE];
2620 struct nlmsghdr *hdr;
2621 struct xfrm_usersa_id *sa_id;
2622
2623 memset(&request, 0, sizeof(request));
2624
2625 DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x", ntohl(spi));
2626
2627 hdr = (struct nlmsghdr*)request;
2628 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2629 hdr->nlmsg_type = XFRM_MSG_DELSA;
2630 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2631
2632 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2633 host2xfrm(dst, &sa_id->daddr);
2634 sa_id->spi = spi;
2635 sa_id->proto = proto_ike2kernel(protocol);
2636 sa_id->family = dst->get_family(dst);
2637
2638 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2639 {
2640 DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x", ntohl(spi));
2641 return FAILED;
2642 }
2643 DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x", ntohl(spi));
2644 return SUCCESS;
2645 }
2646
2647 /**
2648 * Implementation of kernel_interface_t.add_policy.
2649 */
2650 static status_t add_policy(private_kernel_interface_t *this,
2651 host_t *src, host_t *dst,
2652 traffic_selector_t *src_ts,
2653 traffic_selector_t *dst_ts,
2654 policy_dir_t direction, protocol_id_t protocol,
2655 u_int32_t reqid, bool high_prio, mode_t mode,
2656 u_int16_t ipcomp)
2657 {
2658 iterator_t *iterator;
2659 policy_entry_t *current, *policy;
2660 bool found = FALSE;
2661 unsigned char request[BUFFER_SIZE];
2662 struct xfrm_userpolicy_info *policy_info;
2663 struct nlmsghdr *hdr;
2664
2665 /* create a policy */
2666 policy = malloc_thing(policy_entry_t);
2667 memset(policy, 0, sizeof(policy_entry_t));
2668 policy->sel = ts2selector(src_ts, dst_ts);
2669 policy->direction = direction;
2670
2671 /* find the policy, which matches EXACTLY */
2672 pthread_mutex_lock(&this->mutex);
2673 iterator = this->policies->create_iterator(this->policies, TRUE);
2674 while (iterator->iterate(iterator, (void**)&current))
2675 {
2676 if (memeq(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) &&
2677 policy->direction == current->direction)
2678 {
2679 /* use existing policy */
2680 current->refcount++;
2681 DBG2(DBG_KNL, "policy %R === %R %N already exists, increasing "
2682 "refcount", src_ts, dst_ts,
2683 policy_dir_names, direction);
2684 free(policy);
2685 policy = current;
2686 found = TRUE;
2687 break;
2688 }
2689 }
2690 iterator->destroy(iterator);
2691 if (!found)
2692 { /* apply the new one, if we have no such policy */
2693 this->policies->insert_last(this->policies, policy);
2694 policy->refcount = 1;
2695 }
2696
2697 DBG2(DBG_KNL, "adding policy %R === %R %N", src_ts, dst_ts,
2698 policy_dir_names, direction);
2699
2700 memset(&request, 0, sizeof(request));
2701 hdr = (struct nlmsghdr*)request;
2702 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2703 hdr->nlmsg_type = found ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
2704 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2705
2706 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2707 policy_info->sel = policy->sel;
2708 policy_info->dir = policy->direction;
2709 /* calculate priority based on source selector size, small size = high prio */
2710 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2711 policy_info->priority -= policy->sel.prefixlen_s * 10;
2712 policy_info->priority -= policy->sel.proto ? 2 : 0;
2713 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2714 policy_info->action = XFRM_POLICY_ALLOW;
2715 policy_info->share = XFRM_SHARE_ANY;
2716 pthread_mutex_unlock(&this->mutex);
2717
2718 /* policies don't expire */
2719 policy_info->lft.soft_byte_limit = XFRM_INF;
2720 policy_info->lft.soft_packet_limit = XFRM_INF;
2721 policy_info->lft.hard_byte_limit = XFRM_INF;
2722 policy_info->lft.hard_packet_limit = XFRM_INF;
2723 policy_info->lft.soft_add_expires_seconds = 0;
2724 policy_info->lft.hard_add_expires_seconds = 0;
2725 policy_info->lft.soft_use_expires_seconds = 0;
2726 policy_info->lft.hard_use_expires_seconds = 0;
2727
2728 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2729 rthdr->rta_type = XFRMA_TMPL;
2730 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2731
2732 hdr->nlmsg_len += rthdr->rta_len;
2733 if (hdr->nlmsg_len > sizeof(request))
2734 {
2735 return FAILED;
2736 }
2737
2738 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2739
2740 if (ipcomp != IPCOMP_NONE)
2741 {
2742 tmpl->reqid = reqid;
2743 tmpl->id.proto = IPPROTO_COMP;
2744 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2745 tmpl->mode = mode;
2746 tmpl->optional = direction != POLICY_OUT;
2747 tmpl->family = src->get_family(src);
2748
2749 host2xfrm(src, &tmpl->saddr);
2750 host2xfrm(dst, &tmpl->id.daddr);
2751
2752 /* add an additional xfrm_user_tmpl */
2753 rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2754 hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2755 if (hdr->nlmsg_len > sizeof(request))
2756 {
2757 return FAILED;
2758 }
2759
2760 tmpl++;
2761 }
2762
2763 tmpl->reqid = reqid;
2764 tmpl->id.proto = proto_ike2kernel(protocol);
2765 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2766 tmpl->mode = mode;
2767 tmpl->family = src->get_family(src);
2768
2769 host2xfrm(src, &tmpl->saddr);
2770 host2xfrm(dst, &tmpl->id.daddr);
2771
2772 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2773 {
2774 DBG1(DBG_KNL, "unable to add policy %R === %R %N", src_ts, dst_ts,
2775 policy_dir_names, direction);
2776 return FAILED;
2777 }
2778
2779 /* install a route, if:
2780 * - we are NOT updating a policy
2781 * - this is a forward policy (to just get one for each child)
2782 * - we are in tunnel mode
2783 * - we are not using IPv6 (does not work correctly yet!)
2784 * - routing is not disabled via strongswan.conf
2785 */
2786 if (policy->route == NULL && direction == POLICY_FWD &&
2787 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
2788 this->install_routes)
2789 {
2790 policy->route = malloc_thing(route_entry_t);
2791 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2792 {
2793 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2794 policy->route->gateway = get_route(this, src, TRUE);
2795 policy->route->if_index = get_interface_index(this, dst);
2796 policy->route->dst_net = chunk_alloc(
2797 policy->sel.family == AF_INET ? 4 : 16);
2798 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr,
2799 policy->route->dst_net.len);
2800 policy->route->prefixlen = policy->sel.prefixlen_s;
2801
2802 switch (manage_srcroute(this, RTM_NEWROUTE,
2803 NLM_F_CREATE | NLM_F_EXCL, policy->route))
2804 {
2805 default:
2806 DBG1(DBG_KNL, "unable to install source route for %H",
2807 policy->route->src_ip);
2808 /* FALL */
2809 case ALREADY_DONE:
2810 /* route exists, do not uninstall */
2811 route_entry_destroy(policy->route);
2812 policy->route = NULL;
2813 break;
2814 case SUCCESS:
2815 break;
2816 }
2817 }
2818 else
2819 {
2820 free(policy->route);
2821 policy->route = NULL;
2822 }
2823 }
2824
2825 return SUCCESS;
2826 }
2827
2828 /**
2829 * Implementation of kernel_interface_t.query_policy.
2830 */
2831 static status_t query_policy(private_kernel_interface_t *this,
2832 traffic_selector_t *src_ts,
2833 traffic_selector_t *dst_ts,
2834 policy_dir_t direction, u_int32_t *use_time)
2835 {
2836 unsigned char request[BUFFER_SIZE];
2837 struct nlmsghdr *out = NULL, *hdr;
2838 struct xfrm_userpolicy_id *policy_id;
2839 struct xfrm_userpolicy_info *policy = NULL;
2840 size_t len;
2841
2842 memset(&request, 0, sizeof(request));
2843
2844 DBG2(DBG_KNL, "querying policy %R === %R %N", src_ts, dst_ts,
2845 policy_dir_names, direction);
2846
2847 hdr = (struct nlmsghdr*)request;
2848 hdr->nlmsg_flags = NLM_F_REQUEST;
2849 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2850 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2851
2852 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2853 policy_id->sel = ts2selector(src_ts, dst_ts);
2854 policy_id->dir = direction;
2855
2856 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2857 {
2858 hdr = out;
2859 while (NLMSG_OK(hdr, len))
2860 {
2861 switch (hdr->nlmsg_type)
2862 {
2863 case XFRM_MSG_NEWPOLICY:
2864 {
2865 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2866 break;
2867 }
2868 case NLMSG_ERROR:
2869 {
2870 struct nlmsgerr *err = NLMSG_DATA(hdr);
2871 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2872 strerror(-err->error), -err->error);
2873 break;
2874 }
2875 default:
2876 hdr = NLMSG_NEXT(hdr, len);
2877 continue;
2878 case NLMSG_DONE:
2879 break;
2880 }
2881 break;
2882 }
2883 }
2884
2885 if (policy == NULL)
2886 {
2887 DBG2(DBG_KNL, "unable to query policy %R === %R %N", src_ts, dst_ts,
2888 policy_dir_names, direction);
2889 free(out);
2890 return FAILED;
2891 }
2892 *use_time = (time_t)policy->curlft.use_time;
2893
2894 free(out);
2895 return SUCCESS;
2896 }
2897
2898 /**
2899 * Implementation of kernel_interface_t.del_policy.
2900 */
2901 static status_t del_policy(private_kernel_interface_t *this,
2902 traffic_selector_t *src_ts,
2903 traffic_selector_t *dst_ts,
2904 policy_dir_t direction)
2905 {
2906 policy_entry_t *current, policy, *to_delete = NULL;
2907 route_entry_t *route;
2908 unsigned char request[BUFFER_SIZE];
2909 struct nlmsghdr *hdr;
2910 struct xfrm_userpolicy_id *policy_id;
2911 iterator_t *iterator;
2912
2913 DBG2(DBG_KNL, "deleting policy %R === %R %N", src_ts, dst_ts,
2914 policy_dir_names, direction);
2915
2916 /* create a policy */
2917 memset(&policy, 0, sizeof(policy_entry_t));
2918 policy.sel = ts2selector(src_ts, dst_ts);
2919 policy.direction = direction;
2920
2921 /* find the policy */
2922 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2923 while (iterator->iterate(iterator, (void**)&current))
2924 {
2925 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2926 policy.direction == current->direction)
2927 {
2928 to_delete = current;
2929 if (--to_delete->refcount > 0)
2930 {
2931 /* is used by more SAs, keep in kernel */
2932 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2933 iterator->destroy(iterator);
2934 return SUCCESS;
2935 }
2936 /* remove if last reference */
2937 iterator->remove(iterator);
2938 break;
2939 }
2940 }
2941 iterator->destroy(iterator);
2942 if (!to_delete)
2943 {
2944 DBG1(DBG_KNL, "deleting policy %R === %R %N failed, not found", src_ts,
2945 dst_ts, policy_dir_names, direction);
2946 return NOT_FOUND;
2947 }
2948
2949 memset(&request, 0, sizeof(request));
2950
2951 hdr = (struct nlmsghdr*)request;
2952 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2953 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2954 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2955
2956 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2957 policy_id->sel = to_delete->sel;
2958 policy_id->dir = direction;
2959
2960 route = to_delete->route;
2961 free(to_delete);
2962
2963 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2964 {
2965 DBG1(DBG_KNL, "unable to delete policy %R === %R %N", src_ts, dst_ts,
2966 policy_dir_names, direction);
2967 return FAILED;
2968 }
2969
2970 if (route)
2971 {
2972 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2973 {
2974 DBG1(DBG_KNL, "error uninstalling route installed with "
2975 "policy %R === %R %N", src_ts, dst_ts,
2976 policy_dir_names, direction);
2977 }
2978 route_entry_destroy(route);
2979 }
2980 return SUCCESS;
2981 }
2982
2983 /**
2984 * Implementation of kernel_interface_t.destroy.
2985 */
2986 static void destroy(private_kernel_interface_t *this)
2987 {
2988 if (this->routing_table)
2989 {
2990 manage_rule(this, RTM_DELRULE, this->routing_table,
2991 this->routing_table_prio);
2992 }
2993
2994 this->job->cancel(this->job);
2995 close(this->socket_xfrm_events);
2996 close(this->socket_xfrm);
2997 close(this->socket_rt_events);
2998 close(this->socket_rt);
2999 this->policies->destroy(this->policies);
3000 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
3001 free(this);
3002 }
3003
3004 /*
3005 * Described in header.
3006 */
3007 kernel_interface_t *kernel_interface_create()
3008 {
3009 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
3010 struct sockaddr_nl addr;
3011
3012 /* public functions */
3013 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
3014 this->public.get_cpi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
3015 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
3016 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
3017 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
3018 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
3019 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
3020 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
3021 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
3022 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
3023 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
3024 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
3025 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
3026 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
3027 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
3028
3029 /* private members */
3030 this->policies = linked_list_create();
3031 this->ifaces = linked_list_create();
3032 this->hiter = NULL;
3033 this->seq = 200;
3034 pthread_mutex_init(&this->mutex, NULL);
3035 pthread_mutex_init(&this->nl_mutex, NULL);
3036 pthread_cond_init(&this->cond, NULL);
3037 timerclear(&this->last_roam);
3038 this->install_routes = lib->settings->get_bool(lib->settings,
3039 "charon.install_routes", TRUE);
3040 this->routing_table = lib->settings->get_int(lib->settings,
3041 "charon.routing_table", IPSEC_ROUTING_TABLE);
3042 this->routing_table_prio = lib->settings->get_int(lib->settings,
3043 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
3044 memset(&addr, 0, sizeof(addr));
3045 addr.nl_family = AF_NETLINK;
3046
3047 /* create and bind RT socket */
3048 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
3049 if (this->socket_rt <= 0)
3050 {
3051 charon->kill(charon, "unable to create RT netlink socket");
3052 }
3053 addr.nl_groups = 0;
3054 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
3055 {
3056 charon->kill(charon, "unable to bind RT netlink socket");
3057 }
3058
3059 /* create and bind RT socket for events (address/interface/route changes) */
3060 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
3061 if (this->socket_rt_events <= 0)
3062 {
3063 charon->kill(charon, "unable to create RT event socket");
3064 }
3065 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
3066 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
3067 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
3068 {
3069 charon->kill(charon, "unable to bind RT event socket");
3070 }
3071
3072 /* create and bind XFRM socket */
3073 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3074 if (this->socket_xfrm <= 0)
3075 {
3076 charon->kill(charon, "unable to create XFRM netlink socket");
3077 }
3078 addr.nl_groups = 0;
3079 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
3080 {
3081 charon->kill(charon, "unable to bind XFRM netlink socket");
3082 }
3083
3084 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
3085 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3086 if (this->socket_xfrm_events <= 0)
3087 {
3088 charon->kill(charon, "unable to create XFRM event socket");
3089 }
3090 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
3091 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
3092 {
3093 charon->kill(charon, "unable to bind XFRM event socket");
3094 }
3095
3096 this->job = callback_job_create((callback_job_cb_t)receive_events,
3097 this, NULL, NULL);
3098 charon->processor->queue_job(charon->processor, (job_t*)this->job);
3099
3100 if (init_address_list(this) != SUCCESS)
3101 {
3102 charon->kill(charon, "unable to get interface list");
3103 }
3104
3105 if (this->routing_table)
3106 {
3107 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
3108 this->routing_table_prio) != SUCCESS)
3109 {
3110 DBG1(DBG_KNL, "unable to create routing table rule");
3111 }
3112 }
3113
3114 return &this->public;
3115 }
3116