c5fba5238b096188cdaa755880400ccb7fa868ab
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /*
2 * Copyright (C) 2006-2008 Tobias Brunner
3 * Copyright (C) 2005-2007 Martin Willi
4 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
5 * Copyright (C) 2006 Daniel Roethlisberger
6 * Copyright (C) 2005 Jan Hutter
7 * Hochschule fuer Technik Rapperswil
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your
12 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 * for more details.
18 *
19 * $Id$
20 */
21
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <linux/netlink.h>
26 #include <linux/rtnetlink.h>
27 #include <linux/xfrm.h>
28 #include <linux/udp.h>
29 #include <netinet/in.h>
30 #include <pthread.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <errno.h>
34 #include <string.h>
35 #include <net/if.h>
36 #include <sys/ioctl.h>
37
38 #include "kernel_interface.h"
39
40 #include <daemon.h>
41 #include <utils/linked_list.h>
42 #include <processing/jobs/delete_child_sa_job.h>
43 #include <processing/jobs/rekey_child_sa_job.h>
44 #include <processing/jobs/acquire_job.h>
45 #include <processing/jobs/callback_job.h>
46 #include <processing/jobs/roam_job.h>
47
48 /** required for Linux 2.6.26 kernel and later */
49 #ifndef XFRM_STATE_AF_UNSPEC
50 #define XFRM_STATE_AF_UNSPEC 32
51 #endif
52
53 /** routing table for routes installed by us */
54 #ifndef IPSEC_ROUTING_TABLE
55 #define IPSEC_ROUTING_TABLE 100
56 #endif
57 #ifndef IPSEC_ROUTING_TABLE_PRIO
58 #define IPSEC_ROUTING_TABLE_PRIO 100
59 #endif
60
61 /** default priority of installed policies */
62 #define PRIO_LOW 3000
63 #define PRIO_HIGH 2000
64
65 /** delay before firing roam jobs (ms) */
66 #define ROAM_DELAY 100
67
68 #define BUFFER_SIZE 1024
69
70 /**
71 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
72 * 'usual' netlink data x like 'struct xfrm_usersa_info'
73 */
74 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
75 /**
76 * returns a pointer to the next rtattr following rta.
77 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
78 */
79 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
80 /**
81 * returns the total size of attached rta data
82 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
83 */
84 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
85
86 typedef struct kernel_algorithm_t kernel_algorithm_t;
87
88 /**
89 * Mapping from the algorithms defined in IKEv2 to
90 * kernel level algorithm names and their key length
91 */
92 struct kernel_algorithm_t {
93 /**
94 * Identifier specified in IKEv2
95 */
96 int ikev2_id;
97
98 /**
99 * Name of the algorithm, as used as kernel identifier
100 */
101 char *name;
102
103 /**
104 * Key length in bits, if fixed size
105 */
106 u_int key_size;
107 };
108 #define END_OF_LIST -1
109
110 /**
111 * Algorithms for encryption
112 */
113 static kernel_algorithm_t encryption_algs[] = {
114 /* {ENCR_DES_IV64, "***", 0}, */
115 {ENCR_DES, "des", 64},
116 {ENCR_3DES, "des3_ede", 192},
117 /* {ENCR_RC5, "***", 0}, */
118 /* {ENCR_IDEA, "***", 0}, */
119 {ENCR_CAST, "cast128", 0},
120 {ENCR_BLOWFISH, "blowfish", 0},
121 /* {ENCR_3IDEA, "***", 0}, */
122 /* {ENCR_DES_IV32, "***", 0}, */
123 {ENCR_NULL, "cipher_null", 0},
124 {ENCR_AES_CBC, "aes", 0},
125 /* {ENCR_AES_CTR, "***", 0}, */
126 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))", 64}, /* key_size = ICV size */
127 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))", 96}, /* key_size = ICV size */
128 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))", 128}, /* key_size = ICV size */
129 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))", 64}, /* key_size = ICV size */
130 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))", 96}, /* key_size = ICV size */
131 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))", 128}, /* key_size = ICV size */
132 {END_OF_LIST, NULL, 0},
133 };
134
135 /**
136 * Algorithms for integrity protection
137 */
138 static kernel_algorithm_t integrity_algs[] = {
139 {AUTH_HMAC_MD5_96, "md5", 128},
140 {AUTH_HMAC_SHA1_96, "sha1", 160},
141 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
142 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
143 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
144 /* {AUTH_DES_MAC, "***", 0}, */
145 /* {AUTH_KPDK_MD5, "***", 0}, */
146 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
147 {END_OF_LIST, NULL, 0},
148 };
149
150 /**
151 * Algorithms for IPComp
152 */
153 static kernel_algorithm_t compression_algs[] = {
154 /* {IPCOMP_OUI, "***", 0}, */
155 {IPCOMP_DEFLATE, "deflate", 0},
156 {IPCOMP_LZS, "lzs", 0},
157 {IPCOMP_LZJH, "lzjh", 0},
158 {END_OF_LIST, NULL, 0},
159 };
160
161 /**
162 * Look up a kernel algorithm name and its key size
163 */
164 static char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
165 u_int16_t ikev2_algo, u_int16_t *key_size)
166 {
167 while (kernel_algo->ikev2_id != END_OF_LIST)
168 {
169 if (ikev2_algo == kernel_algo->ikev2_id)
170 {
171 /* match, evaluate key length */
172 if (key_size && *key_size == 0)
173 { /* update key size if not set */
174 *key_size = kernel_algo->key_size;
175 }
176 return kernel_algo->name;
177 }
178 kernel_algo++;
179 }
180 return NULL;
181 }
182
183 typedef struct route_entry_t route_entry_t;
184
185 /**
186 * installed routing entry
187 */
188 struct route_entry_t {
189
190 /** Index of the interface the route is bound to */
191 int if_index;
192
193 /** Source ip of the route */
194 host_t *src_ip;
195
196 /** gateway for this route */
197 host_t *gateway;
198
199 /** Destination net */
200 chunk_t dst_net;
201
202 /** Destination net prefixlen */
203 u_int8_t prefixlen;
204 };
205
206 /**
207 * destroy an route_entry_t object
208 */
209 static void route_entry_destroy(route_entry_t *this)
210 {
211 this->src_ip->destroy(this->src_ip);
212 this->gateway->destroy(this->gateway);
213 chunk_free(&this->dst_net);
214 free(this);
215 }
216
217 typedef struct policy_entry_t policy_entry_t;
218
219 /**
220 * installed kernel policy.
221 */
222 struct policy_entry_t {
223
224 /** direction of this policy: in, out, forward */
225 u_int8_t direction;
226
227 /** reqid of the policy */
228 u_int32_t reqid;
229
230 /** parameters of installed policy */
231 struct xfrm_selector sel;
232
233 /** associated route installed for this policy */
234 route_entry_t *route;
235
236 /** by how many CHILD_SA's this policy is used */
237 u_int refcount;
238 };
239
240 typedef struct addr_entry_t addr_entry_t;
241
242 /**
243 * IP address in an inface_entry_t
244 */
245 struct addr_entry_t {
246
247 /** The ip address */
248 host_t *ip;
249
250 /** virtual IP managed by us */
251 bool virtual;
252
253 /** scope of the address */
254 u_char scope;
255
256 /** Number of times this IP is used, if virtual */
257 u_int refcount;
258 };
259
260 /**
261 * destroy a addr_entry_t object
262 */
263 static void addr_entry_destroy(addr_entry_t *this)
264 {
265 this->ip->destroy(this->ip);
266 free(this);
267 }
268
269 typedef struct iface_entry_t iface_entry_t;
270
271 /**
272 * A network interface on this system, containing addr_entry_t's
273 */
274 struct iface_entry_t {
275
276 /** interface index */
277 int ifindex;
278
279 /** name of the interface */
280 char ifname[IFNAMSIZ];
281
282 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
283 u_int flags;
284
285 /** list of addresses as host_t */
286 linked_list_t *addrs;
287 };
288
289 /**
290 * destroy an interface entry
291 */
292 static void iface_entry_destroy(iface_entry_t *this)
293 {
294 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
295 free(this);
296 }
297
298 typedef struct private_kernel_interface_t private_kernel_interface_t;
299
300 /**
301 * Private variables and functions of kernel_interface class.
302 */
303 struct private_kernel_interface_t {
304 /**
305 * Public part of the kernel_interface_t object.
306 */
307 kernel_interface_t public;
308
309 /**
310 * mutex to lock access to netlink socket
311 */
312 pthread_mutex_t nl_mutex;
313
314 /**
315 * mutex to lock access to various lists
316 */
317 pthread_mutex_t mutex;
318
319 /**
320 * condition variable to signal virtual IP add/removal
321 */
322 pthread_cond_t cond;
323
324 /**
325 * List of installed policies (policy_entry_t)
326 */
327 linked_list_t *policies;
328
329 /**
330 * Cached list of interfaces and its adresses (iface_entry_t)
331 */
332 linked_list_t *ifaces;
333
334 /**
335 * iterator used in hook()
336 */
337 iterator_t *hiter;
338
339 /**
340 * job receiving netlink events
341 */
342 callback_job_t *job;
343
344 /**
345 * current sequence number for netlink request
346 */
347 int seq;
348
349 /**
350 * Netlink xfrm socket (IPsec)
351 */
352 int socket_xfrm;
353
354 /**
355 * netlink xfrm socket to receive acquire and expire events
356 */
357 int socket_xfrm_events;
358
359 /**
360 * Netlink rt socket (routing)
361 */
362 int socket_rt;
363
364 /**
365 * Netlink rt socket to receive address change events
366 */
367 int socket_rt_events;
368
369 /**
370 * time of the last roam_job
371 */
372 struct timeval last_roam;
373
374 /**
375 * whether to install routes along policies
376 */
377 bool install_routes;
378
379 /**
380 * routing table to install routes
381 */
382 int routing_table;
383
384 /**
385 * priority of used routing table
386 */
387 int routing_table_prio;
388 };
389
390 /**
391 * convert a IKEv2 specific protocol identifier to the kernel one
392 */
393 static u_int8_t proto_ike2kernel(protocol_id_t proto)
394 {
395 switch (proto)
396 {
397 case PROTO_ESP:
398 return IPPROTO_ESP;
399 case PROTO_AH:
400 return IPPROTO_AH;
401 default:
402 return proto;
403 }
404 }
405
406 /**
407 * reverse of ike2kernel
408 */
409 static protocol_id_t proto_kernel2ike(u_int8_t proto)
410 {
411 switch (proto)
412 {
413 case IPPROTO_ESP:
414 return PROTO_ESP;
415 case IPPROTO_AH:
416 return PROTO_AH;
417 default:
418 return proto;
419 }
420 }
421
422 /**
423 * convert a host_t to a struct xfrm_address
424 */
425 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
426 {
427 chunk_t chunk = host->get_address(host);
428 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
429 }
430
431 /**
432 * convert a traffic selector address range to subnet and its mask.
433 */
434 static void ts2subnet(traffic_selector_t* ts,
435 xfrm_address_t *net, u_int8_t *mask)
436 {
437 /* there is no way to do this cleanly, as the address range may
438 * be anything else but a subnet. We use from_addr as subnet
439 * and try to calculate a usable subnet mask.
440 */
441 int byte, bit;
442 bool found = FALSE;
443 chunk_t from, to;
444 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
445
446 from = ts->get_from_address(ts);
447 to = ts->get_to_address(ts);
448
449 *mask = (size * 8);
450 /* go trough all bits of the addresses, beginning in the front.
451 * as long as they are equal, the subnet gets larger
452 */
453 for (byte = 0; byte < size; byte++)
454 {
455 for (bit = 7; bit >= 0; bit--)
456 {
457 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
458 {
459 *mask = ((7 - bit) + (byte * 8));
460 found = TRUE;
461 break;
462 }
463 }
464 if (found)
465 {
466 break;
467 }
468 }
469 memcpy(net, from.ptr, from.len);
470 chunk_free(&from);
471 chunk_free(&to);
472 }
473
474 /**
475 * convert a traffic selector port range to port/portmask
476 */
477 static void ts2ports(traffic_selector_t* ts,
478 u_int16_t *port, u_int16_t *mask)
479 {
480 /* linux does not seem to accept complex portmasks. Only
481 * any or a specific port is allowed. We set to any, if we have
482 * a port range, or to a specific, if we have one port only.
483 */
484 u_int16_t from, to;
485
486 from = ts->get_from_port(ts);
487 to = ts->get_to_port(ts);
488
489 if (from == to)
490 {
491 *port = htons(from);
492 *mask = ~0;
493 }
494 else
495 {
496 *port = 0;
497 *mask = 0;
498 }
499 }
500
501 /**
502 * convert a pair of traffic_selectors to a xfrm_selector
503 */
504 static struct xfrm_selector ts2selector(traffic_selector_t *src,
505 traffic_selector_t *dst)
506 {
507 struct xfrm_selector sel;
508
509 memset(&sel, 0, sizeof(sel));
510 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
511 /* src or dest proto may be "any" (0), use more restrictive one */
512 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
513 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
514 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
515 ts2ports(dst, &sel.dport, &sel.dport_mask);
516 ts2ports(src, &sel.sport, &sel.sport_mask);
517 sel.ifindex = 0;
518 sel.user = 0;
519
520 return sel;
521 }
522
523 /**
524 * Creates an rtattr and adds it to the netlink message
525 */
526 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
527 size_t buflen)
528 {
529 struct rtattr *rta;
530
531 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
532 {
533 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
534 return;
535 }
536
537 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
538 rta->rta_type = rta_type;
539 rta->rta_len = RTA_LENGTH(data.len);
540 memcpy(RTA_DATA(rta), data.ptr, data.len);
541 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
542 }
543
544 /**
545 * process a XFRM_MSG_ACQUIRE from kernel
546 */
547 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
548 {
549 u_int32_t reqid = 0;
550 job_t *job;
551 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
552 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
553
554 if (RTA_OK(rtattr, rtsize))
555 {
556 if (rtattr->rta_type == XFRMA_TMPL)
557 {
558 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
559 reqid = tmpl->reqid;
560 }
561 }
562 if (reqid == 0)
563 {
564 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
565 return;
566 }
567 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
568 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
569 job = (job_t*)acquire_job_create(reqid);
570 charon->processor->queue_job(charon->processor, job);
571 }
572
573 /**
574 * process a XFRM_MSG_EXPIRE from kernel
575 */
576 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
577 {
578 job_t *job;
579 protocol_id_t protocol;
580 u_int32_t spi, reqid;
581 struct xfrm_user_expire *expire;
582
583 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
584 protocol = proto_kernel2ike(expire->state.id.proto);
585 spi = expire->state.id.spi;
586 reqid = expire->state.reqid;
587
588 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
589
590 if (protocol != PROTO_ESP && protocol != PROTO_AH)
591 {
592 DBG2(DBG_KNL, "ignoring XFRM_MSG_EXPIRE for SA 0x%x (reqid %d) which is "
593 "not a CHILD_SA", ntohl(spi), reqid);
594 return;
595 }
596
597 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
598 expire->hard ? "delete" : "rekey", protocol_id_names,
599 protocol, ntohl(spi), reqid);
600 if (expire->hard)
601 {
602 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
603 }
604 else
605 {
606 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
607 }
608 charon->processor->queue_job(charon->processor, job);
609 }
610
611 /**
612 * start a roaming job. We delay it for a second and fire only one job
613 * for multiple events. Otherwise we would create two many jobs.
614 */
615 static void fire_roam_job(private_kernel_interface_t *this, bool address)
616 {
617 struct timeval now;
618
619 if (gettimeofday(&now, NULL) == 0)
620 {
621 if (timercmp(&now, &this->last_roam, >))
622 {
623 now.tv_usec += ROAM_DELAY * 1000;
624 while (now.tv_usec > 1000000)
625 {
626 now.tv_sec++;
627 now.tv_usec -= 1000000;
628 }
629 this->last_roam = now;
630 charon->scheduler->schedule_job(charon->scheduler,
631 (job_t*)roam_job_create(address), ROAM_DELAY);
632 }
633 }
634 }
635
636 /**
637 * process RTM_NEWLINK/RTM_DELLINK from kernel
638 */
639 static void process_link(private_kernel_interface_t *this,
640 struct nlmsghdr *hdr, bool event)
641 {
642 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
643 struct rtattr *rta = IFLA_RTA(msg);
644 size_t rtasize = IFLA_PAYLOAD (hdr);
645 iterator_t *iterator;
646 iface_entry_t *current, *entry = NULL;
647 char *name = NULL;
648 bool update = FALSE;
649
650 while(RTA_OK(rta, rtasize))
651 {
652 switch (rta->rta_type)
653 {
654 case IFLA_IFNAME:
655 name = RTA_DATA(rta);
656 break;
657 }
658 rta = RTA_NEXT(rta, rtasize);
659 }
660 if (!name)
661 {
662 name = "(unknown)";
663 }
664
665 switch (hdr->nlmsg_type)
666 {
667 case RTM_NEWLINK:
668 {
669 if (msg->ifi_flags & IFF_LOOPBACK)
670 { /* ignore loopback interfaces */
671 break;
672 }
673 iterator = this->ifaces->create_iterator_locked(this->ifaces,
674 &this->mutex);
675 while (iterator->iterate(iterator, (void**)&current))
676 {
677 if (current->ifindex == msg->ifi_index)
678 {
679 entry = current;
680 break;
681 }
682 }
683 if (!entry)
684 {
685 entry = malloc_thing(iface_entry_t);
686 entry->ifindex = msg->ifi_index;
687 entry->flags = 0;
688 entry->addrs = linked_list_create();
689 this->ifaces->insert_last(this->ifaces, entry);
690 }
691 memcpy(entry->ifname, name, IFNAMSIZ);
692 entry->ifname[IFNAMSIZ-1] = '\0';
693 if (event)
694 {
695 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
696 {
697 update = TRUE;
698 DBG1(DBG_KNL, "interface %s activated", name);
699 }
700 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
701 {
702 update = TRUE;
703 DBG1(DBG_KNL, "interface %s deactivated", name);
704 }
705 }
706 entry->flags = msg->ifi_flags;
707 iterator->destroy(iterator);
708 break;
709 }
710 case RTM_DELLINK:
711 {
712 iterator = this->ifaces->create_iterator_locked(this->ifaces,
713 &this->mutex);
714 while (iterator->iterate(iterator, (void**)&current))
715 {
716 if (current->ifindex == msg->ifi_index)
717 {
718 /* we do not remove it, as an address may be added to a
719 * "down" interface and we wan't to know that. */
720 current->flags = msg->ifi_flags;
721 break;
722 }
723 }
724 iterator->destroy(iterator);
725 break;
726 }
727 }
728
729 /* send an update to all IKE_SAs */
730 if (update && event)
731 {
732 fire_roam_job(this, TRUE);
733 }
734 }
735
736 /**
737 * process RTM_NEWADDR/RTM_DELADDR from kernel
738 */
739 static void process_addr(private_kernel_interface_t *this,
740 struct nlmsghdr *hdr, bool event)
741 {
742 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
743 struct rtattr *rta = IFA_RTA(msg);
744 size_t rtasize = IFA_PAYLOAD (hdr);
745 host_t *host = NULL;
746 iterator_t *ifaces, *addrs;
747 iface_entry_t *iface;
748 addr_entry_t *addr;
749 chunk_t local = chunk_empty, address = chunk_empty;
750 bool update = FALSE, found = FALSE, changed = FALSE;
751
752 while(RTA_OK(rta, rtasize))
753 {
754 switch (rta->rta_type)
755 {
756 case IFA_LOCAL:
757 local.ptr = RTA_DATA(rta);
758 local.len = RTA_PAYLOAD(rta);
759 break;
760 case IFA_ADDRESS:
761 address.ptr = RTA_DATA(rta);
762 address.len = RTA_PAYLOAD(rta);
763 break;
764 }
765 rta = RTA_NEXT(rta, rtasize);
766 }
767
768 /* For PPP interfaces, we need the IFA_LOCAL address,
769 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
770 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
771 if (local.ptr)
772 {
773 host = host_create_from_chunk(msg->ifa_family, local, 0);
774 }
775 else if (address.ptr)
776 {
777 host = host_create_from_chunk(msg->ifa_family, address, 0);
778 }
779
780 if (host == NULL)
781 { /* bad family? */
782 return;
783 }
784
785 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
786 while (ifaces->iterate(ifaces, (void**)&iface))
787 {
788 if (iface->ifindex == msg->ifa_index)
789 {
790 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
791 while (addrs->iterate(addrs, (void**)&addr))
792 {
793 if (host->ip_equals(host, addr->ip))
794 {
795 found = TRUE;
796 if (hdr->nlmsg_type == RTM_DELADDR)
797 {
798 changed = TRUE;
799 addrs->remove(addrs);
800 if (!addr->virtual)
801 {
802 DBG1(DBG_KNL, "%H disappeared from %s",
803 host, iface->ifname);
804 }
805 addr_entry_destroy(addr);
806 }
807 else if (hdr->nlmsg_type == RTM_NEWADDR && addr->virtual)
808 {
809 addr->refcount = 1;
810 }
811 }
812 }
813 addrs->destroy(addrs);
814
815 if (hdr->nlmsg_type == RTM_NEWADDR)
816 {
817 if (!found)
818 {
819 found = TRUE;
820 changed = TRUE;
821 addr = malloc_thing(addr_entry_t);
822 addr->ip = host->clone(host);
823 addr->virtual = FALSE;
824 addr->refcount = 1;
825 addr->scope = msg->ifa_scope;
826
827 iface->addrs->insert_last(iface->addrs, addr);
828 if (event)
829 {
830 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
831 }
832 }
833 }
834 if (found && (iface->flags & IFF_UP))
835 {
836 update = TRUE;
837 }
838 break;
839 }
840 }
841 ifaces->destroy(ifaces);
842 host->destroy(host);
843
844 /* send an update to all IKE_SAs */
845 if (update && event && changed)
846 {
847 fire_roam_job(this, TRUE);
848 }
849 }
850
851 /**
852 * Receives events from kernel
853 */
854 static job_requeue_t receive_events(private_kernel_interface_t *this)
855 {
856 char response[1024];
857 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
858 struct sockaddr_nl addr;
859 socklen_t addr_len = sizeof(addr);
860 int len, oldstate, maxfd, selected;
861 fd_set rfds;
862
863 FD_ZERO(&rfds);
864 FD_SET(this->socket_xfrm_events, &rfds);
865 FD_SET(this->socket_rt_events, &rfds);
866 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
867
868 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
869 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
870 pthread_setcancelstate(oldstate, NULL);
871 if (selected <= 0)
872 {
873 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
874 return JOB_REQUEUE_FAIR;
875 }
876 if (FD_ISSET(this->socket_xfrm_events, &rfds))
877 {
878 selected = this->socket_xfrm_events;
879 }
880 else if (FD_ISSET(this->socket_rt_events, &rfds))
881 {
882 selected = this->socket_rt_events;
883 }
884 else
885 {
886 return JOB_REQUEUE_DIRECT;
887 }
888
889 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
890 (struct sockaddr*)&addr, &addr_len);
891 if (len < 0)
892 {
893 switch (errno)
894 {
895 case EINTR:
896 /* interrupted, try again */
897 return JOB_REQUEUE_DIRECT;
898 case EAGAIN:
899 /* no data ready, select again */
900 return JOB_REQUEUE_DIRECT;
901 default:
902 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
903 sleep(1);
904 return JOB_REQUEUE_FAIR;
905 }
906 }
907 if (addr.nl_pid != 0)
908 { /* not from kernel. not interested, try another one */
909 return JOB_REQUEUE_DIRECT;
910 }
911
912 while (NLMSG_OK(hdr, len))
913 {
914 /* looks good so far, dispatch netlink message */
915 if (selected == this->socket_xfrm_events)
916 {
917 switch (hdr->nlmsg_type)
918 {
919 case XFRM_MSG_ACQUIRE:
920 process_acquire(this, hdr);
921 break;
922 case XFRM_MSG_EXPIRE:
923 process_expire(this, hdr);
924 break;
925 default:
926 break;
927 }
928 }
929 else if (selected == this->socket_rt_events)
930 {
931 switch (hdr->nlmsg_type)
932 {
933 case RTM_NEWADDR:
934 case RTM_DELADDR:
935 process_addr(this, hdr, TRUE);
936 pthread_cond_signal(&this->cond);
937 break;
938 case RTM_NEWLINK:
939 case RTM_DELLINK:
940 process_link(this, hdr, TRUE);
941 pthread_cond_signal(&this->cond);
942 break;
943 case RTM_NEWROUTE:
944 case RTM_DELROUTE:
945 fire_roam_job(this, FALSE);
946 break;
947 default:
948 break;
949 }
950 }
951 hdr = NLMSG_NEXT(hdr, len);
952 }
953 return JOB_REQUEUE_DIRECT;
954 }
955
956 /**
957 * send a netlink message and wait for a reply
958 */
959 static status_t netlink_send(private_kernel_interface_t *this,
960 int socket, struct nlmsghdr *in,
961 struct nlmsghdr **out, size_t *out_len)
962 {
963 int len, addr_len;
964 struct sockaddr_nl addr;
965 chunk_t result = chunk_empty, tmp;
966 struct nlmsghdr *msg, peek;
967
968 pthread_mutex_lock(&this->nl_mutex);
969
970 in->nlmsg_seq = ++this->seq;
971 in->nlmsg_pid = getpid();
972
973 memset(&addr, 0, sizeof(addr));
974 addr.nl_family = AF_NETLINK;
975 addr.nl_pid = 0;
976 addr.nl_groups = 0;
977
978 while (TRUE)
979 {
980 len = sendto(socket, in, in->nlmsg_len, 0,
981 (struct sockaddr*)&addr, sizeof(addr));
982
983 if (len != in->nlmsg_len)
984 {
985 if (errno == EINTR)
986 {
987 /* interrupted, try again */
988 continue;
989 }
990 pthread_mutex_unlock(&this->nl_mutex);
991 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
992 return FAILED;
993 }
994 break;
995 }
996
997 while (TRUE)
998 {
999 char buf[4096];
1000 tmp.len = sizeof(buf);
1001 tmp.ptr = buf;
1002 msg = (struct nlmsghdr*)tmp.ptr;
1003
1004 memset(&addr, 0, sizeof(addr));
1005 addr.nl_family = AF_NETLINK;
1006 addr.nl_pid = getpid();
1007 addr.nl_groups = 0;
1008 addr_len = sizeof(addr);
1009
1010 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
1011 (struct sockaddr*)&addr, &addr_len);
1012
1013 if (len < 0)
1014 {
1015 if (errno == EINTR)
1016 {
1017 DBG1(DBG_KNL, "got interrupted");
1018 /* interrupted, try again */
1019 continue;
1020 }
1021 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
1022 pthread_mutex_unlock(&this->nl_mutex);
1023 return FAILED;
1024 }
1025 if (!NLMSG_OK(msg, len))
1026 {
1027 DBG1(DBG_KNL, "received corrupted netlink message");
1028 pthread_mutex_unlock(&this->nl_mutex);
1029 return FAILED;
1030 }
1031 if (msg->nlmsg_seq != this->seq)
1032 {
1033 DBG1(DBG_KNL, "received invalid netlink sequence number");
1034 if (msg->nlmsg_seq < this->seq)
1035 {
1036 continue;
1037 }
1038 pthread_mutex_unlock(&this->nl_mutex);
1039 return FAILED;
1040 }
1041
1042 tmp.len = len;
1043 result = chunk_cata("cc", result, tmp);
1044
1045 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
1046 * numbers to detect multi header messages */
1047 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
1048 (struct sockaddr*)&addr, &addr_len);
1049
1050 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
1051 {
1052 /* seems to be multipart */
1053 continue;
1054 }
1055 break;
1056 }
1057
1058 *out_len = result.len;
1059 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
1060
1061 pthread_mutex_unlock(&this->nl_mutex);
1062
1063 return SUCCESS;
1064 }
1065
1066 /**
1067 * send a netlink message and wait for its acknowlegde
1068 */
1069 static status_t netlink_send_ack(private_kernel_interface_t *this,
1070 int socket, struct nlmsghdr *in)
1071 {
1072 struct nlmsghdr *out, *hdr;
1073 size_t len;
1074
1075 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
1076 {
1077 return FAILED;
1078 }
1079 hdr = out;
1080 while (NLMSG_OK(hdr, len))
1081 {
1082 switch (hdr->nlmsg_type)
1083 {
1084 case NLMSG_ERROR:
1085 {
1086 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
1087
1088 if (err->error)
1089 {
1090 if (-err->error == EEXIST)
1091 { /* do not report existing routes */
1092 free(out);
1093 return ALREADY_DONE;
1094 }
1095 DBG1(DBG_KNL, "received netlink error: %s (%d)",
1096 strerror(-err->error), -err->error);
1097 free(out);
1098 return FAILED;
1099 }
1100 free(out);
1101 return SUCCESS;
1102 }
1103 default:
1104 hdr = NLMSG_NEXT(hdr, len);
1105 continue;
1106 case NLMSG_DONE:
1107 break;
1108 }
1109 break;
1110 }
1111 DBG1(DBG_KNL, "netlink request not acknowlegded");
1112 free(out);
1113 return FAILED;
1114 }
1115
1116 /**
1117 * Initialize a list of local addresses.
1118 */
1119 static status_t init_address_list(private_kernel_interface_t *this)
1120 {
1121 char request[BUFFER_SIZE];
1122 struct nlmsghdr *out, *current, *in;
1123 struct rtgenmsg *msg;
1124 size_t len;
1125 iterator_t *ifaces, *addrs;
1126 iface_entry_t *iface;
1127 addr_entry_t *addr;
1128
1129 DBG1(DBG_KNL, "listening on interfaces:");
1130
1131 memset(&request, 0, sizeof(request));
1132
1133 in = (struct nlmsghdr*)&request;
1134 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1135 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1136 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1137 msg->rtgen_family = AF_UNSPEC;
1138
1139 /* get all links */
1140 in->nlmsg_type = RTM_GETLINK;
1141 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1142 {
1143 return FAILED;
1144 }
1145 current = out;
1146 while (NLMSG_OK(current, len))
1147 {
1148 switch (current->nlmsg_type)
1149 {
1150 case NLMSG_DONE:
1151 break;
1152 case RTM_NEWLINK:
1153 process_link(this, current, FALSE);
1154 /* fall through */
1155 default:
1156 current = NLMSG_NEXT(current, len);
1157 continue;
1158 }
1159 break;
1160 }
1161 free(out);
1162
1163 /* get all interface addresses */
1164 in->nlmsg_type = RTM_GETADDR;
1165 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1166 {
1167 return FAILED;
1168 }
1169 current = out;
1170 while (NLMSG_OK(current, len))
1171 {
1172 switch (current->nlmsg_type)
1173 {
1174 case NLMSG_DONE:
1175 break;
1176 case RTM_NEWADDR:
1177 process_addr(this, current, FALSE);
1178 /* fall through */
1179 default:
1180 current = NLMSG_NEXT(current, len);
1181 continue;
1182 }
1183 break;
1184 }
1185 free(out);
1186
1187 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1188 while (ifaces->iterate(ifaces, (void**)&iface))
1189 {
1190 if (iface->flags & IFF_UP)
1191 {
1192 DBG1(DBG_KNL, " %s", iface->ifname);
1193 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1194 while (addrs->iterate(addrs, (void**)&addr))
1195 {
1196 DBG1(DBG_KNL, " %H", addr->ip);
1197 }
1198 addrs->destroy(addrs);
1199 }
1200 }
1201 ifaces->destroy(ifaces);
1202 return SUCCESS;
1203 }
1204
1205 /**
1206 * iterator hook to iterate over addrs
1207 */
1208 static hook_result_t addr_hook(private_kernel_interface_t *this,
1209 addr_entry_t *in, host_t **out)
1210 {
1211 if (in->virtual)
1212 { /* skip virtual interfaces added by us */
1213 return HOOK_SKIP;
1214 }
1215 if (in->scope >= RT_SCOPE_LINK)
1216 { /* skip addresses with a unusable scope */
1217 return HOOK_SKIP;
1218 }
1219 *out = in->ip;
1220 return HOOK_NEXT;
1221 }
1222
1223 /**
1224 * iterator hook to iterate over ifaces
1225 */
1226 static hook_result_t iface_hook(private_kernel_interface_t *this,
1227 iface_entry_t *in, host_t **out)
1228 {
1229 if (!(in->flags & IFF_UP))
1230 { /* skip interfaces not up */
1231 return HOOK_SKIP;
1232 }
1233
1234 if (this->hiter == NULL)
1235 {
1236 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1237 this->hiter->set_iterator_hook(this->hiter,
1238 (iterator_hook_t*)addr_hook, this);
1239 }
1240 while (this->hiter->iterate(this->hiter, (void**)out))
1241 {
1242 return HOOK_AGAIN;
1243 }
1244 this->hiter->destroy(this->hiter);
1245 this->hiter = NULL;
1246 return HOOK_SKIP;
1247 }
1248
1249 /**
1250 * Implements kernel_interface_t.create_address_iterator.
1251 */
1252 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1253 {
1254 iterator_t *iterator;
1255
1256 /* This iterator is not only hooked, is is double-hooked. As we have stored
1257 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1258 * entries in each interface we iterate. This does the iface_hook. The
1259 * addr_hook returns the ip instead of the addr_entry. */
1260
1261 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1262 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1263 return iterator;
1264 }
1265
1266 /**
1267 * implementation of kernel_interface_t.get_interface_name
1268 */
1269 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1270 {
1271 iterator_t *ifaces, *addrs;
1272 iface_entry_t *iface;
1273 addr_entry_t *addr;
1274 char *name = NULL;
1275
1276 DBG2(DBG_KNL, "getting interface name for %H", ip);
1277
1278 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1279 while (ifaces->iterate(ifaces, (void**)&iface))
1280 {
1281 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1282 while (addrs->iterate(addrs, (void**)&addr))
1283 {
1284 if (ip->ip_equals(ip, addr->ip))
1285 {
1286 name = strdup(iface->ifname);
1287 break;
1288 }
1289 }
1290 addrs->destroy(addrs);
1291 if (name)
1292 {
1293 break;
1294 }
1295 }
1296 ifaces->destroy(ifaces);
1297
1298 if (name)
1299 {
1300 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1301 }
1302 else
1303 {
1304 DBG2(DBG_KNL, "%H is not a local address", ip);
1305 }
1306 return name;
1307 }
1308
1309 /**
1310 * Tries to find an ip address of a local interface that is included in the
1311 * supplied traffic selector.
1312 */
1313 static status_t get_address_by_ts(private_kernel_interface_t *this,
1314 traffic_selector_t *ts, host_t **ip)
1315 {
1316 iterator_t *ifaces, *addrs;
1317 iface_entry_t *iface;
1318 addr_entry_t *addr;
1319 host_t *host;
1320 int family;
1321 bool found = FALSE;
1322
1323 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1324
1325 /* if we have a family which includes localhost, we do not
1326 * search for an IP, we use the default */
1327 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1328
1329 if (family == AF_INET)
1330 {
1331 host = host_create_from_string("127.0.0.1", 0);
1332 }
1333 else
1334 {
1335 host = host_create_from_string("::1", 0);
1336 }
1337
1338 if (ts->includes(ts, host))
1339 {
1340 *ip = host_create_any(family);
1341 host->destroy(host);
1342 DBG2(DBG_KNL, "using host %H", *ip);
1343 return SUCCESS;
1344 }
1345 host->destroy(host);
1346
1347 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1348 while (ifaces->iterate(ifaces, (void**)&iface))
1349 {
1350 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1351 while (addrs->iterate(addrs, (void**)&addr))
1352 {
1353 if (ts->includes(ts, addr->ip))
1354 {
1355 found = TRUE;
1356 *ip = addr->ip->clone(addr->ip);
1357 break;
1358 }
1359 }
1360 addrs->destroy(addrs);
1361 if (found)
1362 {
1363 break;
1364 }
1365 }
1366 ifaces->destroy(ifaces);
1367
1368 if (!found)
1369 {
1370 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1371 return FAILED;
1372 }
1373 DBG2(DBG_KNL, "using host %H", *ip);
1374 return SUCCESS;
1375 }
1376
1377 /**
1378 * get the interface of a local address
1379 */
1380 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1381 {
1382 iterator_t *ifaces, *addrs;
1383 iface_entry_t *iface;
1384 addr_entry_t *addr;
1385 int ifindex = 0;
1386
1387 DBG2(DBG_KNL, "getting iface for %H", ip);
1388
1389 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1390 while (ifaces->iterate(ifaces, (void**)&iface))
1391 {
1392 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1393 while (addrs->iterate(addrs, (void**)&addr))
1394 {
1395 if (ip->ip_equals(ip, addr->ip))
1396 {
1397 ifindex = iface->ifindex;
1398 break;
1399 }
1400 }
1401 addrs->destroy(addrs);
1402 if (ifindex)
1403 {
1404 break;
1405 }
1406 }
1407 ifaces->destroy(ifaces);
1408
1409 if (ifindex == 0)
1410 {
1411 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1412 }
1413 return ifindex;
1414 }
1415
1416 /**
1417 * get the refcount of a virtual ip
1418 */
1419 static int get_vip_refcount(private_kernel_interface_t *this, host_t* ip)
1420 {
1421 iterator_t *ifaces, *addrs;
1422 iface_entry_t *iface;
1423 addr_entry_t *addr;
1424 int refcount = 0;
1425
1426 ifaces = this->ifaces->create_iterator(this->ifaces, TRUE);
1427 while (ifaces->iterate(ifaces, (void**)&iface))
1428 {
1429 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1430 while (addrs->iterate(addrs, (void**)&addr))
1431 {
1432 if (addr->virtual && (iface->flags & IFF_UP) &&
1433 ip->ip_equals(ip, addr->ip))
1434 {
1435 refcount = addr->refcount;
1436 break;
1437 }
1438 }
1439 addrs->destroy(addrs);
1440 if (refcount)
1441 {
1442 break;
1443 }
1444 }
1445 ifaces->destroy(ifaces);
1446
1447 return refcount;
1448 }
1449
1450 /**
1451 * Manages the creation and deletion of ip addresses on an interface.
1452 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1453 */
1454 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1455 int flags, int if_index, host_t *ip)
1456 {
1457 unsigned char request[BUFFER_SIZE];
1458 struct nlmsghdr *hdr;
1459 struct ifaddrmsg *msg;
1460 chunk_t chunk;
1461
1462 memset(&request, 0, sizeof(request));
1463
1464 chunk = ip->get_address(ip);
1465
1466 hdr = (struct nlmsghdr*)request;
1467 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1468 hdr->nlmsg_type = nlmsg_type;
1469 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1470
1471 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1472 msg->ifa_family = ip->get_family(ip);
1473 msg->ifa_flags = 0;
1474 msg->ifa_prefixlen = 8 * chunk.len;
1475 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1476 msg->ifa_index = if_index;
1477
1478 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1479
1480 return netlink_send_ack(this, this->socket_rt, hdr);
1481 }
1482
1483 /**
1484 * Manages source routes in the routing table.
1485 * By setting the appropriate nlmsg_type, the route added or r.
1486 */
1487 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1488 int flags, route_entry_t *route)
1489 {
1490 unsigned char request[BUFFER_SIZE];
1491 struct nlmsghdr *hdr;
1492 struct rtmsg *msg;
1493 chunk_t chunk;
1494
1495 /* if route is 0.0.0.0/0, we can't install it, as it would
1496 * overwrite the default route. Instead, we add two routes:
1497 * 0.0.0.0/1 and 128.0.0.0/1 */
1498 if (this->routing_table == 0 && route->prefixlen == 0)
1499 {
1500 route_entry_t half;
1501 status_t status;
1502
1503 half.dst_net = chunk_alloca(route->dst_net.len);
1504 memset(half.dst_net.ptr, 0, half.dst_net.len);
1505 half.src_ip = route->src_ip;
1506 half.gateway = route->gateway;
1507 half.if_index = route->if_index;
1508 half.prefixlen = 1;
1509
1510 status = manage_srcroute(this, nlmsg_type, flags, &half);
1511 half.dst_net.ptr[0] |= 0x80;
1512 status = manage_srcroute(this, nlmsg_type, flags, &half);
1513 return status;
1514 }
1515
1516 memset(&request, 0, sizeof(request));
1517
1518 hdr = (struct nlmsghdr*)request;
1519 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1520 hdr->nlmsg_type = nlmsg_type;
1521 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1522
1523 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1524 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1525 msg->rtm_dst_len = route->prefixlen;
1526 msg->rtm_table = this->routing_table;
1527 msg->rtm_protocol = RTPROT_STATIC;
1528 msg->rtm_type = RTN_UNICAST;
1529 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1530
1531 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1532 chunk = route->src_ip->get_address(route->src_ip);
1533 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1534 chunk = route->gateway->get_address(route->gateway);
1535 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1536 chunk.ptr = (char*)&route->if_index;
1537 chunk.len = sizeof(route->if_index);
1538 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1539
1540 return netlink_send_ack(this, this->socket_rt, hdr);
1541 }
1542
1543 /**
1544 * create or delete an rule to use our routing table
1545 */
1546 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1547 u_int32_t table, u_int32_t prio)
1548 {
1549 unsigned char request[BUFFER_SIZE];
1550 struct nlmsghdr *hdr;
1551 struct rtmsg *msg;
1552 chunk_t chunk;
1553
1554 memset(&request, 0, sizeof(request));
1555 hdr = (struct nlmsghdr*)request;
1556 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1557 hdr->nlmsg_type = nlmsg_type;
1558 if (nlmsg_type == RTM_NEWRULE)
1559 {
1560 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1561 }
1562 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1563
1564 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1565 msg->rtm_table = table;
1566 msg->rtm_family = AF_INET;
1567 msg->rtm_protocol = RTPROT_BOOT;
1568 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1569 msg->rtm_type = RTN_UNICAST;
1570
1571 chunk = chunk_from_thing(prio);
1572 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1573
1574 return netlink_send_ack(this, this->socket_rt, hdr);
1575 }
1576
1577 /**
1578 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1579 */
1580 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1581 {
1582 int bit, byte;
1583
1584 if (addr.len != net.len)
1585 {
1586 return FALSE;
1587 }
1588 /* scan through all bits, beginning in the front */
1589 for (byte = 0; byte < addr.len; byte++)
1590 {
1591 for (bit = 7; bit >= 0; bit--)
1592 {
1593 /* check if bits are equal (or we reached the end of the net) */
1594 if (bit + byte * 8 > net_len)
1595 {
1596 return TRUE;
1597 }
1598 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1599 {
1600 return FALSE;
1601 }
1602 }
1603 }
1604 return TRUE;
1605 }
1606
1607 /**
1608 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1609 */
1610 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1611 bool nexthop)
1612 {
1613 unsigned char request[BUFFER_SIZE];
1614 struct nlmsghdr *hdr, *out, *current;
1615 struct rtmsg *msg;
1616 chunk_t chunk;
1617 size_t len;
1618 int best = -1;
1619 host_t *src = NULL, *gtw = NULL;
1620
1621 DBG2(DBG_KNL, "getting address to reach %H", dest);
1622
1623 memset(&request, 0, sizeof(request));
1624
1625 hdr = (struct nlmsghdr*)request;
1626 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1627 hdr->nlmsg_type = RTM_GETROUTE;
1628 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1629
1630 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1631 msg->rtm_family = dest->get_family(dest);
1632
1633 chunk = dest->get_address(dest);
1634 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1635
1636 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1637 {
1638 DBG1(DBG_KNL, "getting address to %H failed", dest);
1639 return NULL;
1640 }
1641 current = out;
1642 while (NLMSG_OK(current, len))
1643 {
1644 switch (current->nlmsg_type)
1645 {
1646 case NLMSG_DONE:
1647 break;
1648 case RTM_NEWROUTE:
1649 {
1650 struct rtattr *rta;
1651 size_t rtasize;
1652 chunk_t rta_gtw, rta_src, rta_dst;
1653 u_int32_t rta_oif = 0;
1654
1655 rta_gtw = rta_src = rta_dst = chunk_empty;
1656 msg = (struct rtmsg*)(NLMSG_DATA(current));
1657 rta = RTM_RTA(msg);
1658 rtasize = RTM_PAYLOAD(current);
1659 while (RTA_OK(rta, rtasize))
1660 {
1661 switch (rta->rta_type)
1662 {
1663 case RTA_PREFSRC:
1664 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1665 break;
1666 case RTA_GATEWAY:
1667 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1668 break;
1669 case RTA_DST:
1670 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1671 break;
1672 case RTA_OIF:
1673 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1674 {
1675 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1676 }
1677 break;
1678 }
1679 rta = RTA_NEXT(rta, rtasize);
1680 }
1681
1682 /* apply the route if:
1683 * - it is not from our own ipsec routing table
1684 * - is better than a previous one
1685 * - is the default route or
1686 * - its destination net contains our destination
1687 */
1688 if ((this->routing_table == 0 ||msg->rtm_table != this->routing_table)
1689 && msg->rtm_dst_len > best
1690 && (msg->rtm_dst_len == 0 || /* default route */
1691 (rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len))))
1692 {
1693 iterator_t *ifaces, *addrs;
1694 iface_entry_t *iface;
1695 addr_entry_t *addr;
1696
1697 best = msg->rtm_dst_len;
1698 if (nexthop)
1699 {
1700 DESTROY_IF(gtw);
1701 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1702 }
1703 else if (rta_src.ptr)
1704 {
1705 DESTROY_IF(src);
1706 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1707 if (get_vip_refcount(this, src))
1708 { /* skip source address if it is installed by us */
1709 DESTROY_IF(src);
1710 src = NULL;
1711 current = NLMSG_NEXT(current, len);
1712 continue;
1713 }
1714 }
1715 else
1716 {
1717 /* no source addr, get one from the interfaces */
1718 ifaces = this->ifaces->create_iterator_locked(
1719 this->ifaces, &this->mutex);
1720 while (ifaces->iterate(ifaces, (void**)&iface))
1721 {
1722 if (iface->ifindex == rta_oif)
1723 {
1724 addrs = iface->addrs->create_iterator(
1725 iface->addrs, TRUE);
1726 while (addrs->iterate(addrs, (void**)&addr))
1727 {
1728 chunk_t ip = addr->ip->get_address(addr->ip);
1729 if (msg->rtm_dst_len == 0
1730 || addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1731 {
1732 DESTROY_IF(src);
1733 src = addr->ip->clone(addr->ip);
1734 break;
1735 }
1736 }
1737 addrs->destroy(addrs);
1738 }
1739 }
1740 ifaces->destroy(ifaces);
1741 }
1742 }
1743 /* FALL through */
1744 }
1745 default:
1746 current = NLMSG_NEXT(current, len);
1747 continue;
1748 }
1749 break;
1750 }
1751 free(out);
1752
1753 if (nexthop)
1754 {
1755 if (gtw)
1756 {
1757 return gtw;
1758 }
1759 return dest->clone(dest);
1760 }
1761 return src;
1762 }
1763
1764 /**
1765 * Implementation of kernel_interface_t.get_source_addr.
1766 */
1767 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1768 {
1769 return get_route(this, dest, FALSE);
1770 }
1771
1772 /**
1773 * Implementation of kernel_interface_t.add_ip.
1774 */
1775 static status_t add_ip(private_kernel_interface_t *this,
1776 host_t *virtual_ip, host_t *iface_ip)
1777 {
1778 iface_entry_t *iface;
1779 addr_entry_t *addr;
1780 iterator_t *addrs, *ifaces;
1781 int ifindex;
1782
1783 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1784
1785 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1786 while (ifaces->iterate(ifaces, (void**)&iface))
1787 {
1788 bool iface_found = FALSE;
1789
1790 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1791 while (addrs->iterate(addrs, (void**)&addr))
1792 {
1793 if (iface_ip->ip_equals(iface_ip, addr->ip))
1794 {
1795 iface_found = TRUE;
1796 }
1797 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1798 {
1799 addr->refcount++;
1800 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1801 virtual_ip, iface->ifname);
1802 addrs->destroy(addrs);
1803 ifaces->destroy(ifaces);
1804 return SUCCESS;
1805 }
1806 }
1807 addrs->destroy(addrs);
1808
1809 if (iface_found)
1810 {
1811 ifindex = iface->ifindex;
1812 addr = malloc_thing(addr_entry_t);
1813 addr->ip = virtual_ip->clone(virtual_ip);
1814 addr->refcount = 0;
1815 addr->virtual = TRUE;
1816 addr->scope = RT_SCOPE_UNIVERSE;
1817 iface->addrs->insert_last(iface->addrs, addr);
1818
1819 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1820 ifindex, virtual_ip) == SUCCESS)
1821 {
1822 while (get_vip_refcount(this, virtual_ip) == 0)
1823 { /* wait until address appears */
1824 pthread_cond_wait(&this->cond, &this->mutex);
1825 }
1826 ifaces->destroy(ifaces);
1827 return SUCCESS;
1828 }
1829 ifaces->destroy(ifaces);
1830 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1831 return FAILED;
1832 }
1833 }
1834 ifaces->destroy(ifaces);
1835
1836 DBG1(DBG_KNL, "interface address %H not found, unable to install"
1837 "virtual IP %H", iface_ip, virtual_ip);
1838 return FAILED;
1839 }
1840
1841 /**
1842 * Implementation of kernel_interface_t.del_ip.
1843 */
1844 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1845 {
1846 iface_entry_t *iface;
1847 addr_entry_t *addr;
1848 iterator_t *addrs, *ifaces;
1849 status_t status;
1850 int ifindex;
1851
1852 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1853
1854 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1855 while (ifaces->iterate(ifaces, (void**)&iface))
1856 {
1857 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1858 while (addrs->iterate(addrs, (void**)&addr))
1859 {
1860 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1861 {
1862 ifindex = iface->ifindex;
1863 if (addr->refcount == 1)
1864 {
1865 status = manage_ipaddr(this, RTM_DELADDR, 0,
1866 ifindex, virtual_ip);
1867 if (status == SUCCESS)
1868 { /* wait until the address is really gone */
1869 while (get_vip_refcount(this, virtual_ip) > 0)
1870 {
1871 pthread_cond_wait(&this->cond, &this->mutex);
1872 }
1873 }
1874 addrs->destroy(addrs);
1875 ifaces->destroy(ifaces);
1876 return status;
1877 }
1878 else
1879 {
1880 addr->refcount--;
1881 }
1882 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1883 virtual_ip);
1884 addrs->destroy(addrs);
1885 ifaces->destroy(ifaces);
1886 return SUCCESS;
1887 }
1888 }
1889 addrs->destroy(addrs);
1890 }
1891 ifaces->destroy(ifaces);
1892
1893 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1894 return FAILED;
1895 }
1896
1897 /**
1898 * Get an SPI for a specific protocol from the kernel.
1899 */
1900 static status_t get_spi_internal(private_kernel_interface_t *this,
1901 host_t *src, host_t *dst, u_int8_t proto, u_int32_t min, u_int32_t max,
1902 u_int32_t reqid, u_int32_t *spi)
1903 {
1904 unsigned char request[BUFFER_SIZE];
1905 struct nlmsghdr *hdr, *out;
1906 struct xfrm_userspi_info *userspi;
1907 u_int32_t received_spi = 0;
1908 size_t len;
1909
1910 memset(&request, 0, sizeof(request));
1911
1912 hdr = (struct nlmsghdr*)request;
1913 hdr->nlmsg_flags = NLM_F_REQUEST;
1914 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1915 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1916
1917 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1918 host2xfrm(src, &userspi->info.saddr);
1919 host2xfrm(dst, &userspi->info.id.daddr);
1920 userspi->info.id.proto = proto;
1921 userspi->info.mode = TRUE; /* tunnel mode */
1922 userspi->info.reqid = reqid;
1923 userspi->info.family = src->get_family(src);
1924 userspi->min = min;
1925 userspi->max = max;
1926
1927 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1928 {
1929 hdr = out;
1930 while (NLMSG_OK(hdr, len))
1931 {
1932 switch (hdr->nlmsg_type)
1933 {
1934 case XFRM_MSG_NEWSA:
1935 {
1936 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1937 received_spi = usersa->id.spi;
1938 break;
1939 }
1940 case NLMSG_ERROR:
1941 {
1942 struct nlmsgerr *err = NLMSG_DATA(hdr);
1943
1944 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1945 strerror(-err->error), -err->error);
1946 break;
1947 }
1948 default:
1949 hdr = NLMSG_NEXT(hdr, len);
1950 continue;
1951 case NLMSG_DONE:
1952 break;
1953 }
1954 break;
1955 }
1956 free(out);
1957 }
1958
1959 if (received_spi == 0)
1960 {
1961 return FAILED;
1962 }
1963
1964 *spi = received_spi;
1965 return SUCCESS;
1966 }
1967
1968 /**
1969 * Implementation of kernel_interface_t.get_spi.
1970 */
1971 static status_t get_spi(private_kernel_interface_t *this,
1972 host_t *src, host_t *dst,
1973 protocol_id_t protocol, u_int32_t reqid,
1974 u_int32_t *spi)
1975 {
1976 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1977
1978 if (get_spi_internal(this, src, dst, proto_ike2kernel(protocol),
1979 0xc0000000, 0xcFFFFFFF, reqid, spi) != SUCCESS)
1980 {
1981 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1982 return FAILED;
1983 }
1984
1985 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", *spi, reqid);
1986
1987 return SUCCESS;
1988 }
1989
1990 /**
1991 * Implementation of kernel_interface_t.get_cpi.
1992 */
1993 static status_t get_cpi(private_kernel_interface_t *this,
1994 host_t *src, host_t *dst,
1995 u_int32_t reqid, u_int16_t *cpi)
1996 {
1997 u_int32_t received_spi = 0;
1998 DBG2(DBG_KNL, "getting CPI for reqid %d", reqid);
1999
2000 if (get_spi_internal(this, src, dst,
2001 IPPROTO_COMP, 0x100, 0xEFFF, reqid, &received_spi) != SUCCESS)
2002 {
2003 DBG1(DBG_KNL, "unable to get CPI for reqid %d", reqid);
2004 return FAILED;
2005 }
2006
2007 *cpi = htons((u_int16_t)ntohl(received_spi));
2008
2009 DBG2(DBG_KNL, "got CPI 0x%x for reqid %d", *cpi, reqid);
2010
2011 return SUCCESS;
2012 }
2013
2014 /**
2015 * Implementation of kernel_interface_t.add_sa.
2016 */
2017 static status_t add_sa(private_kernel_interface_t *this,
2018 host_t *src, host_t *dst, u_int32_t spi,
2019 protocol_id_t protocol, u_int32_t reqid,
2020 u_int64_t expire_soft, u_int64_t expire_hard,
2021 u_int16_t enc_alg, u_int16_t enc_size,
2022 u_int16_t int_alg, u_int16_t int_size,
2023 prf_plus_t *prf_plus, mode_t mode,
2024 u_int16_t ipcomp, bool encap,
2025 bool replace)
2026 {
2027 unsigned char request[BUFFER_SIZE];
2028 char *alg_name;
2029 /* additional 4 octets KEYMAT required for AES-GCM as of RFC4106 8.1. */
2030 u_int16_t add_keymat = 32;
2031 struct nlmsghdr *hdr;
2032 struct xfrm_usersa_info *sa;
2033
2034 memset(&request, 0, sizeof(request));
2035
2036 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x and reqid %d", spi, reqid);
2037
2038 hdr = (struct nlmsghdr*)request;
2039 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2040 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
2041 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2042
2043 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
2044 host2xfrm(src, &sa->saddr);
2045 host2xfrm(dst, &sa->id.daddr);
2046 sa->id.spi = spi;
2047 sa->id.proto = proto_ike2kernel(protocol);
2048 sa->family = src->get_family(src);
2049 sa->mode = mode;
2050 if (mode == MODE_TUNNEL)
2051 {
2052 sa->flags |= XFRM_STATE_AF_UNSPEC;
2053 }
2054 sa->replay_window = (protocol == IPPROTO_COMP) ? 0 : 32;
2055 sa->reqid = reqid;
2056 /* we currently do not expire SAs by volume/packet count */
2057 sa->lft.soft_byte_limit = XFRM_INF;
2058 sa->lft.hard_byte_limit = XFRM_INF;
2059 sa->lft.soft_packet_limit = XFRM_INF;
2060 sa->lft.hard_packet_limit = XFRM_INF;
2061 /* we use lifetimes since added, not since used */
2062 sa->lft.soft_add_expires_seconds = expire_soft;
2063 sa->lft.hard_add_expires_seconds = expire_hard;
2064 sa->lft.soft_use_expires_seconds = 0;
2065 sa->lft.hard_use_expires_seconds = 0;
2066
2067 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
2068
2069 switch (enc_alg)
2070 {
2071 case ENCR_UNDEFINED:
2072 /* no encryption */
2073 break;
2074 case ENCR_AES_CCM_ICV8:
2075 case ENCR_AES_CCM_ICV12:
2076 case ENCR_AES_CCM_ICV16:
2077 /* AES-CCM needs only 3 additional octets KEYMAT as of RFC 4309 7.1. */
2078 add_keymat = 24;
2079 /* fall-through */
2080 case ENCR_AES_GCM_ICV8:
2081 case ENCR_AES_GCM_ICV12:
2082 case ENCR_AES_GCM_ICV16:
2083 {
2084 u_int16_t icv_size = 0;
2085 rthdr->rta_type = XFRMA_ALG_AEAD;
2086 alg_name = lookup_algorithm(encryption_algs, enc_alg, &icv_size);
2087 if (alg_name == NULL)
2088 {
2089 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2090 encryption_algorithm_names, enc_alg);
2091 return FAILED;
2092 }
2093 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2094 encryption_algorithm_names, enc_alg, enc_size);
2095
2096 /* additional KEYMAT required */
2097 enc_size += add_keymat;
2098
2099 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo_aead) + enc_size / 8);
2100 hdr->nlmsg_len += rthdr->rta_len;
2101 if (hdr->nlmsg_len > sizeof(request))
2102 {
2103 return FAILED;
2104 }
2105
2106 struct xfrm_algo_aead* algo = (struct xfrm_algo_aead*)RTA_DATA(rthdr);
2107 algo->alg_key_len = enc_size;
2108 algo->alg_icv_len = icv_size;
2109 strcpy(algo->alg_name, alg_name);
2110 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2111
2112 rthdr = XFRM_RTA_NEXT(rthdr);
2113 break;
2114 }
2115 default:
2116 {
2117 rthdr->rta_type = XFRMA_ALG_CRYPT;
2118 alg_name = lookup_algorithm(encryption_algs, enc_alg, &enc_size);
2119 if (alg_name == NULL)
2120 {
2121 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2122 encryption_algorithm_names, enc_alg);
2123 return FAILED;
2124 }
2125 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
2126 encryption_algorithm_names, enc_alg, enc_size);
2127
2128 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + enc_size / 8);
2129 hdr->nlmsg_len += rthdr->rta_len;
2130 if (hdr->nlmsg_len > sizeof(request))
2131 {
2132 return FAILED;
2133 }
2134
2135 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2136 algo->alg_key_len = enc_size;
2137 strcpy(algo->alg_name, alg_name);
2138 prf_plus->get_bytes(prf_plus, enc_size / 8, algo->alg_key);
2139
2140 rthdr = XFRM_RTA_NEXT(rthdr);
2141 break;
2142 }
2143 }
2144
2145 if (int_alg != AUTH_UNDEFINED)
2146 {
2147 rthdr->rta_type = XFRMA_ALG_AUTH;
2148 alg_name = lookup_algorithm(integrity_algs, int_alg, &int_size);
2149 if (alg_name == NULL)
2150 {
2151 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2152 integrity_algorithm_names, int_alg);
2153 return FAILED;
2154 }
2155 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
2156 integrity_algorithm_names, int_alg, int_size);
2157
2158 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + int_size / 8);
2159 hdr->nlmsg_len += rthdr->rta_len;
2160 if (hdr->nlmsg_len > sizeof(request))
2161 {
2162 return FAILED;
2163 }
2164
2165 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2166 algo->alg_key_len = int_size;
2167 strcpy(algo->alg_name, alg_name);
2168 prf_plus->get_bytes(prf_plus, int_size / 8, algo->alg_key);
2169
2170 rthdr = XFRM_RTA_NEXT(rthdr);
2171 }
2172
2173 if (ipcomp != IPCOMP_NONE)
2174 {
2175 rthdr->rta_type = XFRMA_ALG_COMP;
2176 alg_name = lookup_algorithm(compression_algs, ipcomp, NULL);
2177 if (alg_name == NULL)
2178 {
2179 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2180 ipcomp_transform_names, ipcomp);
2181 return FAILED;
2182 }
2183 DBG2(DBG_KNL, " using compression algorithm %N",
2184 ipcomp_transform_names, ipcomp);
2185
2186 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo));
2187 hdr->nlmsg_len += rthdr->rta_len;
2188 if (hdr->nlmsg_len > sizeof(request))
2189 {
2190 return FAILED;
2191 }
2192
2193 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
2194 algo->alg_key_len = 0;
2195 strcpy(algo->alg_name, alg_name);
2196
2197 rthdr = XFRM_RTA_NEXT(rthdr);
2198 }
2199
2200 if (encap)
2201 {
2202 rthdr->rta_type = XFRMA_ENCAP;
2203 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2204
2205 hdr->nlmsg_len += rthdr->rta_len;
2206 if (hdr->nlmsg_len > sizeof(request))
2207 {
2208 return FAILED;
2209 }
2210
2211 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
2212 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2213 tmpl->encap_sport = htons(src->get_port(src));
2214 tmpl->encap_dport = htons(dst->get_port(dst));
2215 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2216 /* encap_oa could probably be derived from the
2217 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
2218 * pluto does the same as we do here but it uses encap_oa in the
2219 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
2220 * the kernel ignores it anyway
2221 * -> does that mean that NAT-T encap doesn't work in transport mode?
2222 * No. The reason the kernel ignores NAT-OA is that it recomputes
2223 * (or, rather, just ignores) the checksum. If packets pass
2224 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
2225 rthdr = XFRM_RTA_NEXT(rthdr);
2226 }
2227
2228 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2229 {
2230 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
2231 return FAILED;
2232 }
2233 return SUCCESS;
2234 }
2235
2236 /**
2237 * Get the replay state (i.e. sequence numbers) of an SA.
2238 */
2239 static status_t get_replay_state(private_kernel_interface_t *this,
2240 u_int32_t spi, protocol_id_t protocol, host_t *dst,
2241 struct xfrm_replay_state *replay)
2242 {
2243 unsigned char request[BUFFER_SIZE];
2244 struct nlmsghdr *hdr, *out = NULL;
2245 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2246 size_t len;
2247 struct rtattr *rta;
2248 size_t rtasize;
2249
2250 memset(&request, 0, sizeof(request));
2251
2252 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI 0x%x", spi);
2253
2254 hdr = (struct nlmsghdr*)request;
2255 hdr->nlmsg_flags = NLM_F_REQUEST;
2256 hdr->nlmsg_type = XFRM_MSG_GETAE;
2257 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2258
2259 aevent_id = (struct xfrm_aevent_id*)NLMSG_DATA(hdr);
2260 aevent_id->flags = XFRM_AE_RVAL;
2261
2262 host2xfrm(dst, &aevent_id->sa_id.daddr);
2263 aevent_id->sa_id.spi = spi;
2264 aevent_id->sa_id.proto = proto_ike2kernel(protocol);
2265 aevent_id->sa_id.family = dst->get_family(dst);
2266
2267 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2268 {
2269 hdr = out;
2270 while (NLMSG_OK(hdr, len))
2271 {
2272 switch (hdr->nlmsg_type)
2273 {
2274 case XFRM_MSG_NEWAE:
2275 {
2276 out_aevent = NLMSG_DATA(hdr);
2277 break;
2278 }
2279 case NLMSG_ERROR:
2280 {
2281 struct nlmsgerr *err = NLMSG_DATA(hdr);
2282 DBG1(DBG_KNL, "querying replay state from SAD entry failed: %s (%d)",
2283 strerror(-err->error), -err->error);
2284 break;
2285 }
2286 default:
2287 hdr = NLMSG_NEXT(hdr, len);
2288 continue;
2289 case NLMSG_DONE:
2290 break;
2291 }
2292 break;
2293 }
2294 }
2295
2296 if (out_aevent == NULL)
2297 {
2298 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2299 free(out);
2300 return FAILED;
2301 }
2302
2303 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2304 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2305 while(RTA_OK(rta, rtasize))
2306 {
2307 if (rta->rta_type == XFRMA_REPLAY_VAL)
2308 {
2309 memcpy(replay, RTA_DATA(rta), rta->rta_len);
2310 free(out);
2311 return SUCCESS;
2312 }
2313 rta = RTA_NEXT(rta, rtasize);
2314 }
2315
2316 DBG1(DBG_KNL, "unable to query replay state from SAD entry with SPI 0x%x", spi);
2317 free(out);
2318 return FAILED;
2319 }
2320
2321 /**
2322 * Implementation of kernel_interface_t.update_sa.
2323 */
2324 static status_t update_sa(private_kernel_interface_t *this,
2325 u_int32_t spi, protocol_id_t protocol,
2326 host_t *src, host_t *dst,
2327 host_t *new_src, host_t *new_dst, bool encap)
2328 {
2329 unsigned char request[BUFFER_SIZE], *pos;
2330 struct nlmsghdr *hdr, *out = NULL;
2331 struct xfrm_usersa_id *sa_id;
2332 struct xfrm_usersa_info *out_sa = NULL, *sa;
2333 size_t len;
2334 struct rtattr *rta;
2335 size_t rtasize;
2336 struct xfrm_encap_tmpl* tmpl = NULL;
2337 bool got_replay_state;
2338 struct xfrm_replay_state replay;
2339
2340 memset(&request, 0, sizeof(request));
2341
2342 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
2343
2344 /* query the exisiting SA first */
2345 hdr = (struct nlmsghdr*)request;
2346 hdr->nlmsg_flags = NLM_F_REQUEST;
2347 hdr->nlmsg_type = XFRM_MSG_GETSA;
2348 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2349
2350 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2351 host2xfrm(dst, &sa_id->daddr);
2352 sa_id->spi = spi;
2353 sa_id->proto = proto_ike2kernel(protocol);
2354 sa_id->family = dst->get_family(dst);
2355
2356 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2357 {
2358 hdr = out;
2359 while (NLMSG_OK(hdr, len))
2360 {
2361 switch (hdr->nlmsg_type)
2362 {
2363 case XFRM_MSG_NEWSA:
2364 {
2365 out_sa = NLMSG_DATA(hdr);
2366 break;
2367 }
2368 case NLMSG_ERROR:
2369 {
2370 struct nlmsgerr *err = NLMSG_DATA(hdr);
2371 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2372 strerror(-err->error), -err->error);
2373 break;
2374 }
2375 default:
2376 hdr = NLMSG_NEXT(hdr, len);
2377 continue;
2378 case NLMSG_DONE:
2379 break;
2380 }
2381 break;
2382 }
2383 }
2384 if (out_sa == NULL)
2385 {
2386 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2387 free(out);
2388 return FAILED;
2389 }
2390
2391 /* try to get the replay state */
2392 got_replay_state = (get_replay_state(
2393 this, spi, protocol, dst, &replay) == SUCCESS);
2394
2395 /* delete the old SA */
2396 if (this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2397 {
2398 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI 0x%x", spi);
2399 free(out);
2400 return FAILED;
2401 }
2402
2403 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2404 spi, src, dst, new_src, new_dst);
2405
2406 /* copy over the SA from out to request */
2407 hdr = (struct nlmsghdr*)request;
2408 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2409 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2410 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2411 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2412 sa = NLMSG_DATA(hdr);
2413 sa->family = new_dst->get_family(new_dst);
2414
2415 if (!src->ip_equals(src, new_src))
2416 {
2417 host2xfrm(new_src, &sa->saddr);
2418 }
2419 if (!dst->ip_equals(dst, new_dst))
2420 {
2421 host2xfrm(new_dst, &sa->id.daddr);
2422 }
2423
2424 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2425 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2426 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2427 while(RTA_OK(rta, rtasize))
2428 {
2429 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2430 if (rta->rta_type != XFRMA_ENCAP || encap)
2431 {
2432 if (rta->rta_type == XFRMA_ENCAP)
2433 { /* update encap tmpl */
2434 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2435 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2436 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2437 }
2438 memcpy(pos, rta, rta->rta_len);
2439 pos += RTA_ALIGN(rta->rta_len);
2440 hdr->nlmsg_len += RTA_ALIGN(rta->rta_len);
2441 }
2442 rta = RTA_NEXT(rta, rtasize);
2443 }
2444
2445 rta = (struct rtattr*)pos;
2446 if (tmpl == NULL && encap)
2447 { /* add tmpl if we are enabling it */
2448 rta->rta_type = XFRMA_ENCAP;
2449 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2450
2451 hdr->nlmsg_len += rta->rta_len;
2452 if (hdr->nlmsg_len > sizeof(request))
2453 {
2454 return FAILED;
2455 }
2456
2457 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2458 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2459 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2460 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2461 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2462
2463 rta = XFRM_RTA_NEXT(rta);
2464 }
2465
2466 if (got_replay_state)
2467 { /* copy the replay data if available */
2468 rta->rta_type = XFRMA_REPLAY_VAL;
2469 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_replay_state));
2470
2471 hdr->nlmsg_len += rta->rta_len;
2472 if (hdr->nlmsg_len > sizeof(request))
2473 {
2474 return FAILED;
2475 }
2476 memcpy(RTA_DATA(rta), &replay, sizeof(replay));
2477
2478 rta = XFRM_RTA_NEXT(rta);
2479 }
2480
2481 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2482 {
2483 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2484 free(out);
2485 return FAILED;
2486 }
2487 free(out);
2488
2489 return SUCCESS;
2490 }
2491
2492 /**
2493 * Implementation of kernel_interface_t.query_sa.
2494 */
2495 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2496 u_int32_t spi, protocol_id_t protocol,
2497 u_int32_t *use_time)
2498 {
2499 unsigned char request[BUFFER_SIZE];
2500 struct nlmsghdr *out = NULL, *hdr;
2501 struct xfrm_usersa_id *sa_id;
2502 struct xfrm_usersa_info *sa = NULL;
2503 size_t len;
2504
2505 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2506 memset(&request, 0, sizeof(request));
2507
2508 hdr = (struct nlmsghdr*)request;
2509 hdr->nlmsg_flags = NLM_F_REQUEST;
2510 hdr->nlmsg_type = XFRM_MSG_GETSA;
2511 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2512
2513 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2514 host2xfrm(dst, &sa_id->daddr);
2515 sa_id->spi = spi;
2516 sa_id->proto = proto_ike2kernel(protocol);
2517 sa_id->family = dst->get_family(dst);
2518
2519 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2520 {
2521 hdr = out;
2522 while (NLMSG_OK(hdr, len))
2523 {
2524 switch (hdr->nlmsg_type)
2525 {
2526 case XFRM_MSG_NEWSA:
2527 {
2528 sa = NLMSG_DATA(hdr);
2529 break;
2530 }
2531 case NLMSG_ERROR:
2532 {
2533 struct nlmsgerr *err = NLMSG_DATA(hdr);
2534 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2535 strerror(-err->error), -err->error);
2536 break;
2537 }
2538 default:
2539 hdr = NLMSG_NEXT(hdr, len);
2540 continue;
2541 case NLMSG_DONE:
2542 break;
2543 }
2544 break;
2545 }
2546 }
2547
2548 if (sa == NULL)
2549 {
2550 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2551 free(out);
2552 return FAILED;
2553 }
2554
2555 *use_time = sa->curlft.use_time;
2556 free (out);
2557 return SUCCESS;
2558 }
2559
2560 /**
2561 * Implementation of kernel_interface_t.del_sa.
2562 */
2563 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2564 u_int32_t spi, protocol_id_t protocol)
2565 {
2566 unsigned char request[BUFFER_SIZE];
2567 struct nlmsghdr *hdr;
2568 struct xfrm_usersa_id *sa_id;
2569
2570 memset(&request, 0, sizeof(request));
2571
2572 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2573
2574 hdr = (struct nlmsghdr*)request;
2575 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2576 hdr->nlmsg_type = XFRM_MSG_DELSA;
2577 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2578
2579 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2580 host2xfrm(dst, &sa_id->daddr);
2581 sa_id->spi = spi;
2582 sa_id->proto = proto_ike2kernel(protocol);
2583 sa_id->family = dst->get_family(dst);
2584
2585 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2586 {
2587 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2588 return FAILED;
2589 }
2590 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2591 return SUCCESS;
2592 }
2593
2594 /**
2595 * Implementation of kernel_interface_t.add_policy.
2596 */
2597 static status_t add_policy(private_kernel_interface_t *this,
2598 host_t *src, host_t *dst,
2599 traffic_selector_t *src_ts,
2600 traffic_selector_t *dst_ts,
2601 policy_dir_t direction, protocol_id_t protocol,
2602 u_int32_t reqid, bool high_prio, mode_t mode,
2603 u_int16_t ipcomp)
2604 {
2605 iterator_t *iterator;
2606 policy_entry_t *current, *policy;
2607 bool found = FALSE;
2608 unsigned char request[BUFFER_SIZE];
2609 struct xfrm_userpolicy_info *policy_info;
2610 struct nlmsghdr *hdr;
2611
2612 /* create a policy */
2613 policy = malloc_thing(policy_entry_t);
2614 memset(policy, 0, sizeof(policy_entry_t));
2615 policy->sel = ts2selector(src_ts, dst_ts);
2616 policy->direction = direction;
2617
2618 /* find the policy, which matches EXACTLY */
2619 pthread_mutex_lock(&this->mutex);
2620 iterator = this->policies->create_iterator(this->policies, TRUE);
2621 while (iterator->iterate(iterator, (void**)&current))
2622 {
2623 if (memeq(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) &&
2624 policy->direction == current->direction)
2625 {
2626 /* use existing policy */
2627 current->refcount++;
2628 DBG2(DBG_KNL, "policy %R===%R already exists, increasing "
2629 "refcount", src_ts, dst_ts);
2630 free(policy);
2631 policy = current;
2632 found = TRUE;
2633 break;
2634 }
2635 }
2636 iterator->destroy(iterator);
2637 if (!found)
2638 { /* apply the new one, if we have no such policy */
2639 this->policies->insert_last(this->policies, policy);
2640 policy->refcount = 1;
2641 }
2642
2643 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2644
2645 memset(&request, 0, sizeof(request));
2646 hdr = (struct nlmsghdr*)request;
2647 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2648 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2649 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2650
2651 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2652 policy_info->sel = policy->sel;
2653 policy_info->dir = policy->direction;
2654 /* calculate priority based on source selector size, small size = high prio */
2655 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2656 policy_info->priority -= policy->sel.prefixlen_s * 10;
2657 policy_info->priority -= policy->sel.proto ? 2 : 0;
2658 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2659 policy_info->action = XFRM_POLICY_ALLOW;
2660 policy_info->share = XFRM_SHARE_ANY;
2661 pthread_mutex_unlock(&this->mutex);
2662
2663 /* policies don't expire */
2664 policy_info->lft.soft_byte_limit = XFRM_INF;
2665 policy_info->lft.soft_packet_limit = XFRM_INF;
2666 policy_info->lft.hard_byte_limit = XFRM_INF;
2667 policy_info->lft.hard_packet_limit = XFRM_INF;
2668 policy_info->lft.soft_add_expires_seconds = 0;
2669 policy_info->lft.hard_add_expires_seconds = 0;
2670 policy_info->lft.soft_use_expires_seconds = 0;
2671 policy_info->lft.hard_use_expires_seconds = 0;
2672
2673 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2674 rthdr->rta_type = XFRMA_TMPL;
2675 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2676
2677 hdr->nlmsg_len += rthdr->rta_len;
2678 if (hdr->nlmsg_len > sizeof(request))
2679 {
2680 return FAILED;
2681 }
2682
2683 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2684
2685 if (ipcomp != IPCOMP_NONE)
2686 {
2687 tmpl->reqid = reqid;
2688 tmpl->id.proto = IPPROTO_COMP;
2689 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2690 tmpl->mode = mode;
2691 tmpl->optional = direction != POLICY_OUT;
2692 tmpl->family = src->get_family(src);
2693
2694 host2xfrm(src, &tmpl->saddr);
2695 host2xfrm(dst, &tmpl->id.daddr);
2696
2697 /* add an additional xfrm_user_tmpl */
2698 rthdr->rta_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2699 hdr->nlmsg_len += RTA_LENGTH(sizeof(struct xfrm_user_tmpl));
2700 if (hdr->nlmsg_len > sizeof(request))
2701 {
2702 return FAILED;
2703 }
2704
2705 tmpl++;
2706 }
2707
2708 tmpl->reqid = reqid;
2709 tmpl->id.proto = proto_ike2kernel(protocol);
2710 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2711 tmpl->mode = mode;
2712 tmpl->family = src->get_family(src);
2713
2714 host2xfrm(src, &tmpl->saddr);
2715 host2xfrm(dst, &tmpl->id.daddr);
2716
2717 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2718 {
2719 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2720 return FAILED;
2721 }
2722
2723 /* install a route, if:
2724 * - we are NOT updating a policy
2725 * - this is a forward policy (to just get one for each child)
2726 * - we are in tunnel mode
2727 * - we are not using IPv6 (does not work correctly yet!)
2728 * - routing is not disabled via strongswan.conf
2729 */
2730 if (policy->route == NULL && direction == POLICY_FWD &&
2731 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6 &&
2732 this->install_routes)
2733 {
2734 policy->route = malloc_thing(route_entry_t);
2735 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2736 {
2737 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2738 policy->route->gateway = get_route(this, src, TRUE);
2739 policy->route->if_index = get_interface_index(this, dst);
2740 policy->route->dst_net = chunk_alloc(
2741 policy->sel.family == AF_INET ? 4 : 16);
2742 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr,
2743 policy->route->dst_net.len);
2744 policy->route->prefixlen = policy->sel.prefixlen_s;
2745
2746 switch (manage_srcroute(this, RTM_NEWROUTE,
2747 NLM_F_CREATE | NLM_F_EXCL, policy->route))
2748 {
2749 default:
2750 DBG1(DBG_KNL, "unable to install source route for %H",
2751 policy->route->src_ip);
2752 /* FALL */
2753 case ALREADY_DONE:
2754 /* route exists, do not uninstall */
2755 route_entry_destroy(policy->route);
2756 policy->route = NULL;
2757 break;
2758 case SUCCESS:
2759 break;
2760 }
2761 }
2762 else
2763 {
2764 free(policy->route);
2765 policy->route = NULL;
2766 }
2767 }
2768
2769 return SUCCESS;
2770 }
2771
2772 /**
2773 * Implementation of kernel_interface_t.query_policy.
2774 */
2775 static status_t query_policy(private_kernel_interface_t *this,
2776 traffic_selector_t *src_ts,
2777 traffic_selector_t *dst_ts,
2778 policy_dir_t direction, u_int32_t *use_time)
2779 {
2780 unsigned char request[BUFFER_SIZE];
2781 struct nlmsghdr *out = NULL, *hdr;
2782 struct xfrm_userpolicy_id *policy_id;
2783 struct xfrm_userpolicy_info *policy = NULL;
2784 size_t len;
2785
2786 memset(&request, 0, sizeof(request));
2787
2788 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2789
2790 hdr = (struct nlmsghdr*)request;
2791 hdr->nlmsg_flags = NLM_F_REQUEST;
2792 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2793 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2794
2795 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2796 policy_id->sel = ts2selector(src_ts, dst_ts);
2797 policy_id->dir = direction;
2798
2799 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2800 {
2801 hdr = out;
2802 while (NLMSG_OK(hdr, len))
2803 {
2804 switch (hdr->nlmsg_type)
2805 {
2806 case XFRM_MSG_NEWPOLICY:
2807 {
2808 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2809 break;
2810 }
2811 case NLMSG_ERROR:
2812 {
2813 struct nlmsgerr *err = NLMSG_DATA(hdr);
2814 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2815 strerror(-err->error), -err->error);
2816 break;
2817 }
2818 default:
2819 hdr = NLMSG_NEXT(hdr, len);
2820 continue;
2821 case NLMSG_DONE:
2822 break;
2823 }
2824 break;
2825 }
2826 }
2827
2828 if (policy == NULL)
2829 {
2830 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2831 free(out);
2832 return FAILED;
2833 }
2834 *use_time = (time_t)policy->curlft.use_time;
2835
2836 free(out);
2837 return SUCCESS;
2838 }
2839
2840 /**
2841 * Implementation of kernel_interface_t.del_policy.
2842 */
2843 static status_t del_policy(private_kernel_interface_t *this,
2844 traffic_selector_t *src_ts,
2845 traffic_selector_t *dst_ts,
2846 policy_dir_t direction)
2847 {
2848 policy_entry_t *current, policy, *to_delete = NULL;
2849 route_entry_t *route;
2850 unsigned char request[BUFFER_SIZE];
2851 struct nlmsghdr *hdr;
2852 struct xfrm_userpolicy_id *policy_id;
2853 iterator_t *iterator;
2854
2855 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2856
2857 /* create a policy */
2858 memset(&policy, 0, sizeof(policy_entry_t));
2859 policy.sel = ts2selector(src_ts, dst_ts);
2860 policy.direction = direction;
2861
2862 /* find the policy */
2863 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2864 while (iterator->iterate(iterator, (void**)&current))
2865 {
2866 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2867 policy.direction == current->direction)
2868 {
2869 to_delete = current;
2870 if (--to_delete->refcount > 0)
2871 {
2872 /* is used by more SAs, keep in kernel */
2873 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2874 iterator->destroy(iterator);
2875 return SUCCESS;
2876 }
2877 /* remove if last reference */
2878 iterator->remove(iterator);
2879 break;
2880 }
2881 }
2882 iterator->destroy(iterator);
2883 if (!to_delete)
2884 {
2885 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2886 return NOT_FOUND;
2887 }
2888
2889 memset(&request, 0, sizeof(request));
2890
2891 hdr = (struct nlmsghdr*)request;
2892 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2893 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2894 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2895
2896 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2897 policy_id->sel = to_delete->sel;
2898 policy_id->dir = direction;
2899
2900 route = to_delete->route;
2901 free(to_delete);
2902
2903 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2904 {
2905 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2906 return FAILED;
2907 }
2908
2909 if (route)
2910 {
2911 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2912 {
2913 DBG1(DBG_KNL, "error uninstalling route installed with "
2914 "policy %R===%R", src_ts, dst_ts);
2915 }
2916 route_entry_destroy(route);
2917 }
2918 return SUCCESS;
2919 }
2920
2921 /**
2922 * Implementation of kernel_interface_t.destroy.
2923 */
2924 static void destroy(private_kernel_interface_t *this)
2925 {
2926 if (this->routing_table)
2927 {
2928 manage_rule(this, RTM_DELRULE, this->routing_table,
2929 this->routing_table_prio);
2930 }
2931
2932 this->job->cancel(this->job);
2933 close(this->socket_xfrm_events);
2934 close(this->socket_xfrm);
2935 close(this->socket_rt_events);
2936 close(this->socket_rt);
2937 this->policies->destroy(this->policies);
2938 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2939 free(this);
2940 }
2941
2942 /*
2943 * Described in header.
2944 */
2945 kernel_interface_t *kernel_interface_create()
2946 {
2947 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2948 struct sockaddr_nl addr;
2949
2950 /* public functions */
2951 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2952 this->public.get_cpi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,u_int32_t,u_int16_t*))get_cpi;
2953 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,u_int16_t,u_int16_t,u_int16_t,u_int16_t,prf_plus_t*,mode_t,u_int16_t,bool,bool))add_sa;
2954 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2955 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2956 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2957 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t,u_int16_t))add_policy;
2958 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2959 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2960 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2961 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2962 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2963 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2964 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2965 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2966
2967 /* private members */
2968 this->policies = linked_list_create();
2969 this->ifaces = linked_list_create();
2970 this->hiter = NULL;
2971 this->seq = 200;
2972 pthread_mutex_init(&this->mutex, NULL);
2973 pthread_mutex_init(&this->nl_mutex, NULL);
2974 pthread_cond_init(&this->cond, NULL);
2975 timerclear(&this->last_roam);
2976 this->install_routes = lib->settings->get_bool(lib->settings,
2977 "charon.install_routes", TRUE);
2978 this->routing_table = lib->settings->get_int(lib->settings,
2979 "charon.routing_table", IPSEC_ROUTING_TABLE);
2980 this->routing_table_prio = lib->settings->get_int(lib->settings,
2981 "charon.routing_table_prio", IPSEC_ROUTING_TABLE_PRIO);
2982 memset(&addr, 0, sizeof(addr));
2983 addr.nl_family = AF_NETLINK;
2984
2985 /* create and bind RT socket */
2986 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2987 if (this->socket_rt <= 0)
2988 {
2989 charon->kill(charon, "unable to create RT netlink socket");
2990 }
2991 addr.nl_groups = 0;
2992 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2993 {
2994 charon->kill(charon, "unable to bind RT netlink socket");
2995 }
2996
2997 /* create and bind RT socket for events (address/interface/route changes) */
2998 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2999 if (this->socket_rt_events <= 0)
3000 {
3001 charon->kill(charon, "unable to create RT event socket");
3002 }
3003 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
3004 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
3005 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
3006 {
3007 charon->kill(charon, "unable to bind RT event socket");
3008 }
3009
3010 /* create and bind XFRM socket */
3011 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3012 if (this->socket_xfrm <= 0)
3013 {
3014 charon->kill(charon, "unable to create XFRM netlink socket");
3015 }
3016 addr.nl_groups = 0;
3017 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
3018 {
3019 charon->kill(charon, "unable to bind XFRM netlink socket");
3020 }
3021
3022 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
3023 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3024 if (this->socket_xfrm_events <= 0)
3025 {
3026 charon->kill(charon, "unable to create XFRM event socket");
3027 }
3028 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
3029 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
3030 {
3031 charon->kill(charon, "unable to bind XFRM event socket");
3032 }
3033
3034 this->job = callback_job_create((callback_job_cb_t)receive_events,
3035 this, NULL, NULL);
3036 charon->processor->queue_job(charon->processor, (job_t*)this->job);
3037
3038 if (init_address_list(this) != SUCCESS)
3039 {
3040 charon->kill(charon, "unable to get interface list");
3041 }
3042
3043 if (this->routing_table)
3044 {
3045 if (manage_rule(this, RTM_NEWRULE, this->routing_table,
3046 this->routing_table_prio) != SUCCESS)
3047 {
3048 DBG1(DBG_KNL, "unable to create routing table rule");
3049 }
3050 }
3051
3052 return &this->public;
3053 }
3054