include default route with missing dst field into route evaluation
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /**
2 * @file kernel_interface.c
3 *
4 * @brief Implementation of kernel_interface_t.
5 *
6 */
7
8 /*
9 * Copyright (C) 2005-2007 Martin Willi
10 * Copyright (C) 2006-2007 Tobias Brunner
11 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
12 * Copyright (C) 2006 Daniel Roethlisberger
13 * Copyright (C) 2005 Jan Hutter
14 * Hochschule fuer Technik Rapperswil
15 * Copyright (C) 2003 Herbert Xu.
16 *
17 * Based on xfrm code from pluto.
18 *
19 * This program is free software; you can redistribute it and/or modify it
20 * under the terms of the GNU General Public License as published by the
21 * Free Software Foundation; either version 2 of the License, or (at your
22 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 * for more details.
28 */
29
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <linux/netlink.h>
33 #include <linux/rtnetlink.h>
34 #include <linux/xfrm.h>
35 #include <linux/udp.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <net/if.h>
42 #include <sys/ioctl.h>
43
44 #include "kernel_interface.h"
45
46 #include <daemon.h>
47 #include <utils/linked_list.h>
48 #include <processing/jobs/delete_child_sa_job.h>
49 #include <processing/jobs/rekey_child_sa_job.h>
50 #include <processing/jobs/acquire_job.h>
51 #include <processing/jobs/callback_job.h>
52 #include <processing/jobs/roam_job.h>
53
54 /** routing table for routes installed by us */
55 #ifndef IPSEC_ROUTING_TABLE
56 #define IPSEC_ROUTING_TABLE 100
57 #endif
58 #ifndef IPSEC_ROUTING_TABLE_PRIO
59 #define IPSEC_ROUTING_TABLE_PRIO 100
60 #endif
61
62 /** kernel level protocol identifiers */
63 #define KERNEL_ESP 50
64 #define KERNEL_AH 51
65
66 /** default priority of installed policies */
67 #define PRIO_LOW 3000
68 #define PRIO_HIGH 2000
69
70 #define BUFFER_SIZE 1024
71
72 /**
73 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
74 * 'usual' netlink data x like 'struct xfrm_usersa_info'
75 */
76 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
77 /**
78 * returns a pointer to the next rtattr following rta.
79 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
80 */
81 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
82 /**
83 * returns the total size of attached rta data
84 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
85 */
86 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
87
88 typedef struct kernel_algorithm_t kernel_algorithm_t;
89
90 /**
91 * Mapping from the algorithms defined in IKEv2 to
92 * kernel level algorithm names and their key length
93 */
94 struct kernel_algorithm_t {
95 /**
96 * Identifier specified in IKEv2
97 */
98 int ikev2_id;
99
100 /**
101 * Name of the algorithm, as used as kernel identifier
102 */
103 char *name;
104
105 /**
106 * Key length in bits, if fixed size
107 */
108 u_int key_size;
109 };
110 #define END_OF_LIST -1
111
112 /**
113 * Algorithms for encryption
114 */
115 kernel_algorithm_t encryption_algs[] = {
116 /* {ENCR_DES_IV64, "***", 0}, */
117 {ENCR_DES, "des", 64},
118 {ENCR_3DES, "des3_ede", 192},
119 /* {ENCR_RC5, "***", 0}, */
120 /* {ENCR_IDEA, "***", 0}, */
121 {ENCR_CAST, "cast128", 0},
122 {ENCR_BLOWFISH, "blowfish", 0},
123 /* {ENCR_3IDEA, "***", 0}, */
124 /* {ENCR_DES_IV32, "***", 0}, */
125 {ENCR_NULL, "cipher_null", 0},
126 {ENCR_AES_CBC, "aes", 0},
127 /* {ENCR_AES_CTR, "***", 0}, */
128 {END_OF_LIST, NULL, 0},
129 };
130
131 /**
132 * Algorithms for integrity protection
133 */
134 kernel_algorithm_t integrity_algs[] = {
135 {AUTH_HMAC_MD5_96, "md5", 128},
136 {AUTH_HMAC_SHA1_96, "sha1", 160},
137 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
138 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
139 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
140 /* {AUTH_DES_MAC, "***", 0}, */
141 /* {AUTH_KPDK_MD5, "***", 0}, */
142 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
143 {END_OF_LIST, NULL, 0},
144 };
145
146 /**
147 * Look up a kernel algorithm name and its key size
148 */
149 char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
150 algorithm_t *ikev2_algo, u_int *key_size)
151 {
152 while (kernel_algo->ikev2_id != END_OF_LIST)
153 {
154 if (ikev2_algo->algorithm == kernel_algo->ikev2_id)
155 {
156 /* match, evaluate key length */
157 if (ikev2_algo->key_size)
158 { /* variable length */
159 *key_size = ikev2_algo->key_size;
160 }
161 else
162 { /* fixed length */
163 *key_size = kernel_algo->key_size;
164 }
165 return kernel_algo->name;
166 }
167 kernel_algo++;
168 }
169 return NULL;
170 }
171
172 typedef struct route_entry_t route_entry_t;
173
174 /**
175 * installed routing entry
176 */
177 struct route_entry_t {
178
179 /** Index of the interface the route is bound to */
180 int if_index;
181
182 /** Source ip of the route */
183 host_t *src_ip;
184
185 /** gateway for this route */
186 host_t *gateway;
187
188 /** Destination net */
189 chunk_t dst_net;
190
191 /** Destination net prefixlen */
192 u_int8_t prefixlen;
193 };
194
195 /**
196 * destroy an route_entry_t object
197 */
198 static void route_entry_destroy(route_entry_t *this)
199 {
200 this->src_ip->destroy(this->src_ip);
201 this->gateway->destroy(this->gateway);
202 chunk_free(&this->dst_net);
203 free(this);
204 }
205
206 typedef struct policy_entry_t policy_entry_t;
207
208 /**
209 * installed kernel policy.
210 */
211 struct policy_entry_t {
212
213 /** direction of this policy: in, out, forward */
214 u_int8_t direction;
215
216 /** reqid of the policy */
217 u_int32_t reqid;
218
219 /** parameters of installed policy */
220 struct xfrm_selector sel;
221
222 /** associated route installed for this policy */
223 route_entry_t *route;
224
225 /** by how many CHILD_SA's this policy is used */
226 u_int refcount;
227 };
228
229 typedef struct addr_entry_t addr_entry_t;
230
231 /**
232 * IP address in an inface_entry_t
233 */
234 struct addr_entry_t {
235
236 /** The ip address */
237 host_t *ip;
238
239 /** virtual IP managed by us */
240 bool virtual;
241
242 /** scope of the address */
243 u_char scope;
244
245 /** Number of times this IP is used, if virtual */
246 u_int refcount;
247 };
248
249 /**
250 * destroy a addr_entry_t object
251 */
252 static void addr_entry_destroy(addr_entry_t *this)
253 {
254 this->ip->destroy(this->ip);
255 free(this);
256 }
257
258 typedef struct iface_entry_t iface_entry_t;
259
260 /**
261 * A network interface on this system, containing addr_entry_t's
262 */
263 struct iface_entry_t {
264
265 /** interface index */
266 int ifindex;
267
268 /** name of the interface */
269 char ifname[IFNAMSIZ];
270
271 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
272 u_int flags;
273
274 /** list of addresses as host_t */
275 linked_list_t *addrs;
276 };
277
278 /**
279 * destroy an interface entry
280 */
281 static void iface_entry_destroy(iface_entry_t *this)
282 {
283 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
284 free(this);
285 }
286
287 typedef struct private_kernel_interface_t private_kernel_interface_t;
288
289 /**
290 * Private variables and functions of kernel_interface class.
291 */
292 struct private_kernel_interface_t {
293 /**
294 * Public part of the kernel_interface_t object.
295 */
296 kernel_interface_t public;
297
298 /**
299 * mutex to lock access to the various lists
300 */
301 pthread_mutex_t mutex;
302
303 /**
304 * List of installed policies (policy_entry_t)
305 */
306 linked_list_t *policies;
307
308 /**
309 * Cached list of interfaces and its adresses (iface_entry_t)
310 */
311 linked_list_t *ifaces;
312
313 /**
314 * iterator used in hook()
315 */
316 iterator_t *hiter;
317
318 /**
319 * job receiving netlink events
320 */
321 callback_job_t *job;
322
323 /**
324 * current sequence number for netlink request
325 */
326 int seq;
327
328 /**
329 * Netlink xfrm socket (IPsec)
330 */
331 int socket_xfrm;
332
333 /**
334 * netlink xfrm socket to receive acquire and expire events
335 */
336 int socket_xfrm_events;
337
338 /**
339 * Netlink rt socket (routing)
340 */
341 int socket_rt;
342
343 /**
344 * Netlink rt socket to receive address change events
345 */
346 int socket_rt_events;
347 };
348
349 /**
350 * convert a host_t to a struct xfrm_address
351 */
352 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
353 {
354 chunk_t chunk = host->get_address(host);
355 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
356 }
357
358 /**
359 * convert a traffic selector address range to subnet and its mask.
360 */
361 static void ts2subnet(traffic_selector_t* ts,
362 xfrm_address_t *net, u_int8_t *mask)
363 {
364 /* there is no way to do this cleanly, as the address range may
365 * be anything else but a subnet. We use from_addr as subnet
366 * and try to calculate a usable subnet mask.
367 */
368 int byte, bit;
369 bool found = FALSE;
370 chunk_t from, to;
371 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
372
373 from = ts->get_from_address(ts);
374 to = ts->get_to_address(ts);
375
376 *mask = (size * 8);
377 /* go trough all bits of the addresses, beginning in the front.
378 * as long as they are equal, the subnet gets larger
379 */
380 for (byte = 0; byte < size; byte++)
381 {
382 for (bit = 7; bit >= 0; bit--)
383 {
384 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
385 {
386 *mask = ((7 - bit) + (byte * 8));
387 found = TRUE;
388 break;
389 }
390 }
391 if (found)
392 {
393 break;
394 }
395 }
396 memcpy(net, from.ptr, from.len);
397 chunk_free(&from);
398 chunk_free(&to);
399 }
400
401 /**
402 * convert a traffic selector port range to port/portmask
403 */
404 static void ts2ports(traffic_selector_t* ts,
405 u_int16_t *port, u_int16_t *mask)
406 {
407 /* linux does not seem to accept complex portmasks. Only
408 * any or a specific port is allowed. We set to any, if we have
409 * a port range, or to a specific, if we have one port only.
410 */
411 u_int16_t from, to;
412
413 from = ts->get_from_port(ts);
414 to = ts->get_to_port(ts);
415
416 if (from == to)
417 {
418 *port = htons(from);
419 *mask = ~0;
420 }
421 else
422 {
423 *port = 0;
424 *mask = 0;
425 }
426 }
427
428 /**
429 * convert a pair of traffic_selectors to a xfrm_selector
430 */
431 static struct xfrm_selector ts2selector(traffic_selector_t *src,
432 traffic_selector_t *dst)
433 {
434 struct xfrm_selector sel;
435
436 memset(&sel, 0, sizeof(sel));
437 sel.family = src->get_type(src) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
438 /* src or dest proto may be "any" (0), use more restrictive one */
439 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
440 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
441 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
442 ts2ports(dst, &sel.dport, &sel.dport_mask);
443 ts2ports(src, &sel.sport, &sel.sport_mask);
444 sel.ifindex = 0;
445 sel.user = 0;
446
447 return sel;
448 }
449
450 /**
451 * Creates an rtattr and adds it to the netlink message
452 */
453 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
454 size_t buflen)
455 {
456 struct rtattr *rta;
457
458 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
459 {
460 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
461 return;
462 }
463
464 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
465 rta->rta_type = rta_type;
466 rta->rta_len = RTA_LENGTH(data.len);
467 memcpy(RTA_DATA(rta), data.ptr, data.len);
468 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
469 }
470
471 /**
472 * process a XFRM_MSG_ACQUIRE from kernel
473 */
474 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
475 {
476 u_int32_t reqid = 0;
477 job_t *job;
478 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
479 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
480
481 if (RTA_OK(rtattr, rtsize))
482 {
483 if (rtattr->rta_type == XFRMA_TMPL)
484 {
485 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
486 reqid = tmpl->reqid;
487 }
488 }
489 if (reqid == 0)
490 {
491 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
492 return;
493 }
494 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
495 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
496 job = (job_t*)acquire_job_create(reqid);
497 charon->processor->queue_job(charon->processor, job);
498 }
499
500 /**
501 * process a XFRM_MSG_EXPIRE from kernel
502 */
503 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
504 {
505 job_t *job;
506 protocol_id_t protocol;
507 u_int32_t spi, reqid;
508 struct xfrm_user_expire *expire;
509
510 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
511 protocol = expire->state.id.proto == KERNEL_ESP ? PROTO_ESP : PROTO_AH;
512 spi = expire->state.id.spi;
513 reqid = expire->state.reqid;
514
515 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
516 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
517 expire->hard ? "delete" : "rekey", protocol_id_names,
518 protocol, ntohl(spi), reqid);
519 if (expire->hard)
520 {
521 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
522 }
523 else
524 {
525 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
526 }
527 charon->processor->queue_job(charon->processor, job);
528 }
529
530 /**
531 * process RTM_NEWLINK/RTM_DELLINK from kernel
532 */
533 static void process_link(private_kernel_interface_t *this,
534 struct nlmsghdr *hdr, bool event)
535 {
536 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
537 struct rtattr *rta = IFLA_RTA(msg);
538 size_t rtasize = IFLA_PAYLOAD (hdr);
539 iterator_t *iterator;
540 iface_entry_t *current, *entry = NULL;
541 char *name = NULL;
542 bool update = FALSE;
543
544 while(RTA_OK(rta, rtasize))
545 {
546 switch (rta->rta_type)
547 {
548 case IFLA_IFNAME:
549 name = RTA_DATA(rta);
550 break;
551 }
552 rta = RTA_NEXT(rta, rtasize);
553 }
554 if (!name)
555 {
556 name = "(unknown)";
557 }
558
559 switch (hdr->nlmsg_type)
560 {
561 case RTM_NEWLINK:
562 {
563 if (msg->ifi_flags & IFF_LOOPBACK)
564 { /* ignore loopback interfaces */
565 break;
566 }
567 iterator = this->ifaces->create_iterator_locked(this->ifaces,
568 &this->mutex);
569 while (iterator->iterate(iterator, (void**)&current))
570 {
571 if (current->ifindex == msg->ifi_index)
572 {
573 entry = current;
574 break;
575 }
576 }
577 if (!entry)
578 {
579 entry = malloc_thing(iface_entry_t);
580 entry->ifindex = msg->ifi_index;
581 entry->flags = 0;
582 entry->addrs = linked_list_create();
583 this->ifaces->insert_last(this->ifaces, entry);
584 }
585 memcpy(entry->ifname, name, IFNAMSIZ);
586 entry->ifname[IFNAMSIZ-1] = '\0';
587 if (event)
588 {
589 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
590 {
591 update = TRUE;
592 DBG1(DBG_KNL, "interface %s activated", name);
593 }
594 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
595 {
596 update = TRUE;
597 DBG1(DBG_KNL, "interface %s deactivated", name);
598 }
599 }
600 entry->flags = msg->ifi_flags;
601 iterator->destroy(iterator);
602 break;
603 }
604 case RTM_DELLINK:
605 {
606 iterator = this->ifaces->create_iterator_locked(this->ifaces,
607 &this->mutex);
608 while (iterator->iterate(iterator, (void**)&current))
609 {
610 if (current->ifindex == msg->ifi_index)
611 {
612 /* we do not remove it, as an address may be added to a
613 * "down" interface and we wan't to know that. */
614 current->flags = msg->ifi_flags;
615 break;
616 }
617 }
618 iterator->destroy(iterator);
619 break;
620 }
621 }
622
623 /* send an update to all IKE_SAs */
624 if (update && event)
625 {
626 charon->processor->queue_job(charon->processor,
627 (job_t*)roam_job_create(TRUE));
628 }
629 }
630
631 /**
632 * process RTM_NEWADDR/RTM_DELADDR from kernel
633 */
634 static void process_addr(private_kernel_interface_t *this,
635 struct nlmsghdr *hdr, bool event)
636 {
637 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
638 struct rtattr *rta = IFA_RTA(msg);
639 size_t rtasize = IFA_PAYLOAD (hdr);
640 host_t *host = NULL;
641 iterator_t *ifaces, *addrs;
642 iface_entry_t *iface;
643 addr_entry_t *addr;
644 chunk_t local = chunk_empty, address = chunk_empty;
645 bool update = FALSE, found = FALSE, changed = FALSE;
646
647 while(RTA_OK(rta, rtasize))
648 {
649 switch (rta->rta_type)
650 {
651 case IFA_LOCAL:
652 local.ptr = RTA_DATA(rta);
653 local.len = RTA_PAYLOAD(rta);
654 break;
655 case IFA_ADDRESS:
656 address.ptr = RTA_DATA(rta);
657 address.len = RTA_PAYLOAD(rta);
658 break;
659 }
660 rta = RTA_NEXT(rta, rtasize);
661 }
662
663 /* For PPP interfaces, we need the IFA_LOCAL address,
664 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
665 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
666 if (local.ptr)
667 {
668 host = host_create_from_chunk(msg->ifa_family, local, 0);
669 }
670 else if (address.ptr)
671 {
672 host = host_create_from_chunk(msg->ifa_family, address, 0);
673 }
674
675 if (host == NULL)
676 { /* bad family? */
677 return;
678 }
679
680 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
681 while (ifaces->iterate(ifaces, (void**)&iface))
682 {
683 if (iface->ifindex == msg->ifa_index)
684 {
685 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
686 while (addrs->iterate(addrs, (void**)&addr))
687 {
688 if (host->ip_equals(host, addr->ip))
689 {
690 found = TRUE;
691 if (hdr->nlmsg_type == RTM_DELADDR)
692 {
693 changed = TRUE;
694 addrs->remove(addrs);
695 addr_entry_destroy(addr);
696 DBG1(DBG_KNL, "%H disappeared from %s", host, iface->ifname);
697 }
698 }
699 }
700 addrs->destroy(addrs);
701
702 if (hdr->nlmsg_type == RTM_NEWADDR)
703 {
704 if (!found)
705 {
706 found = TRUE;
707 changed = TRUE;
708 addr = malloc_thing(addr_entry_t);
709 addr->ip = host->clone(host);
710 addr->virtual = FALSE;
711 addr->refcount = 1;
712 addr->scope = msg->ifa_scope;
713
714 iface->addrs->insert_last(iface->addrs, addr);
715 if (event)
716 {
717 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
718 }
719 }
720 }
721 if (found && (iface->flags & IFF_UP))
722 {
723 update = TRUE;
724 }
725 break;
726 }
727 }
728 ifaces->destroy(ifaces);
729 host->destroy(host);
730
731 /* send an update to all IKE_SAs */
732 if (update && event && changed)
733 {
734 charon->processor->queue_job(charon->processor,
735 (job_t*)roam_job_create(TRUE));
736 }
737 }
738
739 /**
740 * Receives events from kernel
741 */
742 static job_requeue_t receive_events(private_kernel_interface_t *this)
743 {
744 char response[1024];
745 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
746 struct sockaddr_nl addr;
747 socklen_t addr_len = sizeof(addr);
748 int len, oldstate, maxfd, selected;
749 fd_set rfds;
750
751 FD_ZERO(&rfds);
752 FD_SET(this->socket_xfrm_events, &rfds);
753 FD_SET(this->socket_rt_events, &rfds);
754 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
755
756 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
757 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
758 pthread_setcancelstate(oldstate, NULL);
759 if (selected <= 0)
760 {
761 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
762 return JOB_REQUEUE_FAIR;
763 }
764 if (FD_ISSET(this->socket_xfrm_events, &rfds))
765 {
766 selected = this->socket_xfrm_events;
767 }
768 else if (FD_ISSET(this->socket_rt_events, &rfds))
769 {
770 selected = this->socket_rt_events;
771 }
772 else
773 {
774 return JOB_REQUEUE_DIRECT;
775 }
776
777 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
778 (struct sockaddr*)&addr, &addr_len);
779 if (len < 0)
780 {
781 switch (errno)
782 {
783 case EINTR:
784 /* interrupted, try again */
785 return JOB_REQUEUE_DIRECT;
786 case EAGAIN:
787 /* no data ready, select again */
788 return JOB_REQUEUE_DIRECT;
789 default:
790 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
791 sleep(1);
792 return JOB_REQUEUE_FAIR;
793 }
794 }
795 if (addr.nl_pid != 0)
796 { /* not from kernel. not interested, try another one */
797 return JOB_REQUEUE_DIRECT;
798 }
799
800 while (NLMSG_OK(hdr, len))
801 {
802 /* looks good so far, dispatch netlink message */
803 if (selected == this->socket_xfrm_events)
804 {
805 switch (hdr->nlmsg_type)
806 {
807 case XFRM_MSG_ACQUIRE:
808 process_acquire(this, hdr);
809 break;
810 case XFRM_MSG_EXPIRE:
811 process_expire(this, hdr);
812 break;
813 default:
814 break;
815 }
816 }
817 else if (selected == this->socket_rt_events)
818 {
819 switch (hdr->nlmsg_type)
820 {
821 case RTM_NEWADDR:
822 case RTM_DELADDR:
823 process_addr(this, hdr, TRUE);
824 break;
825 case RTM_NEWLINK:
826 case RTM_DELLINK:
827 process_link(this, hdr, TRUE);
828 break;
829 case RTM_NEWROUTE:
830 case RTM_DELROUTE:
831 charon->processor->queue_job(charon->processor,
832 (job_t*)roam_job_create(FALSE));
833 break;
834 default:
835 break;
836 }
837 }
838 hdr = NLMSG_NEXT(hdr, len);
839 }
840 return JOB_REQUEUE_DIRECT;
841 }
842
843 /**
844 * send a netlink message and wait for a reply
845 */
846 static status_t netlink_send(private_kernel_interface_t *this,
847 int socket, struct nlmsghdr *in,
848 struct nlmsghdr **out, size_t *out_len)
849 {
850 int len, addr_len;
851 struct sockaddr_nl addr;
852 chunk_t result = chunk_empty, tmp;
853 struct nlmsghdr *msg, peek;
854
855 pthread_mutex_lock(&this->mutex);
856
857 in->nlmsg_seq = ++this->seq;
858 in->nlmsg_pid = getpid();
859
860 memset(&addr, 0, sizeof(addr));
861 addr.nl_family = AF_NETLINK;
862 addr.nl_pid = 0;
863 addr.nl_groups = 0;
864
865 while (TRUE)
866 {
867 len = sendto(socket, in, in->nlmsg_len, 0,
868 (struct sockaddr*)&addr, sizeof(addr));
869
870 if (len != in->nlmsg_len)
871 {
872 if (errno == EINTR)
873 {
874 /* interrupted, try again */
875 continue;
876 }
877 pthread_mutex_unlock(&this->mutex);
878 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
879 return FAILED;
880 }
881 break;
882 }
883
884 while (TRUE)
885 {
886 char buf[4096];
887 tmp.len = sizeof(buf);
888 tmp.ptr = buf;
889 msg = (struct nlmsghdr*)tmp.ptr;
890
891 memset(&addr, 0, sizeof(addr));
892 addr.nl_family = AF_NETLINK;
893 addr.nl_pid = getpid();
894 addr.nl_groups = 0;
895 addr_len = sizeof(addr);
896
897 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
898 (struct sockaddr*)&addr, &addr_len);
899
900 if (len < 0)
901 {
902 if (errno == EINTR)
903 {
904 DBG1(DBG_KNL, "got interrupted");
905 /* interrupted, try again */
906 continue;
907 }
908 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
909 pthread_mutex_unlock(&this->mutex);
910 return FAILED;
911 }
912 if (!NLMSG_OK(msg, len))
913 {
914 DBG1(DBG_KNL, "received corrupted netlink message");
915 pthread_mutex_unlock(&this->mutex);
916 return FAILED;
917 }
918 if (msg->nlmsg_seq != this->seq)
919 {
920 DBG1(DBG_KNL, "received invalid netlink sequence number");
921 if (msg->nlmsg_seq < this->seq)
922 {
923 continue;
924 }
925 pthread_mutex_unlock(&this->mutex);
926 return FAILED;
927 }
928
929 tmp.len = len;
930 result = chunk_cata("cc", result, tmp);
931
932 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
933 * numbers to detect multi header messages */
934 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
935 (struct sockaddr*)&addr, &addr_len);
936
937 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
938 {
939 /* seems to be multipart */
940 continue;
941 }
942 break;
943 }
944
945 *out_len = result.len;
946 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
947
948 pthread_mutex_unlock(&this->mutex);
949
950 return SUCCESS;
951 }
952
953 /**
954 * send a netlink message and wait for its acknowlegde
955 */
956 static status_t netlink_send_ack(private_kernel_interface_t *this,
957 int socket, struct nlmsghdr *in)
958 {
959 struct nlmsghdr *out, *hdr;
960 size_t len;
961
962 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
963 {
964 return FAILED;
965 }
966 hdr = out;
967 while (NLMSG_OK(hdr, len))
968 {
969 switch (hdr->nlmsg_type)
970 {
971 case NLMSG_ERROR:
972 {
973 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
974
975 if (err->error)
976 {
977 DBG1(DBG_KNL, "received netlink error: %s (%d)",
978 strerror(-err->error), -err->error);
979 free(out);
980 return FAILED;
981 }
982 free(out);
983 return SUCCESS;
984 }
985 default:
986 hdr = NLMSG_NEXT(hdr, len);
987 continue;
988 case NLMSG_DONE:
989 break;
990 }
991 break;
992 }
993 DBG1(DBG_KNL, "netlink request not acknowlegded");
994 free(out);
995 return FAILED;
996 }
997
998 /**
999 * Initialize a list of local addresses.
1000 */
1001 static status_t init_address_list(private_kernel_interface_t *this)
1002 {
1003 char request[BUFFER_SIZE];
1004 struct nlmsghdr *out, *current, *in;
1005 struct rtgenmsg *msg;
1006 size_t len;
1007 iterator_t *ifaces, *addrs;
1008 iface_entry_t *iface;
1009 addr_entry_t *addr;
1010
1011 DBG1(DBG_KNL, "listening on interfaces:");
1012
1013 memset(&request, 0, sizeof(request));
1014
1015 in = (struct nlmsghdr*)&request;
1016 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1017 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1018 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1019 msg->rtgen_family = AF_UNSPEC;
1020
1021 /* get all links */
1022 in->nlmsg_type = RTM_GETLINK;
1023 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1024 {
1025 return FAILED;
1026 }
1027 current = out;
1028 while (NLMSG_OK(current, len))
1029 {
1030 switch (current->nlmsg_type)
1031 {
1032 case NLMSG_DONE:
1033 break;
1034 case RTM_NEWLINK:
1035 process_link(this, current, FALSE);
1036 /* fall through */
1037 default:
1038 current = NLMSG_NEXT(current, len);
1039 continue;
1040 }
1041 break;
1042 }
1043 free(out);
1044
1045 /* get all interface addresses */
1046 in->nlmsg_type = RTM_GETADDR;
1047 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1048 {
1049 return FAILED;
1050 }
1051 current = out;
1052 while (NLMSG_OK(current, len))
1053 {
1054 switch (current->nlmsg_type)
1055 {
1056 case NLMSG_DONE:
1057 break;
1058 case RTM_NEWADDR:
1059 process_addr(this, current, FALSE);
1060 /* fall through */
1061 default:
1062 current = NLMSG_NEXT(current, len);
1063 continue;
1064 }
1065 break;
1066 }
1067 free(out);
1068
1069 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1070 while (ifaces->iterate(ifaces, (void**)&iface))
1071 {
1072 if (iface->flags & IFF_UP)
1073 {
1074 DBG1(DBG_KNL, " %s", iface->ifname);
1075 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1076 while (addrs->iterate(addrs, (void**)&addr))
1077 {
1078 DBG1(DBG_KNL, " %H", addr->ip);
1079 }
1080 addrs->destroy(addrs);
1081 }
1082 }
1083 ifaces->destroy(ifaces);
1084 return SUCCESS;
1085 }
1086
1087 /**
1088 * iterator hook to iterate over addrs
1089 */
1090 static hook_result_t addr_hook(private_kernel_interface_t *this,
1091 addr_entry_t *in, host_t **out)
1092 {
1093 if (in->virtual)
1094 { /* skip virtual interfaces added by us */
1095 return HOOK_SKIP;
1096 }
1097 if (in->scope >= RT_SCOPE_LINK)
1098 { /* skip addresses with a unusable scope */
1099 return HOOK_SKIP;
1100 }
1101 *out = in->ip;
1102 return HOOK_NEXT;
1103 }
1104
1105 /**
1106 * iterator hook to iterate over ifaces
1107 */
1108 static hook_result_t iface_hook(private_kernel_interface_t *this,
1109 iface_entry_t *in, host_t **out)
1110 {
1111 if (!(in->flags & IFF_UP))
1112 { /* skip interfaces not up */
1113 return HOOK_SKIP;
1114 }
1115
1116 if (this->hiter == NULL)
1117 {
1118 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1119 this->hiter->set_iterator_hook(this->hiter,
1120 (iterator_hook_t*)addr_hook, this);
1121 }
1122 while (this->hiter->iterate(this->hiter, (void**)out))
1123 {
1124 return HOOK_AGAIN;
1125 }
1126 this->hiter->destroy(this->hiter);
1127 this->hiter = NULL;
1128 return HOOK_SKIP;
1129 }
1130
1131 /**
1132 * Implements kernel_interface_t.create_address_iterator.
1133 */
1134 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1135 {
1136 iterator_t *iterator;
1137
1138 /* This iterator is not only hooked, is is double-hooked. As we have stored
1139 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1140 * entries in each interface we iterate. This does the iface_hook. The
1141 * addr_hook returns the ip instead of the addr_entry. */
1142
1143 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1144 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1145 return iterator;
1146 }
1147
1148 /**
1149 * implementation of kernel_interface_t.get_interface_name
1150 */
1151 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1152 {
1153 iterator_t *ifaces, *addrs;
1154 iface_entry_t *iface;
1155 addr_entry_t *addr;
1156 char *name = NULL;
1157
1158 DBG2(DBG_KNL, "getting interface name for %H", ip);
1159
1160 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1161 while (ifaces->iterate(ifaces, (void**)&iface))
1162 {
1163 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1164 while (addrs->iterate(addrs, (void**)&addr))
1165 {
1166 if (ip->ip_equals(ip, addr->ip))
1167 {
1168 name = strdup(iface->ifname);
1169 break;
1170 }
1171 }
1172 addrs->destroy(addrs);
1173 if (name)
1174 {
1175 break;
1176 }
1177 }
1178 ifaces->destroy(ifaces);
1179
1180 if (name)
1181 {
1182 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1183 }
1184 else
1185 {
1186 DBG2(DBG_KNL, "%H is not a local address", ip);
1187 }
1188 return name;
1189 }
1190
1191 /**
1192 * Tries to find an ip address of a local interface that is included in the
1193 * supplied traffic selector.
1194 */
1195 static status_t get_address_by_ts(private_kernel_interface_t *this,
1196 traffic_selector_t *ts, host_t **ip)
1197 {
1198 iterator_t *ifaces, *addrs;
1199 iface_entry_t *iface;
1200 addr_entry_t *addr;
1201 host_t *host;
1202 int family;
1203 bool found = FALSE;
1204
1205 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1206
1207 /* if we have a family which includes localhost, we do not
1208 * search for an IP, we use the default */
1209 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1210
1211 if (family == AF_INET)
1212 {
1213 host = host_create_from_string("127.0.0.1", 0);
1214 }
1215 else
1216 {
1217 host = host_create_from_string("::1", 0);
1218 }
1219
1220 if (ts->includes(ts, host))
1221 {
1222 *ip = host_create_any(family);
1223 host->destroy(host);
1224 DBG2(DBG_KNL, "using host %H", *ip);
1225 return SUCCESS;
1226 }
1227 host->destroy(host);
1228
1229 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1230 while (ifaces->iterate(ifaces, (void**)&iface))
1231 {
1232 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1233 while (addrs->iterate(addrs, (void**)&addr))
1234 {
1235 if (ts->includes(ts, addr->ip))
1236 {
1237 found = TRUE;
1238 *ip = addr->ip->clone(addr->ip);
1239 break;
1240 }
1241 }
1242 addrs->destroy(addrs);
1243 if (found)
1244 {
1245 break;
1246 }
1247 }
1248 ifaces->destroy(ifaces);
1249
1250 if (!found)
1251 {
1252 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1253 return FAILED;
1254 }
1255 DBG2(DBG_KNL, "using host %H", *ip);
1256 return SUCCESS;
1257 }
1258
1259 /**
1260 * get the interface of a local address
1261 */
1262 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1263 {
1264 iterator_t *ifaces, *addrs;
1265 iface_entry_t *iface;
1266 addr_entry_t *addr;
1267 int ifindex = 0;
1268
1269 DBG2(DBG_KNL, "getting iface for %H", ip);
1270
1271 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1272 while (ifaces->iterate(ifaces, (void**)&iface))
1273 {
1274 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1275 while (addrs->iterate(addrs, (void**)&addr))
1276 {
1277 if (ip->ip_equals(ip, addr->ip))
1278 {
1279 ifindex = iface->ifindex;
1280 break;
1281 }
1282 }
1283 addrs->destroy(addrs);
1284 if (ifindex)
1285 {
1286 break;
1287 }
1288 }
1289 ifaces->destroy(ifaces);
1290
1291 if (ifindex == 0)
1292 {
1293 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1294 }
1295 return ifindex;
1296 }
1297
1298 /**
1299 * Manages the creation and deletion of ip addresses on an interface.
1300 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1301 */
1302 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1303 int flags, int if_index, host_t *ip)
1304 {
1305 unsigned char request[BUFFER_SIZE];
1306 struct nlmsghdr *hdr;
1307 struct ifaddrmsg *msg;
1308 chunk_t chunk;
1309
1310 memset(&request, 0, sizeof(request));
1311
1312 chunk = ip->get_address(ip);
1313
1314 hdr = (struct nlmsghdr*)request;
1315 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1316 hdr->nlmsg_type = nlmsg_type;
1317 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1318
1319 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1320 msg->ifa_family = ip->get_family(ip);
1321 msg->ifa_flags = 0;
1322 msg->ifa_prefixlen = 8 * chunk.len;
1323 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1324 msg->ifa_index = if_index;
1325
1326 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1327
1328 return netlink_send_ack(this, this->socket_rt, hdr);
1329 }
1330
1331 /**
1332 * Manages source routes in the routing table.
1333 * By setting the appropriate nlmsg_type, the route added or r.
1334 */
1335 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1336 int flags, route_entry_t *route)
1337 {
1338 unsigned char request[BUFFER_SIZE];
1339 struct nlmsghdr *hdr;
1340 struct rtmsg *msg;
1341 chunk_t chunk;
1342
1343 /* if route is 0.0.0.0/0, we can't install it, as it would
1344 * overwrite the default route. Instead, we add two routes:
1345 * 0.0.0.0/1 and 128.0.0.0/1
1346 * TODO: use metrics instead */
1347 if (route->prefixlen == 0)
1348 {
1349 route_entry_t half;
1350 status_t status;
1351
1352 half.dst_net = chunk_alloca(route->dst_net.len);
1353 memset(half.dst_net.ptr, 0, half.dst_net.len);
1354 half.src_ip = route->src_ip;
1355 half.gateway = route->gateway;
1356 half.if_index = route->if_index;
1357 half.prefixlen = 1;
1358
1359 status = manage_srcroute(this, nlmsg_type, flags, &half);
1360 half.dst_net.ptr[0] |= 0x80;
1361 status = manage_srcroute(this, nlmsg_type, flags, &half);
1362 return status;
1363 }
1364
1365 memset(&request, 0, sizeof(request));
1366
1367 hdr = (struct nlmsghdr*)request;
1368 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1369 hdr->nlmsg_type = nlmsg_type;
1370 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1371
1372 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1373 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1374 msg->rtm_dst_len = route->prefixlen;
1375 msg->rtm_table = IPSEC_ROUTING_TABLE;
1376 msg->rtm_protocol = RTPROT_STATIC;
1377 msg->rtm_type = RTN_UNICAST;
1378 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1379
1380 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1381 chunk = route->src_ip->get_address(route->src_ip);
1382 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1383 chunk = route->gateway->get_address(route->gateway);
1384 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1385 chunk.ptr = (char*)&route->if_index;
1386 chunk.len = sizeof(route->if_index);
1387 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1388
1389 return netlink_send_ack(this, this->socket_rt, hdr);
1390 }
1391
1392 /**
1393 * create or delete an rule to use our routing table
1394 */
1395 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1396 u_int32_t table, u_int32_t prio)
1397 {
1398 unsigned char request[BUFFER_SIZE];
1399 struct nlmsghdr *hdr;
1400 struct rtmsg *msg;
1401 chunk_t chunk;
1402
1403 memset(&request, 0, sizeof(request));
1404 hdr = (struct nlmsghdr*)request;
1405 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1406 hdr->nlmsg_type = nlmsg_type;
1407 if (nlmsg_type == RTM_NEWRULE)
1408 {
1409 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1410 }
1411 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1412
1413 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1414 msg->rtm_table = table;
1415 msg->rtm_family = AF_INET;
1416 msg->rtm_protocol = RTPROT_BOOT;
1417 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1418 msg->rtm_type = RTN_UNICAST;
1419
1420 chunk = chunk_from_thing(prio);
1421 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1422
1423 return netlink_send_ack(this, this->socket_rt, hdr);
1424 }
1425
1426 /**
1427 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1428 */
1429 static bool addr_in_subnet(chunk_t addr, chunk_t net, int net_len)
1430 {
1431 int bit, byte;
1432
1433 if (addr.len != net.len)
1434 {
1435 return FALSE;
1436 }
1437 /* scan through all bits, beginning in the front */
1438 for (byte = 0; byte < addr.len; byte++)
1439 {
1440 for (bit = 7; bit >= 0; bit--)
1441 {
1442 /* check if bits are equal (or we reached the end of the net) */
1443 if (bit + byte * 8 > net_len)
1444 {
1445 return TRUE;
1446 }
1447 if (((1<<bit) & addr.ptr[byte]) != ((1<<bit) & net.ptr[byte]))
1448 {
1449 return FALSE;
1450 }
1451 }
1452 }
1453 return TRUE;
1454 }
1455
1456 /**
1457 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1458 */
1459 static host_t *get_route(private_kernel_interface_t *this, host_t *dest,
1460 bool nexthop)
1461 {
1462 unsigned char request[BUFFER_SIZE];
1463 struct nlmsghdr *hdr, *out, *current;
1464 struct rtmsg *msg;
1465 chunk_t chunk;
1466 size_t len;
1467 int best = -1;
1468 host_t *src = NULL, *gtw = NULL;
1469
1470 DBG2(DBG_KNL, "getting address to reach %H", dest);
1471
1472 memset(&request, 0, sizeof(request));
1473
1474 hdr = (struct nlmsghdr*)request;
1475 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_ROOT;
1476 hdr->nlmsg_type = RTM_GETROUTE;
1477 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1478
1479 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1480 msg->rtm_family = dest->get_family(dest);
1481
1482 chunk = dest->get_address(dest);
1483 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1484
1485 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1486 {
1487 DBG1(DBG_KNL, "getting address to %H failed", dest);
1488 return NULL;
1489 }
1490 current = out;
1491 while (NLMSG_OK(current, len))
1492 {
1493 switch (current->nlmsg_type)
1494 {
1495 case NLMSG_DONE:
1496 break;
1497 case RTM_NEWROUTE:
1498 {
1499 struct rtattr *rta;
1500 size_t rtasize;
1501 chunk_t rta_gtw, rta_src, rta_dst;
1502 u_int32_t rta_oif = 0;
1503
1504 rta_gtw = rta_src = rta_dst = chunk_empty;
1505 msg = (struct rtmsg*)(NLMSG_DATA(current));
1506 rta = RTM_RTA(msg);
1507 rtasize = RTM_PAYLOAD(current);
1508 while (RTA_OK(rta, rtasize))
1509 {
1510 switch (rta->rta_type)
1511 {
1512 case RTA_PREFSRC:
1513 rta_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1514 break;
1515 case RTA_GATEWAY:
1516 rta_gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1517 break;
1518 case RTA_DST:
1519 rta_dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1520 break;
1521 case RTA_OIF:
1522 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1523 {
1524 rta_oif = *(u_int32_t*)RTA_DATA(rta);
1525 }
1526 break;
1527 }
1528 rta = RTA_NEXT(rta, rtasize);
1529 }
1530
1531 /* apply the route if:
1532 * - it is not from our own ipsec routing table
1533 * - its destination net contains our destination
1534 * - is better than a previous one
1535 */
1536 if (msg->rtm_table != IPSEC_ROUTING_TABLE
1537 && msg->rtm_dst_len > best
1538 && (msg->rtm_dst_len == 0 || /* default route */
1539 rta_dst.ptr && addr_in_subnet(chunk, rta_dst, msg->rtm_dst_len)))
1540 {
1541 iterator_t *ifaces, *addrs;
1542 iface_entry_t *iface;
1543 addr_entry_t *addr;
1544
1545 best = msg->rtm_dst_len;
1546 if (nexthop)
1547 {
1548 DESTROY_IF(gtw);
1549 gtw = host_create_from_chunk(msg->rtm_family, rta_gtw, 0);
1550 }
1551 else if (rta_src.ptr)
1552 {
1553 DESTROY_IF(src);
1554 src = host_create_from_chunk(msg->rtm_family, rta_src, 0);
1555 }
1556 else
1557 {
1558 /* no source addr, get one from the interfaces */
1559 ifaces = this->ifaces->create_iterator_locked(
1560 this->ifaces, &this->mutex);
1561 while (ifaces->iterate(ifaces, (void**)&iface))
1562 {
1563 if (iface->ifindex == rta_oif)
1564 {
1565 addrs = iface->addrs->create_iterator(
1566 iface->addrs, TRUE);
1567 while (addrs->iterate(addrs, (void**)&addr))
1568 {
1569 chunk_t ip = addr->ip->get_address(addr->ip);
1570 if (rta_dst.ptr
1571 && addr_in_subnet(ip, rta_dst, msg->rtm_dst_len))
1572 {
1573 DESTROY_IF(src);
1574 src = addr->ip->clone(addr->ip);
1575 best = msg->rtm_dst_len;
1576 break;
1577 }
1578 }
1579 addrs->destroy(addrs);
1580 }
1581 }
1582 ifaces->destroy(ifaces);
1583 }
1584 }
1585 /* FALL through */
1586 }
1587 default:
1588 current = NLMSG_NEXT(current, len);
1589 continue;
1590 }
1591 break;
1592 }
1593 free(out);
1594
1595 if (nexthop)
1596 {
1597 if (gtw)
1598 {
1599 return gtw;
1600 }
1601 return dest->clone(dest);
1602 }
1603 return src;
1604 }
1605
1606 /**
1607 * Implementation of kernel_interface_t.get_source_addr.
1608 */
1609 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1610 {
1611 return get_route(this, dest, FALSE);
1612 }
1613
1614 /**
1615 * Implementation of kernel_interface_t.add_ip.
1616 */
1617 static status_t add_ip(private_kernel_interface_t *this,
1618 host_t *virtual_ip, host_t *iface_ip)
1619 {
1620 iface_entry_t *iface;
1621 addr_entry_t *addr;
1622 iterator_t *addrs, *ifaces;
1623
1624 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1625
1626 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1627 while (ifaces->iterate(ifaces, (void**)&iface))
1628 {
1629 bool iface_found = FALSE;
1630
1631 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1632 while (addrs->iterate(addrs, (void**)&addr))
1633 {
1634 if (iface_ip->ip_equals(iface_ip, addr->ip))
1635 {
1636 iface_found = TRUE;
1637 }
1638 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1639 {
1640 addr->refcount++;
1641 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1642 virtual_ip, iface->ifname);
1643 addrs->destroy(addrs);
1644 ifaces->destroy(ifaces);
1645 return SUCCESS;
1646 }
1647 }
1648 addrs->destroy(addrs);
1649
1650 if (iface_found)
1651 {
1652 int ifindex = iface->ifindex;
1653 ifaces->destroy(ifaces);
1654 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1655 ifindex, virtual_ip) == SUCCESS)
1656 {
1657 addr = malloc_thing(addr_entry_t);
1658 addr->ip = virtual_ip->clone(virtual_ip);
1659 addr->refcount = 1;
1660 addr->virtual = TRUE;
1661 addr->scope = RT_SCOPE_UNIVERSE;
1662 pthread_mutex_lock(&this->mutex);
1663 iface->addrs->insert_last(iface->addrs, addr);
1664 pthread_mutex_unlock(&this->mutex);
1665 return SUCCESS;
1666 }
1667 DBG2(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1668 return FAILED;
1669
1670 }
1671
1672 }
1673 ifaces->destroy(ifaces);
1674
1675 DBG2(DBG_KNL, "interface address %H not found, unable to install"
1676 "virtual IP %H", iface_ip, virtual_ip);
1677 return FAILED;
1678 }
1679
1680 /**
1681 * Implementation of kernel_interface_t.del_ip.
1682 */
1683 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1684 {
1685 iface_entry_t *iface;
1686 addr_entry_t *addr;
1687 iterator_t *addrs, *ifaces;
1688
1689 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1690
1691 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1692 while (ifaces->iterate(ifaces, (void**)&iface))
1693 {
1694 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1695 while (addrs->iterate(addrs, (void**)&addr))
1696 {
1697 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1698 {
1699 int ifindex = iface->ifindex;
1700 addr->refcount--;
1701 if (addr->refcount == 0)
1702 {
1703 addrs->remove(addrs);
1704 addrs->destroy(addrs);
1705 ifaces->destroy(ifaces);
1706 addr_entry_destroy(addr);
1707 return manage_ipaddr(this, RTM_DELADDR, 0,
1708 ifindex, virtual_ip);
1709 }
1710 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1711 virtual_ip);
1712 addrs->destroy(addrs);
1713 ifaces->destroy(ifaces);
1714 return SUCCESS;
1715 }
1716 }
1717 addrs->destroy(addrs);
1718 }
1719 ifaces->destroy(ifaces);
1720
1721 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1722 return FAILED;
1723 }
1724
1725 /**
1726 * Implementation of kernel_interface_t.get_spi.
1727 */
1728 static status_t get_spi(private_kernel_interface_t *this,
1729 host_t *src, host_t *dst,
1730 protocol_id_t protocol, u_int32_t reqid,
1731 u_int32_t *spi)
1732 {
1733 unsigned char request[BUFFER_SIZE];
1734 struct nlmsghdr *hdr, *out;
1735 struct xfrm_userspi_info *userspi;
1736 u_int32_t received_spi = 0;
1737 size_t len;
1738
1739 memset(&request, 0, sizeof(request));
1740
1741 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1742
1743 hdr = (struct nlmsghdr*)request;
1744 hdr->nlmsg_flags = NLM_F_REQUEST;
1745 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1746 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1747
1748 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1749 host2xfrm(src, &userspi->info.saddr);
1750 host2xfrm(dst, &userspi->info.id.daddr);
1751 userspi->info.id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1752 userspi->info.mode = TRUE; /* tunnel mode */
1753 userspi->info.reqid = reqid;
1754 userspi->info.family = src->get_family(src);
1755 userspi->min = 0xc0000000;
1756 userspi->max = 0xcFFFFFFF;
1757
1758 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1759 {
1760 hdr = out;
1761 while (NLMSG_OK(hdr, len))
1762 {
1763 switch (hdr->nlmsg_type)
1764 {
1765 case XFRM_MSG_NEWSA:
1766 {
1767 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1768 received_spi = usersa->id.spi;
1769 break;
1770 }
1771 case NLMSG_ERROR:
1772 {
1773 struct nlmsgerr *err = NLMSG_DATA(hdr);
1774
1775 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1776 strerror(-err->error), -err->error);
1777 break;
1778 }
1779 default:
1780 hdr = NLMSG_NEXT(hdr, len);
1781 continue;
1782 case NLMSG_DONE:
1783 break;
1784 }
1785 break;
1786 }
1787 free(out);
1788 }
1789
1790 if (received_spi == 0)
1791 {
1792 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1793 return FAILED;
1794 }
1795
1796 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", received_spi, reqid);
1797
1798 *spi = received_spi;
1799 return SUCCESS;
1800 }
1801
1802 /**
1803 * Implementation of kernel_interface_t.add_sa.
1804 */
1805 static status_t add_sa(private_kernel_interface_t *this,
1806 host_t *src, host_t *dst, u_int32_t spi,
1807 protocol_id_t protocol, u_int32_t reqid,
1808 u_int64_t expire_soft, u_int64_t expire_hard,
1809 algorithm_t *enc_alg, algorithm_t *int_alg,
1810 prf_plus_t *prf_plus, mode_t mode, bool encap,
1811 bool replace)
1812 {
1813 unsigned char request[BUFFER_SIZE];
1814 char *alg_name;
1815 u_int key_size;
1816 struct nlmsghdr *hdr;
1817 struct xfrm_usersa_info *sa;
1818
1819 memset(&request, 0, sizeof(request));
1820
1821 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x", spi);
1822
1823 hdr = (struct nlmsghdr*)request;
1824 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1825 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1826 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1827
1828 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
1829 host2xfrm(src, &sa->saddr);
1830 host2xfrm(dst, &sa->id.daddr);
1831 sa->id.spi = spi;
1832 sa->id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1833 sa->family = src->get_family(src);
1834 sa->mode = mode;
1835 sa->replay_window = 32;
1836 sa->reqid = reqid;
1837 /* we currently do not expire SAs by volume/packet count */
1838 sa->lft.soft_byte_limit = XFRM_INF;
1839 sa->lft.hard_byte_limit = XFRM_INF;
1840 sa->lft.soft_packet_limit = XFRM_INF;
1841 sa->lft.hard_packet_limit = XFRM_INF;
1842 /* we use lifetimes since added, not since used */
1843 sa->lft.soft_add_expires_seconds = expire_soft;
1844 sa->lft.hard_add_expires_seconds = expire_hard;
1845 sa->lft.soft_use_expires_seconds = 0;
1846 sa->lft.hard_use_expires_seconds = 0;
1847
1848 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
1849
1850 if (enc_alg->algorithm != ENCR_UNDEFINED)
1851 {
1852 rthdr->rta_type = XFRMA_ALG_CRYPT;
1853 alg_name = lookup_algorithm(encryption_algs, enc_alg, &key_size);
1854 if (alg_name == NULL)
1855 {
1856 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1857 encryption_algorithm_names, enc_alg->algorithm);
1858 return FAILED;
1859 }
1860 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1861 encryption_algorithm_names, enc_alg->algorithm, key_size);
1862
1863 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1864 hdr->nlmsg_len += rthdr->rta_len;
1865 if (hdr->nlmsg_len > sizeof(request))
1866 {
1867 return FAILED;
1868 }
1869
1870 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1871 algo->alg_key_len = key_size;
1872 strcpy(algo->alg_name, alg_name);
1873 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1874
1875 rthdr = XFRM_RTA_NEXT(rthdr);
1876 }
1877
1878 if (int_alg->algorithm != AUTH_UNDEFINED)
1879 {
1880 rthdr->rta_type = XFRMA_ALG_AUTH;
1881 alg_name = lookup_algorithm(integrity_algs, int_alg, &key_size);
1882 if (alg_name == NULL)
1883 {
1884 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1885 integrity_algorithm_names, int_alg->algorithm);
1886 return FAILED;
1887 }
1888 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1889 integrity_algorithm_names, int_alg->algorithm, key_size);
1890
1891 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1892 hdr->nlmsg_len += rthdr->rta_len;
1893 if (hdr->nlmsg_len > sizeof(request))
1894 {
1895 return FAILED;
1896 }
1897
1898 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1899 algo->alg_key_len = key_size;
1900 strcpy(algo->alg_name, alg_name);
1901 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1902
1903 rthdr = XFRM_RTA_NEXT(rthdr);
1904 }
1905
1906 /* TODO: add IPComp here */
1907
1908 if (encap)
1909 {
1910 rthdr->rta_type = XFRMA_ENCAP;
1911 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
1912
1913 hdr->nlmsg_len += rthdr->rta_len;
1914 if (hdr->nlmsg_len > sizeof(request))
1915 {
1916 return FAILED;
1917 }
1918
1919 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
1920 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1921 tmpl->encap_sport = htons(src->get_port(src));
1922 tmpl->encap_dport = htons(dst->get_port(dst));
1923 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1924 /* encap_oa could probably be derived from the
1925 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
1926 * pluto does the same as we do here but it uses encap_oa in the
1927 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
1928 * the kernel ignores it anyway
1929 * -> does that mean that NAT-T encap doesn't work in transport mode?
1930 * No. The reason the kernel ignores NAT-OA is that it recomputes
1931 * (or, rather, just ignores) the checksum. If packets pass
1932 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
1933 rthdr = XFRM_RTA_NEXT(rthdr);
1934 }
1935
1936 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
1937 {
1938 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
1939 return FAILED;
1940 }
1941 return SUCCESS;
1942 }
1943
1944 /**
1945 * Implementation of kernel_interface_t.update_sa.
1946 */
1947 static status_t update_sa(private_kernel_interface_t *this,
1948 u_int32_t spi, protocol_id_t protocol,
1949 host_t *src, host_t *dst,
1950 host_t *new_src, host_t *new_dst, bool encap)
1951 {
1952 unsigned char request[BUFFER_SIZE], *pos;
1953 struct nlmsghdr *hdr, *out = NULL;
1954 struct xfrm_usersa_id *sa_id;
1955 struct xfrm_usersa_info *out_sa = NULL, *sa;
1956 size_t len;
1957 struct rtattr *rta;
1958 size_t rtasize;
1959 struct xfrm_encap_tmpl* tmpl = NULL;
1960
1961 memset(&request, 0, sizeof(request));
1962
1963 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
1964
1965 /* query the exisiting SA first */
1966 hdr = (struct nlmsghdr*)request;
1967 hdr->nlmsg_flags = NLM_F_REQUEST;
1968 hdr->nlmsg_type = XFRM_MSG_GETSA;
1969 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
1970
1971 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
1972 host2xfrm(dst, &sa_id->daddr);
1973 sa_id->spi = spi;
1974 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1975 sa_id->family = dst->get_family(dst);
1976
1977 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1978 {
1979 hdr = out;
1980 while (NLMSG_OK(hdr, len))
1981 {
1982 switch (hdr->nlmsg_type)
1983 {
1984 case XFRM_MSG_NEWSA:
1985 {
1986 out_sa = NLMSG_DATA(hdr);
1987 break;
1988 }
1989 case NLMSG_ERROR:
1990 {
1991 struct nlmsgerr *err = NLMSG_DATA(hdr);
1992 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
1993 strerror(-err->error), -err->error);
1994 break;
1995 }
1996 default:
1997 hdr = NLMSG_NEXT(hdr, len);
1998 continue;
1999 case NLMSG_DONE:
2000 break;
2001 }
2002 break;
2003 }
2004 }
2005 if (out_sa == NULL ||
2006 this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
2007 {
2008 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2009 free(out);
2010 return FAILED;
2011 }
2012
2013 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
2014 spi, src, dst, new_src, new_dst);
2015
2016 /* copy over the SA from out to request */
2017 hdr = (struct nlmsghdr*)request;
2018 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
2019 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2020 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2021 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2022 sa = NLMSG_DATA(hdr);
2023 sa->family = new_dst->get_family(new_dst);
2024
2025 if (!src->ip_equals(src, new_src))
2026 {
2027 host2xfrm(new_src, &sa->saddr);
2028 }
2029 if (!dst->ip_equals(dst, new_dst))
2030 {
2031 host2xfrm(new_dst, &sa->id.daddr);
2032 }
2033
2034 rta = XFRM_RTA(out, struct xfrm_usersa_info);
2035 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
2036 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
2037 while(RTA_OK(rta, rtasize))
2038 {
2039 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2040 if (rta->rta_type != XFRMA_ENCAP || encap)
2041 {
2042 if (rta->rta_type == XFRMA_ENCAP)
2043 { /* update encap tmpl */
2044 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2045 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2046 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2047 }
2048 memcpy(pos, rta, rta->rta_len);
2049 pos += rta->rta_len;
2050 hdr->nlmsg_len += rta->rta_len;
2051 }
2052 rta = RTA_NEXT(rta, rtasize);
2053 }
2054 if (tmpl == NULL && encap)
2055 { /* add tmpl if we are enabling it */
2056 rta = (struct rtattr*)pos;
2057 rta->rta_type = XFRMA_ENCAP;
2058 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
2059 hdr->nlmsg_len += rta->rta_len;
2060 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
2061 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2062 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
2063 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
2064 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2065 }
2066
2067 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2068 {
2069 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
2070 free(out);
2071 return FAILED;
2072 }
2073 free(out);
2074
2075 return SUCCESS;
2076 }
2077
2078 /**
2079 * Implementation of kernel_interface_t.query_sa.
2080 */
2081 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
2082 u_int32_t spi, protocol_id_t protocol,
2083 u_int32_t *use_time)
2084 {
2085 unsigned char request[BUFFER_SIZE];
2086 struct nlmsghdr *out = NULL, *hdr;
2087 struct xfrm_usersa_id *sa_id;
2088 struct xfrm_usersa_info *sa = NULL;
2089 size_t len;
2090
2091 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
2092 memset(&request, 0, sizeof(request));
2093
2094 hdr = (struct nlmsghdr*)request;
2095 hdr->nlmsg_flags = NLM_F_REQUEST;
2096 hdr->nlmsg_type = XFRM_MSG_GETSA;
2097 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2098
2099 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2100 host2xfrm(dst, &sa_id->daddr);
2101 sa_id->spi = spi;
2102 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2103 sa_id->family = dst->get_family(dst);
2104
2105 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2106 {
2107 hdr = out;
2108 while (NLMSG_OK(hdr, len))
2109 {
2110 switch (hdr->nlmsg_type)
2111 {
2112 case XFRM_MSG_NEWSA:
2113 {
2114 sa = NLMSG_DATA(hdr);
2115 break;
2116 }
2117 case NLMSG_ERROR:
2118 {
2119 struct nlmsgerr *err = NLMSG_DATA(hdr);
2120 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2121 strerror(-err->error), -err->error);
2122 break;
2123 }
2124 default:
2125 hdr = NLMSG_NEXT(hdr, len);
2126 continue;
2127 case NLMSG_DONE:
2128 break;
2129 }
2130 break;
2131 }
2132 }
2133
2134 if (sa == NULL)
2135 {
2136 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2137 free(out);
2138 return FAILED;
2139 }
2140
2141 *use_time = sa->curlft.use_time;
2142 free (out);
2143 return SUCCESS;
2144 }
2145
2146 /**
2147 * Implementation of kernel_interface_t.del_sa.
2148 */
2149 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2150 u_int32_t spi, protocol_id_t protocol)
2151 {
2152 unsigned char request[BUFFER_SIZE];
2153 struct nlmsghdr *hdr;
2154 struct xfrm_usersa_id *sa_id;
2155
2156 memset(&request, 0, sizeof(request));
2157
2158 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2159
2160 hdr = (struct nlmsghdr*)request;
2161 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2162 hdr->nlmsg_type = XFRM_MSG_DELSA;
2163 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2164
2165 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2166 host2xfrm(dst, &sa_id->daddr);
2167 sa_id->spi = spi;
2168 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2169 sa_id->family = dst->get_family(dst);
2170
2171 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2172 {
2173 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2174 return FAILED;
2175 }
2176 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2177 return SUCCESS;
2178 }
2179
2180 /**
2181 * Implementation of kernel_interface_t.add_policy.
2182 */
2183 static status_t add_policy(private_kernel_interface_t *this,
2184 host_t *src, host_t *dst,
2185 traffic_selector_t *src_ts,
2186 traffic_selector_t *dst_ts,
2187 policy_dir_t direction, protocol_id_t protocol,
2188 u_int32_t reqid, bool high_prio, mode_t mode)
2189 {
2190 iterator_t *iterator;
2191 policy_entry_t *current, *policy;
2192 bool found = FALSE;
2193 unsigned char request[BUFFER_SIZE];
2194 struct xfrm_userpolicy_info *policy_info;
2195 struct nlmsghdr *hdr;
2196
2197 /* create a policy */
2198 policy = malloc_thing(policy_entry_t);
2199 memset(policy, 0, sizeof(policy_entry_t));
2200 policy->sel = ts2selector(src_ts, dst_ts);
2201 policy->direction = direction;
2202
2203 /* find the policy, which matches EXACTLY */
2204 pthread_mutex_lock(&this->mutex);
2205 iterator = this->policies->create_iterator(this->policies, TRUE);
2206 while (iterator->iterate(iterator, (void**)&current))
2207 {
2208 if (memcmp(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) == 0 &&
2209 policy->direction == current->direction)
2210 {
2211 /* use existing policy */
2212 current->refcount++;
2213 DBG2(DBG_KNL, "policy %R===%R already exists, increasing ",
2214 "refcount", src_ts, dst_ts);
2215 free(policy);
2216 policy = current;
2217 found = TRUE;
2218 break;
2219 }
2220 }
2221 iterator->destroy(iterator);
2222 if (!found)
2223 { /* apply the new one, if we have no such policy */
2224 this->policies->insert_last(this->policies, policy);
2225 policy->refcount = 1;
2226 }
2227
2228 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2229
2230 memset(&request, 0, sizeof(request));
2231 hdr = (struct nlmsghdr*)request;
2232 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2233 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2234 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2235
2236 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2237 policy_info->sel = policy->sel;
2238 policy_info->dir = policy->direction;
2239 /* calculate priority based on source selector size, small size = high prio */
2240 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2241 policy_info->priority -= policy->sel.prefixlen_s * 10;
2242 policy_info->priority -= policy->sel.proto ? 2 : 0;
2243 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2244 policy_info->action = XFRM_POLICY_ALLOW;
2245 policy_info->share = XFRM_SHARE_ANY;
2246 pthread_mutex_unlock(&this->mutex);
2247
2248 /* policies don't expire */
2249 policy_info->lft.soft_byte_limit = XFRM_INF;
2250 policy_info->lft.soft_packet_limit = XFRM_INF;
2251 policy_info->lft.hard_byte_limit = XFRM_INF;
2252 policy_info->lft.hard_packet_limit = XFRM_INF;
2253 policy_info->lft.soft_add_expires_seconds = 0;
2254 policy_info->lft.hard_add_expires_seconds = 0;
2255 policy_info->lft.soft_use_expires_seconds = 0;
2256 policy_info->lft.hard_use_expires_seconds = 0;
2257
2258 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2259 rthdr->rta_type = XFRMA_TMPL;
2260
2261 rthdr->rta_len = sizeof(struct xfrm_user_tmpl);
2262 rthdr->rta_len = RTA_LENGTH(rthdr->rta_len);
2263
2264 hdr->nlmsg_len += rthdr->rta_len;
2265 if (hdr->nlmsg_len > sizeof(request))
2266 {
2267 return FAILED;
2268 }
2269
2270 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2271 tmpl->reqid = reqid;
2272 tmpl->id.proto = (protocol == PROTO_AH) ? KERNEL_AH : KERNEL_ESP;
2273 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2274 tmpl->mode = mode;
2275 tmpl->family = src->get_family(src);
2276
2277 host2xfrm(src, &tmpl->saddr);
2278 host2xfrm(dst, &tmpl->id.daddr);
2279
2280 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2281 {
2282 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2283 return FAILED;
2284 }
2285
2286 /* install a route, if:
2287 * - we are NOT updating a policy
2288 * - this is a forward policy (to just get one for each child)
2289 * - we are in tunnel mode
2290 * - we are not using IPv6 (does not work correctly yet!)
2291 */
2292 if (policy->route == NULL && direction == POLICY_FWD &&
2293 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6)
2294 {
2295 policy->route = malloc_thing(route_entry_t);
2296 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2297 {
2298 /* get the nexthop to src (src as we are in POLICY_FWD).*/
2299 policy->route->gateway = get_route(this, src, TRUE);
2300 policy->route->if_index = get_interface_index(this, dst);
2301 policy->route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2302 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr, policy->route->dst_net.len);
2303 policy->route->prefixlen = policy->sel.prefixlen_s;
2304
2305 if (manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2306 policy->route) != SUCCESS)
2307 {
2308 DBG1(DBG_KNL, "unable to install source route for %H",
2309 policy->route->src_ip);
2310 route_entry_destroy(policy->route);
2311 policy->route = NULL;
2312 }
2313 }
2314 else
2315 {
2316 free(policy->route);
2317 policy->route = NULL;
2318 }
2319 }
2320
2321 return SUCCESS;
2322 }
2323
2324 /**
2325 * Implementation of kernel_interface_t.query_policy.
2326 */
2327 static status_t query_policy(private_kernel_interface_t *this,
2328 traffic_selector_t *src_ts,
2329 traffic_selector_t *dst_ts,
2330 policy_dir_t direction, u_int32_t *use_time)
2331 {
2332 unsigned char request[BUFFER_SIZE];
2333 struct nlmsghdr *out = NULL, *hdr;
2334 struct xfrm_userpolicy_id *policy_id;
2335 struct xfrm_userpolicy_info *policy = NULL;
2336 size_t len;
2337
2338 memset(&request, 0, sizeof(request));
2339
2340 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2341
2342 hdr = (struct nlmsghdr*)request;
2343 hdr->nlmsg_flags = NLM_F_REQUEST;
2344 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2345 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2346
2347 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2348 policy_id->sel = ts2selector(src_ts, dst_ts);
2349 policy_id->dir = direction;
2350
2351 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2352 {
2353 hdr = out;
2354 while (NLMSG_OK(hdr, len))
2355 {
2356 switch (hdr->nlmsg_type)
2357 {
2358 case XFRM_MSG_NEWPOLICY:
2359 {
2360 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2361 break;
2362 }
2363 case NLMSG_ERROR:
2364 {
2365 struct nlmsgerr *err = NLMSG_DATA(hdr);
2366 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2367 strerror(-err->error), -err->error);
2368 break;
2369 }
2370 default:
2371 hdr = NLMSG_NEXT(hdr, len);
2372 continue;
2373 case NLMSG_DONE:
2374 break;
2375 }
2376 break;
2377 }
2378 }
2379
2380 if (policy == NULL)
2381 {
2382 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2383 free(out);
2384 return FAILED;
2385 }
2386 *use_time = (time_t)policy->curlft.use_time;
2387
2388 free(out);
2389 return SUCCESS;
2390 }
2391
2392 /**
2393 * Implementation of kernel_interface_t.del_policy.
2394 */
2395 static status_t del_policy(private_kernel_interface_t *this,
2396 traffic_selector_t *src_ts,
2397 traffic_selector_t *dst_ts,
2398 policy_dir_t direction)
2399 {
2400 policy_entry_t *current, policy, *to_delete = NULL;
2401 route_entry_t *route;
2402 unsigned char request[BUFFER_SIZE];
2403 struct nlmsghdr *hdr;
2404 struct xfrm_userpolicy_id *policy_id;
2405 iterator_t *iterator;
2406
2407 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2408
2409 /* create a policy */
2410 memset(&policy, 0, sizeof(policy_entry_t));
2411 policy.sel = ts2selector(src_ts, dst_ts);
2412 policy.direction = direction;
2413
2414 /* find the policy */
2415 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2416 while (iterator->iterate(iterator, (void**)&current))
2417 {
2418 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2419 policy.direction == current->direction)
2420 {
2421 to_delete = current;
2422 if (--to_delete->refcount > 0)
2423 {
2424 /* is used by more SAs, keep in kernel */
2425 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2426 iterator->destroy(iterator);
2427 return SUCCESS;
2428 }
2429 /* remove if last reference */
2430 iterator->remove(iterator);
2431 break;
2432 }
2433 }
2434 iterator->destroy(iterator);
2435 if (!to_delete)
2436 {
2437 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2438 return NOT_FOUND;
2439 }
2440
2441 memset(&request, 0, sizeof(request));
2442
2443 hdr = (struct nlmsghdr*)request;
2444 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2445 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2446 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2447
2448 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2449 policy_id->sel = to_delete->sel;
2450 policy_id->dir = direction;
2451
2452 route = to_delete->route;
2453 free(to_delete);
2454
2455 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2456 {
2457 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2458 return FAILED;
2459 }
2460
2461 if (route)
2462 {
2463 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2464 {
2465 DBG1(DBG_KNL, "error uninstalling route installed with "
2466 "policy %R===%R", src_ts, dst_ts);
2467 }
2468 route_entry_destroy(route);
2469 }
2470 return SUCCESS;
2471 }
2472
2473 /**
2474 * Implementation of kernel_interface_t.destroy.
2475 */
2476 static void destroy(private_kernel_interface_t *this)
2477 {
2478 manage_rule(this, RTM_DELRULE, IPSEC_ROUTING_TABLE, IPSEC_ROUTING_TABLE_PRIO);
2479
2480 this->job->cancel(this->job);
2481 close(this->socket_xfrm_events);
2482 close(this->socket_xfrm);
2483 close(this->socket_rt_events);
2484 close(this->socket_rt);
2485 this->policies->destroy(this->policies);
2486 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2487 free(this);
2488 }
2489
2490 /*
2491 * Described in header.
2492 */
2493 kernel_interface_t *kernel_interface_create()
2494 {
2495 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2496 struct sockaddr_nl addr;
2497
2498 /* public functions */
2499 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2500 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,algorithm_t*,algorithm_t*,prf_plus_t*,mode_t,bool,bool))add_sa;
2501 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2502 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2503 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2504 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t))add_policy;
2505 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2506 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2507 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2508 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2509 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2510 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2511 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2512 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2513
2514 /* private members */
2515 this->policies = linked_list_create();
2516 this->ifaces = linked_list_create();
2517 this->hiter = NULL;
2518 this->seq = 200;
2519 pthread_mutex_init(&this->mutex,NULL);
2520
2521 memset(&addr, 0, sizeof(addr));
2522 addr.nl_family = AF_NETLINK;
2523
2524 /* create and bind RT socket */
2525 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2526 if (this->socket_rt <= 0)
2527 {
2528 charon->kill(charon, "unable to create RT netlink socket");
2529 }
2530 addr.nl_groups = 0;
2531 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2532 {
2533 charon->kill(charon, "unable to bind RT netlink socket");
2534 }
2535
2536 /* create and bind RT socket for events (address/interface/route changes) */
2537 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2538 if (this->socket_rt_events <= 0)
2539 {
2540 charon->kill(charon, "unable to create RT event socket");
2541 }
2542 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2543 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
2544 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
2545 {
2546 charon->kill(charon, "unable to bind RT event socket");
2547 }
2548
2549 /* create and bind XFRM socket */
2550 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2551 if (this->socket_xfrm <= 0)
2552 {
2553 charon->kill(charon, "unable to create XFRM netlink socket");
2554 }
2555 addr.nl_groups = 0;
2556 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
2557 {
2558 charon->kill(charon, "unable to bind XFRM netlink socket");
2559 }
2560
2561 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
2562 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2563 if (this->socket_xfrm_events <= 0)
2564 {
2565 charon->kill(charon, "unable to create XFRM event socket");
2566 }
2567 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
2568 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
2569 {
2570 charon->kill(charon, "unable to bind XFRM event socket");
2571 }
2572
2573 this->job = callback_job_create((callback_job_cb_t)receive_events,
2574 this, NULL, NULL);
2575 charon->processor->queue_job(charon->processor, (job_t*)this->job);
2576
2577 if (init_address_list(this) != SUCCESS)
2578 {
2579 charon->kill(charon, "unable to get interface list");
2580 }
2581
2582 if (manage_rule(this, RTM_NEWRULE, IPSEC_ROUTING_TABLE,
2583 IPSEC_ROUTING_TABLE_PRIO) != SUCCESS)
2584 {
2585 DBG1(DBG_KNL, "unable to create routing table rule");
2586 }
2587
2588 return &this->public;
2589 }
2590