using own routing table for installed routes (table 100, prio 100)
[strongswan.git] / src / charon / kernel / kernel_interface.c
1 /**
2 * @file kernel_interface.c
3 *
4 * @brief Implementation of kernel_interface_t.
5 *
6 */
7
8 /*
9 * Copyright (C) 2005-2007 Martin Willi
10 * Copyright (C) 2006-2007 Tobias Brunner
11 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
12 * Copyright (C) 2006 Daniel Roethlisberger
13 * Copyright (C) 2005 Jan Hutter
14 * Hochschule fuer Technik Rapperswil
15 * Copyright (C) 2003 Herbert Xu.
16 *
17 * Based on xfrm code from pluto.
18 *
19 * This program is free software; you can redistribute it and/or modify it
20 * under the terms of the GNU General Public License as published by the
21 * Free Software Foundation; either version 2 of the License, or (at your
22 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 * for more details.
28 */
29
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <linux/netlink.h>
33 #include <linux/rtnetlink.h>
34 #include <linux/xfrm.h>
35 #include <linux/udp.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <net/if.h>
42 #include <sys/ioctl.h>
43
44 #include "kernel_interface.h"
45
46 #include <daemon.h>
47 #include <utils/linked_list.h>
48 #include <processing/jobs/delete_child_sa_job.h>
49 #include <processing/jobs/rekey_child_sa_job.h>
50 #include <processing/jobs/acquire_job.h>
51 #include <processing/jobs/callback_job.h>
52 #include <processing/jobs/roam_job.h>
53
54 /** routing table for routes installed by us */
55 #ifndef IPSEC_ROUTING_TABLE
56 #define IPSEC_ROUTING_TABLE 100
57 #endif
58 #ifndef IPSEC_ROUTING_TABLE_PRIO
59 #define IPSEC_ROUTING_TABLE_PRIO 100
60 #endif
61
62 /** kernel level protocol identifiers */
63 #define KERNEL_ESP 50
64 #define KERNEL_AH 51
65
66 /** default priority of installed policies */
67 #define PRIO_LOW 3000
68 #define PRIO_HIGH 2000
69
70 #define BUFFER_SIZE 1024
71
72 /**
73 * returns a pointer to the first rtattr following the nlmsghdr *nlh and the
74 * 'usual' netlink data x like 'struct xfrm_usersa_info'
75 */
76 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(x))))
77 /**
78 * returns a pointer to the next rtattr following rta.
79 * !!! do not use this to parse messages. use RTA_NEXT and RTA_OK instead !!!
80 */
81 #define XFRM_RTA_NEXT(rta) ((struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
82 /**
83 * returns the total size of attached rta data
84 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
85 */
86 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
87
88 typedef struct kernel_algorithm_t kernel_algorithm_t;
89
90 /**
91 * Mapping from the algorithms defined in IKEv2 to
92 * kernel level algorithm names and their key length
93 */
94 struct kernel_algorithm_t {
95 /**
96 * Identifier specified in IKEv2
97 */
98 int ikev2_id;
99
100 /**
101 * Name of the algorithm, as used as kernel identifier
102 */
103 char *name;
104
105 /**
106 * Key length in bits, if fixed size
107 */
108 u_int key_size;
109 };
110 #define END_OF_LIST -1
111
112 /**
113 * Algorithms for encryption
114 */
115 kernel_algorithm_t encryption_algs[] = {
116 /* {ENCR_DES_IV64, "***", 0}, */
117 {ENCR_DES, "des", 64},
118 {ENCR_3DES, "des3_ede", 192},
119 /* {ENCR_RC5, "***", 0}, */
120 /* {ENCR_IDEA, "***", 0}, */
121 {ENCR_CAST, "cast128", 0},
122 {ENCR_BLOWFISH, "blowfish", 0},
123 /* {ENCR_3IDEA, "***", 0}, */
124 /* {ENCR_DES_IV32, "***", 0}, */
125 {ENCR_NULL, "cipher_null", 0},
126 {ENCR_AES_CBC, "aes", 0},
127 /* {ENCR_AES_CTR, "***", 0}, */
128 {END_OF_LIST, NULL, 0},
129 };
130
131 /**
132 * Algorithms for integrity protection
133 */
134 kernel_algorithm_t integrity_algs[] = {
135 {AUTH_HMAC_MD5_96, "md5", 128},
136 {AUTH_HMAC_SHA1_96, "sha1", 160},
137 {AUTH_HMAC_SHA2_256_128, "sha256", 256},
138 {AUTH_HMAC_SHA2_384_192, "sha384", 384},
139 {AUTH_HMAC_SHA2_512_256, "sha512", 512},
140 /* {AUTH_DES_MAC, "***", 0}, */
141 /* {AUTH_KPDK_MD5, "***", 0}, */
142 {AUTH_AES_XCBC_96, "xcbc(aes)", 128},
143 {END_OF_LIST, NULL, 0},
144 };
145
146 /**
147 * Look up a kernel algorithm name and its key size
148 */
149 char* lookup_algorithm(kernel_algorithm_t *kernel_algo,
150 algorithm_t *ikev2_algo, u_int *key_size)
151 {
152 while (kernel_algo->ikev2_id != END_OF_LIST)
153 {
154 if (ikev2_algo->algorithm == kernel_algo->ikev2_id)
155 {
156 /* match, evaluate key length */
157 if (ikev2_algo->key_size)
158 { /* variable length */
159 *key_size = ikev2_algo->key_size;
160 }
161 else
162 { /* fixed length */
163 *key_size = kernel_algo->key_size;
164 }
165 return kernel_algo->name;
166 }
167 kernel_algo++;
168 }
169 return NULL;
170 }
171
172 typedef struct route_entry_t route_entry_t;
173
174 /**
175 * installed routing entry
176 */
177 struct route_entry_t {
178
179 /** Index of the interface the route is bound to */
180 int if_index;
181
182 /** Source ip of the route */
183 host_t *src_ip;
184
185 /** gateway for this route */
186 host_t *gateway;
187
188 /** Destination net */
189 chunk_t dst_net;
190
191 /** Destination net prefixlen */
192 u_int8_t prefixlen;
193 };
194
195 /**
196 * destroy an route_entry_t object
197 */
198 static void route_entry_destroy(route_entry_t *this)
199 {
200 this->src_ip->destroy(this->src_ip);
201 this->gateway->destroy(this->gateway);
202 chunk_free(&this->dst_net);
203 free(this);
204 }
205
206 typedef struct policy_entry_t policy_entry_t;
207
208 /**
209 * installed kernel policy.
210 */
211 struct policy_entry_t {
212
213 /** direction of this policy: in, out, forward */
214 u_int8_t direction;
215
216 /** reqid of the policy */
217 u_int32_t reqid;
218
219 /** parameters of installed policy */
220 struct xfrm_selector sel;
221
222 /** associated route installed for this policy */
223 route_entry_t *route;
224
225 /** by how many CHILD_SA's this policy is used */
226 u_int refcount;
227 };
228
229 typedef struct addr_entry_t addr_entry_t;
230
231 /**
232 * IP address in an inface_entry_t
233 */
234 struct addr_entry_t {
235
236 /** The ip address */
237 host_t *ip;
238
239 /** virtual IP managed by us */
240 bool virtual;
241
242 /** scope of the address */
243 u_char scope;
244
245 /** Number of times this IP is used, if virtual */
246 u_int refcount;
247 };
248
249 /**
250 * destroy a addr_entry_t object
251 */
252 static void addr_entry_destroy(addr_entry_t *this)
253 {
254 this->ip->destroy(this->ip);
255 free(this);
256 }
257
258 typedef struct iface_entry_t iface_entry_t;
259
260 /**
261 * A network interface on this system, containing addr_entry_t's
262 */
263 struct iface_entry_t {
264
265 /** interface index */
266 int ifindex;
267
268 /** name of the interface */
269 char ifname[IFNAMSIZ];
270
271 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
272 u_int flags;
273
274 /** list of addresses as host_t */
275 linked_list_t *addrs;
276 };
277
278 /**
279 * destroy an interface entry
280 */
281 static void iface_entry_destroy(iface_entry_t *this)
282 {
283 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
284 free(this);
285 }
286
287 typedef struct private_kernel_interface_t private_kernel_interface_t;
288
289 /**
290 * Private variables and functions of kernel_interface class.
291 */
292 struct private_kernel_interface_t {
293 /**
294 * Public part of the kernel_interface_t object.
295 */
296 kernel_interface_t public;
297
298 /**
299 * mutex to lock access to the various lists
300 */
301 pthread_mutex_t mutex;
302
303 /**
304 * List of installed policies (policy_entry_t)
305 */
306 linked_list_t *policies;
307
308 /**
309 * Cached list of interfaces and its adresses (iface_entry_t)
310 */
311 linked_list_t *ifaces;
312
313 /**
314 * iterator used in hook()
315 */
316 iterator_t *hiter;
317
318 /**
319 * job receiving netlink events
320 */
321 callback_job_t *job;
322
323 /**
324 * current sequence number for netlink request
325 */
326 int seq;
327
328 /**
329 * Netlink xfrm socket (IPsec)
330 */
331 int socket_xfrm;
332
333 /**
334 * netlink xfrm socket to receive acquire and expire events
335 */
336 int socket_xfrm_events;
337
338 /**
339 * Netlink rt socket (routing)
340 */
341 int socket_rt;
342
343 /**
344 * Netlink rt socket to receive address change events
345 */
346 int socket_rt_events;
347 };
348
349 /**
350 * convert a host_t to a struct xfrm_address
351 */
352 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
353 {
354 chunk_t chunk = host->get_address(host);
355 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
356 }
357
358 /**
359 * convert a traffic selector address range to subnet and its mask.
360 */
361 static void ts2subnet(traffic_selector_t* ts,
362 xfrm_address_t *net, u_int8_t *mask)
363 {
364 /* there is no way to do this cleanly, as the address range may
365 * be anything else but a subnet. We use from_addr as subnet
366 * and try to calculate a usable subnet mask.
367 */
368 int byte, bit;
369 bool found = FALSE;
370 chunk_t from, to;
371 size_t size = (ts->get_type(ts) == TS_IPV4_ADDR_RANGE) ? 4 : 16;
372
373 from = ts->get_from_address(ts);
374 to = ts->get_to_address(ts);
375
376 *mask = (size * 8);
377 /* go trough all bits of the addresses, beginning in the front.
378 * as long as they are equal, the subnet gets larger
379 */
380 for (byte = 0; byte < size; byte++)
381 {
382 for (bit = 7; bit >= 0; bit--)
383 {
384 if ((1<<bit & from.ptr[byte]) != (1<<bit & to.ptr[byte]))
385 {
386 *mask = ((7 - bit) + (byte * 8));
387 found = TRUE;
388 break;
389 }
390 }
391 if (found)
392 {
393 break;
394 }
395 }
396 memcpy(net, from.ptr, from.len);
397 chunk_free(&from);
398 chunk_free(&to);
399 }
400
401 /**
402 * convert a traffic selector port range to port/portmask
403 */
404 static void ts2ports(traffic_selector_t* ts,
405 u_int16_t *port, u_int16_t *mask)
406 {
407 /* linux does not seem to accept complex portmasks. Only
408 * any or a specific port is allowed. We set to any, if we have
409 * a port range, or to a specific, if we have one port only.
410 */
411 u_int16_t from, to;
412
413 from = ts->get_from_port(ts);
414 to = ts->get_to_port(ts);
415
416 if (from == to)
417 {
418 *port = htons(from);
419 *mask = ~0;
420 }
421 else
422 {
423 *port = 0;
424 *mask = 0;
425 }
426 }
427
428 /**
429 * convert a pair of traffic_selectors to a xfrm_selector
430 */
431 static struct xfrm_selector ts2selector(traffic_selector_t *src,
432 traffic_selector_t *dst)
433 {
434 struct xfrm_selector sel;
435
436 memset(&sel, 0, sizeof(sel));
437 sel.family = src->get_type(src) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
438 /* src or dest proto may be "any" (0), use more restrictive one */
439 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
440 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
441 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
442 ts2ports(dst, &sel.dport, &sel.dport_mask);
443 ts2ports(src, &sel.sport, &sel.sport_mask);
444 sel.ifindex = 0;
445 sel.user = 0;
446
447 return sel;
448 }
449
450 /**
451 * Creates an rtattr and adds it to the netlink message
452 */
453 static void add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
454 size_t buflen)
455 {
456 struct rtattr *rta;
457
458 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_ALIGN(data.len) > buflen)
459 {
460 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
461 return;
462 }
463
464 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
465 rta->rta_type = rta_type;
466 rta->rta_len = RTA_LENGTH(data.len);
467 memcpy(RTA_DATA(rta), data.ptr, data.len);
468 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
469 }
470
471 /**
472 * process a XFRM_MSG_ACQUIRE from kernel
473 */
474 static void process_acquire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
475 {
476 u_int32_t reqid = 0;
477 job_t *job;
478 struct rtattr *rtattr = XFRM_RTA(hdr, struct xfrm_user_acquire);
479 size_t rtsize = XFRM_PAYLOAD(hdr, struct xfrm_user_tmpl);
480
481 if (RTA_OK(rtattr, rtsize))
482 {
483 if (rtattr->rta_type == XFRMA_TMPL)
484 {
485 struct xfrm_user_tmpl* tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rtattr);
486 reqid = tmpl->reqid;
487 }
488 }
489 if (reqid == 0)
490 {
491 DBG1(DBG_KNL, "received a XFRM_MSG_ACQUIRE, but no reqid found");
492 return;
493 }
494 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
495 DBG1(DBG_KNL, "creating acquire job for CHILD_SA with reqid %d", reqid);
496 job = (job_t*)acquire_job_create(reqid);
497 charon->processor->queue_job(charon->processor, job);
498 }
499
500 /**
501 * process a XFRM_MSG_EXPIRE from kernel
502 */
503 static void process_expire(private_kernel_interface_t *this, struct nlmsghdr *hdr)
504 {
505 job_t *job;
506 protocol_id_t protocol;
507 u_int32_t spi, reqid;
508 struct xfrm_user_expire *expire;
509
510 expire = (struct xfrm_user_expire*)NLMSG_DATA(hdr);
511 protocol = expire->state.id.proto == KERNEL_ESP ? PROTO_ESP : PROTO_AH;
512 spi = expire->state.id.spi;
513 reqid = expire->state.reqid;
514
515 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
516 DBG1(DBG_KNL, "creating %s job for %N CHILD_SA 0x%x (reqid %d)",
517 expire->hard ? "delete" : "rekey", protocol_id_names,
518 protocol, ntohl(spi), reqid);
519 if (expire->hard)
520 {
521 job = (job_t*)delete_child_sa_job_create(reqid, protocol, spi);
522 }
523 else
524 {
525 job = (job_t*)rekey_child_sa_job_create(reqid, protocol, spi);
526 }
527 charon->processor->queue_job(charon->processor, job);
528 }
529
530 /**
531 * process RTM_NEWLINK/RTM_DELLINK from kernel
532 */
533 static void process_link(private_kernel_interface_t *this,
534 struct nlmsghdr *hdr, bool event)
535 {
536 struct ifinfomsg* msg = (struct ifinfomsg*)(NLMSG_DATA(hdr));
537 struct rtattr *rta = IFLA_RTA(msg);
538 size_t rtasize = IFLA_PAYLOAD (hdr);
539 iterator_t *iterator;
540 iface_entry_t *current, *entry = NULL;
541 char *name = NULL;
542 bool update = FALSE;
543
544 while(RTA_OK(rta, rtasize))
545 {
546 switch (rta->rta_type)
547 {
548 case IFLA_IFNAME:
549 name = RTA_DATA(rta);
550 break;
551 }
552 rta = RTA_NEXT(rta, rtasize);
553 }
554 if (!name)
555 {
556 name = "(unknown)";
557 }
558
559 switch (hdr->nlmsg_type)
560 {
561 case RTM_NEWLINK:
562 {
563 if (msg->ifi_flags & IFF_LOOPBACK)
564 { /* ignore loopback interfaces */
565 break;
566 }
567 iterator = this->ifaces->create_iterator_locked(this->ifaces,
568 &this->mutex);
569 while (iterator->iterate(iterator, (void**)&current))
570 {
571 if (current->ifindex == msg->ifi_index)
572 {
573 entry = current;
574 break;
575 }
576 }
577 if (!entry)
578 {
579 entry = malloc_thing(iface_entry_t);
580 entry->ifindex = msg->ifi_index;
581 entry->flags = 0;
582 entry->addrs = linked_list_create();
583 this->ifaces->insert_last(this->ifaces, entry);
584 }
585 memcpy(entry->ifname, name, IFNAMSIZ);
586 entry->ifname[IFNAMSIZ-1] = '\0';
587 if (event)
588 {
589 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
590 {
591 update = TRUE;
592 DBG1(DBG_KNL, "interface %s activated", name);
593 }
594 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
595 {
596 update = TRUE;
597 DBG1(DBG_KNL, "interface %s deactivated", name);
598 }
599 }
600 entry->flags = msg->ifi_flags;
601 iterator->destroy(iterator);
602 break;
603 }
604 case RTM_DELLINK:
605 {
606 iterator = this->ifaces->create_iterator_locked(this->ifaces,
607 &this->mutex);
608 while (iterator->iterate(iterator, (void**)&current))
609 {
610 if (current->ifindex == msg->ifi_index)
611 {
612 /* we do not remove it, as an address may be added to a
613 * "down" interface and we wan't to know that. */
614 current->flags = msg->ifi_flags;
615 break;
616 }
617 }
618 iterator->destroy(iterator);
619 break;
620 }
621 }
622
623 /* send an update to all IKE_SAs */
624 if (update && event)
625 {
626 charon->processor->queue_job(charon->processor,
627 (job_t*)roam_job_create(TRUE));
628 }
629 }
630
631 /**
632 * process RTM_NEWADDR/RTM_DELADDR from kernel
633 */
634 static void process_addr(private_kernel_interface_t *this,
635 struct nlmsghdr *hdr, bool event)
636 {
637 struct ifaddrmsg* msg = (struct ifaddrmsg*)(NLMSG_DATA(hdr));
638 struct rtattr *rta = IFA_RTA(msg);
639 size_t rtasize = IFA_PAYLOAD (hdr);
640 host_t *host = NULL;
641 iterator_t *ifaces, *addrs;
642 iface_entry_t *iface;
643 addr_entry_t *addr;
644 chunk_t local = chunk_empty, address = chunk_empty;
645 bool update = FALSE, found = FALSE, changed = FALSE;
646
647 while(RTA_OK(rta, rtasize))
648 {
649 switch (rta->rta_type)
650 {
651 case IFA_LOCAL:
652 local.ptr = RTA_DATA(rta);
653 local.len = RTA_PAYLOAD(rta);
654 break;
655 case IFA_ADDRESS:
656 address.ptr = RTA_DATA(rta);
657 address.len = RTA_PAYLOAD(rta);
658 break;
659 }
660 rta = RTA_NEXT(rta, rtasize);
661 }
662
663 /* For PPP interfaces, we need the IFA_LOCAL address,
664 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
665 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
666 if (local.ptr)
667 {
668 host = host_create_from_chunk(msg->ifa_family, local, 0);
669 }
670 else if (address.ptr)
671 {
672 host = host_create_from_chunk(msg->ifa_family, address, 0);
673 }
674
675 if (host == NULL)
676 { /* bad family? */
677 return;
678 }
679
680 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
681 while (ifaces->iterate(ifaces, (void**)&iface))
682 {
683 if (iface->ifindex == msg->ifa_index)
684 {
685 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
686 while (addrs->iterate(addrs, (void**)&addr))
687 {
688 if (host->ip_equals(host, addr->ip))
689 {
690 found = TRUE;
691 if (hdr->nlmsg_type == RTM_DELADDR)
692 {
693 changed = TRUE;
694 addrs->remove(addrs);
695 addr_entry_destroy(addr);
696 DBG1(DBG_KNL, "%H disappeared from %s", host, iface->ifname);
697 }
698 }
699 }
700 addrs->destroy(addrs);
701
702 if (hdr->nlmsg_type == RTM_NEWADDR)
703 {
704 if (!found)
705 {
706 found = TRUE;
707 changed = TRUE;
708 addr = malloc_thing(addr_entry_t);
709 addr->ip = host->clone(host);
710 addr->virtual = FALSE;
711 addr->refcount = 1;
712 addr->scope = msg->ifa_scope;
713
714 iface->addrs->insert_last(iface->addrs, addr);
715 if (event)
716 {
717 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
718 }
719 }
720 }
721 if (found && (iface->flags & IFF_UP))
722 {
723 update = TRUE;
724 }
725 break;
726 }
727 }
728 ifaces->destroy(ifaces);
729 host->destroy(host);
730
731 /* send an update to all IKE_SAs */
732 if (update && event && changed)
733 {
734 charon->processor->queue_job(charon->processor,
735 (job_t*)roam_job_create(TRUE));
736 }
737 }
738
739 /**
740 * Receives events from kernel
741 */
742 static job_requeue_t receive_events(private_kernel_interface_t *this)
743 {
744 char response[1024];
745 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
746 struct sockaddr_nl addr;
747 socklen_t addr_len = sizeof(addr);
748 int len, oldstate, maxfd, selected;
749 fd_set rfds;
750
751 FD_ZERO(&rfds);
752 FD_SET(this->socket_xfrm_events, &rfds);
753 FD_SET(this->socket_rt_events, &rfds);
754 maxfd = max(this->socket_xfrm_events, this->socket_rt_events);
755
756 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
757 selected = select(maxfd + 1, &rfds, NULL, NULL, NULL);
758 pthread_setcancelstate(oldstate, NULL);
759 if (selected <= 0)
760 {
761 DBG1(DBG_KNL, "selecting on sockets failed: %s", strerror(errno));
762 return JOB_REQUEUE_FAIR;
763 }
764 if (FD_ISSET(this->socket_xfrm_events, &rfds))
765 {
766 selected = this->socket_xfrm_events;
767 }
768 else if (FD_ISSET(this->socket_rt_events, &rfds))
769 {
770 selected = this->socket_rt_events;
771 }
772 else
773 {
774 return JOB_REQUEUE_DIRECT;
775 }
776
777 len = recvfrom(selected, response, sizeof(response), MSG_DONTWAIT,
778 (struct sockaddr*)&addr, &addr_len);
779 if (len < 0)
780 {
781 switch (errno)
782 {
783 case EINTR:
784 /* interrupted, try again */
785 return JOB_REQUEUE_DIRECT;
786 case EAGAIN:
787 /* no data ready, select again */
788 return JOB_REQUEUE_DIRECT;
789 default:
790 DBG1(DBG_KNL, "unable to receive from xfrm event socket");
791 sleep(1);
792 return JOB_REQUEUE_FAIR;
793 }
794 }
795 if (addr.nl_pid != 0)
796 { /* not from kernel. not interested, try another one */
797 return JOB_REQUEUE_DIRECT;
798 }
799
800 while (NLMSG_OK(hdr, len))
801 {
802 /* looks good so far, dispatch netlink message */
803 if (selected == this->socket_xfrm_events)
804 {
805 switch (hdr->nlmsg_type)
806 {
807 case XFRM_MSG_ACQUIRE:
808 process_acquire(this, hdr);
809 break;
810 case XFRM_MSG_EXPIRE:
811 process_expire(this, hdr);
812 break;
813 default:
814 break;
815 }
816 }
817 else if (selected == this->socket_rt_events)
818 {
819 switch (hdr->nlmsg_type)
820 {
821 case RTM_NEWADDR:
822 case RTM_DELADDR:
823 process_addr(this, hdr, TRUE);
824 break;
825 case RTM_NEWLINK:
826 case RTM_DELLINK:
827 process_link(this, hdr, TRUE);
828 break;
829 case RTM_NEWROUTE:
830 case RTM_DELROUTE:
831 charon->processor->queue_job(charon->processor,
832 (job_t*)roam_job_create(FALSE));
833 break;
834 default:
835 break;
836 }
837 }
838 hdr = NLMSG_NEXT(hdr, len);
839 }
840 return JOB_REQUEUE_DIRECT;
841 }
842
843 /**
844 * send a netlink message and wait for a reply
845 */
846 static status_t netlink_send(private_kernel_interface_t *this,
847 int socket, struct nlmsghdr *in,
848 struct nlmsghdr **out, size_t *out_len)
849 {
850 int len, addr_len;
851 struct sockaddr_nl addr;
852 chunk_t result = chunk_empty, tmp;
853 struct nlmsghdr *msg, peek;
854
855 pthread_mutex_lock(&this->mutex);
856
857 in->nlmsg_seq = ++this->seq;
858 in->nlmsg_pid = getpid();
859
860 memset(&addr, 0, sizeof(addr));
861 addr.nl_family = AF_NETLINK;
862 addr.nl_pid = 0;
863 addr.nl_groups = 0;
864
865 while (TRUE)
866 {
867 len = sendto(socket, in, in->nlmsg_len, 0,
868 (struct sockaddr*)&addr, sizeof(addr));
869
870 if (len != in->nlmsg_len)
871 {
872 if (errno == EINTR)
873 {
874 /* interrupted, try again */
875 continue;
876 }
877 pthread_mutex_unlock(&this->mutex);
878 DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno));
879 return FAILED;
880 }
881 break;
882 }
883
884 while (TRUE)
885 {
886 char buf[4096];
887 tmp.len = sizeof(buf);
888 tmp.ptr = buf;
889 msg = (struct nlmsghdr*)tmp.ptr;
890
891 memset(&addr, 0, sizeof(addr));
892 addr.nl_family = AF_NETLINK;
893 addr.nl_pid = getpid();
894 addr.nl_groups = 0;
895 addr_len = sizeof(addr);
896
897 len = recvfrom(socket, tmp.ptr, tmp.len, 0,
898 (struct sockaddr*)&addr, &addr_len);
899
900 if (len < 0)
901 {
902 if (errno == EINTR)
903 {
904 DBG1(DBG_KNL, "got interrupted");
905 /* interrupted, try again */
906 continue;
907 }
908 DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno));
909 pthread_mutex_unlock(&this->mutex);
910 return FAILED;
911 }
912 if (!NLMSG_OK(msg, len))
913 {
914 DBG1(DBG_KNL, "received corrupted netlink message");
915 pthread_mutex_unlock(&this->mutex);
916 return FAILED;
917 }
918 if (msg->nlmsg_seq != this->seq)
919 {
920 DBG1(DBG_KNL, "received invalid netlink sequence number");
921 if (msg->nlmsg_seq < this->seq)
922 {
923 continue;
924 }
925 pthread_mutex_unlock(&this->mutex);
926 return FAILED;
927 }
928
929 tmp.len = len;
930 result = chunk_cata("cc", result, tmp);
931
932 /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence
933 * numbers to detect multi header messages */
934 len = recvfrom(socket, &peek, sizeof(peek), MSG_PEEK | MSG_DONTWAIT,
935 (struct sockaddr*)&addr, &addr_len);
936
937 if (len == sizeof(peek) && peek.nlmsg_seq == this->seq)
938 {
939 /* seems to be multipart */
940 continue;
941 }
942 break;
943 }
944
945 *out_len = result.len;
946 *out = (struct nlmsghdr*)clalloc(result.ptr, result.len);
947
948 pthread_mutex_unlock(&this->mutex);
949
950 return SUCCESS;
951 }
952
953 /**
954 * send a netlink message and wait for its acknowlegde
955 */
956 static status_t netlink_send_ack(private_kernel_interface_t *this,
957 int socket, struct nlmsghdr *in)
958 {
959 struct nlmsghdr *out, *hdr;
960 size_t len;
961
962 if (netlink_send(this, socket, in, &out, &len) != SUCCESS)
963 {
964 return FAILED;
965 }
966 hdr = out;
967 while (NLMSG_OK(hdr, len))
968 {
969 switch (hdr->nlmsg_type)
970 {
971 case NLMSG_ERROR:
972 {
973 struct nlmsgerr* err = (struct nlmsgerr*)NLMSG_DATA(hdr);
974
975 if (err->error)
976 {
977 DBG1(DBG_KNL, "received netlink error: %s (%d)",
978 strerror(-err->error), -err->error);
979 free(out);
980 return FAILED;
981 }
982 free(out);
983 return SUCCESS;
984 }
985 default:
986 hdr = NLMSG_NEXT(hdr, len);
987 continue;
988 case NLMSG_DONE:
989 break;
990 }
991 break;
992 }
993 DBG1(DBG_KNL, "netlink request not acknowlegded");
994 free(out);
995 return FAILED;
996 }
997
998 /**
999 * Initialize a list of local addresses.
1000 */
1001 static status_t init_address_list(private_kernel_interface_t *this)
1002 {
1003 char request[BUFFER_SIZE];
1004 struct nlmsghdr *out, *current, *in;
1005 struct rtgenmsg *msg;
1006 size_t len;
1007 iterator_t *ifaces, *addrs;
1008 iface_entry_t *iface;
1009 addr_entry_t *addr;
1010
1011 DBG1(DBG_KNL, "listening on interfaces:");
1012
1013 memset(&request, 0, sizeof(request));
1014
1015 in = (struct nlmsghdr*)&request;
1016 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
1017 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
1018 msg = (struct rtgenmsg*)NLMSG_DATA(in);
1019 msg->rtgen_family = AF_UNSPEC;
1020
1021 /* get all links */
1022 in->nlmsg_type = RTM_GETLINK;
1023 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1024 {
1025 return FAILED;
1026 }
1027 current = out;
1028 while (NLMSG_OK(current, len))
1029 {
1030 switch (current->nlmsg_type)
1031 {
1032 case NLMSG_DONE:
1033 break;
1034 case RTM_NEWLINK:
1035 process_link(this, current, FALSE);
1036 /* fall through */
1037 default:
1038 current = NLMSG_NEXT(current, len);
1039 continue;
1040 }
1041 break;
1042 }
1043 free(out);
1044
1045 /* get all interface addresses */
1046 in->nlmsg_type = RTM_GETADDR;
1047 if (netlink_send(this, this->socket_rt, in, &out, &len) != SUCCESS)
1048 {
1049 return FAILED;
1050 }
1051 current = out;
1052 while (NLMSG_OK(current, len))
1053 {
1054 switch (current->nlmsg_type)
1055 {
1056 case NLMSG_DONE:
1057 break;
1058 case RTM_NEWADDR:
1059 process_addr(this, current, FALSE);
1060 /* fall through */
1061 default:
1062 current = NLMSG_NEXT(current, len);
1063 continue;
1064 }
1065 break;
1066 }
1067 free(out);
1068
1069 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1070 while (ifaces->iterate(ifaces, (void**)&iface))
1071 {
1072 if (iface->flags & IFF_UP)
1073 {
1074 DBG1(DBG_KNL, " %s", iface->ifname);
1075 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1076 while (addrs->iterate(addrs, (void**)&addr))
1077 {
1078 DBG1(DBG_KNL, " %H", addr->ip);
1079 }
1080 addrs->destroy(addrs);
1081 }
1082 }
1083 ifaces->destroy(ifaces);
1084 return SUCCESS;
1085 }
1086
1087 /**
1088 * iterator hook to iterate over addrs
1089 */
1090 static hook_result_t addr_hook(private_kernel_interface_t *this,
1091 addr_entry_t *in, host_t **out)
1092 {
1093 if (in->virtual)
1094 { /* skip virtual interfaces added by us */
1095 return HOOK_SKIP;
1096 }
1097 if (in->scope >= RT_SCOPE_LINK)
1098 { /* skip addresses with a unusable scope */
1099 return HOOK_SKIP;
1100 }
1101 *out = in->ip;
1102 return HOOK_NEXT;
1103 }
1104
1105 /**
1106 * iterator hook to iterate over ifaces
1107 */
1108 static hook_result_t iface_hook(private_kernel_interface_t *this,
1109 iface_entry_t *in, host_t **out)
1110 {
1111 if (!(in->flags & IFF_UP))
1112 { /* skip interfaces not up */
1113 return HOOK_SKIP;
1114 }
1115
1116 if (this->hiter == NULL)
1117 {
1118 this->hiter = in->addrs->create_iterator(in->addrs, TRUE);
1119 this->hiter->set_iterator_hook(this->hiter,
1120 (iterator_hook_t*)addr_hook, this);
1121 }
1122 while (this->hiter->iterate(this->hiter, (void**)out))
1123 {
1124 return HOOK_AGAIN;
1125 }
1126 this->hiter->destroy(this->hiter);
1127 this->hiter = NULL;
1128 return HOOK_SKIP;
1129 }
1130
1131 /**
1132 * Implements kernel_interface_t.create_address_iterator.
1133 */
1134 static iterator_t *create_address_iterator(private_kernel_interface_t *this)
1135 {
1136 iterator_t *iterator;
1137
1138 /* This iterator is not only hooked, is is double-hooked. As we have stored
1139 * our addresses in iface_entry->addr_entry->ip, we need to iterate the
1140 * entries in each interface we iterate. This does the iface_hook. The
1141 * addr_hook returns the ip instead of the addr_entry. */
1142
1143 iterator = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1144 iterator->set_iterator_hook(iterator, (iterator_hook_t*)iface_hook, this);
1145 return iterator;
1146 }
1147
1148 /**
1149 * implementation of kernel_interface_t.get_interface_name
1150 */
1151 static char *get_interface_name(private_kernel_interface_t *this, host_t* ip)
1152 {
1153 iterator_t *ifaces, *addrs;
1154 iface_entry_t *iface;
1155 addr_entry_t *addr;
1156 char *name = NULL;
1157
1158 DBG2(DBG_KNL, "getting interface name for %H", ip);
1159
1160 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1161 while (ifaces->iterate(ifaces, (void**)&iface))
1162 {
1163 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1164 while (addrs->iterate(addrs, (void**)&addr))
1165 {
1166 if (ip->ip_equals(ip, addr->ip))
1167 {
1168 name = strdup(iface->ifname);
1169 break;
1170 }
1171 }
1172 addrs->destroy(addrs);
1173 if (name)
1174 {
1175 break;
1176 }
1177 }
1178 ifaces->destroy(ifaces);
1179
1180 if (name)
1181 {
1182 DBG2(DBG_KNL, "%H is on interface %s", ip, name);
1183 }
1184 else
1185 {
1186 DBG2(DBG_KNL, "%H is not a local address", ip);
1187 }
1188 return name;
1189 }
1190
1191 /**
1192 * Tries to find an ip address of a local interface that is included in the
1193 * supplied traffic selector.
1194 */
1195 static status_t get_address_by_ts(private_kernel_interface_t *this,
1196 traffic_selector_t *ts, host_t **ip)
1197 {
1198 iterator_t *ifaces, *addrs;
1199 iface_entry_t *iface;
1200 addr_entry_t *addr;
1201 host_t *host;
1202 int family;
1203 bool found = FALSE;
1204
1205 DBG2(DBG_KNL, "getting a local address in traffic selector %R", ts);
1206
1207 /* if we have a family which includes localhost, we do not
1208 * search for an IP, we use the default */
1209 family = ts->get_type(ts) == TS_IPV4_ADDR_RANGE ? AF_INET : AF_INET6;
1210
1211 if (family == AF_INET)
1212 {
1213 host = host_create_from_string("127.0.0.1", 0);
1214 }
1215 else
1216 {
1217 host = host_create_from_string("::1", 0);
1218 }
1219
1220 if (ts->includes(ts, host))
1221 {
1222 *ip = host_create_any(family);
1223 host->destroy(host);
1224 DBG2(DBG_KNL, "using host %H", *ip);
1225 return SUCCESS;
1226 }
1227 host->destroy(host);
1228
1229 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1230 while (ifaces->iterate(ifaces, (void**)&iface))
1231 {
1232 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1233 while (addrs->iterate(addrs, (void**)&addr))
1234 {
1235 if (ts->includes(ts, addr->ip))
1236 {
1237 found = TRUE;
1238 *ip = addr->ip->clone(addr->ip);
1239 break;
1240 }
1241 }
1242 addrs->destroy(addrs);
1243 if (found)
1244 {
1245 break;
1246 }
1247 }
1248 ifaces->destroy(ifaces);
1249
1250 if (!found)
1251 {
1252 DBG1(DBG_KNL, "no local address found in traffic selector %R", ts);
1253 return FAILED;
1254 }
1255 DBG2(DBG_KNL, "using host %H", *ip);
1256 return SUCCESS;
1257 }
1258
1259 /**
1260 * get the interface of a local address
1261 */
1262 static int get_interface_index(private_kernel_interface_t *this, host_t* ip)
1263 {
1264 iterator_t *ifaces, *addrs;
1265 iface_entry_t *iface;
1266 addr_entry_t *addr;
1267 int ifindex = 0;
1268
1269 DBG2(DBG_KNL, "getting iface for %H", ip);
1270
1271 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1272 while (ifaces->iterate(ifaces, (void**)&iface))
1273 {
1274 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1275 while (addrs->iterate(addrs, (void**)&addr))
1276 {
1277 if (ip->ip_equals(ip, addr->ip))
1278 {
1279 ifindex = iface->ifindex;
1280 break;
1281 }
1282 }
1283 addrs->destroy(addrs);
1284 if (ifindex)
1285 {
1286 break;
1287 }
1288 }
1289 ifaces->destroy(ifaces);
1290
1291 if (ifindex == 0)
1292 {
1293 DBG1(DBG_KNL, "unable to get interface for %H", ip);
1294 }
1295 return ifindex;
1296 }
1297
1298 /**
1299 * Manages the creation and deletion of ip addresses on an interface.
1300 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1301 */
1302 static status_t manage_ipaddr(private_kernel_interface_t *this, int nlmsg_type,
1303 int flags, int if_index, host_t *ip)
1304 {
1305 unsigned char request[BUFFER_SIZE];
1306 struct nlmsghdr *hdr;
1307 struct ifaddrmsg *msg;
1308 chunk_t chunk;
1309
1310 memset(&request, 0, sizeof(request));
1311
1312 chunk = ip->get_address(ip);
1313
1314 hdr = (struct nlmsghdr*)request;
1315 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1316 hdr->nlmsg_type = nlmsg_type;
1317 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
1318
1319 msg = (struct ifaddrmsg*)NLMSG_DATA(hdr);
1320 msg->ifa_family = ip->get_family(ip);
1321 msg->ifa_flags = 0;
1322 msg->ifa_prefixlen = 8 * chunk.len;
1323 msg->ifa_scope = RT_SCOPE_UNIVERSE;
1324 msg->ifa_index = if_index;
1325
1326 add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
1327
1328 return netlink_send_ack(this, this->socket_rt, hdr);
1329 }
1330
1331 /**
1332 * Manages source routes in the routing table.
1333 * By setting the appropriate nlmsg_type, the route added or r.
1334 */
1335 static status_t manage_srcroute(private_kernel_interface_t *this, int nlmsg_type,
1336 int flags, route_entry_t *route)
1337 {
1338 unsigned char request[BUFFER_SIZE];
1339 struct nlmsghdr *hdr;
1340 struct rtmsg *msg;
1341 chunk_t chunk;
1342
1343 /* if route is 0.0.0.0/0, we can't install it, as it would
1344 * overwrite the default route. Instead, we add two routes:
1345 * 0.0.0.0/1 and 128.0.0.0/1
1346 * TODO: use metrics instead */
1347 if (route->prefixlen == 0)
1348 {
1349 route_entry_t half;
1350 status_t status;
1351
1352 half.dst_net = chunk_alloca(route->dst_net.len);
1353 memset(half.dst_net.ptr, 0, half.dst_net.len);
1354 half.src_ip = route->src_ip;
1355 half.gateway = route->gateway;
1356 half.if_index = route->if_index;
1357 half.prefixlen = 1;
1358
1359 status = manage_srcroute(this, nlmsg_type, flags, &half);
1360 half.dst_net.ptr[0] |= 0x80;
1361 status = manage_srcroute(this, nlmsg_type, flags, &half);
1362 return status;
1363 }
1364
1365 memset(&request, 0, sizeof(request));
1366
1367 hdr = (struct nlmsghdr*)request;
1368 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
1369 hdr->nlmsg_type = nlmsg_type;
1370 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1371
1372 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1373 msg->rtm_family = route->src_ip->get_family(route->src_ip);
1374 msg->rtm_dst_len = route->prefixlen;
1375 msg->rtm_table = IPSEC_ROUTING_TABLE;
1376 msg->rtm_protocol = RTPROT_STATIC;
1377 msg->rtm_type = RTN_UNICAST;
1378 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1379
1380 add_attribute(hdr, RTA_DST, route->dst_net, sizeof(request));
1381 chunk = route->src_ip->get_address(route->src_ip);
1382 add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1383 chunk = route->gateway->get_address(route->gateway);
1384 add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
1385 chunk.ptr = (char*)&route->if_index;
1386 chunk.len = sizeof(route->if_index);
1387 add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
1388
1389 return netlink_send_ack(this, this->socket_rt, hdr);
1390 }
1391
1392 /**
1393 * create or delete an rule to use our routing table
1394 */
1395 static status_t manage_rule(private_kernel_interface_t *this, int nlmsg_type,
1396 u_int32_t table, u_int32_t prio)
1397 {
1398 unsigned char request[BUFFER_SIZE];
1399 struct nlmsghdr *hdr;
1400 struct rtmsg *msg;
1401 chunk_t chunk;
1402
1403 memset(&request, 0, sizeof(request));
1404 hdr = (struct nlmsghdr*)request;
1405 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1406 hdr->nlmsg_type = nlmsg_type;
1407 if (nlmsg_type == RTM_NEWRULE)
1408 {
1409 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
1410 }
1411 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1412
1413 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1414 msg->rtm_table = table;
1415 msg->rtm_family = AF_INET;
1416 msg->rtm_protocol = RTPROT_BOOT;
1417 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1418 msg->rtm_type = RTN_UNICAST;
1419
1420 chunk = chunk_from_thing(prio);
1421 add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
1422
1423 return netlink_send_ack(this, this->socket_rt, hdr);
1424 }
1425
1426 /**
1427 * Get the nexthop gateway for dest; or the source addr if gateway = FALSE
1428 */
1429 static host_t* get_addr(private_kernel_interface_t *this,
1430 host_t *dest, bool gateway)
1431 {
1432 unsigned char request[BUFFER_SIZE];
1433 struct nlmsghdr *hdr, *out, *current;
1434 struct rtmsg *msg;
1435 chunk_t chunk;
1436 size_t len;
1437 host_t *addr = NULL;
1438
1439 DBG2(DBG_KNL, "getting address to reach %H", dest);
1440
1441 memset(&request, 0, sizeof(request));
1442
1443 hdr = (struct nlmsghdr*)request;
1444 hdr->nlmsg_flags = NLM_F_REQUEST;
1445 hdr->nlmsg_type = RTM_GETROUTE;
1446 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1447
1448 msg = (struct rtmsg*)NLMSG_DATA(hdr);
1449 msg->rtm_family = dest->get_family(dest);
1450 msg->rtm_dst_len = msg->rtm_family == AF_INET ? 32 : 128;
1451 msg->rtm_table = RT_TABLE_MAIN;
1452 msg->rtm_protocol = RTPROT_STATIC;
1453 msg->rtm_type = RTN_UNICAST;
1454 msg->rtm_scope = RT_SCOPE_UNIVERSE;
1455
1456 chunk = dest->get_address(dest);
1457 add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1458
1459 if (netlink_send(this, this->socket_rt, hdr, &out, &len) != SUCCESS)
1460 {
1461 DBG1(DBG_KNL, "getting address to %H failed", dest);
1462 return NULL;
1463 }
1464 current = out;
1465 while (NLMSG_OK(current, len))
1466 {
1467 switch (current->nlmsg_type)
1468 {
1469 case NLMSG_DONE:
1470 break;
1471 case RTM_NEWROUTE:
1472 {
1473 struct rtattr *rta;
1474 size_t rtasize;
1475
1476 msg = (struct rtmsg*)(NLMSG_DATA(current));
1477 rta = RTM_RTA(msg);
1478 rtasize = RTM_PAYLOAD(current);
1479 while(RTA_OK(rta, rtasize))
1480 {
1481 if ((rta->rta_type == RTA_PREFSRC && !gateway) ||
1482 (rta->rta_type == RTA_GATEWAY && gateway))
1483 {
1484 chunk.ptr = RTA_DATA(rta);
1485 chunk.len = RTA_PAYLOAD(rta);
1486 addr = host_create_from_chunk(msg->rtm_family,
1487 chunk, 0);
1488 break;
1489 }
1490 rta = RTA_NEXT(rta, rtasize);
1491 }
1492 break;
1493 }
1494 default:
1495 current = NLMSG_NEXT(current, len);
1496 continue;
1497 }
1498 break;
1499 }
1500 free(out);
1501 if (addr == NULL)
1502 {
1503 DBG2(DBG_KNL, "no route found to %H", dest);
1504 }
1505 return addr;
1506 }
1507
1508 /**
1509 * Implementation of kernel_interface_t.get_source_addr.
1510 */
1511 static host_t* get_source_addr(private_kernel_interface_t *this, host_t *dest)
1512 {
1513 return get_addr(this, dest, FALSE);
1514 }
1515
1516 /**
1517 * Implementation of kernel_interface_t.add_ip.
1518 */
1519 static status_t add_ip(private_kernel_interface_t *this,
1520 host_t *virtual_ip, host_t *iface_ip)
1521 {
1522 iface_entry_t *iface;
1523 addr_entry_t *addr;
1524 iterator_t *addrs, *ifaces;
1525
1526 DBG2(DBG_KNL, "adding virtual IP %H", virtual_ip);
1527
1528 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1529 while (ifaces->iterate(ifaces, (void**)&iface))
1530 {
1531 bool iface_found = FALSE;
1532
1533 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1534 while (addrs->iterate(addrs, (void**)&addr))
1535 {
1536 if (iface_ip->ip_equals(iface_ip, addr->ip))
1537 {
1538 iface_found = TRUE;
1539 }
1540 else if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1541 {
1542 addr->refcount++;
1543 DBG2(DBG_KNL, "virtual IP %H already installed on %s",
1544 virtual_ip, iface->ifname);
1545 addrs->destroy(addrs);
1546 ifaces->destroy(ifaces);
1547 return SUCCESS;
1548 }
1549 }
1550 addrs->destroy(addrs);
1551
1552 if (iface_found)
1553 {
1554 int ifindex = iface->ifindex;
1555 ifaces->destroy(ifaces);
1556 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
1557 ifindex, virtual_ip) == SUCCESS)
1558 {
1559 addr = malloc_thing(addr_entry_t);
1560 addr->ip = virtual_ip->clone(virtual_ip);
1561 addr->refcount = 1;
1562 addr->virtual = TRUE;
1563 addr->scope = RT_SCOPE_UNIVERSE;
1564 pthread_mutex_lock(&this->mutex);
1565 iface->addrs->insert_last(iface->addrs, addr);
1566 pthread_mutex_unlock(&this->mutex);
1567 return SUCCESS;
1568 }
1569 DBG2(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
1570 return FAILED;
1571
1572 }
1573
1574 }
1575 ifaces->destroy(ifaces);
1576
1577 DBG2(DBG_KNL, "interface address %H not found, unable to install"
1578 "virtual IP %H", iface_ip, virtual_ip);
1579 return FAILED;
1580 }
1581
1582 /**
1583 * Implementation of kernel_interface_t.del_ip.
1584 */
1585 static status_t del_ip(private_kernel_interface_t *this, host_t *virtual_ip)
1586 {
1587 iface_entry_t *iface;
1588 addr_entry_t *addr;
1589 iterator_t *addrs, *ifaces;
1590
1591 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
1592
1593 ifaces = this->ifaces->create_iterator_locked(this->ifaces, &this->mutex);
1594 while (ifaces->iterate(ifaces, (void**)&iface))
1595 {
1596 addrs = iface->addrs->create_iterator(iface->addrs, TRUE);
1597 while (addrs->iterate(addrs, (void**)&addr))
1598 {
1599 if (virtual_ip->ip_equals(virtual_ip, addr->ip))
1600 {
1601 int ifindex = iface->ifindex;
1602 addr->refcount--;
1603 if (addr->refcount == 0)
1604 {
1605 addrs->remove(addrs);
1606 addrs->destroy(addrs);
1607 ifaces->destroy(ifaces);
1608 addr_entry_destroy(addr);
1609 return manage_ipaddr(this, RTM_DELADDR, 0,
1610 ifindex, virtual_ip);
1611 }
1612 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
1613 virtual_ip);
1614 addrs->destroy(addrs);
1615 ifaces->destroy(ifaces);
1616 return SUCCESS;
1617 }
1618 }
1619 addrs->destroy(addrs);
1620 }
1621 ifaces->destroy(ifaces);
1622
1623 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
1624 return FAILED;
1625 }
1626
1627 /**
1628 * Implementation of kernel_interface_t.get_spi.
1629 */
1630 static status_t get_spi(private_kernel_interface_t *this,
1631 host_t *src, host_t *dst,
1632 protocol_id_t protocol, u_int32_t reqid,
1633 u_int32_t *spi)
1634 {
1635 unsigned char request[BUFFER_SIZE];
1636 struct nlmsghdr *hdr, *out;
1637 struct xfrm_userspi_info *userspi;
1638 u_int32_t received_spi = 0;
1639 size_t len;
1640
1641 memset(&request, 0, sizeof(request));
1642
1643 DBG2(DBG_KNL, "getting SPI for reqid %d", reqid);
1644
1645 hdr = (struct nlmsghdr*)request;
1646 hdr->nlmsg_flags = NLM_F_REQUEST;
1647 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1648 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1649
1650 userspi = (struct xfrm_userspi_info*)NLMSG_DATA(hdr);
1651 host2xfrm(src, &userspi->info.saddr);
1652 host2xfrm(dst, &userspi->info.id.daddr);
1653 userspi->info.id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1654 userspi->info.mode = TRUE; /* tunnel mode */
1655 userspi->info.reqid = reqid;
1656 userspi->info.family = src->get_family(src);
1657 userspi->min = 0xc0000000;
1658 userspi->max = 0xcFFFFFFF;
1659
1660 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1661 {
1662 hdr = out;
1663 while (NLMSG_OK(hdr, len))
1664 {
1665 switch (hdr->nlmsg_type)
1666 {
1667 case XFRM_MSG_NEWSA:
1668 {
1669 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1670 received_spi = usersa->id.spi;
1671 break;
1672 }
1673 case NLMSG_ERROR:
1674 {
1675 struct nlmsgerr *err = NLMSG_DATA(hdr);
1676
1677 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1678 strerror(-err->error), -err->error);
1679 break;
1680 }
1681 default:
1682 hdr = NLMSG_NEXT(hdr, len);
1683 continue;
1684 case NLMSG_DONE:
1685 break;
1686 }
1687 break;
1688 }
1689 free(out);
1690 }
1691
1692 if (received_spi == 0)
1693 {
1694 DBG1(DBG_KNL, "unable to get SPI for reqid %d", reqid);
1695 return FAILED;
1696 }
1697
1698 DBG2(DBG_KNL, "got SPI 0x%x for reqid %d", received_spi, reqid);
1699
1700 *spi = received_spi;
1701 return SUCCESS;
1702 }
1703
1704 /**
1705 * Implementation of kernel_interface_t.add_sa.
1706 */
1707 static status_t add_sa(private_kernel_interface_t *this,
1708 host_t *src, host_t *dst, u_int32_t spi,
1709 protocol_id_t protocol, u_int32_t reqid,
1710 u_int64_t expire_soft, u_int64_t expire_hard,
1711 algorithm_t *enc_alg, algorithm_t *int_alg,
1712 prf_plus_t *prf_plus, mode_t mode, bool encap,
1713 bool replace)
1714 {
1715 unsigned char request[BUFFER_SIZE];
1716 char *alg_name;
1717 u_int key_size;
1718 struct nlmsghdr *hdr;
1719 struct xfrm_usersa_info *sa;
1720
1721 memset(&request, 0, sizeof(request));
1722
1723 DBG2(DBG_KNL, "adding SAD entry with SPI 0x%x", spi);
1724
1725 hdr = (struct nlmsghdr*)request;
1726 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1727 hdr->nlmsg_type = replace ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1728 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1729
1730 sa = (struct xfrm_usersa_info*)NLMSG_DATA(hdr);
1731 host2xfrm(src, &sa->saddr);
1732 host2xfrm(dst, &sa->id.daddr);
1733 sa->id.spi = spi;
1734 sa->id.proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1735 sa->family = src->get_family(src);
1736 sa->mode = mode;
1737 sa->replay_window = 32;
1738 sa->reqid = reqid;
1739 /* we currently do not expire SAs by volume/packet count */
1740 sa->lft.soft_byte_limit = XFRM_INF;
1741 sa->lft.hard_byte_limit = XFRM_INF;
1742 sa->lft.soft_packet_limit = XFRM_INF;
1743 sa->lft.hard_packet_limit = XFRM_INF;
1744 /* we use lifetimes since added, not since used */
1745 sa->lft.soft_add_expires_seconds = expire_soft;
1746 sa->lft.hard_add_expires_seconds = expire_hard;
1747 sa->lft.soft_use_expires_seconds = 0;
1748 sa->lft.hard_use_expires_seconds = 0;
1749
1750 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_usersa_info);
1751
1752 if (enc_alg->algorithm != ENCR_UNDEFINED)
1753 {
1754 rthdr->rta_type = XFRMA_ALG_CRYPT;
1755 alg_name = lookup_algorithm(encryption_algs, enc_alg, &key_size);
1756 if (alg_name == NULL)
1757 {
1758 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1759 encryption_algorithm_names, enc_alg->algorithm);
1760 return FAILED;
1761 }
1762 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1763 encryption_algorithm_names, enc_alg->algorithm, key_size);
1764
1765 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1766 hdr->nlmsg_len += rthdr->rta_len;
1767 if (hdr->nlmsg_len > sizeof(request))
1768 {
1769 return FAILED;
1770 }
1771
1772 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1773 algo->alg_key_len = key_size;
1774 strcpy(algo->alg_name, alg_name);
1775 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1776
1777 rthdr = XFRM_RTA_NEXT(rthdr);
1778 }
1779
1780 if (int_alg->algorithm != AUTH_UNDEFINED)
1781 {
1782 rthdr->rta_type = XFRMA_ALG_AUTH;
1783 alg_name = lookup_algorithm(integrity_algs, int_alg, &key_size);
1784 if (alg_name == NULL)
1785 {
1786 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1787 integrity_algorithm_names, int_alg->algorithm);
1788 return FAILED;
1789 }
1790 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1791 integrity_algorithm_names, int_alg->algorithm, key_size);
1792
1793 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_algo) + key_size);
1794 hdr->nlmsg_len += rthdr->rta_len;
1795 if (hdr->nlmsg_len > sizeof(request))
1796 {
1797 return FAILED;
1798 }
1799
1800 struct xfrm_algo* algo = (struct xfrm_algo*)RTA_DATA(rthdr);
1801 algo->alg_key_len = key_size;
1802 strcpy(algo->alg_name, alg_name);
1803 prf_plus->get_bytes(prf_plus, key_size / 8, algo->alg_key);
1804
1805 rthdr = XFRM_RTA_NEXT(rthdr);
1806 }
1807
1808 /* TODO: add IPComp here */
1809
1810 if (encap)
1811 {
1812 rthdr->rta_type = XFRMA_ENCAP;
1813 rthdr->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
1814
1815 hdr->nlmsg_len += rthdr->rta_len;
1816 if (hdr->nlmsg_len > sizeof(request))
1817 {
1818 return FAILED;
1819 }
1820
1821 struct xfrm_encap_tmpl* tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rthdr);
1822 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1823 tmpl->encap_sport = htons(src->get_port(src));
1824 tmpl->encap_dport = htons(dst->get_port(dst));
1825 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1826 /* encap_oa could probably be derived from the
1827 * traffic selectors [rfc4306, p39]. In the netlink kernel implementation
1828 * pluto does the same as we do here but it uses encap_oa in the
1829 * pfkey implementation. BUT as /usr/src/linux/net/key/af_key.c indicates
1830 * the kernel ignores it anyway
1831 * -> does that mean that NAT-T encap doesn't work in transport mode?
1832 * No. The reason the kernel ignores NAT-OA is that it recomputes
1833 * (or, rather, just ignores) the checksum. If packets pass
1834 * the IPsec checks it marks them "checksum ok" so OA isn't needed. */
1835 rthdr = XFRM_RTA_NEXT(rthdr);
1836 }
1837
1838 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
1839 {
1840 DBG1(DBG_KNL, "unable to add SAD entry with SPI 0x%x", spi);
1841 return FAILED;
1842 }
1843 return SUCCESS;
1844 }
1845
1846 /**
1847 * Implementation of kernel_interface_t.update_sa.
1848 */
1849 static status_t update_sa(private_kernel_interface_t *this,
1850 u_int32_t spi, protocol_id_t protocol,
1851 host_t *src, host_t *dst,
1852 host_t *new_src, host_t *new_dst, bool encap)
1853 {
1854 unsigned char request[BUFFER_SIZE], *pos;
1855 struct nlmsghdr *hdr, *out = NULL;
1856 struct xfrm_usersa_id *sa_id;
1857 struct xfrm_usersa_info *out_sa = NULL, *sa;
1858 size_t len;
1859 struct rtattr *rta;
1860 size_t rtasize;
1861 struct xfrm_encap_tmpl* tmpl = NULL;
1862
1863 memset(&request, 0, sizeof(request));
1864
1865 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x for update", spi);
1866
1867 /* query the exisiting SA first */
1868 hdr = (struct nlmsghdr*)request;
1869 hdr->nlmsg_flags = NLM_F_REQUEST;
1870 hdr->nlmsg_type = XFRM_MSG_GETSA;
1871 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
1872
1873 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
1874 host2xfrm(dst, &sa_id->daddr);
1875 sa_id->spi = spi;
1876 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
1877 sa_id->family = dst->get_family(dst);
1878
1879 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1880 {
1881 hdr = out;
1882 while (NLMSG_OK(hdr, len))
1883 {
1884 switch (hdr->nlmsg_type)
1885 {
1886 case XFRM_MSG_NEWSA:
1887 {
1888 out_sa = NLMSG_DATA(hdr);
1889 break;
1890 }
1891 case NLMSG_ERROR:
1892 {
1893 struct nlmsgerr *err = NLMSG_DATA(hdr);
1894 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
1895 strerror(-err->error), -err->error);
1896 break;
1897 }
1898 default:
1899 hdr = NLMSG_NEXT(hdr, len);
1900 continue;
1901 case NLMSG_DONE:
1902 break;
1903 }
1904 break;
1905 }
1906 }
1907 if (out_sa == NULL ||
1908 this->public.del_sa(&this->public, dst, spi, protocol) != SUCCESS)
1909 {
1910 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
1911 free(out);
1912 return FAILED;
1913 }
1914
1915 DBG2(DBG_KNL, "updating SAD entry with SPI 0x%x from %#H..%#H to %#H..%#H",
1916 spi, src, dst, new_src, new_dst);
1917
1918 /* copy over the SA from out to request */
1919 hdr = (struct nlmsghdr*)request;
1920 memcpy(hdr, out, min(out->nlmsg_len, sizeof(request)));
1921 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1922 hdr->nlmsg_type = XFRM_MSG_NEWSA;
1923 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1924 sa = NLMSG_DATA(hdr);
1925 sa->family = new_dst->get_family(new_dst);
1926
1927 if (!src->ip_equals(src, new_src))
1928 {
1929 host2xfrm(new_src, &sa->saddr);
1930 }
1931 if (!dst->ip_equals(dst, new_dst))
1932 {
1933 host2xfrm(new_dst, &sa->id.daddr);
1934 }
1935
1936 rta = XFRM_RTA(out, struct xfrm_usersa_info);
1937 rtasize = XFRM_PAYLOAD(out, struct xfrm_usersa_info);
1938 pos = (u_char*)XFRM_RTA(hdr, struct xfrm_usersa_info);
1939 while(RTA_OK(rta, rtasize))
1940 {
1941 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
1942 if (rta->rta_type != XFRMA_ENCAP || encap)
1943 {
1944 if (rta->rta_type == XFRMA_ENCAP)
1945 { /* update encap tmpl */
1946 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
1947 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
1948 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
1949 }
1950 memcpy(pos, rta, rta->rta_len);
1951 pos += rta->rta_len;
1952 hdr->nlmsg_len += rta->rta_len;
1953 }
1954 rta = RTA_NEXT(rta, rtasize);
1955 }
1956 if (tmpl == NULL && encap)
1957 { /* add tmpl if we are enabling it */
1958 rta = (struct rtattr*)pos;
1959 rta->rta_type = XFRMA_ENCAP;
1960 rta->rta_len = RTA_LENGTH(sizeof(struct xfrm_encap_tmpl));
1961 hdr->nlmsg_len += rta->rta_len;
1962 tmpl = (struct xfrm_encap_tmpl*)RTA_DATA(rta);
1963 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1964 tmpl->encap_sport = ntohs(new_src->get_port(new_src));
1965 tmpl->encap_dport = ntohs(new_dst->get_port(new_dst));
1966 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1967 }
1968
1969 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
1970 {
1971 DBG1(DBG_KNL, "unable to update SAD entry with SPI 0x%x", spi);
1972 free(out);
1973 return FAILED;
1974 }
1975 free(out);
1976
1977 return SUCCESS;
1978 }
1979
1980 /**
1981 * Implementation of kernel_interface_t.query_sa.
1982 */
1983 static status_t query_sa(private_kernel_interface_t *this, host_t *dst,
1984 u_int32_t spi, protocol_id_t protocol,
1985 u_int32_t *use_time)
1986 {
1987 unsigned char request[BUFFER_SIZE];
1988 struct nlmsghdr *out = NULL, *hdr;
1989 struct xfrm_usersa_id *sa_id;
1990 struct xfrm_usersa_info *sa = NULL;
1991 size_t len;
1992
1993 DBG2(DBG_KNL, "querying SAD entry with SPI 0x%x", spi);
1994 memset(&request, 0, sizeof(request));
1995
1996 hdr = (struct nlmsghdr*)request;
1997 hdr->nlmsg_flags = NLM_F_REQUEST;
1998 hdr->nlmsg_type = XFRM_MSG_GETSA;
1999 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2000
2001 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2002 host2xfrm(dst, &sa_id->daddr);
2003 sa_id->spi = spi;
2004 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2005 sa_id->family = dst->get_family(dst);
2006
2007 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2008 {
2009 hdr = out;
2010 while (NLMSG_OK(hdr, len))
2011 {
2012 switch (hdr->nlmsg_type)
2013 {
2014 case XFRM_MSG_NEWSA:
2015 {
2016 sa = NLMSG_DATA(hdr);
2017 break;
2018 }
2019 case NLMSG_ERROR:
2020 {
2021 struct nlmsgerr *err = NLMSG_DATA(hdr);
2022 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2023 strerror(-err->error), -err->error);
2024 break;
2025 }
2026 default:
2027 hdr = NLMSG_NEXT(hdr, len);
2028 continue;
2029 case NLMSG_DONE:
2030 break;
2031 }
2032 break;
2033 }
2034 }
2035
2036 if (sa == NULL)
2037 {
2038 DBG1(DBG_KNL, "unable to query SAD entry with SPI 0x%x", spi);
2039 free(out);
2040 return FAILED;
2041 }
2042
2043 *use_time = sa->curlft.use_time;
2044 free (out);
2045 return SUCCESS;
2046 }
2047
2048 /**
2049 * Implementation of kernel_interface_t.del_sa.
2050 */
2051 static status_t del_sa(private_kernel_interface_t *this, host_t *dst,
2052 u_int32_t spi, protocol_id_t protocol)
2053 {
2054 unsigned char request[BUFFER_SIZE];
2055 struct nlmsghdr *hdr;
2056 struct xfrm_usersa_id *sa_id;
2057
2058 memset(&request, 0, sizeof(request));
2059
2060 DBG2(DBG_KNL, "deleting SAD entry with SPI 0x%x", spi);
2061
2062 hdr = (struct nlmsghdr*)request;
2063 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2064 hdr->nlmsg_type = XFRM_MSG_DELSA;
2065 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2066
2067 sa_id = (struct xfrm_usersa_id*)NLMSG_DATA(hdr);
2068 host2xfrm(dst, &sa_id->daddr);
2069 sa_id->spi = spi;
2070 sa_id->proto = (protocol == PROTO_ESP) ? KERNEL_ESP : KERNEL_AH;
2071 sa_id->family = dst->get_family(dst);
2072
2073 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2074 {
2075 DBG1(DBG_KNL, "unable to delete SAD entry with SPI 0x%x", spi);
2076 return FAILED;
2077 }
2078 DBG2(DBG_KNL, "deleted SAD entry with SPI 0x%x", spi);
2079 return SUCCESS;
2080 }
2081
2082 /**
2083 * Implementation of kernel_interface_t.add_policy.
2084 */
2085 static status_t add_policy(private_kernel_interface_t *this,
2086 host_t *src, host_t *dst,
2087 traffic_selector_t *src_ts,
2088 traffic_selector_t *dst_ts,
2089 policy_dir_t direction, protocol_id_t protocol,
2090 u_int32_t reqid, bool high_prio, mode_t mode)
2091 {
2092 iterator_t *iterator;
2093 policy_entry_t *current, *policy;
2094 bool found = FALSE;
2095 unsigned char request[BUFFER_SIZE];
2096 struct xfrm_userpolicy_info *policy_info;
2097 struct nlmsghdr *hdr;
2098
2099 /* create a policy */
2100 policy = malloc_thing(policy_entry_t);
2101 memset(policy, 0, sizeof(policy_entry_t));
2102 policy->sel = ts2selector(src_ts, dst_ts);
2103 policy->direction = direction;
2104
2105 /* find the policy, which matches EXACTLY */
2106 pthread_mutex_lock(&this->mutex);
2107 iterator = this->policies->create_iterator(this->policies, TRUE);
2108 while (iterator->iterate(iterator, (void**)&current))
2109 {
2110 if (memcmp(&current->sel, &policy->sel, sizeof(struct xfrm_selector)) == 0 &&
2111 policy->direction == current->direction)
2112 {
2113 /* use existing policy */
2114 current->refcount++;
2115 DBG2(DBG_KNL, "policy %R===%R already exists, increasing ",
2116 "refcount", src_ts, dst_ts);
2117 free(policy);
2118 policy = current;
2119 found = TRUE;
2120 break;
2121 }
2122 }
2123 iterator->destroy(iterator);
2124 if (!found)
2125 { /* apply the new one, if we have no such policy */
2126 this->policies->insert_last(this->policies, policy);
2127 policy->refcount = 1;
2128 }
2129
2130 DBG2(DBG_KNL, "adding policy %R===%R", src_ts, dst_ts);
2131
2132 memset(&request, 0, sizeof(request));
2133 hdr = (struct nlmsghdr*)request;
2134 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2135 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2136 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2137
2138 policy_info = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2139 policy_info->sel = policy->sel;
2140 policy_info->dir = policy->direction;
2141 /* calculate priority based on source selector size, small size = high prio */
2142 policy_info->priority = high_prio ? PRIO_HIGH : PRIO_LOW;
2143 policy_info->priority -= policy->sel.prefixlen_s * 10;
2144 policy_info->priority -= policy->sel.proto ? 2 : 0;
2145 policy_info->priority -= policy->sel.sport_mask ? 1 : 0;
2146 policy_info->action = XFRM_POLICY_ALLOW;
2147 policy_info->share = XFRM_SHARE_ANY;
2148 pthread_mutex_unlock(&this->mutex);
2149
2150 /* policies don't expire */
2151 policy_info->lft.soft_byte_limit = XFRM_INF;
2152 policy_info->lft.soft_packet_limit = XFRM_INF;
2153 policy_info->lft.hard_byte_limit = XFRM_INF;
2154 policy_info->lft.hard_packet_limit = XFRM_INF;
2155 policy_info->lft.soft_add_expires_seconds = 0;
2156 policy_info->lft.hard_add_expires_seconds = 0;
2157 policy_info->lft.soft_use_expires_seconds = 0;
2158 policy_info->lft.hard_use_expires_seconds = 0;
2159
2160 struct rtattr *rthdr = XFRM_RTA(hdr, struct xfrm_userpolicy_info);
2161 rthdr->rta_type = XFRMA_TMPL;
2162
2163 rthdr->rta_len = sizeof(struct xfrm_user_tmpl);
2164 rthdr->rta_len = RTA_LENGTH(rthdr->rta_len);
2165
2166 hdr->nlmsg_len += rthdr->rta_len;
2167 if (hdr->nlmsg_len > sizeof(request))
2168 {
2169 return FAILED;
2170 }
2171
2172 struct xfrm_user_tmpl *tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rthdr);
2173 tmpl->reqid = reqid;
2174 tmpl->id.proto = (protocol == PROTO_AH) ? KERNEL_AH : KERNEL_ESP;
2175 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2176 tmpl->mode = mode;
2177 tmpl->family = src->get_family(src);
2178
2179 host2xfrm(src, &tmpl->saddr);
2180 host2xfrm(dst, &tmpl->id.daddr);
2181
2182 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2183 {
2184 DBG1(DBG_KNL, "unable to add policy %R===%R", src_ts, dst_ts);
2185 return FAILED;
2186 }
2187
2188 /* install a route, if:
2189 * - we are NOT updating a policy
2190 * - this is a forward policy (to just get one for each child)
2191 * - we are in tunnel mode
2192 * - we are not using IPv6 (does not work correctly yet!)
2193 */
2194 if (policy->route == NULL && direction == POLICY_FWD &&
2195 mode != MODE_TRANSPORT && src->get_family(src) != AF_INET6)
2196 {
2197 policy->route = malloc_thing(route_entry_t);
2198 if (get_address_by_ts(this, dst_ts, &policy->route->src_ip) == SUCCESS)
2199 {
2200 /* if we have a gateway (via), we use it. If it's direct, we
2201 * use the peers address (which is src, as we are in POLICY_FWD).*/
2202 policy->route->gateway = get_addr(this, src, TRUE);
2203 if (policy->route->gateway == NULL)
2204 {
2205 policy->route->gateway = src->clone(src);
2206 }
2207 policy->route->if_index = get_interface_index(this, dst);
2208 policy->route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2209 memcpy(policy->route->dst_net.ptr, &policy->sel.saddr, policy->route->dst_net.len);
2210 policy->route->prefixlen = policy->sel.prefixlen_s;
2211
2212 if (manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2213 policy->route) != SUCCESS)
2214 {
2215 DBG1(DBG_KNL, "unable to install source route for %H",
2216 policy->route->src_ip);
2217 route_entry_destroy(policy->route);
2218 policy->route = NULL;
2219 }
2220 }
2221 else
2222 {
2223 free(policy->route);
2224 policy->route = NULL;
2225 }
2226 }
2227
2228 return SUCCESS;
2229 }
2230
2231 /**
2232 * Implementation of kernel_interface_t.query_policy.
2233 */
2234 static status_t query_policy(private_kernel_interface_t *this,
2235 traffic_selector_t *src_ts,
2236 traffic_selector_t *dst_ts,
2237 policy_dir_t direction, u_int32_t *use_time)
2238 {
2239 unsigned char request[BUFFER_SIZE];
2240 struct nlmsghdr *out = NULL, *hdr;
2241 struct xfrm_userpolicy_id *policy_id;
2242 struct xfrm_userpolicy_info *policy = NULL;
2243 size_t len;
2244
2245 memset(&request, 0, sizeof(request));
2246
2247 DBG2(DBG_KNL, "querying policy %R===%R", src_ts, dst_ts);
2248
2249 hdr = (struct nlmsghdr*)request;
2250 hdr->nlmsg_flags = NLM_F_REQUEST;
2251 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2252 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2253
2254 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2255 policy_id->sel = ts2selector(src_ts, dst_ts);
2256 policy_id->dir = direction;
2257
2258 if (netlink_send(this, this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2259 {
2260 hdr = out;
2261 while (NLMSG_OK(hdr, len))
2262 {
2263 switch (hdr->nlmsg_type)
2264 {
2265 case XFRM_MSG_NEWPOLICY:
2266 {
2267 policy = (struct xfrm_userpolicy_info*)NLMSG_DATA(hdr);
2268 break;
2269 }
2270 case NLMSG_ERROR:
2271 {
2272 struct nlmsgerr *err = NLMSG_DATA(hdr);
2273 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2274 strerror(-err->error), -err->error);
2275 break;
2276 }
2277 default:
2278 hdr = NLMSG_NEXT(hdr, len);
2279 continue;
2280 case NLMSG_DONE:
2281 break;
2282 }
2283 break;
2284 }
2285 }
2286
2287 if (policy == NULL)
2288 {
2289 DBG2(DBG_KNL, "unable to query policy %R===%R", src_ts, dst_ts);
2290 free(out);
2291 return FAILED;
2292 }
2293 *use_time = (time_t)policy->curlft.use_time;
2294
2295 free(out);
2296 return SUCCESS;
2297 }
2298
2299 /**
2300 * Implementation of kernel_interface_t.del_policy.
2301 */
2302 static status_t del_policy(private_kernel_interface_t *this,
2303 traffic_selector_t *src_ts,
2304 traffic_selector_t *dst_ts,
2305 policy_dir_t direction)
2306 {
2307 policy_entry_t *current, policy, *to_delete = NULL;
2308 route_entry_t *route;
2309 unsigned char request[BUFFER_SIZE];
2310 struct nlmsghdr *hdr;
2311 struct xfrm_userpolicy_id *policy_id;
2312 iterator_t *iterator;
2313
2314 DBG2(DBG_KNL, "deleting policy %R===%R", src_ts, dst_ts);
2315
2316 /* create a policy */
2317 memset(&policy, 0, sizeof(policy_entry_t));
2318 policy.sel = ts2selector(src_ts, dst_ts);
2319 policy.direction = direction;
2320
2321 /* find the policy */
2322 iterator = this->policies->create_iterator_locked(this->policies, &this->mutex);
2323 while (iterator->iterate(iterator, (void**)&current))
2324 {
2325 if (memcmp(&current->sel, &policy.sel, sizeof(struct xfrm_selector)) == 0 &&
2326 policy.direction == current->direction)
2327 {
2328 to_delete = current;
2329 if (--to_delete->refcount > 0)
2330 {
2331 /* is used by more SAs, keep in kernel */
2332 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
2333 iterator->destroy(iterator);
2334 return SUCCESS;
2335 }
2336 /* remove if last reference */
2337 iterator->remove(iterator);
2338 break;
2339 }
2340 }
2341 iterator->destroy(iterator);
2342 if (!to_delete)
2343 {
2344 DBG1(DBG_KNL, "deleting policy %R===%R failed, not found", src_ts, dst_ts);
2345 return NOT_FOUND;
2346 }
2347
2348 memset(&request, 0, sizeof(request));
2349
2350 hdr = (struct nlmsghdr*)request;
2351 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2352 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
2353 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2354
2355 policy_id = (struct xfrm_userpolicy_id*)NLMSG_DATA(hdr);
2356 policy_id->sel = to_delete->sel;
2357 policy_id->dir = direction;
2358
2359 route = to_delete->route;
2360 free(to_delete);
2361
2362 if (netlink_send_ack(this, this->socket_xfrm, hdr) != SUCCESS)
2363 {
2364 DBG1(DBG_KNL, "unable to delete policy %R===%R", src_ts, dst_ts);
2365 return FAILED;
2366 }
2367
2368 if (route)
2369 {
2370 if (manage_srcroute(this, RTM_DELROUTE, 0, route) != SUCCESS)
2371 {
2372 DBG1(DBG_KNL, "error uninstalling route installed with "
2373 "policy %R===%R", src_ts, dst_ts);
2374 }
2375 route_entry_destroy(route);
2376 }
2377 return SUCCESS;
2378 }
2379
2380 /**
2381 * Implementation of kernel_interface_t.destroy.
2382 */
2383 static void destroy(private_kernel_interface_t *this)
2384 {
2385 manage_rule(this, RTM_DELRULE, IPSEC_ROUTING_TABLE, IPSEC_ROUTING_TABLE_PRIO);
2386
2387 this->job->cancel(this->job);
2388 close(this->socket_xfrm_events);
2389 close(this->socket_xfrm);
2390 close(this->socket_rt_events);
2391 close(this->socket_rt);
2392 this->policies->destroy(this->policies);
2393 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2394 free(this);
2395 }
2396
2397 /*
2398 * Described in header.
2399 */
2400 kernel_interface_t *kernel_interface_create()
2401 {
2402 private_kernel_interface_t *this = malloc_thing(private_kernel_interface_t);
2403 struct sockaddr_nl addr;
2404
2405 /* public functions */
2406 this->public.get_spi = (status_t(*)(kernel_interface_t*,host_t*,host_t*,protocol_id_t,u_int32_t,u_int32_t*))get_spi;
2407 this->public.add_sa = (status_t(*)(kernel_interface_t *,host_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t,u_int64_t,u_int64_t,algorithm_t*,algorithm_t*,prf_plus_t*,mode_t,bool,bool))add_sa;
2408 this->public.update_sa = (status_t(*)(kernel_interface_t*,u_int32_t,protocol_id_t,host_t*,host_t*,host_t*,host_t*,bool))update_sa;
2409 this->public.query_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t,u_int32_t*))query_sa;
2410 this->public.del_sa = (status_t(*)(kernel_interface_t*,host_t*,u_int32_t,protocol_id_t))del_sa;
2411 this->public.add_policy = (status_t(*)(kernel_interface_t*,host_t*,host_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,protocol_id_t,u_int32_t,bool,mode_t))add_policy;
2412 this->public.query_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t,u_int32_t*))query_policy;
2413 this->public.del_policy = (status_t(*)(kernel_interface_t*,traffic_selector_t*,traffic_selector_t*,policy_dir_t))del_policy;
2414 this->public.get_interface = (char*(*)(kernel_interface_t*,host_t*))get_interface_name;
2415 this->public.create_address_iterator = (iterator_t*(*)(kernel_interface_t*))create_address_iterator;
2416 this->public.get_source_addr = (host_t*(*)(kernel_interface_t*, host_t *dest))get_source_addr;
2417 this->public.add_ip = (status_t(*)(kernel_interface_t*,host_t*,host_t*)) add_ip;
2418 this->public.del_ip = (status_t(*)(kernel_interface_t*,host_t*)) del_ip;
2419 this->public.destroy = (void(*)(kernel_interface_t*)) destroy;
2420
2421 /* private members */
2422 this->policies = linked_list_create();
2423 this->ifaces = linked_list_create();
2424 this->hiter = NULL;
2425 this->seq = 200;
2426 pthread_mutex_init(&this->mutex,NULL);
2427
2428 memset(&addr, 0, sizeof(addr));
2429 addr.nl_family = AF_NETLINK;
2430
2431 /* create and bind RT socket */
2432 this->socket_rt = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2433 if (this->socket_rt <= 0)
2434 {
2435 charon->kill(charon, "unable to create RT netlink socket");
2436 }
2437 addr.nl_groups = 0;
2438 if (bind(this->socket_rt, (struct sockaddr*)&addr, sizeof(addr)))
2439 {
2440 charon->kill(charon, "unable to bind RT netlink socket");
2441 }
2442
2443 /* create and bind RT socket for events (address/interface/route changes) */
2444 this->socket_rt_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2445 if (this->socket_rt_events <= 0)
2446 {
2447 charon->kill(charon, "unable to create RT event socket");
2448 }
2449 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
2450 RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_LINK;
2451 if (bind(this->socket_rt_events, (struct sockaddr*)&addr, sizeof(addr)))
2452 {
2453 charon->kill(charon, "unable to bind RT event socket");
2454 }
2455
2456 /* create and bind XFRM socket */
2457 this->socket_xfrm = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2458 if (this->socket_xfrm <= 0)
2459 {
2460 charon->kill(charon, "unable to create XFRM netlink socket");
2461 }
2462 addr.nl_groups = 0;
2463 if (bind(this->socket_xfrm, (struct sockaddr*)&addr, sizeof(addr)))
2464 {
2465 charon->kill(charon, "unable to bind XFRM netlink socket");
2466 }
2467
2468 /* create and bind XFRM socket for ACQUIRE & EXPIRE */
2469 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
2470 if (this->socket_xfrm_events <= 0)
2471 {
2472 charon->kill(charon, "unable to create XFRM event socket");
2473 }
2474 addr.nl_groups = XFRMGRP_ACQUIRE | XFRMGRP_EXPIRE;
2475 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
2476 {
2477 charon->kill(charon, "unable to bind XFRM event socket");
2478 }
2479
2480 this->job = callback_job_create((callback_job_cb_t)receive_events,
2481 this, NULL, NULL);
2482 charon->processor->queue_job(charon->processor, (job_t*)this->job);
2483
2484 if (init_address_list(this) != SUCCESS)
2485 {
2486 charon->kill(charon, "unable to get interface list");
2487 }
2488
2489 if (manage_rule(this, RTM_NEWRULE, IPSEC_ROUTING_TABLE,
2490 IPSEC_ROUTING_TABLE_PRIO) != SUCCESS)
2491 {
2492 DBG1(DBG_KNL, "unable to create routing table rule");
2493 }
2494
2495 return &this->public;
2496 }
2497