9d0c925c0117d85ee9f15cbe29c10a3742436d64
[strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_ipsec.c
1 /*
2 * Copyright (C) 2006-2019 Tobias Brunner
3 * Copyright (C) 2005-2009 Martin Willi
4 * Copyright (C) 2008-2016 Andreas Steffen
5 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
6 * Copyright (C) 2006 Daniel Roethlisberger
7 * Copyright (C) 2005 Jan Hutter
8 * HSR Hochschule fuer Technik Rapperswil
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 */
20 /*
21 * Copyright (C) 2018 Mellanox Technologies.
22 *
23 * Permission is hereby granted, free of charge, to any person obtaining a copy
24 * of this software and associated documentation files (the "Software"), to deal
25 * in the Software without restriction, including without limitation the rights
26 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27 * copies of the Software, and to permit persons to whom the Software is
28 * furnished to do so, subject to the following conditions:
29 *
30 * The above copyright notice and this permission notice shall be included in
31 * all copies or substantial portions of the Software.
32 *
33 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39 * THE SOFTWARE.
40 */
41
42 #define _GNU_SOURCE
43 #include <sys/types.h>
44 #include <sys/socket.h>
45 #include <sys/ioctl.h>
46 #include <stdint.h>
47 #include <linux/ipsec.h>
48 #include <linux/netlink.h>
49 #include <linux/rtnetlink.h>
50 #include <linux/xfrm.h>
51 #include <linux/udp.h>
52 #include <linux/ethtool.h>
53 #include <linux/sockios.h>
54 #include <net/if.h>
55 #include <unistd.h>
56 #include <time.h>
57 #include <errno.h>
58 #include <string.h>
59 #include <fcntl.h>
60 #include <dlfcn.h>
61
62 #include "kernel_netlink_ipsec.h"
63 #include "kernel_netlink_shared.h"
64
65 #include <daemon.h>
66 #include <utils/debug.h>
67 #include <threading/mutex.h>
68 #include <threading/condvar.h>
69 #include <collections/array.h>
70 #include <collections/hashtable.h>
71 #include <collections/linked_list.h>
72
73 /** Required for Linux 2.6.26 kernel and later */
74 #ifndef XFRM_STATE_AF_UNSPEC
75 #define XFRM_STATE_AF_UNSPEC 32
76 #endif
77
78 /** From linux/in.h */
79 #ifndef IP_XFRM_POLICY
80 #define IP_XFRM_POLICY 17
81 #endif
82
83 /** Missing on uclibc */
84 #ifndef IPV6_XFRM_POLICY
85 #define IPV6_XFRM_POLICY 34
86 #endif /*IPV6_XFRM_POLICY*/
87
88 /* from linux/udp.h */
89 #ifndef UDP_ENCAP
90 #define UDP_ENCAP 100
91 #endif
92
93 #ifndef UDP_ENCAP_ESPINUDP
94 #define UDP_ENCAP_ESPINUDP 2
95 #endif
96
97 /* this is not defined on some platforms */
98 #ifndef SOL_UDP
99 #define SOL_UDP IPPROTO_UDP
100 #endif
101
102 /** Base priority for installed policies */
103 #define PRIO_BASE 200000
104
105 /**
106 * Map the limit for bytes and packets to XFRM_INF by default
107 */
108 #define XFRM_LIMIT(x) ((x) == 0 ? XFRM_INF : (x))
109
110 /**
111 * Create ORable bitfield of XFRM NL groups
112 */
113 #define XFRMNLGRP(x) (1<<(XFRMNLGRP_##x-1))
114
115 /**
116 * Returns a pointer to the first rtattr following the nlmsghdr *nlh and the
117 * 'usual' netlink data x like 'struct xfrm_usersa_info'
118 */
119 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + \
120 NLMSG_ALIGN(sizeof(x))))
121 /**
122 * Returns the total size of attached rta data
123 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
124 */
125 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
126
127 typedef struct kernel_algorithm_t kernel_algorithm_t;
128
129 /**
130 * Mapping of IKEv2 kernel identifier to linux crypto API names
131 */
132 struct kernel_algorithm_t {
133 /**
134 * Identifier specified in IKEv2
135 */
136 int ikev2;
137
138 /**
139 * Name of the algorithm in linux crypto API
140 */
141 const char *name;
142 };
143
144 ENUM(xfrm_msg_names, XFRM_MSG_NEWSA, XFRM_MSG_MAPPING,
145 "XFRM_MSG_NEWSA",
146 "XFRM_MSG_DELSA",
147 "XFRM_MSG_GETSA",
148 "XFRM_MSG_NEWPOLICY",
149 "XFRM_MSG_DELPOLICY",
150 "XFRM_MSG_GETPOLICY",
151 "XFRM_MSG_ALLOCSPI",
152 "XFRM_MSG_ACQUIRE",
153 "XFRM_MSG_EXPIRE",
154 "XFRM_MSG_UPDPOLICY",
155 "XFRM_MSG_UPDSA",
156 "XFRM_MSG_POLEXPIRE",
157 "XFRM_MSG_FLUSHSA",
158 "XFRM_MSG_FLUSHPOLICY",
159 "XFRM_MSG_NEWAE",
160 "XFRM_MSG_GETAE",
161 "XFRM_MSG_REPORT",
162 "XFRM_MSG_MIGRATE",
163 "XFRM_MSG_NEWSADINFO",
164 "XFRM_MSG_GETSADINFO",
165 "XFRM_MSG_NEWSPDINFO",
166 "XFRM_MSG_GETSPDINFO",
167 "XFRM_MSG_MAPPING"
168 );
169
170 ENUM(xfrm_attr_type_names, XFRMA_UNSPEC, XFRMA_OFFLOAD_DEV,
171 "XFRMA_UNSPEC",
172 "XFRMA_ALG_AUTH",
173 "XFRMA_ALG_CRYPT",
174 "XFRMA_ALG_COMP",
175 "XFRMA_ENCAP",
176 "XFRMA_TMPL",
177 "XFRMA_SA",
178 "XFRMA_POLICY",
179 "XFRMA_SEC_CTX",
180 "XFRMA_LTIME_VAL",
181 "XFRMA_REPLAY_VAL",
182 "XFRMA_REPLAY_THRESH",
183 "XFRMA_ETIMER_THRESH",
184 "XFRMA_SRCADDR",
185 "XFRMA_COADDR",
186 "XFRMA_LASTUSED",
187 "XFRMA_POLICY_TYPE",
188 "XFRMA_MIGRATE",
189 "XFRMA_ALG_AEAD",
190 "XFRMA_KMADDRESS",
191 "XFRMA_ALG_AUTH_TRUNC",
192 "XFRMA_MARK",
193 "XFRMA_TFCPAD",
194 "XFRMA_REPLAY_ESN_VAL",
195 "XFRMA_SA_EXTRA_FLAGS",
196 "XFRMA_PROTO",
197 "XFRMA_ADDRESS_FILTER",
198 "XFRMA_PAD",
199 "XFRMA_OFFLOAD_DEV",
200 );
201
202 /**
203 * Algorithms for encryption
204 */
205 static kernel_algorithm_t encryption_algs[] = {
206 /* {ENCR_DES_IV64, "***" }, */
207 {ENCR_DES, "des" },
208 {ENCR_3DES, "des3_ede" },
209 /* {ENCR_RC5, "***" }, */
210 /* {ENCR_IDEA, "***" }, */
211 {ENCR_CAST, "cast5" },
212 {ENCR_BLOWFISH, "blowfish" },
213 /* {ENCR_3IDEA, "***" }, */
214 /* {ENCR_DES_IV32, "***" }, */
215 {ENCR_NULL, "cipher_null" },
216 {ENCR_AES_CBC, "aes" },
217 {ENCR_AES_CTR, "rfc3686(ctr(aes))" },
218 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))" },
219 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))" },
220 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))" },
221 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))" },
222 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))" },
223 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))" },
224 {ENCR_NULL_AUTH_AES_GMAC, "rfc4543(gcm(aes))" },
225 {ENCR_CAMELLIA_CBC, "cbc(camellia)" },
226 /* {ENCR_CAMELLIA_CTR, "***" }, */
227 /* {ENCR_CAMELLIA_CCM_ICV8, "***" }, */
228 /* {ENCR_CAMELLIA_CCM_ICV12, "***" }, */
229 /* {ENCR_CAMELLIA_CCM_ICV16, "***" }, */
230 {ENCR_SERPENT_CBC, "serpent" },
231 {ENCR_TWOFISH_CBC, "twofish" },
232 {ENCR_CHACHA20_POLY1305, "rfc7539esp(chacha20,poly1305)"},
233 };
234
235 /**
236 * Algorithms for integrity protection
237 */
238 static kernel_algorithm_t integrity_algs[] = {
239 {AUTH_HMAC_MD5_96, "md5" },
240 {AUTH_HMAC_MD5_128, "hmac(md5)" },
241 {AUTH_HMAC_SHA1_96, "sha1" },
242 {AUTH_HMAC_SHA1_160, "hmac(sha1)" },
243 {AUTH_HMAC_SHA2_256_96, "sha256" },
244 {AUTH_HMAC_SHA2_256_128, "hmac(sha256)" },
245 {AUTH_HMAC_SHA2_384_192, "hmac(sha384)" },
246 {AUTH_HMAC_SHA2_512_256, "hmac(sha512)" },
247 /* {AUTH_DES_MAC, "***" }, */
248 /* {AUTH_KPDK_MD5, "***" }, */
249 {AUTH_AES_XCBC_96, "xcbc(aes)" },
250 {AUTH_AES_CMAC_96, "cmac(aes)" },
251 };
252
253 /**
254 * Algorithms for IPComp
255 */
256 static kernel_algorithm_t compression_algs[] = {
257 /* {IPCOMP_OUI, "***" }, */
258 {IPCOMP_DEFLATE, "deflate" },
259 {IPCOMP_LZS, "lzs" },
260 {IPCOMP_LZJH, "lzjh" },
261 };
262
263 /**
264 * Look up a kernel algorithm name and its key size
265 */
266 static const char* lookup_algorithm(transform_type_t type, int ikev2)
267 {
268 kernel_algorithm_t *list;
269 int i, count;
270 char *name;
271
272 switch (type)
273 {
274 case ENCRYPTION_ALGORITHM:
275 list = encryption_algs;
276 count = countof(encryption_algs);
277 break;
278 case INTEGRITY_ALGORITHM:
279 list = integrity_algs;
280 count = countof(integrity_algs);
281 break;
282 case COMPRESSION_ALGORITHM:
283 list = compression_algs;
284 count = countof(compression_algs);
285 break;
286 default:
287 return NULL;
288 }
289 for (i = 0; i < count; i++)
290 {
291 if (list[i].ikev2 == ikev2)
292 {
293 return list[i].name;
294 }
295 }
296 if (charon->kernel->lookup_algorithm(charon->kernel, ikev2, type, NULL,
297 &name))
298 {
299 return name;
300 }
301 return NULL;
302 }
303
304 typedef struct private_kernel_netlink_ipsec_t private_kernel_netlink_ipsec_t;
305
306 /**
307 * Private variables and functions of kernel_netlink class.
308 */
309 struct private_kernel_netlink_ipsec_t {
310 /**
311 * Public part of the kernel_netlink_t object
312 */
313 kernel_netlink_ipsec_t public;
314
315 /**
316 * Mutex to lock access to installed policies
317 */
318 mutex_t *mutex;
319
320 /**
321 * Condvar to synchronize access to individual policies
322 */
323 condvar_t *condvar;
324
325 /**
326 * Hash table of installed policies (policy_entry_t)
327 */
328 hashtable_t *policies;
329
330 /**
331 * Hash table of IPsec SAs using policies (ipsec_sa_t)
332 */
333 hashtable_t *sas;
334
335 /**
336 * Netlink xfrm socket (IPsec)
337 */
338 netlink_socket_t *socket_xfrm;
339
340 /**
341 * Netlink xfrm socket to receive acquire and expire events
342 */
343 int socket_xfrm_events;
344
345 /**
346 * Whether to install routes along policies
347 */
348 bool install_routes;
349
350 /**
351 * Whether to set protocol and ports on selector installed with transport
352 * mode IPsec SAs
353 */
354 bool proto_port_transport;
355
356 /**
357 * Whether to always use UPDATE to install policies
358 */
359 bool policy_update;
360
361 /**
362 * Installed port based IKE bypass policies, as bypass_t
363 */
364 array_t *bypass;
365
366 /**
367 * Custom priority calculation function
368 */
369 uint32_t (*get_priority)(kernel_ipsec_policy_id_t *id,
370 kernel_ipsec_manage_policy_t *data);
371 };
372
373 typedef struct route_entry_t route_entry_t;
374
375 /**
376 * Installed routing entry
377 */
378 struct route_entry_t {
379 /** Name of the interface the route is bound to */
380 char *if_name;
381
382 /** Source ip of the route */
383 host_t *src_ip;
384
385 /** Gateway for this route */
386 host_t *gateway;
387
388 /** Destination net */
389 chunk_t dst_net;
390
391 /** Destination net prefixlen */
392 uint8_t prefixlen;
393
394 /** Whether the route was installed for a passthrough policy */
395 bool pass;
396 };
397
398 /**
399 * Destroy a route_entry_t object
400 */
401 static void route_entry_destroy(route_entry_t *this)
402 {
403 free(this->if_name);
404 this->src_ip->destroy(this->src_ip);
405 DESTROY_IF(this->gateway);
406 chunk_free(&this->dst_net);
407 free(this);
408 }
409
410 /**
411 * Compare two route_entry_t objects
412 */
413 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
414 {
415 if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
416 a->pass == b->pass &&
417 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
418 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
419 {
420 return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
421 a->gateway->ip_equals(a->gateway, b->gateway));
422 }
423 return FALSE;
424 }
425
426 typedef struct ipsec_sa_t ipsec_sa_t;
427
428 /**
429 * IPsec SA assigned to a policy.
430 */
431 struct ipsec_sa_t {
432 /** Source address of this SA */
433 host_t *src;
434
435 /** Destination address of this SA */
436 host_t *dst;
437
438 /** Optional mark */
439 mark_t mark;
440
441 /** Optional mark */
442 uint32_t if_id;
443
444 /** Description of this SA */
445 ipsec_sa_cfg_t cfg;
446
447 /** Reference count for this SA */
448 refcount_t refcount;
449 };
450
451 /**
452 * Hash function for ipsec_sa_t objects
453 */
454 static u_int ipsec_sa_hash(ipsec_sa_t *sa)
455 {
456 return chunk_hash_inc(sa->src->get_address(sa->src),
457 chunk_hash_inc(sa->dst->get_address(sa->dst),
458 chunk_hash_inc(chunk_from_thing(sa->mark),
459 chunk_hash_inc(chunk_from_thing(sa->if_id),
460 chunk_hash(chunk_from_thing(sa->cfg))))));
461 }
462
463 /**
464 * Equality function for ipsec_sa_t objects
465 */
466 static bool ipsec_sa_equals(ipsec_sa_t *sa, ipsec_sa_t *other_sa)
467 {
468 return sa->src->ip_equals(sa->src, other_sa->src) &&
469 sa->dst->ip_equals(sa->dst, other_sa->dst) &&
470 sa->mark.value == other_sa->mark.value &&
471 sa->mark.mask == other_sa->mark.mask &&
472 sa->if_id == other_sa->if_id &&
473 ipsec_sa_cfg_equals(&sa->cfg, &other_sa->cfg);
474 }
475
476 /**
477 * Allocate or reference an IPsec SA object
478 */
479 static ipsec_sa_t *ipsec_sa_create(private_kernel_netlink_ipsec_t *this,
480 host_t *src, host_t *dst, mark_t mark,
481 uint32_t if_id, ipsec_sa_cfg_t *cfg)
482 {
483 ipsec_sa_t *sa, *found;
484 INIT(sa,
485 .src = src,
486 .dst = dst,
487 .mark = mark,
488 .if_id = if_id,
489 .cfg = *cfg,
490 );
491 found = this->sas->get(this->sas, sa);
492 if (!found)
493 {
494 sa->src = src->clone(src);
495 sa->dst = dst->clone(dst);
496 this->sas->put(this->sas, sa, sa);
497 }
498 else
499 {
500 free(sa);
501 sa = found;
502 }
503 ref_get(&sa->refcount);
504 return sa;
505 }
506
507 /**
508 * Release and destroy an IPsec SA object
509 */
510 static void ipsec_sa_destroy(private_kernel_netlink_ipsec_t *this,
511 ipsec_sa_t *sa)
512 {
513 if (ref_put(&sa->refcount))
514 {
515 this->sas->remove(this->sas, sa);
516 DESTROY_IF(sa->src);
517 DESTROY_IF(sa->dst);
518 free(sa);
519 }
520 }
521
522 typedef struct policy_sa_t policy_sa_t;
523 typedef struct policy_sa_out_t policy_sa_out_t;
524
525 /**
526 * Mapping between a policy and an IPsec SA.
527 */
528 struct policy_sa_t {
529 /** Priority assigned to the policy when installed with this SA */
530 uint32_t priority;
531
532 /** Automatic priority assigned to the policy when installed with this SA */
533 uint32_t auto_priority;
534
535 /** Type of the policy */
536 policy_type_t type;
537
538 /** Assigned SA */
539 ipsec_sa_t *sa;
540 };
541
542 /**
543 * For outbound policies we also cache the traffic selectors in order to install
544 * the route.
545 */
546 struct policy_sa_out_t {
547 /** Generic interface */
548 policy_sa_t generic;
549
550 /** Source traffic selector of this policy */
551 traffic_selector_t *src_ts;
552
553 /** Destination traffic selector of this policy */
554 traffic_selector_t *dst_ts;
555 };
556
557 /**
558 * Create a policy_sa(_in)_t object
559 */
560 static policy_sa_t *policy_sa_create(private_kernel_netlink_ipsec_t *this,
561 policy_dir_t dir, policy_type_t type, host_t *src, host_t *dst,
562 traffic_selector_t *src_ts, traffic_selector_t *dst_ts, mark_t mark,
563 uint32_t if_id, ipsec_sa_cfg_t *cfg)
564 {
565 policy_sa_t *policy;
566
567 if (dir == POLICY_OUT)
568 {
569 policy_sa_out_t *out;
570 INIT(out,
571 .src_ts = src_ts->clone(src_ts),
572 .dst_ts = dst_ts->clone(dst_ts),
573 );
574 policy = &out->generic;
575 }
576 else
577 {
578 INIT(policy, .priority = 0);
579 }
580 policy->type = type;
581 policy->sa = ipsec_sa_create(this, src, dst, mark, if_id, cfg);
582 return policy;
583 }
584
585 /**
586 * Destroy a policy_sa(_in)_t object
587 */
588 static void policy_sa_destroy(policy_sa_t *policy, policy_dir_t dir,
589 private_kernel_netlink_ipsec_t *this)
590 {
591 if (dir == POLICY_OUT)
592 {
593 policy_sa_out_t *out = (policy_sa_out_t*)policy;
594 out->src_ts->destroy(out->src_ts);
595 out->dst_ts->destroy(out->dst_ts);
596 }
597 ipsec_sa_destroy(this, policy->sa);
598 free(policy);
599 }
600
601 CALLBACK(policy_sa_destroy_cb, void,
602 policy_sa_t *policy, va_list args)
603 {
604 private_kernel_netlink_ipsec_t *this;
605 policy_dir_t dir;
606
607 VA_ARGS_VGET(args, dir, this);
608 policy_sa_destroy(policy, dir, this);
609 }
610
611 typedef struct policy_entry_t policy_entry_t;
612
613 /**
614 * Installed kernel policy.
615 */
616 struct policy_entry_t {
617
618 /** Direction of this policy: in, out, forward */
619 uint8_t direction;
620
621 /** Parameters of installed policy */
622 struct xfrm_selector sel;
623
624 /** Optional mark */
625 uint32_t mark;
626
627 /** Optional interface ID */
628 uint32_t if_id;
629
630 /** Associated route installed for this policy */
631 route_entry_t *route;
632
633 /** List of SAs this policy is used by, ordered by priority */
634 linked_list_t *used_by;
635
636 /** reqid for this policy */
637 uint32_t reqid;
638
639 /** Number of threads waiting to work on this policy */
640 int waiting;
641
642 /** TRUE if a thread is working on this policy */
643 bool working;
644 };
645
646 /**
647 * Destroy a policy_entry_t object
648 */
649 static void policy_entry_destroy(private_kernel_netlink_ipsec_t *this,
650 policy_entry_t *policy)
651 {
652 if (policy->route)
653 {
654 route_entry_destroy(policy->route);
655 }
656 if (policy->used_by)
657 {
658 policy->used_by->invoke_function(policy->used_by, policy_sa_destroy_cb,
659 policy->direction, this);
660 policy->used_by->destroy(policy->used_by);
661 }
662 free(policy);
663 }
664
665 /**
666 * Hash function for policy_entry_t objects
667 */
668 static u_int policy_hash(policy_entry_t *key)
669 {
670 chunk_t chunk = chunk_from_thing(key->sel);
671 return chunk_hash_inc(chunk, chunk_hash_inc(chunk_from_thing(key->mark),
672 chunk_hash(chunk_from_thing(key->if_id))));
673 }
674
675 /**
676 * Equality function for policy_entry_t objects
677 */
678 static bool policy_equals(policy_entry_t *key, policy_entry_t *other_key)
679 {
680 return memeq(&key->sel, &other_key->sel, sizeof(struct xfrm_selector)) &&
681 key->mark == other_key->mark &&
682 key->if_id == other_key->if_id &&
683 key->direction == other_key->direction;
684 }
685
686 /**
687 * Determine number of set bits in 16 bit port mask
688 */
689 static inline uint32_t port_mask_bits(uint16_t port_mask)
690 {
691 uint32_t bits;
692 uint16_t bit_mask = 0x8000;
693
694 port_mask = ntohs(port_mask);
695
696 for (bits = 0; bits < 16; bits++)
697 {
698 if (!(port_mask & bit_mask))
699 {
700 break;
701 }
702 bit_mask >>= 1;
703 }
704 return bits;
705 }
706
707 /**
708 * Calculate the priority of a policy
709 *
710 * bits 0-0: separate trap and regular policies (0..1) 1 bit
711 * bits 1-1: restriction to network interface (0..1) 1 bit
712 * bits 2-7: src + dst port mask bits (2 * 0..16) 6 bits
713 * bits 8-8: restriction to protocol (0..1) 1 bit
714 * bits 9-17: src + dst network mask bits (2 * 0..128) 9 bits
715 * 18 bits
716 *
717 * smallest value: 000000000 0 000000 0 0: 0, lowest priority = 200'000
718 * largest value : 100000000 1 100000 1 1: 131'459, highst priority = 68'541
719 */
720 static uint32_t get_priority(policy_entry_t *policy, policy_priority_t prio,
721 char *interface)
722 {
723 uint32_t priority = PRIO_BASE, sport_mask_bits, dport_mask_bits;
724
725 switch (prio)
726 {
727 case POLICY_PRIORITY_FALLBACK:
728 priority += PRIO_BASE;
729 /* fall-through to next case */
730 case POLICY_PRIORITY_ROUTED:
731 case POLICY_PRIORITY_DEFAULT:
732 priority += PRIO_BASE;
733 /* fall-through to next case */
734 case POLICY_PRIORITY_PASS:
735 break;
736 }
737 sport_mask_bits = port_mask_bits(policy->sel.sport_mask);
738 dport_mask_bits = port_mask_bits(policy->sel.dport_mask);
739
740 /* calculate priority */
741 priority -= (policy->sel.prefixlen_s + policy->sel.prefixlen_d) * 512;
742 priority -= policy->sel.proto ? 256 : 0;
743 priority -= (sport_mask_bits + dport_mask_bits) * 4;
744 priority -= (interface != NULL) * 2;
745 priority -= (prio != POLICY_PRIORITY_ROUTED);
746
747 return priority;
748 }
749
750 /**
751 * Convert the general ipsec mode to the one defined in xfrm.h
752 */
753 static uint8_t mode2kernel(ipsec_mode_t mode)
754 {
755 switch (mode)
756 {
757 case MODE_TRANSPORT:
758 return XFRM_MODE_TRANSPORT;
759 case MODE_TUNNEL:
760 return XFRM_MODE_TUNNEL;
761 case MODE_BEET:
762 return XFRM_MODE_BEET;
763 default:
764 return mode;
765 }
766 }
767
768 /**
769 * Convert a host_t to a struct xfrm_address
770 */
771 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
772 {
773 chunk_t chunk = host->get_address(host);
774 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
775 }
776
777 /**
778 * Convert a struct xfrm_address to a host_t
779 */
780 static host_t* xfrm2host(int family, xfrm_address_t *xfrm, uint16_t port)
781 {
782 chunk_t chunk;
783
784 switch (family)
785 {
786 case AF_INET:
787 chunk = chunk_create((u_char*)&xfrm->a4, sizeof(xfrm->a4));
788 break;
789 case AF_INET6:
790 chunk = chunk_create((u_char*)&xfrm->a6, sizeof(xfrm->a6));
791 break;
792 default:
793 return NULL;
794 }
795 return host_create_from_chunk(family, chunk, ntohs(port));
796 }
797
798 /**
799 * Convert a traffic selector address range to subnet and its mask.
800 */
801 static void ts2subnet(traffic_selector_t* ts,
802 xfrm_address_t *net, uint8_t *mask)
803 {
804 host_t *net_host;
805 chunk_t net_chunk;
806
807 ts->to_subnet(ts, &net_host, mask);
808 net_chunk = net_host->get_address(net_host);
809 memcpy(net, net_chunk.ptr, net_chunk.len);
810 net_host->destroy(net_host);
811 }
812
813 /**
814 * Convert a traffic selector port range to port/portmask
815 */
816 static void ts2ports(traffic_selector_t* ts,
817 uint16_t *port, uint16_t *mask)
818 {
819 uint16_t from, to, bitmask;
820 int bit;
821
822 from = ts->get_from_port(ts);
823 to = ts->get_to_port(ts);
824
825 /* Quick check for a single port */
826 if (from == to)
827 {
828 *port = htons(from);
829 *mask = ~0;
830 }
831 else
832 {
833 /* Compute the port mask for port ranges */
834 *mask = 0;
835
836 for (bit = 15; bit >= 0; bit--)
837 {
838 bitmask = 1 << bit;
839
840 if ((bitmask & from) != (bitmask & to))
841 {
842 *port = htons(from & *mask);
843 *mask = htons(*mask);
844 return;
845 }
846 *mask |= bitmask;
847 }
848 }
849 return;
850 }
851
852 /**
853 * Convert a pair of traffic_selectors to an xfrm_selector
854 */
855 static struct xfrm_selector ts2selector(traffic_selector_t *src,
856 traffic_selector_t *dst,
857 char *interface)
858 {
859 struct xfrm_selector sel;
860 uint16_t port;
861
862 memset(&sel, 0, sizeof(sel));
863 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
864 /* src or dest proto may be "any" (0), use more restrictive one */
865 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
866 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
867 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
868 ts2ports(dst, &sel.dport, &sel.dport_mask);
869 ts2ports(src, &sel.sport, &sel.sport_mask);
870 if ((sel.proto == IPPROTO_ICMP || sel.proto == IPPROTO_ICMPV6) &&
871 (sel.dport || sel.sport))
872 {
873 /* the kernel expects the ICMP type and code in the source and
874 * destination port fields, respectively. */
875 port = ntohs(max(sel.dport, sel.sport));
876 sel.sport = htons(traffic_selector_icmp_type(port));
877 sel.sport_mask = sel.sport ? ~0 : 0;
878 sel.dport = htons(traffic_selector_icmp_code(port));
879 sel.dport_mask = sel.dport ? ~0 : 0;
880 }
881 sel.ifindex = interface ? if_nametoindex(interface) : 0;
882 sel.user = 0;
883
884 return sel;
885 }
886
887 /**
888 * Convert an xfrm_selector to a src|dst traffic_selector
889 */
890 static traffic_selector_t* selector2ts(struct xfrm_selector *sel, bool src)
891 {
892 u_char *addr;
893 uint8_t prefixlen;
894 uint16_t port = 0;
895 host_t *host = NULL;
896
897 if (src)
898 {
899 addr = (u_char*)&sel->saddr;
900 prefixlen = sel->prefixlen_s;
901 if (sel->sport_mask)
902 {
903 port = ntohs(sel->sport);
904 }
905 }
906 else
907 {
908 addr = (u_char*)&sel->daddr;
909 prefixlen = sel->prefixlen_d;
910 if (sel->dport_mask)
911 {
912 port = ntohs(sel->dport);
913 }
914 }
915 if (sel->proto == IPPROTO_ICMP || sel->proto == IPPROTO_ICMPV6)
916 { /* convert ICMP[v6] message type and code as supplied by the kernel in
917 * source and destination ports (both in network order) */
918 port = (sel->sport >> 8) | (sel->dport & 0xff00);
919 port = ntohs(port);
920 }
921 /* The Linux 2.6 kernel does not set the selector's family field,
922 * so as a kludge we additionally test the prefix length.
923 */
924 if (sel->family == AF_INET || sel->prefixlen_s == 32)
925 {
926 host = host_create_from_chunk(AF_INET, chunk_create(addr, 4), 0);
927 }
928 else if (sel->family == AF_INET6 || sel->prefixlen_s == 128)
929 {
930 host = host_create_from_chunk(AF_INET6, chunk_create(addr, 16), 0);
931 }
932
933 if (host)
934 {
935 return traffic_selector_create_from_subnet(host, prefixlen,
936 sel->proto, port, port ?: 65535);
937 }
938 return NULL;
939 }
940
941 /**
942 * Process a XFRM_MSG_ACQUIRE from kernel
943 */
944 static void process_acquire(private_kernel_netlink_ipsec_t *this,
945 struct nlmsghdr *hdr)
946 {
947 struct xfrm_user_acquire *acquire;
948 struct rtattr *rta;
949 size_t rtasize;
950 traffic_selector_t *src_ts, *dst_ts;
951 uint32_t reqid = 0;
952 int proto = 0;
953
954 acquire = NLMSG_DATA(hdr);
955 rta = XFRM_RTA(hdr, struct xfrm_user_acquire);
956 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_user_acquire);
957
958 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
959
960 while (RTA_OK(rta, rtasize))
961 {
962 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
963
964 if (rta->rta_type == XFRMA_TMPL)
965 {
966 struct xfrm_user_tmpl* tmpl;
967 tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rta);
968 reqid = tmpl->reqid;
969 proto = tmpl->id.proto;
970 }
971 rta = RTA_NEXT(rta, rtasize);
972 }
973 switch (proto)
974 {
975 case 0:
976 case IPPROTO_ESP:
977 case IPPROTO_AH:
978 break;
979 default:
980 /* acquire for AH/ESP only, not for IPCOMP */
981 return;
982 }
983 src_ts = selector2ts(&acquire->sel, TRUE);
984 dst_ts = selector2ts(&acquire->sel, FALSE);
985
986 charon->kernel->acquire(charon->kernel, reqid, src_ts, dst_ts);
987 }
988
989 /**
990 * Process a XFRM_MSG_EXPIRE from kernel
991 */
992 static void process_expire(private_kernel_netlink_ipsec_t *this,
993 struct nlmsghdr *hdr)
994 {
995 struct xfrm_user_expire *expire;
996 uint32_t spi;
997 uint8_t protocol;
998 host_t *dst;
999
1000 expire = NLMSG_DATA(hdr);
1001 protocol = expire->state.id.proto;
1002 spi = expire->state.id.spi;
1003
1004 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
1005
1006 if (protocol == IPPROTO_ESP || protocol == IPPROTO_AH)
1007 {
1008 dst = xfrm2host(expire->state.family, &expire->state.id.daddr, 0);
1009 if (dst)
1010 {
1011 charon->kernel->expire(charon->kernel, protocol, spi, dst,
1012 expire->hard != 0);
1013 dst->destroy(dst);
1014 }
1015 }
1016 }
1017
1018 /**
1019 * Process a XFRM_MSG_MIGRATE from kernel
1020 */
1021 static void process_migrate(private_kernel_netlink_ipsec_t *this,
1022 struct nlmsghdr *hdr)
1023 {
1024 struct xfrm_userpolicy_id *policy_id;
1025 struct rtattr *rta;
1026 size_t rtasize;
1027 traffic_selector_t *src_ts, *dst_ts;
1028 host_t *local = NULL, *remote = NULL;
1029 host_t *old_src = NULL, *old_dst = NULL;
1030 host_t *new_src = NULL, *new_dst = NULL;
1031 uint32_t reqid = 0;
1032 policy_dir_t dir;
1033
1034 policy_id = NLMSG_DATA(hdr);
1035 rta = XFRM_RTA(hdr, struct xfrm_userpolicy_id);
1036 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_userpolicy_id);
1037
1038 DBG2(DBG_KNL, "received a XFRM_MSG_MIGRATE");
1039
1040 src_ts = selector2ts(&policy_id->sel, TRUE);
1041 dst_ts = selector2ts(&policy_id->sel, FALSE);
1042 dir = (policy_dir_t)policy_id->dir;
1043
1044 DBG2(DBG_KNL, " policy: %R === %R %N", src_ts, dst_ts, policy_dir_names);
1045
1046 while (RTA_OK(rta, rtasize))
1047 {
1048 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
1049 if (rta->rta_type == XFRMA_KMADDRESS)
1050 {
1051 struct xfrm_user_kmaddress *kmaddress;
1052
1053 kmaddress = (struct xfrm_user_kmaddress*)RTA_DATA(rta);
1054 local = xfrm2host(kmaddress->family, &kmaddress->local, 0);
1055 remote = xfrm2host(kmaddress->family, &kmaddress->remote, 0);
1056 DBG2(DBG_KNL, " kmaddress: %H...%H", local, remote);
1057 }
1058 else if (rta->rta_type == XFRMA_MIGRATE)
1059 {
1060 struct xfrm_user_migrate *migrate;
1061
1062 migrate = (struct xfrm_user_migrate*)RTA_DATA(rta);
1063 old_src = xfrm2host(migrate->old_family, &migrate->old_saddr, 0);
1064 old_dst = xfrm2host(migrate->old_family, &migrate->old_daddr, 0);
1065 new_src = xfrm2host(migrate->new_family, &migrate->new_saddr, 0);
1066 new_dst = xfrm2host(migrate->new_family, &migrate->new_daddr, 0);
1067 reqid = migrate->reqid;
1068 DBG2(DBG_KNL, " migrate %H...%H to %H...%H, reqid {%u}",
1069 old_src, old_dst, new_src, new_dst, reqid);
1070 DESTROY_IF(old_src);
1071 DESTROY_IF(old_dst);
1072 DESTROY_IF(new_src);
1073 DESTROY_IF(new_dst);
1074 }
1075 rta = RTA_NEXT(rta, rtasize);
1076 }
1077
1078 if (src_ts && dst_ts && local && remote)
1079 {
1080 charon->kernel->migrate(charon->kernel, reqid, src_ts, dst_ts, dir,
1081 local, remote);
1082 }
1083 else
1084 {
1085 DESTROY_IF(src_ts);
1086 DESTROY_IF(dst_ts);
1087 DESTROY_IF(local);
1088 DESTROY_IF(remote);
1089 }
1090 }
1091
1092 /**
1093 * Process a XFRM_MSG_MAPPING from kernel
1094 */
1095 static void process_mapping(private_kernel_netlink_ipsec_t *this,
1096 struct nlmsghdr *hdr)
1097 {
1098 struct xfrm_user_mapping *mapping;
1099 uint32_t spi;
1100
1101 mapping = NLMSG_DATA(hdr);
1102 spi = mapping->id.spi;
1103
1104 DBG2(DBG_KNL, "received a XFRM_MSG_MAPPING");
1105
1106 if (mapping->id.proto == IPPROTO_ESP)
1107 {
1108 host_t *dst, *new;
1109
1110 dst = xfrm2host(mapping->id.family, &mapping->id.daddr, 0);
1111 if (dst)
1112 {
1113 new = xfrm2host(mapping->id.family, &mapping->new_saddr,
1114 mapping->new_sport);
1115 if (new)
1116 {
1117 charon->kernel->mapping(charon->kernel, IPPROTO_ESP, spi, dst,
1118 new);
1119 new->destroy(new);
1120 }
1121 dst->destroy(dst);
1122 }
1123 }
1124 }
1125
1126 /**
1127 * Receives events from kernel
1128 */
1129 static bool receive_events(private_kernel_netlink_ipsec_t *this, int fd,
1130 watcher_event_t event)
1131 {
1132 char response[netlink_get_buflen()];
1133 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1134 struct sockaddr_nl addr;
1135 socklen_t addr_len = sizeof(addr);
1136 int len;
1137
1138 len = recvfrom(this->socket_xfrm_events, response, sizeof(response),
1139 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
1140 if (len < 0)
1141 {
1142 switch (errno)
1143 {
1144 case EINTR:
1145 /* interrupted, try again */
1146 return TRUE;
1147 case EAGAIN:
1148 /* no data ready, select again */
1149 return TRUE;
1150 default:
1151 DBG1(DBG_KNL, "unable to receive from XFRM event socket: %s "
1152 "(%d)", strerror(errno), errno);
1153 sleep(1);
1154 return TRUE;
1155 }
1156 }
1157
1158 if (addr.nl_pid != 0)
1159 { /* not from kernel. not interested, try another one */
1160 return TRUE;
1161 }
1162
1163 while (NLMSG_OK(hdr, len))
1164 {
1165 switch (hdr->nlmsg_type)
1166 {
1167 case XFRM_MSG_ACQUIRE:
1168 process_acquire(this, hdr);
1169 break;
1170 case XFRM_MSG_EXPIRE:
1171 process_expire(this, hdr);
1172 break;
1173 case XFRM_MSG_MIGRATE:
1174 process_migrate(this, hdr);
1175 break;
1176 case XFRM_MSG_MAPPING:
1177 process_mapping(this, hdr);
1178 break;
1179 default:
1180 DBG1(DBG_KNL, "received unknown event from XFRM event "
1181 "socket: %d", hdr->nlmsg_type);
1182 break;
1183 }
1184 hdr = NLMSG_NEXT(hdr, len);
1185 }
1186 return TRUE;
1187 }
1188
1189 METHOD(kernel_ipsec_t, get_features, kernel_feature_t,
1190 private_kernel_netlink_ipsec_t *this)
1191 {
1192 return KERNEL_ESP_V3_TFC | KERNEL_POLICY_SPI;
1193 }
1194
1195 /**
1196 * Get an SPI for a specific protocol from the kernel.
1197 */
1198 static status_t get_spi_internal(private_kernel_netlink_ipsec_t *this,
1199 host_t *src, host_t *dst, uint8_t proto, uint32_t min, uint32_t max,
1200 uint32_t *spi)
1201 {
1202 netlink_buf_t request;
1203 struct nlmsghdr *hdr, *out;
1204 struct xfrm_userspi_info *userspi;
1205 uint32_t received_spi = 0;
1206 size_t len;
1207
1208 memset(&request, 0, sizeof(request));
1209
1210 hdr = &request.hdr;
1211 hdr->nlmsg_flags = NLM_F_REQUEST;
1212 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1213 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1214
1215 userspi = NLMSG_DATA(hdr);
1216 host2xfrm(src, &userspi->info.saddr);
1217 host2xfrm(dst, &userspi->info.id.daddr);
1218 userspi->info.id.proto = proto;
1219 userspi->info.mode = XFRM_MODE_TUNNEL;
1220 userspi->info.family = src->get_family(src);
1221 userspi->min = min;
1222 userspi->max = max;
1223
1224 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1225 {
1226 hdr = out;
1227 while (NLMSG_OK(hdr, len))
1228 {
1229 switch (hdr->nlmsg_type)
1230 {
1231 case XFRM_MSG_NEWSA:
1232 {
1233 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1234 received_spi = usersa->id.spi;
1235 break;
1236 }
1237 case NLMSG_ERROR:
1238 {
1239 struct nlmsgerr *err = NLMSG_DATA(hdr);
1240 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1241 strerror(-err->error), -err->error);
1242 break;
1243 }
1244 default:
1245 hdr = NLMSG_NEXT(hdr, len);
1246 continue;
1247 case NLMSG_DONE:
1248 break;
1249 }
1250 break;
1251 }
1252 free(out);
1253 }
1254
1255 if (received_spi == 0)
1256 {
1257 return FAILED;
1258 }
1259
1260 *spi = received_spi;
1261 return SUCCESS;
1262 }
1263
1264 METHOD(kernel_ipsec_t, get_spi, status_t,
1265 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1266 uint8_t protocol, uint32_t *spi)
1267 {
1268 uint32_t spi_min, spi_max;
1269
1270 spi_min = lib->settings->get_int(lib->settings, "%s.spi_min",
1271 KERNEL_SPI_MIN, lib->ns);
1272 spi_max = lib->settings->get_int(lib->settings, "%s.spi_max",
1273 KERNEL_SPI_MAX, lib->ns);
1274
1275 if (get_spi_internal(this, src, dst, protocol, min(spi_min, spi_max),
1276 max(spi_min, spi_max), spi) != SUCCESS)
1277 {
1278 DBG1(DBG_KNL, "unable to get SPI");
1279 return FAILED;
1280 }
1281
1282 DBG2(DBG_KNL, "got SPI %.8x", ntohl(*spi));
1283 return SUCCESS;
1284 }
1285
1286 METHOD(kernel_ipsec_t, get_cpi, status_t,
1287 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1288 uint16_t *cpi)
1289 {
1290 uint32_t received_spi = 0;
1291
1292 if (get_spi_internal(this, src, dst, IPPROTO_COMP,
1293 0x100, 0xEFFF, &received_spi) != SUCCESS)
1294 {
1295 DBG1(DBG_KNL, "unable to get CPI");
1296 return FAILED;
1297 }
1298
1299 *cpi = htons((uint16_t)ntohl(received_spi));
1300
1301 DBG2(DBG_KNL, "got CPI %.4x", ntohs(*cpi));
1302 return SUCCESS;
1303 }
1304
1305 /**
1306 * Format the mark for debug messages
1307 */
1308 static void format_mark(char *buf, int buflen, mark_t mark)
1309 {
1310 if (mark.value | mark.mask)
1311 {
1312 snprintf(buf, buflen, " (mark %u/0x%08x)", mark.value, mark.mask);
1313 }
1314 }
1315
1316 /**
1317 * Add a XFRM mark to message if required
1318 */
1319 static bool add_mark(struct nlmsghdr *hdr, int buflen, mark_t mark)
1320 {
1321 if (mark.value | mark.mask)
1322 {
1323 struct xfrm_mark *xmrk;
1324
1325 xmrk = netlink_reserve(hdr, buflen, XFRMA_MARK, sizeof(*xmrk));
1326 if (!xmrk)
1327 {
1328 return FALSE;
1329 }
1330 xmrk->v = mark.value;
1331 xmrk->m = mark.mask;
1332 }
1333 return TRUE;
1334 }
1335
1336 /**
1337 * Add a uint32 attribute to message
1338 */
1339 static bool add_uint32(struct nlmsghdr *hdr, int buflen,
1340 enum xfrm_attr_type_t type, uint32_t value)
1341 {
1342 uint32_t *xvalue;
1343
1344 xvalue = netlink_reserve(hdr, buflen, type, sizeof(*xvalue));
1345 if (!xvalue)
1346 {
1347 return FALSE;
1348 }
1349 *xvalue = value;
1350 return TRUE;
1351 }
1352
1353 /* ETHTOOL_GSSET_INFO is available since 2.6.34 and ETH_SS_FEATURES (enum) and
1354 * ETHTOOL_GFEATURES since 2.6.39, so check for the latter */
1355 #ifdef ETHTOOL_GFEATURES
1356
1357 /**
1358 * Global metadata used for IPsec HW offload
1359 */
1360 static struct {
1361 /** determined HW offload support */
1362 bool supported;
1363 /** bit in feature set */
1364 u_int bit;
1365 /** total number of device feature blocks */
1366 u_int total_blocks;
1367 } netlink_hw_offload;
1368
1369 /**
1370 * Check if kernel supports HW offload and determine feature flag
1371 */
1372 static void netlink_find_offload_feature(const char *ifname)
1373 {
1374 struct ethtool_sset_info *sset_info;
1375 struct ethtool_gstrings *cmd = NULL;
1376 struct ifreq ifr;
1377 uint32_t sset_len, i;
1378 char *str;
1379 int err, query_socket;
1380
1381 query_socket = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_XFRM);
1382 if (query_socket < 0)
1383 {
1384 return;
1385 }
1386
1387 /* determine number of device features */
1388 INIT_EXTRA(sset_info, sizeof(uint32_t),
1389 .cmd = ETHTOOL_GSSET_INFO,
1390 .sset_mask = 1ULL << ETH_SS_FEATURES,
1391 );
1392 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1393 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1394 ifr.ifr_data = (void*)sset_info;
1395
1396 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1397 if (err || sset_info->sset_mask != 1ULL << ETH_SS_FEATURES)
1398 {
1399 goto out;
1400 }
1401 sset_len = sset_info->data[0];
1402
1403 /* retrieve names of device features */
1404 INIT_EXTRA(cmd, ETH_GSTRING_LEN * sset_len,
1405 .cmd = ETHTOOL_GSTRINGS,
1406 .string_set = ETH_SS_FEATURES,
1407 );
1408 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1409 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1410 ifr.ifr_data = (void*)cmd;
1411
1412 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1413 if (err)
1414 {
1415 goto out;
1416 }
1417
1418 /* look for the ESP_HW feature bit */
1419 str = (char*)cmd->data;
1420 for (i = 0; i < cmd->len; i++)
1421 {
1422 if (strneq(str, "esp-hw-offload", ETH_GSTRING_LEN))
1423 {
1424 netlink_hw_offload.supported = TRUE;
1425 netlink_hw_offload.bit = i;
1426 netlink_hw_offload.total_blocks = (sset_len + 31) / 32;
1427 break;
1428 }
1429 str += ETH_GSTRING_LEN;
1430 }
1431
1432 out:
1433 free(sset_info);
1434 free(cmd);
1435 close(query_socket);
1436 }
1437
1438 /**
1439 * Check if interface supported HW offload
1440 */
1441 static bool netlink_detect_offload(const char *ifname)
1442 {
1443 struct ethtool_gfeatures *cmd;
1444 uint32_t feature_bit;
1445 struct ifreq ifr;
1446 int query_socket;
1447 int block;
1448 bool ret = FALSE;
1449
1450 if (!netlink_hw_offload.supported)
1451 {
1452 DBG1(DBG_KNL, "HW offload is not supported by kernel");
1453 return FALSE;
1454 }
1455
1456 query_socket = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_XFRM);
1457 if (query_socket < 0)
1458 {
1459 return FALSE;
1460 }
1461
1462 /* feature is supported by kernel, query device features */
1463 INIT_EXTRA(cmd, sizeof(cmd->features[0]) * netlink_hw_offload.total_blocks,
1464 .cmd = ETHTOOL_GFEATURES,
1465 .size = netlink_hw_offload.total_blocks,
1466 );
1467 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1468 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1469 ifr.ifr_data = (void*)cmd;
1470
1471 if (!ioctl(query_socket, SIOCETHTOOL, &ifr))
1472 {
1473 block = netlink_hw_offload.bit / 32;
1474 feature_bit = 1U << (netlink_hw_offload.bit % 32);
1475 if (cmd->features[block].active & feature_bit)
1476 {
1477 ret = TRUE;
1478 }
1479 }
1480
1481 if (!ret)
1482 {
1483 DBG1(DBG_KNL, "HW offload is not supported by device");
1484 }
1485 free(cmd);
1486 close(query_socket);
1487 return ret;
1488 }
1489
1490 #else
1491
1492 static void netlink_find_offload_feature(const char *ifname)
1493 {
1494 }
1495
1496 static bool netlink_detect_offload(const char *ifname)
1497 {
1498 return FALSE;
1499 }
1500
1501 #endif
1502
1503 /**
1504 * There are 3 HW offload configuration values:
1505 * 1. HW_OFFLOAD_NO : Do not configure HW offload.
1506 * 2. HW_OFFLOAD_YES : Configure HW offload.
1507 * Fail SA addition if offload is not supported.
1508 * 3. HW_OFFLOAD_AUTO : Configure HW offload if supported by the kernel
1509 * and device.
1510 * Do not fail SA addition otherwise.
1511 */
1512 static bool config_hw_offload(kernel_ipsec_sa_id_t *id,
1513 kernel_ipsec_add_sa_t *data, struct nlmsghdr *hdr,
1514 int buflen)
1515 {
1516 host_t *local = data->inbound ? id->dst : id->src;
1517 struct xfrm_user_offload *offload;
1518 bool hw_offload_yes, ret = FALSE;
1519 char *ifname;
1520
1521 /* do Ipsec configuration without offload */
1522 if (data->hw_offload == HW_OFFLOAD_NO)
1523 {
1524 return TRUE;
1525 }
1526
1527 hw_offload_yes = (data->hw_offload == HW_OFFLOAD_YES);
1528
1529 if (!charon->kernel->get_interface(charon->kernel, local, &ifname))
1530 {
1531 return !hw_offload_yes;
1532 }
1533
1534 /* check if interface supports hw_offload */
1535 if (!netlink_detect_offload(ifname))
1536 {
1537 ret = !hw_offload_yes;
1538 goto out;
1539 }
1540
1541 /* activate HW offload */
1542 offload = netlink_reserve(hdr, buflen,
1543 XFRMA_OFFLOAD_DEV, sizeof(*offload));
1544 if (!offload)
1545 {
1546 ret = !hw_offload_yes;
1547 goto out;
1548 }
1549 offload->ifindex = if_nametoindex(ifname);
1550 if (local->get_family(local) == AF_INET6)
1551 {
1552 offload->flags |= XFRM_OFFLOAD_IPV6;
1553 }
1554 offload->flags |= data->inbound ? XFRM_OFFLOAD_INBOUND : 0;
1555
1556 ret = TRUE;
1557
1558 out:
1559 free(ifname);
1560 return ret;
1561 }
1562
1563 METHOD(kernel_ipsec_t, add_sa, status_t,
1564 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
1565 kernel_ipsec_add_sa_t *data)
1566 {
1567 netlink_buf_t request;
1568 const char *alg_name;
1569 char markstr[32] = "";
1570 struct nlmsghdr *hdr;
1571 struct xfrm_usersa_info *sa;
1572 uint16_t icv_size = 64, ipcomp = data->ipcomp;
1573 ipsec_mode_t mode = data->mode, original_mode = data->mode;
1574 traffic_selector_t *first_src_ts, *first_dst_ts;
1575 status_t status = FAILED;
1576
1577 /* if IPComp is used, we install an additional IPComp SA. if the cpi is 0
1578 * we are in the recursive call below */
1579 if (ipcomp != IPCOMP_NONE && data->cpi != 0)
1580 {
1581 lifetime_cfg_t lft = {{0,0,0},{0,0,0},{0,0,0}};
1582 kernel_ipsec_sa_id_t ipcomp_id = {
1583 .src = id->src,
1584 .dst = id->dst,
1585 .spi = htonl(ntohs(data->cpi)),
1586 .proto = IPPROTO_COMP,
1587 .mark = id->mark,
1588 .if_id = id->if_id,
1589 };
1590 kernel_ipsec_add_sa_t ipcomp_sa = {
1591 .reqid = data->reqid,
1592 .mode = data->mode,
1593 .src_ts = data->src_ts,
1594 .dst_ts = data->dst_ts,
1595 .lifetime = &lft,
1596 .enc_alg = ENCR_UNDEFINED,
1597 .int_alg = AUTH_UNDEFINED,
1598 .tfc = data->tfc,
1599 .ipcomp = data->ipcomp,
1600 .initiator = data->initiator,
1601 .inbound = data->inbound,
1602 .update = data->update,
1603 };
1604 add_sa(this, &ipcomp_id, &ipcomp_sa);
1605 ipcomp = IPCOMP_NONE;
1606 /* use transport mode ESP SA, IPComp uses tunnel mode */
1607 mode = MODE_TRANSPORT;
1608 }
1609
1610 memset(&request, 0, sizeof(request));
1611 format_mark(markstr, sizeof(markstr), id->mark);
1612
1613 DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%u}%s",
1614 ntohl(id->spi), data->reqid, markstr);
1615
1616 hdr = &request.hdr;
1617 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1618 hdr->nlmsg_type = data->update ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1619 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1620
1621 sa = NLMSG_DATA(hdr);
1622 host2xfrm(id->src, &sa->saddr);
1623 host2xfrm(id->dst, &sa->id.daddr);
1624 sa->id.spi = id->spi;
1625 sa->id.proto = id->proto;
1626 sa->family = id->src->get_family(id->src);
1627 sa->mode = mode2kernel(mode);
1628
1629 if (!data->copy_df)
1630 {
1631 sa->flags |= XFRM_STATE_NOPMTUDISC;
1632 }
1633
1634 if (!data->copy_ecn)
1635 {
1636 sa->flags |= XFRM_STATE_NOECN;
1637 }
1638
1639 if (data->inbound)
1640 {
1641 switch (data->copy_dscp)
1642 {
1643 case DSCP_COPY_YES:
1644 case DSCP_COPY_IN_ONLY:
1645 sa->flags |= XFRM_STATE_DECAP_DSCP;
1646 break;
1647 default:
1648 break;
1649 }
1650 }
1651 else
1652 {
1653 switch (data->copy_dscp)
1654 {
1655 case DSCP_COPY_IN_ONLY:
1656 case DSCP_COPY_NO:
1657 {
1658 /* currently the only extra flag */
1659 if (!add_uint32(hdr, sizeof(request), XFRMA_SA_EXTRA_FLAGS,
1660 XFRM_SA_XFLAG_DONT_ENCAP_DSCP))
1661 {
1662 goto failed;
1663 }
1664 break;
1665 }
1666 default:
1667 break;
1668 }
1669 }
1670
1671 switch (mode)
1672 {
1673 case MODE_TUNNEL:
1674 sa->flags |= XFRM_STATE_AF_UNSPEC;
1675 break;
1676 case MODE_BEET:
1677 case MODE_TRANSPORT:
1678 if (original_mode == MODE_TUNNEL)
1679 { /* don't install selectors for switched SAs. because only one
1680 * selector can be installed other traffic would get dropped */
1681 break;
1682 }
1683 if (data->src_ts->get_first(data->src_ts,
1684 (void**)&first_src_ts) == SUCCESS &&
1685 data->dst_ts->get_first(data->dst_ts,
1686 (void**)&first_dst_ts) == SUCCESS)
1687 {
1688 sa->sel = ts2selector(first_src_ts, first_dst_ts,
1689 data->interface);
1690 if (!this->proto_port_transport)
1691 {
1692 /* don't install proto/port on SA. This would break
1693 * potential secondary SAs for the same address using a
1694 * different prot/port. */
1695 sa->sel.proto = 0;
1696 sa->sel.dport = sa->sel.dport_mask = 0;
1697 sa->sel.sport = sa->sel.sport_mask = 0;
1698 }
1699 }
1700 break;
1701 default:
1702 break;
1703 }
1704 if (id->proto == IPPROTO_AH && sa->family == AF_INET)
1705 { /* use alignment to 4 bytes for IPv4 instead of the incorrect 8 byte
1706 * alignment that's used by default but is only valid for IPv6 */
1707 sa->flags |= XFRM_STATE_ALIGN4;
1708 }
1709
1710 sa->reqid = data->reqid;
1711 sa->lft.soft_byte_limit = XFRM_LIMIT(data->lifetime->bytes.rekey);
1712 sa->lft.hard_byte_limit = XFRM_LIMIT(data->lifetime->bytes.life);
1713 sa->lft.soft_packet_limit = XFRM_LIMIT(data->lifetime->packets.rekey);
1714 sa->lft.hard_packet_limit = XFRM_LIMIT(data->lifetime->packets.life);
1715 /* we use lifetimes since added, not since used */
1716 sa->lft.soft_add_expires_seconds = data->lifetime->time.rekey;
1717 sa->lft.hard_add_expires_seconds = data->lifetime->time.life;
1718 sa->lft.soft_use_expires_seconds = 0;
1719 sa->lft.hard_use_expires_seconds = 0;
1720
1721 switch (data->enc_alg)
1722 {
1723 case ENCR_UNDEFINED:
1724 /* no encryption */
1725 break;
1726 case ENCR_AES_CCM_ICV16:
1727 case ENCR_AES_GCM_ICV16:
1728 case ENCR_NULL_AUTH_AES_GMAC:
1729 case ENCR_CAMELLIA_CCM_ICV16:
1730 case ENCR_CHACHA20_POLY1305:
1731 icv_size += 32;
1732 /* FALL */
1733 case ENCR_AES_CCM_ICV12:
1734 case ENCR_AES_GCM_ICV12:
1735 case ENCR_CAMELLIA_CCM_ICV12:
1736 icv_size += 32;
1737 /* FALL */
1738 case ENCR_AES_CCM_ICV8:
1739 case ENCR_AES_GCM_ICV8:
1740 case ENCR_CAMELLIA_CCM_ICV8:
1741 {
1742 struct xfrm_algo_aead *algo;
1743
1744 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1745 if (alg_name == NULL)
1746 {
1747 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1748 encryption_algorithm_names, data->enc_alg);
1749 goto failed;
1750 }
1751 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1752 encryption_algorithm_names, data->enc_alg,
1753 data->enc_key.len * 8);
1754
1755 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AEAD,
1756 sizeof(*algo) + data->enc_key.len);
1757 if (!algo)
1758 {
1759 goto failed;
1760 }
1761 algo->alg_key_len = data->enc_key.len * 8;
1762 algo->alg_icv_len = icv_size;
1763 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1764 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1765 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1766 break;
1767 }
1768 default:
1769 {
1770 struct xfrm_algo *algo;
1771
1772 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1773 if (alg_name == NULL)
1774 {
1775 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1776 encryption_algorithm_names, data->enc_alg);
1777 goto failed;
1778 }
1779 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1780 encryption_algorithm_names, data->enc_alg,
1781 data->enc_key.len * 8);
1782
1783 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_CRYPT,
1784 sizeof(*algo) + data->enc_key.len);
1785 if (!algo)
1786 {
1787 goto failed;
1788 }
1789 algo->alg_key_len = data->enc_key.len * 8;
1790 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1791 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1792 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1793 }
1794 }
1795
1796 if (data->int_alg != AUTH_UNDEFINED)
1797 {
1798 u_int trunc_len = 0;
1799
1800 alg_name = lookup_algorithm(INTEGRITY_ALGORITHM, data->int_alg);
1801 if (alg_name == NULL)
1802 {
1803 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1804 integrity_algorithm_names, data->int_alg);
1805 goto failed;
1806 }
1807 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1808 integrity_algorithm_names, data->int_alg, data->int_key.len * 8);
1809
1810 switch (data->int_alg)
1811 {
1812 case AUTH_HMAC_MD5_128:
1813 case AUTH_HMAC_SHA2_256_128:
1814 trunc_len = 128;
1815 break;
1816 case AUTH_HMAC_SHA1_160:
1817 trunc_len = 160;
1818 break;
1819 default:
1820 break;
1821 }
1822
1823 if (trunc_len)
1824 {
1825 struct xfrm_algo_auth* algo;
1826
1827 /* the kernel uses SHA256 with 96 bit truncation by default,
1828 * use specified truncation size supported by newer kernels.
1829 * also use this for untruncated MD5 and SHA1. */
1830 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH_TRUNC,
1831 sizeof(*algo) + data->int_key.len);
1832 if (!algo)
1833 {
1834 goto failed;
1835 }
1836 algo->alg_key_len = data->int_key.len * 8;
1837 algo->alg_trunc_len = trunc_len;
1838 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1839 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1840 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
1841 }
1842 else
1843 {
1844 struct xfrm_algo* algo;
1845
1846 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH,
1847 sizeof(*algo) + data->int_key.len);
1848 if (!algo)
1849 {
1850 goto failed;
1851 }
1852 algo->alg_key_len = data->int_key.len * 8;
1853 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1854 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1855 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
1856 }
1857 }
1858
1859 if (ipcomp != IPCOMP_NONE)
1860 {
1861 struct xfrm_algo* algo;
1862
1863 alg_name = lookup_algorithm(COMPRESSION_ALGORITHM, ipcomp);
1864 if (alg_name == NULL)
1865 {
1866 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1867 ipcomp_transform_names, ipcomp);
1868 goto failed;
1869 }
1870 DBG2(DBG_KNL, " using compression algorithm %N",
1871 ipcomp_transform_names, ipcomp);
1872
1873 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_COMP,
1874 sizeof(*algo));
1875 if (!algo)
1876 {
1877 goto failed;
1878 }
1879 algo->alg_key_len = 0;
1880 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1881 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1882 }
1883
1884 if (data->encap)
1885 {
1886 struct xfrm_encap_tmpl *tmpl;
1887
1888 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP, sizeof(*tmpl));
1889 if (!tmpl)
1890 {
1891 goto failed;
1892 }
1893 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1894 tmpl->encap_sport = htons(id->src->get_port(id->src));
1895 tmpl->encap_dport = htons(id->dst->get_port(id->dst));
1896 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1897 /* encap_oa could probably be derived from the
1898 * traffic selectors [rfc4306, p39]. In the netlink kernel
1899 * implementation pluto does the same as we do here but it uses
1900 * encap_oa in the pfkey implementation.
1901 * BUT as /usr/src/linux/net/key/af_key.c indicates the kernel ignores
1902 * it anyway
1903 * -> does that mean that NAT-T encap doesn't work in transport mode?
1904 * No. The reason the kernel ignores NAT-OA is that it recomputes
1905 * (or, rather, just ignores) the checksum. If packets pass the IPsec
1906 * checks it marks them "checksum ok" so OA isn't needed. */
1907 }
1908
1909 if (!add_mark(hdr, sizeof(request), id->mark))
1910 {
1911 goto failed;
1912 }
1913
1914 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
1915 {
1916 goto failed;
1917 }
1918
1919 if (ipcomp == IPCOMP_NONE && (data->mark.value | data->mark.mask))
1920 {
1921 if (!add_uint32(hdr, sizeof(request), XFRMA_SET_MARK,
1922 data->mark.value) ||
1923 !add_uint32(hdr, sizeof(request), XFRMA_SET_MARK_MASK,
1924 data->mark.mask))
1925 {
1926 goto failed;
1927 }
1928 }
1929
1930 if (data->tfc && id->proto == IPPROTO_ESP && mode == MODE_TUNNEL)
1931 { /* the kernel supports TFC padding only for tunnel mode ESP SAs */
1932 if (!add_uint32(hdr, sizeof(request), XFRMA_TFCPAD, data->tfc))
1933 {
1934 goto failed;
1935 }
1936 }
1937
1938 if (id->proto != IPPROTO_COMP)
1939 {
1940 /* generally, we don't need a replay window for outbound SAs, however,
1941 * when using ESN the kernel rejects the attribute if it is 0 */
1942 if (!data->inbound && data->replay_window)
1943 {
1944 data->replay_window = data->esn ? 1 : 0;
1945 }
1946 if (data->replay_window != 0 && (data->esn || data->replay_window > 32))
1947 {
1948 /* for ESN or larger replay windows we need the new
1949 * XFRMA_REPLAY_ESN_VAL attribute to configure a bitmap */
1950 struct xfrm_replay_state_esn *replay;
1951 uint32_t bmp_size;
1952
1953 bmp_size = round_up(data->replay_window, sizeof(uint32_t) * 8) / 8;
1954 replay = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
1955 sizeof(*replay) + bmp_size);
1956 if (!replay)
1957 {
1958 goto failed;
1959 }
1960 /* bmp_len contains number uf __u32's */
1961 replay->bmp_len = bmp_size / sizeof(uint32_t);
1962 replay->replay_window = data->replay_window;
1963 DBG2(DBG_KNL, " using replay window of %u packets",
1964 data->replay_window);
1965
1966 if (data->esn)
1967 {
1968 DBG2(DBG_KNL, " using extended sequence numbers (ESN)");
1969 sa->flags |= XFRM_STATE_ESN;
1970 }
1971 }
1972 else
1973 {
1974 DBG2(DBG_KNL, " using replay window of %u packets",
1975 data->replay_window);
1976 sa->replay_window = data->replay_window;
1977 }
1978
1979 DBG2(DBG_KNL, " HW offload: %N", hw_offload_names, data->hw_offload);
1980 if (!config_hw_offload(id, data, hdr, sizeof(request)))
1981 {
1982 DBG1(DBG_KNL, "failed to configure HW offload");
1983 goto failed;
1984 }
1985 }
1986
1987 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
1988 if (status == NOT_FOUND && data->update)
1989 {
1990 DBG1(DBG_KNL, "allocated SPI not found anymore, try to add SAD entry");
1991 hdr->nlmsg_type = XFRM_MSG_NEWSA;
1992 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
1993 }
1994
1995 if (status != SUCCESS)
1996 {
1997 DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x%s (%N)", ntohl(id->spi),
1998 markstr, status_names, status);
1999 status = FAILED;
2000 goto failed;
2001 }
2002
2003 status = SUCCESS;
2004
2005 failed:
2006 memwipe(&request, sizeof(request));
2007 return status;
2008 }
2009
2010 /**
2011 * Get the ESN replay state (i.e. sequence numbers) of an SA.
2012 *
2013 * Allocates into one the replay state structure we get from the kernel.
2014 */
2015 static void get_replay_state(private_kernel_netlink_ipsec_t *this,
2016 kernel_ipsec_sa_id_t *sa,
2017 struct xfrm_replay_state_esn **replay_esn,
2018 uint32_t *replay_esn_len,
2019 struct xfrm_replay_state **replay,
2020 struct xfrm_lifetime_cur **lifetime)
2021 {
2022 netlink_buf_t request;
2023 struct nlmsghdr *hdr, *out = NULL;
2024 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2025 size_t len;
2026 struct rtattr *rta;
2027 size_t rtasize;
2028
2029 memset(&request, 0, sizeof(request));
2030
2031 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI %.8x",
2032 ntohl(sa->spi));
2033
2034 hdr = &request.hdr;
2035 hdr->nlmsg_flags = NLM_F_REQUEST;
2036 hdr->nlmsg_type = XFRM_MSG_GETAE;
2037 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2038
2039 aevent_id = NLMSG_DATA(hdr);
2040 aevent_id->flags = XFRM_AE_RVAL;
2041
2042 host2xfrm(sa->dst, &aevent_id->sa_id.daddr);
2043 aevent_id->sa_id.spi = sa->spi;
2044 aevent_id->sa_id.proto = sa->proto;
2045 aevent_id->sa_id.family = sa->dst->get_family(sa->dst);
2046
2047 if (!add_mark(hdr, sizeof(request), sa->mark))
2048 {
2049 return;
2050 }
2051 if (sa->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, sa->if_id))
2052 {
2053 return;
2054 }
2055
2056 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2057 {
2058 hdr = out;
2059 while (NLMSG_OK(hdr, len))
2060 {
2061 switch (hdr->nlmsg_type)
2062 {
2063 case XFRM_MSG_NEWAE:
2064 {
2065 out_aevent = NLMSG_DATA(hdr);
2066 break;
2067 }
2068 case NLMSG_ERROR:
2069 {
2070 struct nlmsgerr *err = NLMSG_DATA(hdr);
2071 DBG1(DBG_KNL, "querying replay state from SAD entry "
2072 "failed: %s (%d)", strerror(-err->error), -err->error);
2073 break;
2074 }
2075 default:
2076 hdr = NLMSG_NEXT(hdr, len);
2077 continue;
2078 case NLMSG_DONE:
2079 break;
2080 }
2081 break;
2082 }
2083 }
2084
2085 if (out_aevent)
2086 {
2087 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2088 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2089 while (RTA_OK(rta, rtasize))
2090 {
2091 if (rta->rta_type == XFRMA_LTIME_VAL &&
2092 RTA_PAYLOAD(rta) == sizeof(**lifetime))
2093 {
2094 free(*lifetime);
2095 *lifetime = malloc(RTA_PAYLOAD(rta));
2096 memcpy(*lifetime, RTA_DATA(rta), RTA_PAYLOAD(rta));
2097 }
2098 if (rta->rta_type == XFRMA_REPLAY_VAL &&
2099 RTA_PAYLOAD(rta) == sizeof(**replay))
2100 {
2101 free(*replay);
2102 *replay = malloc(RTA_PAYLOAD(rta));
2103 memcpy(*replay, RTA_DATA(rta), RTA_PAYLOAD(rta));
2104 }
2105 if (rta->rta_type == XFRMA_REPLAY_ESN_VAL &&
2106 RTA_PAYLOAD(rta) >= sizeof(**replay_esn))
2107 {
2108 free(*replay_esn);
2109 *replay_esn = malloc(RTA_PAYLOAD(rta));
2110 *replay_esn_len = RTA_PAYLOAD(rta);
2111 memcpy(*replay_esn, RTA_DATA(rta), RTA_PAYLOAD(rta));
2112 }
2113 rta = RTA_NEXT(rta, rtasize);
2114 }
2115 }
2116 free(out);
2117 }
2118
2119 METHOD(kernel_ipsec_t, query_sa, status_t,
2120 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2121 kernel_ipsec_query_sa_t *data, uint64_t *bytes, uint64_t *packets,
2122 time_t *time)
2123 {
2124 netlink_buf_t request;
2125 struct nlmsghdr *out = NULL, *hdr;
2126 struct xfrm_usersa_id *sa_id;
2127 struct xfrm_usersa_info *sa = NULL;
2128 status_t status = FAILED;
2129 size_t len;
2130 char markstr[32] = "";
2131
2132 memset(&request, 0, sizeof(request));
2133 format_mark(markstr, sizeof(markstr), id->mark);
2134
2135 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x%s", ntohl(id->spi),
2136 markstr);
2137
2138 hdr = &request.hdr;
2139 hdr->nlmsg_flags = NLM_F_REQUEST;
2140 hdr->nlmsg_type = XFRM_MSG_GETSA;
2141 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2142
2143 sa_id = NLMSG_DATA(hdr);
2144 host2xfrm(id->dst, &sa_id->daddr);
2145 sa_id->spi = id->spi;
2146 sa_id->proto = id->proto;
2147 sa_id->family = id->dst->get_family(id->dst);
2148
2149 if (!add_mark(hdr, sizeof(request), id->mark))
2150 {
2151 return FAILED;
2152 }
2153 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2154 {
2155 return FAILED;
2156 }
2157
2158 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2159 {
2160 hdr = out;
2161 while (NLMSG_OK(hdr, len))
2162 {
2163 switch (hdr->nlmsg_type)
2164 {
2165 case XFRM_MSG_NEWSA:
2166 {
2167 sa = NLMSG_DATA(hdr);
2168 break;
2169 }
2170 case NLMSG_ERROR:
2171 {
2172 struct nlmsgerr *err = NLMSG_DATA(hdr);
2173
2174 DBG1(DBG_KNL, "querying SAD entry with SPI %.8x%s failed: "
2175 "%s (%d)", ntohl(id->spi), markstr,
2176 strerror(-err->error), -err->error);
2177 break;
2178 }
2179 default:
2180 hdr = NLMSG_NEXT(hdr, len);
2181 continue;
2182 case NLMSG_DONE:
2183 break;
2184 }
2185 break;
2186 }
2187 }
2188
2189 if (sa == NULL)
2190 {
2191 DBG2(DBG_KNL, "unable to query SAD entry with SPI %.8x%s",
2192 ntohl(id->spi), markstr);
2193 }
2194 else
2195 {
2196 if (bytes)
2197 {
2198 *bytes = sa->curlft.bytes;
2199 }
2200 if (packets)
2201 {
2202 *packets = sa->curlft.packets;
2203 }
2204 if (time)
2205 { /* curlft contains an "use" time, but that contains a timestamp
2206 * of the first use, not the last. Last use time must be queried
2207 * on the policy on Linux */
2208 *time = 0;
2209 }
2210 status = SUCCESS;
2211 }
2212 memwipe(out, len);
2213 free(out);
2214 return status;
2215 }
2216
2217 METHOD(kernel_ipsec_t, del_sa, status_t,
2218 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2219 kernel_ipsec_del_sa_t *data)
2220 {
2221 netlink_buf_t request;
2222 struct nlmsghdr *hdr;
2223 struct xfrm_usersa_id *sa_id;
2224 char markstr[32] = "";
2225
2226 /* if IPComp was used, we first delete the additional IPComp SA */
2227 if (data->cpi)
2228 {
2229 kernel_ipsec_sa_id_t ipcomp_id = {
2230 .src = id->src,
2231 .dst = id->dst,
2232 .spi = htonl(ntohs(data->cpi)),
2233 .proto = IPPROTO_COMP,
2234 .mark = id->mark,
2235 };
2236 kernel_ipsec_del_sa_t ipcomp = {};
2237 del_sa(this, &ipcomp_id, &ipcomp);
2238 }
2239
2240 memset(&request, 0, sizeof(request));
2241 format_mark(markstr, sizeof(markstr), id->mark);
2242
2243 DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x%s", ntohl(id->spi),
2244 markstr);
2245
2246 hdr = &request.hdr;
2247 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2248 hdr->nlmsg_type = XFRM_MSG_DELSA;
2249 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2250
2251 sa_id = NLMSG_DATA(hdr);
2252 host2xfrm(id->dst, &sa_id->daddr);
2253 sa_id->spi = id->spi;
2254 sa_id->proto = id->proto;
2255 sa_id->family = id->dst->get_family(id->dst);
2256
2257 if (!add_mark(hdr, sizeof(request), id->mark))
2258 {
2259 return FAILED;
2260 }
2261 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2262 {
2263 return FAILED;
2264 }
2265
2266 switch (this->socket_xfrm->send_ack(this->socket_xfrm, hdr))
2267 {
2268 case SUCCESS:
2269 DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x%s",
2270 ntohl(id->spi), markstr);
2271 return SUCCESS;
2272 case NOT_FOUND:
2273 return NOT_FOUND;
2274 default:
2275 DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x%s",
2276 ntohl(id->spi), markstr);
2277 return FAILED;
2278 }
2279 }
2280
2281 METHOD(kernel_ipsec_t, update_sa, status_t,
2282 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2283 kernel_ipsec_update_sa_t *data)
2284 {
2285 netlink_buf_t request;
2286 struct nlmsghdr *hdr, *out_hdr = NULL, *out = NULL;
2287 struct xfrm_usersa_id *sa_id;
2288 struct xfrm_usersa_info *sa;
2289 size_t len;
2290 struct rtattr *rta;
2291 size_t rtasize;
2292 struct xfrm_encap_tmpl* encap = NULL;
2293 struct xfrm_replay_state *replay = NULL;
2294 struct xfrm_replay_state_esn *replay_esn = NULL;
2295 struct xfrm_lifetime_cur *lifetime = NULL;
2296 uint32_t replay_esn_len = 0;
2297 kernel_ipsec_del_sa_t del = { 0 };
2298 status_t status = FAILED;
2299 traffic_selector_t *ts;
2300 char markstr[32] = "";
2301
2302 /* if IPComp is used, we first update the IPComp SA */
2303 if (data->cpi)
2304 {
2305 kernel_ipsec_sa_id_t ipcomp_id = {
2306 .src = id->src,
2307 .dst = id->dst,
2308 .spi = htonl(ntohs(data->cpi)),
2309 .proto = IPPROTO_COMP,
2310 .mark = id->mark,
2311 .if_id = id->if_id,
2312 };
2313 kernel_ipsec_update_sa_t ipcomp = {
2314 .new_src = data->new_src,
2315 .new_dst = data->new_dst,
2316 };
2317 update_sa(this, &ipcomp_id, &ipcomp);
2318 }
2319
2320 memset(&request, 0, sizeof(request));
2321 format_mark(markstr, sizeof(markstr), id->mark);
2322
2323 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x%s for update",
2324 ntohl(id->spi), markstr);
2325
2326 /* query the existing SA first */
2327 hdr = &request.hdr;
2328 hdr->nlmsg_flags = NLM_F_REQUEST;
2329 hdr->nlmsg_type = XFRM_MSG_GETSA;
2330 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2331
2332 sa_id = NLMSG_DATA(hdr);
2333 host2xfrm(id->dst, &sa_id->daddr);
2334 sa_id->spi = id->spi;
2335 sa_id->proto = id->proto;
2336 sa_id->family = id->dst->get_family(id->dst);
2337
2338 if (!add_mark(hdr, sizeof(request), id->mark))
2339 {
2340 return FAILED;
2341 }
2342 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2343 {
2344 return FAILED;
2345 }
2346
2347 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2348 {
2349 hdr = out;
2350 while (NLMSG_OK(hdr, len))
2351 {
2352 switch (hdr->nlmsg_type)
2353 {
2354 case XFRM_MSG_NEWSA:
2355 {
2356 out_hdr = hdr;
2357 break;
2358 }
2359 case NLMSG_ERROR:
2360 {
2361 struct nlmsgerr *err = NLMSG_DATA(hdr);
2362 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2363 strerror(-err->error), -err->error);
2364 break;
2365 }
2366 default:
2367 hdr = NLMSG_NEXT(hdr, len);
2368 continue;
2369 case NLMSG_DONE:
2370 break;
2371 }
2372 break;
2373 }
2374 }
2375 if (!out_hdr)
2376 {
2377 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2378 ntohl(id->spi), markstr);
2379 goto failed;
2380 }
2381
2382 get_replay_state(this, id, &replay_esn, &replay_esn_len, &replay,
2383 &lifetime);
2384
2385 /* delete the old SA (without affecting the IPComp SA) */
2386 if (del_sa(this, id, &del) != SUCCESS)
2387 {
2388 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x%s",
2389 ntohl(id->spi), markstr);
2390 goto failed;
2391 }
2392
2393 DBG2(DBG_KNL, "updating SAD entry with SPI %.8x%s from %#H..%#H to "
2394 "%#H..%#H", ntohl(id->spi), markstr, id->src, id->dst, data->new_src,
2395 data->new_dst);
2396 /* copy over the SA from out to request */
2397 hdr = &request.hdr;
2398 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2399 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2400 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2401 sa = NLMSG_DATA(hdr);
2402 memcpy(sa, NLMSG_DATA(out_hdr), sizeof(struct xfrm_usersa_info));
2403 sa->family = data->new_dst->get_family(data->new_dst);
2404
2405 if (!id->src->ip_equals(id->src, data->new_src))
2406 {
2407 host2xfrm(data->new_src, &sa->saddr);
2408
2409 ts = selector2ts(&sa->sel, TRUE);
2410 if (ts && ts->is_host(ts, id->src))
2411 {
2412 ts->set_address(ts, data->new_src);
2413 ts2subnet(ts, &sa->sel.saddr, &sa->sel.prefixlen_s);
2414 }
2415 DESTROY_IF(ts);
2416 }
2417 if (!id->dst->ip_equals(id->dst, data->new_dst))
2418 {
2419 host2xfrm(data->new_dst, &sa->id.daddr);
2420
2421 ts = selector2ts(&sa->sel, FALSE);
2422 if (ts && ts->is_host(ts, id->dst))
2423 {
2424 ts->set_address(ts, data->new_dst);
2425 ts2subnet(ts, &sa->sel.daddr, &sa->sel.prefixlen_d);
2426 }
2427 DESTROY_IF(ts);
2428 }
2429
2430 rta = XFRM_RTA(out_hdr, struct xfrm_usersa_info);
2431 rtasize = XFRM_PAYLOAD(out_hdr, struct xfrm_usersa_info);
2432 while (RTA_OK(rta, rtasize))
2433 {
2434 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2435 if (rta->rta_type != XFRMA_ENCAP || data->new_encap)
2436 {
2437 if (rta->rta_type == XFRMA_ENCAP)
2438 { /* update encap tmpl */
2439 encap = RTA_DATA(rta);
2440 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2441 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2442 }
2443 if (rta->rta_type == XFRMA_OFFLOAD_DEV)
2444 { /* update offload device */
2445 struct xfrm_user_offload *offload;
2446 host_t *local;
2447 char *ifname;
2448
2449 offload = RTA_DATA(rta);
2450 local = offload->flags & XFRM_OFFLOAD_INBOUND ? data->new_dst
2451 : data->new_src;
2452
2453 if (charon->kernel->get_interface(charon->kernel, local,
2454 &ifname))
2455 {
2456 offload->ifindex = if_nametoindex(ifname);
2457 if (local->get_family(local) == AF_INET6)
2458 {
2459 offload->flags |= XFRM_OFFLOAD_IPV6;
2460 }
2461 else
2462 {
2463 offload->flags &= ~XFRM_OFFLOAD_IPV6;
2464 }
2465 free(ifname);
2466 }
2467 }
2468 netlink_add_attribute(hdr, rta->rta_type,
2469 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)),
2470 sizeof(request));
2471 }
2472 rta = RTA_NEXT(rta, rtasize);
2473 }
2474
2475 if (encap == NULL && data->new_encap)
2476 { /* add tmpl if we are enabling it */
2477 encap = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP,
2478 sizeof(*encap));
2479 if (!encap)
2480 {
2481 goto failed;
2482 }
2483 encap->encap_type = UDP_ENCAP_ESPINUDP;
2484 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2485 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2486 memset(&encap->encap_oa, 0, sizeof (xfrm_address_t));
2487 }
2488
2489 if (replay_esn)
2490 {
2491 struct xfrm_replay_state_esn *state;
2492
2493 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
2494 replay_esn_len);
2495 if (!state)
2496 {
2497 goto failed;
2498 }
2499 memcpy(state, replay_esn, replay_esn_len);
2500 }
2501 else if (replay)
2502 {
2503 struct xfrm_replay_state *state;
2504
2505 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_VAL,
2506 sizeof(*state));
2507 if (!state)
2508 {
2509 goto failed;
2510 }
2511 memcpy(state, replay, sizeof(*state));
2512 }
2513 else
2514 {
2515 DBG1(DBG_KNL, "unable to copy replay state from old SAD entry with "
2516 "SPI %.8x%s", ntohl(id->spi), markstr);
2517 }
2518 if (lifetime)
2519 {
2520 struct xfrm_lifetime_cur *state;
2521
2522 state = netlink_reserve(hdr, sizeof(request), XFRMA_LTIME_VAL,
2523 sizeof(*state));
2524 if (!state)
2525 {
2526 goto failed;
2527 }
2528 memcpy(state, lifetime, sizeof(*state));
2529 }
2530 else
2531 {
2532 DBG1(DBG_KNL, "unable to copy usage stats from old SAD entry with "
2533 "SPI %.8x%s", ntohl(id->spi), markstr);
2534 }
2535
2536 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2537 {
2538 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2539 ntohl(id->spi), markstr);
2540 goto failed;
2541 }
2542
2543 status = SUCCESS;
2544 failed:
2545 free(replay);
2546 free(replay_esn);
2547 free(lifetime);
2548 memwipe(out, len);
2549 memwipe(&request, sizeof(request));
2550 free(out);
2551
2552 return status;
2553 }
2554
2555 METHOD(kernel_ipsec_t, flush_sas, status_t,
2556 private_kernel_netlink_ipsec_t *this)
2557 {
2558 netlink_buf_t request;
2559 struct nlmsghdr *hdr;
2560 struct xfrm_usersa_flush *flush;
2561 struct {
2562 uint8_t proto;
2563 char *name;
2564 } protos[] = {
2565 { IPPROTO_AH, "AH" },
2566 { IPPROTO_ESP, "ESP" },
2567 { IPPROTO_COMP, "IPComp" },
2568 };
2569 int i;
2570
2571 memset(&request, 0, sizeof(request));
2572
2573 hdr = &request.hdr;
2574 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2575 hdr->nlmsg_type = XFRM_MSG_FLUSHSA;
2576 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
2577
2578 flush = NLMSG_DATA(hdr);
2579
2580 for (i = 0; i < countof(protos); i++)
2581 {
2582 DBG2(DBG_KNL, "flushing all %s SAD entries", protos[i].name);
2583
2584 flush->proto = protos[i].proto;
2585
2586 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2587 {
2588 DBG1(DBG_KNL, "unable to flush %s SAD entries", protos[i].name);
2589 return FAILED;
2590 }
2591 }
2592 return SUCCESS;
2593 }
2594
2595 /**
2596 * Unlock the mutex and signal waiting threads
2597 */
2598 static void policy_change_done(private_kernel_netlink_ipsec_t *this,
2599 policy_entry_t *policy)
2600 {
2601 policy->working = FALSE;
2602 if (policy->waiting)
2603 { /* don't need to wake threads waiting for other policies */
2604 this->condvar->broadcast(this->condvar);
2605 }
2606 this->mutex->unlock(this->mutex);
2607 }
2608
2609 /**
2610 * Install a route for the given policy if enabled and required
2611 */
2612 static void install_route(private_kernel_netlink_ipsec_t *this,
2613 policy_entry_t *policy, policy_sa_t *mapping, ipsec_sa_t *ipsec)
2614 {
2615 policy_sa_out_t *out = (policy_sa_out_t*)mapping;
2616 route_entry_t *route;
2617 host_t *iface;
2618
2619 INIT(route,
2620 .prefixlen = policy->sel.prefixlen_d,
2621 .pass = mapping->type == POLICY_PASS,
2622 );
2623
2624 if (charon->kernel->get_address_by_ts(charon->kernel, out->src_ts,
2625 &route->src_ip, NULL) != SUCCESS)
2626 {
2627 if (!route->pass)
2628 {
2629 free(route);
2630 return;
2631 }
2632 /* allow blank source IP for passthrough policies */
2633 route->src_ip = host_create_any(policy->sel.family);
2634 }
2635
2636 if (!ipsec->dst->is_anyaddr(ipsec->dst))
2637 {
2638 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2639 ipsec->dst, -1, ipsec->src,
2640 &route->if_name);
2641 }
2642 else
2643 { /* for shunt policies */
2644 iface = xfrm2host(policy->sel.family, &policy->sel.daddr, 0);
2645 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2646 iface, policy->sel.prefixlen_d,
2647 route->src_ip, &route->if_name);
2648 iface->destroy(iface);
2649 }
2650 route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2651 memcpy(route->dst_net.ptr, &policy->sel.daddr, route->dst_net.len);
2652
2653 /* get the interface to install the route for, if we haven't one yet.
2654 * If we have a local address, use it. Otherwise (for shunt policies)
2655 * use the route's source address. */
2656 if (!route->if_name)
2657 {
2658 iface = ipsec->src;
2659 if (iface->is_anyaddr(iface))
2660 {
2661 iface = route->src_ip;
2662 }
2663 if (!charon->kernel->get_interface(charon->kernel, iface,
2664 &route->if_name) &&
2665 !route->pass)
2666 { /* don't require an interface for passthrough policies */
2667 route_entry_destroy(route);
2668 return;
2669 }
2670 }
2671 if (policy->route)
2672 {
2673 route_entry_t *old = policy->route;
2674 if (route_entry_equals(old, route))
2675 {
2676 route_entry_destroy(route);
2677 return;
2678 }
2679 /* uninstall previously installed route */
2680 if (charon->kernel->del_route(charon->kernel, old->dst_net,
2681 old->prefixlen, old->gateway,
2682 old->src_ip, old->if_name,
2683 old->pass) != SUCCESS)
2684 {
2685 DBG1(DBG_KNL, "error uninstalling route installed with policy "
2686 "%R === %R %N", out->src_ts, out->dst_ts, policy_dir_names,
2687 policy->direction);
2688 }
2689 route_entry_destroy(old);
2690 policy->route = NULL;
2691 }
2692
2693 DBG2(DBG_KNL, "installing route: %R via %H src %H dev %s", out->dst_ts,
2694 route->gateway, route->src_ip, route->if_name);
2695 switch (charon->kernel->add_route(charon->kernel, route->dst_net,
2696 route->prefixlen, route->gateway,
2697 route->src_ip, route->if_name,
2698 route->pass))
2699 {
2700 default:
2701 DBG1(DBG_KNL, "unable to install source route for %H",
2702 route->src_ip);
2703 /* FALL */
2704 case ALREADY_DONE:
2705 /* route exists, do not uninstall */
2706 route_entry_destroy(route);
2707 break;
2708 case SUCCESS:
2709 /* cache the installed route */
2710 policy->route = route;
2711 break;
2712 }
2713 }
2714
2715 /**
2716 * Add or update a policy in the kernel.
2717 *
2718 * Note: The mutex has to be locked when entering this function
2719 * and is unlocked here in any case.
2720 */
2721 static status_t add_policy_internal(private_kernel_netlink_ipsec_t *this,
2722 policy_entry_t *policy, policy_sa_t *mapping, bool update)
2723 {
2724 netlink_buf_t request;
2725 policy_entry_t clone;
2726 ipsec_sa_t *ipsec = mapping->sa;
2727 struct xfrm_userpolicy_info *policy_info;
2728 struct nlmsghdr *hdr;
2729 status_t status;
2730 int i;
2731
2732 /* clone the policy so we are able to check it out again later */
2733 memcpy(&clone, policy, sizeof(policy_entry_t));
2734
2735 memset(&request, 0, sizeof(request));
2736 hdr = &request.hdr;
2737 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2738 hdr->nlmsg_type = update ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
2739 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2740
2741 policy_info = NLMSG_DATA(hdr);
2742 policy_info->sel = policy->sel;
2743 policy_info->dir = policy->direction;
2744
2745 /* calculate priority based on selector size, small size = high prio */
2746 policy_info->priority = mapping->priority;
2747 policy_info->action = mapping->type != POLICY_DROP ? XFRM_POLICY_ALLOW
2748 : XFRM_POLICY_BLOCK;
2749 policy_info->share = XFRM_SHARE_ANY;
2750
2751 /* policies don't expire */
2752 policy_info->lft.soft_byte_limit = XFRM_INF;
2753 policy_info->lft.soft_packet_limit = XFRM_INF;
2754 policy_info->lft.hard_byte_limit = XFRM_INF;
2755 policy_info->lft.hard_packet_limit = XFRM_INF;
2756 policy_info->lft.soft_add_expires_seconds = 0;
2757 policy_info->lft.hard_add_expires_seconds = 0;
2758 policy_info->lft.soft_use_expires_seconds = 0;
2759 policy_info->lft.hard_use_expires_seconds = 0;
2760
2761 if (mapping->type == POLICY_IPSEC && ipsec->cfg.reqid)
2762 {
2763 struct xfrm_user_tmpl *tmpl;
2764 struct {
2765 uint8_t proto;
2766 uint32_t spi;
2767 bool use;
2768 } protos[] = {
2769 { IPPROTO_COMP, htonl(ntohs(ipsec->cfg.ipcomp.cpi)),
2770 ipsec->cfg.ipcomp.transform != IPCOMP_NONE },
2771 { IPPROTO_ESP, ipsec->cfg.esp.spi, ipsec->cfg.esp.use },
2772 { IPPROTO_AH, ipsec->cfg.ah.spi, ipsec->cfg.ah.use },
2773 };
2774 ipsec_mode_t proto_mode = ipsec->cfg.mode;
2775 int count = 0;
2776
2777 for (i = 0; i < countof(protos); i++)
2778 {
2779 if (protos[i].use)
2780 {
2781 count++;
2782 }
2783 }
2784 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_TMPL,
2785 count * sizeof(*tmpl));
2786 if (!tmpl)
2787 {
2788 policy_change_done(this, policy);
2789 return FAILED;
2790 }
2791
2792 for (i = 0; i < countof(protos); i++)
2793 {
2794 if (!protos[i].use)
2795 {
2796 continue;
2797 }
2798 tmpl->reqid = ipsec->cfg.reqid;
2799 tmpl->id.proto = protos[i].proto;
2800 if (policy->direction == POLICY_OUT)
2801 {
2802 tmpl->id.spi = protos[i].spi;
2803 }
2804 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2805 tmpl->mode = mode2kernel(proto_mode);
2806 tmpl->optional = protos[i].proto == IPPROTO_COMP &&
2807 policy->direction != POLICY_OUT;
2808 tmpl->family = ipsec->src->get_family(ipsec->src);
2809
2810 if (proto_mode == MODE_TUNNEL || proto_mode == MODE_BEET)
2811 { /* only for tunnel mode */
2812 host2xfrm(ipsec->src, &tmpl->saddr);
2813 host2xfrm(ipsec->dst, &tmpl->id.daddr);
2814 }
2815
2816 tmpl++;
2817
2818 /* use transport mode for other SAs */
2819 proto_mode = MODE_TRANSPORT;
2820 }
2821 }
2822
2823 if (!add_mark(hdr, sizeof(request), ipsec->mark))
2824 {
2825 policy_change_done(this, policy);
2826 return FAILED;
2827 }
2828 if (ipsec->if_id &&
2829 !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, ipsec->if_id))
2830 {
2831 policy_change_done(this, policy);
2832 return FAILED;
2833 }
2834 this->mutex->unlock(this->mutex);
2835
2836 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2837 if (status == ALREADY_DONE && !update)
2838 {
2839 DBG1(DBG_KNL, "policy already exists, try to update it");
2840 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2841 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2842 }
2843
2844 this->mutex->lock(this->mutex);
2845 if (status != SUCCESS)
2846 {
2847 policy_change_done(this, policy);
2848 return FAILED;
2849 }
2850 /* install a route, if:
2851 * - this is an outbound policy (to just get one for each child)
2852 * - routing is not disabled via strongswan.conf
2853 * - the selector is not for a specific protocol/port
2854 * - no XFRM interface ID is configured
2855 * - we are in tunnel/BEET mode or install a bypass policy
2856 */
2857 if (policy->direction == POLICY_OUT && this->install_routes &&
2858 !policy->sel.proto && !policy->sel.dport && !policy->sel.sport &&
2859 !policy->if_id)
2860 {
2861 if (mapping->type == POLICY_PASS ||
2862 (mapping->type == POLICY_IPSEC && ipsec->cfg.mode != MODE_TRANSPORT))
2863 {
2864 install_route(this, policy, mapping, ipsec);
2865 }
2866 }
2867 policy_change_done(this, policy);
2868 return SUCCESS;
2869 }
2870
2871 METHOD(kernel_ipsec_t, add_policy, status_t,
2872 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
2873 kernel_ipsec_manage_policy_t *data)
2874 {
2875 policy_entry_t *policy, *current;
2876 policy_sa_t *assigned_sa, *current_sa;
2877 enumerator_t *enumerator;
2878 bool found = FALSE, update = TRUE;
2879 char markstr[32] = "";
2880 uint32_t cur_priority = 0;
2881 int use_count;
2882
2883 /* create a policy */
2884 INIT(policy,
2885 .sel = ts2selector(id->src_ts, id->dst_ts, id->interface),
2886 .mark = id->mark.value & id->mark.mask,
2887 .if_id = id->if_id,
2888 .direction = id->dir,
2889 .reqid = data->sa->reqid,
2890 );
2891 format_mark(markstr, sizeof(markstr), id->mark);
2892
2893 /* find the policy, which matches EXACTLY */
2894 this->mutex->lock(this->mutex);
2895 current = this->policies->get(this->policies, policy);
2896 if (current)
2897 {
2898 if (current->reqid && data->sa->reqid &&
2899 current->reqid != data->sa->reqid)
2900 {
2901 DBG1(DBG_CFG, "unable to install policy %R === %R %N%s for reqid "
2902 "%u, the same policy for reqid %u exists",
2903 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr,
2904 data->sa->reqid, current->reqid);
2905 policy_entry_destroy(this, policy);
2906 this->mutex->unlock(this->mutex);
2907 return INVALID_STATE;
2908 }
2909 /* use existing policy */
2910 DBG2(DBG_KNL, "policy %R === %R %N%s already exists, increasing "
2911 "refcount", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
2912 markstr);
2913 policy_entry_destroy(this, policy);
2914 policy = current;
2915 found = TRUE;
2916
2917 policy->waiting++;
2918 while (policy->working)
2919 {
2920 this->condvar->wait(this->condvar, this->mutex);
2921 }
2922 policy->waiting--;
2923 policy->working = TRUE;
2924 }
2925 else
2926 { /* use the new one, if we have no such policy */
2927 policy->used_by = linked_list_create();
2928 this->policies->put(this->policies, policy, policy);
2929 }
2930
2931 /* cache the assigned IPsec SA */
2932 assigned_sa = policy_sa_create(this, id->dir, data->type, data->src,
2933 data->dst, id->src_ts, id->dst_ts, id->mark,
2934 id->if_id, data->sa);
2935 assigned_sa->auto_priority = get_priority(policy, data->prio, id->interface);
2936 assigned_sa->priority = this->get_priority ? this->get_priority(id, data)
2937 : data->manual_prio;
2938 assigned_sa->priority = assigned_sa->priority ?: assigned_sa->auto_priority;
2939
2940 /* insert the SA according to its priority */
2941 enumerator = policy->used_by->create_enumerator(policy->used_by);
2942 while (enumerator->enumerate(enumerator, (void**)&current_sa))
2943 {
2944 if (current_sa->priority > assigned_sa->priority)
2945 {
2946 break;
2947 }
2948 if (current_sa->priority == assigned_sa->priority)
2949 {
2950 /* in case of equal manual prios order SAs by automatic priority */
2951 if (current_sa->auto_priority > assigned_sa->auto_priority)
2952 {
2953 break;
2954 }
2955 /* prefer SAs with a reqid over those without */
2956 if (current_sa->auto_priority == assigned_sa->auto_priority &&
2957 (!current_sa->sa->cfg.reqid || assigned_sa->sa->cfg.reqid))
2958 {
2959 break;
2960 }
2961 }
2962 if (update)
2963 {
2964 cur_priority = current_sa->priority;
2965 update = FALSE;
2966 }
2967 }
2968 policy->used_by->insert_before(policy->used_by, enumerator, assigned_sa);
2969 enumerator->destroy(enumerator);
2970
2971 use_count = policy->used_by->get_count(policy->used_by);
2972 if (!update)
2973 { /* we don't update the policy if the priority is lower than that of
2974 * the currently installed one */
2975 policy_change_done(this, policy);
2976 DBG2(DBG_KNL, "not updating policy %R === %R %N%s [priority %u, "
2977 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
2978 id->dir, markstr, cur_priority, use_count);
2979 return SUCCESS;
2980 }
2981 policy->reqid = assigned_sa->sa->cfg.reqid;
2982
2983 if (this->policy_update)
2984 {
2985 found = TRUE;
2986 }
2987
2988 DBG2(DBG_KNL, "%s policy %R === %R %N%s [priority %u, refcount %d]",
2989 found ? "updating" : "adding", id->src_ts, id->dst_ts,
2990 policy_dir_names, id->dir, markstr, assigned_sa->priority, use_count);
2991
2992 if (add_policy_internal(this, policy, assigned_sa, found) != SUCCESS)
2993 {
2994 DBG1(DBG_KNL, "unable to %s policy %R === %R %N%s",
2995 found ? "update" : "add", id->src_ts, id->dst_ts,
2996 policy_dir_names, id->dir, markstr);
2997 return FAILED;
2998 }
2999 return SUCCESS;
3000 }
3001
3002 METHOD(kernel_ipsec_t, query_policy, status_t,
3003 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
3004 kernel_ipsec_query_policy_t *data, time_t *use_time)
3005 {
3006 netlink_buf_t request;
3007 struct nlmsghdr *out = NULL, *hdr;
3008 struct xfrm_userpolicy_id *policy_id;
3009 struct xfrm_userpolicy_info *policy = NULL;
3010 size_t len;
3011 char markstr[32] = "";
3012
3013 memset(&request, 0, sizeof(request));
3014 format_mark(markstr, sizeof(markstr), id->mark);
3015
3016 DBG2(DBG_KNL, "querying policy %R === %R %N%s", id->src_ts, id->dst_ts,
3017 policy_dir_names, id->dir, markstr);
3018
3019 hdr = &request.hdr;
3020 hdr->nlmsg_flags = NLM_F_REQUEST;
3021 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
3022 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3023
3024 policy_id = NLMSG_DATA(hdr);
3025 policy_id->sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3026 policy_id->dir = id->dir;
3027
3028 if (!add_mark(hdr, sizeof(request), id->mark))
3029 {
3030 return FAILED;
3031 }
3032 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
3033 {
3034 return FAILED;
3035 }
3036
3037 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
3038 {
3039 hdr = out;
3040 while (NLMSG_OK(hdr, len))
3041 {
3042 switch (hdr->nlmsg_type)
3043 {
3044 case XFRM_MSG_NEWPOLICY:
3045 {
3046 policy = NLMSG_DATA(hdr);
3047 break;
3048 }
3049 case NLMSG_ERROR:
3050 {
3051 struct nlmsgerr *err = NLMSG_DATA(hdr);
3052 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
3053 strerror(-err->error), -err->error);
3054 break;
3055 }
3056 default:
3057 hdr = NLMSG_NEXT(hdr, len);
3058 continue;
3059 case NLMSG_DONE:
3060 break;
3061 }
3062 break;
3063 }
3064 }
3065
3066 if (policy == NULL)
3067 {
3068 DBG2(DBG_KNL, "unable to query policy %R === %R %N%s", id->src_ts,
3069 id->dst_ts, policy_dir_names, id->dir, markstr);
3070 free(out);
3071 return FAILED;
3072 }
3073
3074 if (policy->curlft.use_time)
3075 {
3076 /* we need the monotonic time, but the kernel returns system time. */
3077 *use_time = time_monotonic(NULL) - (time(NULL) - policy->curlft.use_time);
3078 }
3079 else
3080 {
3081 *use_time = 0;
3082 }
3083
3084 free(out);
3085 return SUCCESS;
3086 }
3087
3088 METHOD(kernel_ipsec_t, del_policy, status_t,
3089 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
3090 kernel_ipsec_manage_policy_t *data)
3091 {
3092 policy_entry_t *current, policy;
3093 enumerator_t *enumerator;
3094 policy_sa_t *mapping;
3095 netlink_buf_t request;
3096 struct nlmsghdr *hdr;
3097 struct xfrm_userpolicy_id *policy_id;
3098 bool is_installed = TRUE;
3099 uint32_t priority, auto_priority, cur_priority;
3100 ipsec_sa_t assigned_sa = {
3101 .src = data->src,
3102 .dst = data->dst,
3103 .mark = id->mark,
3104 .if_id = id->if_id,
3105 .cfg = *data->sa,
3106 };
3107 char markstr[32] = "";
3108 int use_count;
3109 status_t status = SUCCESS;
3110
3111 format_mark(markstr, sizeof(markstr), id->mark);
3112
3113 DBG2(DBG_KNL, "deleting policy %R === %R %N%s", id->src_ts, id->dst_ts,
3114 policy_dir_names, id->dir, markstr);
3115
3116 /* create a policy */
3117 memset(&policy, 0, sizeof(policy_entry_t));
3118 policy.sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3119 policy.mark = id->mark.value & id->mark.mask;
3120 policy.if_id = id->if_id;
3121 policy.direction = id->dir;
3122
3123 /* find the policy */
3124 this->mutex->lock(this->mutex);
3125 current = this->policies->get(this->policies, &policy);
3126 if (!current)
3127 {
3128 DBG1(DBG_KNL, "deleting policy %R === %R %N%s failed, not found",
3129 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr);
3130 this->mutex->unlock(this->mutex);
3131 return NOT_FOUND;
3132 }
3133 current->waiting++;
3134 while (current->working)
3135 {
3136 this->condvar->wait(this->condvar, this->mutex);
3137 }
3138 current->working = TRUE;
3139 current->waiting--;
3140
3141 /* remove mapping to SA by reqid and priority */
3142 auto_priority = get_priority(current, data->prio,id->interface);
3143 priority = this->get_priority ? this->get_priority(id, data)
3144 : data->manual_prio;
3145 priority = priority ?: auto_priority;
3146
3147 enumerator = current->used_by->create_enumerator(current->used_by);
3148 while (enumerator->enumerate(enumerator, (void**)&mapping))
3149 {
3150 if (priority == mapping->priority &&
3151 auto_priority == mapping->auto_priority &&
3152 data->type == mapping->type &&
3153 ipsec_sa_equals(mapping->sa, &assigned_sa))
3154 {
3155 current->used_by->remove_at(current->used_by, enumerator);
3156 policy_sa_destroy(mapping, id->dir, this);
3157 break;
3158 }
3159 if (is_installed)
3160 {
3161 cur_priority = mapping->priority;
3162 is_installed = FALSE;
3163 }
3164 }
3165 enumerator->destroy(enumerator);
3166
3167 use_count = current->used_by->get_count(current->used_by);
3168 if (use_count > 0)
3169 { /* policy is used by more SAs, keep in kernel */
3170 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
3171 if (!is_installed)
3172 { /* no need to update as the policy was not installed for this SA */
3173 policy_change_done(this, current);
3174 DBG2(DBG_KNL, "not updating policy %R === %R %N%s [priority %u, "
3175 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
3176 id->dir, markstr, cur_priority, use_count);
3177 return SUCCESS;
3178 }
3179 current->used_by->get_first(current->used_by, (void**)&mapping);
3180 current->reqid = mapping->sa->cfg.reqid;
3181
3182 DBG2(DBG_KNL, "updating policy %R === %R %N%s [priority %u, "
3183 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3184 markstr, mapping->priority, use_count);
3185
3186 if (add_policy_internal(this, current, mapping, TRUE) != SUCCESS)
3187 {
3188 DBG1(DBG_KNL, "unable to update policy %R === %R %N%s",
3189 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr);
3190 return FAILED;
3191 }
3192 return SUCCESS;
3193 }
3194
3195 memset(&request, 0, sizeof(request));
3196
3197 hdr = &request.hdr;
3198 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3199 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
3200 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3201
3202 policy_id = NLMSG_DATA(hdr);
3203 policy_id->sel = current->sel;
3204 policy_id->dir = id->dir;
3205
3206 if (!add_mark(hdr, sizeof(request), id->mark))
3207 {
3208 policy_change_done(this, current);
3209 return FAILED;
3210 }
3211 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
3212 {
3213 policy_change_done(this, current);
3214 return FAILED;
3215 }
3216
3217 if (current->route)
3218 {
3219 route_entry_t *route = current->route;
3220 if (charon->kernel->del_route(charon->kernel, route->dst_net,
3221 route->prefixlen, route->gateway,
3222 route->src_ip, route->if_name,
3223 route->pass) != SUCCESS)
3224 {
3225 DBG1(DBG_KNL, "error uninstalling route installed with policy "
3226 "%R === %R %N%s", id->src_ts, id->dst_ts, policy_dir_names,
3227 id->dir, markstr);
3228 }
3229 }
3230 this->mutex->unlock(this->mutex);
3231
3232 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)