4 * @brief Implementation of socket_t.
9 * Copyright (C) 2006 Tobias Brunner, Daniel Roethlisberger
10 * Copyright (C) 2005-2006 Martin Willi
11 * Copyright (C) 2005 Jan Hutter
12 * Hochschule fuer Technik Rapperswil
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the
16 * Free Software Foundation; either version 2 of the License, or (at your
17 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 #include <sys/types.h>
27 #include <sys/socket.h>
33 #include <sys/ioctl.h>
34 #include <netinet/in.h>
35 #include <netinet/ip.h>
36 #include <netinet/ip6.h>
37 #include <netinet/udp.h>
38 #include <linux/ipsec.h>
39 #include <linux/filter.h>
47 /* constants for packet handling */
48 #define IP_LEN sizeof(struct iphdr)
49 #define IP6_LEN sizeof(struct ip6_hdr)
50 #define UDP_LEN sizeof(struct udphdr)
51 #define MARKER_LEN sizeof(u_int32_t)
53 /* offsets for packet handling */
54 #define IP_PROTO_OFFSET 9
55 #define IP6_PROTO_OFFSET 6
56 #define IKE_VERSION_OFFSET 17
57 #define IKE_LENGTH_OFFSET 24
60 #ifndef IP_IPSEC_POLICY
61 #define IP_IPSEC_POLICY 16
62 #endif /*IP_IPSEC_POLICY*/
64 /* from linux/udp.h */
69 #ifndef UDP_ENCAP_ESPINUDP
70 #define UDP_ENCAP_ESPINUDP 2
71 #endif /*UDP_ENCAP_ESPINUDP*/
73 /* needed for older kernel headers */
74 #ifndef IPV6_2292PKTINFO
75 #define IPV6_2292PKTINFO 2
76 #endif /*IPV6_2292PKTINFO*/
78 /* missing on uclibc */
79 #ifndef IPV6_IPSEC_POLICY
80 #define IPV6_IPSEC_POLICY 34
81 #endif /*IPV6_IPSEC_POLICY*/
83 typedef struct private_socket_t private_socket_t
;
86 * Private data of an socket_t object
88 struct private_socket_t
{
100 * port used for nat-t
105 * raw receiver socket for IPv4
110 * raw receiver socket for IPv6
115 * send socket on regular port for IPv4
120 * send socket on regular port for IPv6
125 * send socket on nat-t port for IPv4
130 * send socket on nat-t port for IPv6
136 * implementation of socket_t.receive
138 static status_t
receiver(private_socket_t
*this, packet_t
**packet
)
140 char buffer
[MAX_PACKET
];
144 host_t
*source
= NULL
, *dest
= NULL
;
146 int data_offset
, oldstate
;
153 FD_SET(this->recv4
, &rfds
);
157 FD_SET(this->recv6
, &rfds
);
160 DBG2(DBG_NET
, "waiting for data on raw sockets");
162 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE
, &oldstate
);
163 if (select(max(this->recv4
, this->recv6
) + 1, &rfds
, NULL
, NULL
, NULL
) <= 0)
165 pthread_setcancelstate(oldstate
, NULL
);
168 pthread_setcancelstate(oldstate
, NULL
);
170 if (this->recv4
&& FD_ISSET(this->recv4
, &rfds
))
172 /* IPv4 raw sockets return the IP header. We read src/dest
173 * information directly from the raw header */
175 struct sockaddr_in src
, dst
;
177 bytes_read
= recv(this->recv4
, buffer
, MAX_PACKET
, 0);
180 DBG1(DBG_NET
, "error reading from IPv4 socket: %m");
183 DBG3(DBG_NET
, "received IPv4 packet %b", buffer
, bytes_read
);
185 /* read source/dest from raw IP/UDP header */
186 if (bytes_read
< IP_LEN
+ UDP_LEN
+ MARKER_LEN
)
188 DBG1(DBG_NET
, "received IPv4 packet too short (%d bytes)",
192 ip
= (struct iphdr
*) buffer
;
193 udp
= (struct udphdr
*) (buffer
+ IP_LEN
);
194 src
.sin_family
= AF_INET
;
195 src
.sin_addr
.s_addr
= ip
->saddr
;
196 src
.sin_port
= udp
->source
;
197 dst
.sin_family
= AF_INET
;
198 dst
.sin_addr
.s_addr
= ip
->daddr
;
199 dst
.sin_port
= udp
->dest
;
200 source
= host_create_from_sockaddr((sockaddr_t
*)&src
);
201 dest
= host_create_from_sockaddr((sockaddr_t
*)&dst
);
203 pkt
= packet_create();
204 pkt
->set_source(pkt
, source
);
205 pkt
->set_destination(pkt
, dest
);
206 DBG2(DBG_NET
, "received packet: from %#H to %#H", source
, dest
);
207 data_offset
= IP_LEN
+ UDP_LEN
;
208 /* remove non esp marker */
209 if (dest
->get_port(dest
) == this->natt_port
)
211 data_offset
+= MARKER_LEN
;
214 data
.len
= bytes_read
- data_offset
;
215 data
.ptr
= malloc(data
.len
);
216 memcpy(data
.ptr
, buffer
+ data_offset
, data
.len
);
217 pkt
->set_data(pkt
, data
);
219 else if (this->recv6
&& FD_ISSET(this->recv6
, &rfds
))
221 /* IPv6 raw sockets return no IP header. We must query
222 * src/dest via socket options/ancillary data */
224 struct cmsghdr
*cmsgptr
;
225 struct sockaddr_in6 src
, dst
;
230 msg
.msg_namelen
= sizeof(src
);
231 iov
.iov_base
= buffer
;
232 iov
.iov_len
= sizeof(buffer
);
235 msg
.msg_control
= ancillary
;
236 msg
.msg_controllen
= sizeof(ancillary
);
239 bytes_read
= recvmsg(this->recv6
, &msg
, 0);
242 DBG1(DBG_NET
, "error reading from IPv6 socket: %m");
245 DBG3(DBG_NET
, "received IPv6 packet %b", buffer
, bytes_read
);
247 if (bytes_read
< IP_LEN
+ UDP_LEN
+ MARKER_LEN
)
249 DBG3(DBG_NET
, "received IPv6 packet too short (%d bytes)",
254 /* read ancillary data to get destination address */
255 for (cmsgptr
= CMSG_FIRSTHDR(&msg
); cmsgptr
!= NULL
;
256 cmsgptr
= CMSG_NXTHDR(&msg
, cmsgptr
))
258 if (cmsgptr
->cmsg_len
== 0)
260 DBG1(DBG_NET
, "error reading IPv6 ancillary data");
263 if (cmsgptr
->cmsg_level
== SOL_IPV6
&&
264 cmsgptr
->cmsg_type
== IPV6_2292PKTINFO
)
266 struct in6_pktinfo
*pktinfo
;
267 pktinfo
= (struct in6_pktinfo
*)CMSG_DATA(cmsgptr
);
269 memset(&dst
, 0, sizeof(dst
));
270 memcpy(&dst
.sin6_addr
, &pktinfo
->ipi6_addr
, sizeof(dst
.sin6_addr
));
271 dst
.sin6_family
= AF_INET6
;
272 udp
= (struct udphdr
*) (buffer
);
273 dst
.sin6_port
= udp
->dest
;
274 src
.sin6_port
= udp
->source
;
275 dest
= host_create_from_sockaddr((sockaddr_t
*)&dst
);
278 /* ancillary data missing? */
281 DBG1(DBG_NET
, "error reading IPv6 packet header");
285 source
= host_create_from_sockaddr((sockaddr_t
*)&src
);
287 pkt
= packet_create();
288 pkt
->set_source(pkt
, source
);
289 pkt
->set_destination(pkt
, dest
);
290 DBG2(DBG_NET
, "received packet: from %#H to %#H", source
, dest
);
291 data_offset
= UDP_LEN
;
292 /* remove non esp marker */
293 if (dest
->get_port(dest
) == this->natt_port
)
295 data_offset
+= MARKER_LEN
;
298 data
.len
= bytes_read
- data_offset
;
299 data
.ptr
= malloc(data
.len
);
300 memcpy(data
.ptr
, buffer
+ data_offset
, data
.len
);
301 pkt
->set_data(pkt
, data
);
305 /* oops, shouldn't happen */
315 * implementation of socket_t.send
317 status_t
sender(private_socket_t
*this, packet_t
*packet
)
319 int sport
, skt
, family
;
321 chunk_t data
, marked
;
324 struct cmsghdr
*cmsg
;
327 src
= packet
->get_source(packet
);
328 dst
= packet
->get_destination(packet
);
329 data
= packet
->get_data(packet
);
331 DBG2(DBG_NET
, "sending packet: from %#H to %#H", src
, dst
);
334 sport
= src
->get_port(src
);
335 family
= dst
->get_family(dst
);
336 if (sport
== this->port
)
338 if (family
== AF_INET
)
347 else if (sport
== this->natt_port
)
349 if (family
== AF_INET
)
351 skt
= this->send4_natt
;
355 skt
= this->send6_natt
;
357 /* NAT keepalives without marker */
358 if (data
.len
!= 1 || data
.ptr
[0] != 0xFF)
360 /* add non esp marker to packet */
361 if (data
.len
> MAX_PACKET
- MARKER_LEN
)
363 DBG1(DBG_NET
, "unable to send packet: it's too big (%d bytes)",
367 marked
= chunk_alloc(data
.len
+ MARKER_LEN
);
368 memset(marked
.ptr
, 0, MARKER_LEN
);
369 memcpy(marked
.ptr
+ MARKER_LEN
, data
.ptr
, data
.len
);
370 /* let the packet do the clean up for us */
371 packet
->set_data(packet
, marked
);
377 DBG1(DBG_NET
, "unable to locate a send socket for port %d", sport
);
381 memset(&msg
, 0, sizeof(struct msghdr
));
382 msg
.msg_name
= dst
->get_sockaddr(dst
);;
383 msg
.msg_namelen
= *dst
->get_sockaddr_len(dst
);
384 iov
.iov_base
= data
.ptr
;
385 iov
.iov_len
= data
.len
;
390 if (!dst
->is_anyaddr(dst
))
392 if (family
== AF_INET
)
394 char buf
[CMSG_SPACE(sizeof(struct in_pktinfo
))];
395 struct in_pktinfo
*pktinfo
;
396 struct sockaddr_in
*sin
;
398 msg
.msg_control
= buf
;
399 msg
.msg_controllen
= sizeof(buf
);
400 cmsg
= CMSG_FIRSTHDR(&msg
);
401 cmsg
->cmsg_level
= SOL_IP
;
402 cmsg
->cmsg_type
= IP_PKTINFO
;
403 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct in_pktinfo
));
404 pktinfo
= (struct in_pktinfo
*)CMSG_DATA(cmsg
);
405 memset(pktinfo
, 0, sizeof(struct in_pktinfo
));
406 sin
= (struct sockaddr_in
*)src
->get_sockaddr(src
);
407 memcpy(&pktinfo
->ipi_spec_dst
, &sin
->sin_addr
, sizeof(struct in_addr
));
411 char buf
[CMSG_SPACE(sizeof(struct in6_pktinfo
))];
412 struct in6_pktinfo
*pktinfo
;
413 struct sockaddr_in6
*sin
;
415 msg
.msg_control
= buf
;
416 msg
.msg_controllen
= sizeof(buf
);
417 cmsg
= CMSG_FIRSTHDR(&msg
);
418 cmsg
->cmsg_level
= SOL_IPV6
;
419 cmsg
->cmsg_type
= IPV6_2292PKTINFO
;
420 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct in6_pktinfo
));
421 pktinfo
= (struct in6_pktinfo
*)CMSG_DATA(cmsg
);
422 memset(pktinfo
, 0, sizeof(struct in6_pktinfo
));
423 sin
= (struct sockaddr_in6
*)src
->get_sockaddr(src
);
424 memcpy(&pktinfo
->ipi6_addr
, &sin
->sin6_addr
, sizeof(struct in6_addr
));
428 bytes_sent
= sendmsg(skt
, &msg
, 0);
430 if (bytes_sent
!= data
.len
)
432 DBG1(DBG_NET
, "error writing to socket: %m");
439 * open a socket to send packets
441 static int open_send_socket(private_socket_t
*this, int family
, u_int16_t port
)
444 int type
= UDP_ENCAP_ESPINUDP
;
445 struct sockaddr_storage addr
;
446 u_int sol
, ipsec_policy
;
447 struct sadb_x_policy policy
;
450 memset(&addr
, 0, sizeof(addr
));
451 /* precalculate constants depending on address family */
456 struct sockaddr_in
*sin
= (struct sockaddr_in
*)&addr
;
457 sin
->sin_family
= AF_INET
;
458 sin
->sin_addr
.s_addr
= INADDR_ANY
;
459 sin
->sin_port
= htons(port
);
461 ipsec_policy
= IP_IPSEC_POLICY
;
466 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)&addr
;
467 sin6
->sin6_family
= AF_INET6
;
468 memcpy(&sin6
->sin6_addr
, &in6addr_any
, sizeof(in6addr_any
));
469 sin6
->sin6_port
= htons(port
);
471 ipsec_policy
= IPV6_IPSEC_POLICY
;
478 skt
= socket(family
, SOCK_DGRAM
, IPPROTO_UDP
);
481 DBG1(DBG_NET
, "could not open send socket: %m");
485 if (setsockopt(skt
, SOL_SOCKET
, SO_REUSEADDR
, (void*)&on
, sizeof(on
)) < 0)
487 DBG1(DBG_NET
, "unable to set SO_REUSEADDR on send socket: %m");
492 /* bypass outgoung IKE traffic on send socket */
493 memset(&policy
, 0, sizeof(policy
));
494 policy
.sadb_x_policy_len
= sizeof(policy
) / sizeof(u_int64_t
);
495 policy
.sadb_x_policy_exttype
= SADB_X_EXT_POLICY
;
496 policy
.sadb_x_policy_type
= IPSEC_POLICY_BYPASS
;
497 policy
.sadb_x_policy_dir
= IPSEC_DIR_OUTBOUND
;
499 if (setsockopt(skt
, sol
, ipsec_policy
, &policy
, sizeof(policy
)) < 0)
501 DBG1(DBG_NET
, "unable to set IPSEC_POLICY on send socket: %m");
506 /* We don't receive packets on the send socket, but we need a INBOUND policy.
507 * Otherwise, UDP decapsulation does not work!!! */
508 policy
.sadb_x_policy_dir
= IPSEC_DIR_INBOUND
;
509 if (setsockopt(skt
, sol
, ipsec_policy
, &policy
, sizeof(policy
)) < 0)
511 DBG1(DBG_NET
, "unable to set IPSEC_POLICY on send socket: %m");
516 /* bind the send socket */
517 if (bind(skt
, (struct sockaddr
*)&addr
, sizeof(addr
)) < 0)
519 DBG1(DBG_NET
, "unable to bind send socket: %m");
524 if (family
== AF_INET
)
526 /* enable UDP decapsulation globally, only for one socket needed */
527 if (setsockopt(skt
, SOL_UDP
, UDP_ENCAP
, &type
, sizeof(type
)) < 0)
529 DBG1(DBG_NET
, "unable to set UDP_ENCAP: %m; NAT-T may fail");
537 * open a socket to receive packets
539 static int open_recv_socket(private_socket_t
*this, int family
)
543 u_int proto_offset
, ip_len
, sol
, ipsec_policy
, udp_header
, ike_header
;
544 struct sadb_x_policy policy
;
546 /* precalculate constants depending on address family */
550 proto_offset
= IP_PROTO_OFFSET
;
553 ipsec_policy
= IP_IPSEC_POLICY
;
556 proto_offset
= IP6_PROTO_OFFSET
;
557 ip_len
= 0; /* IPv6 raw sockets contain no IP header */
559 ipsec_policy
= IPV6_IPSEC_POLICY
;
565 ike_header
= ip_len
+ UDP_LEN
;
567 /* This filter code filters out all non-IKEv2 traffic on
568 * a SOCK_RAW IP_PROTP_UDP socket. Handling of other
569 * IKE versions is done in pluto.
571 struct sock_filter ikev2_filter_code
[] =
573 /* Destination Port must be either port or natt_port */
574 BPF_STMT(BPF_LD
+BPF_H
+BPF_ABS
, udp_header
+ 2),
575 BPF_JUMP(BPF_JMP
+BPF_JEQ
+BPF_K
, this->port
, 1, 0),
576 BPF_JUMP(BPF_JMP
+BPF_JEQ
+BPF_K
, this->natt_port
, 5, 12),
578 /* IKE version must be 2.0 */
579 BPF_STMT(BPF_LD
+BPF_B
+BPF_ABS
, ike_header
+ IKE_VERSION_OFFSET
),
580 BPF_JUMP(BPF_JMP
+BPF_JEQ
+BPF_K
, 0x20, 0, 10),
581 /* packet length is length in IKEv2 header + ip header + udp header */
582 BPF_STMT(BPF_LD
+BPF_W
+BPF_ABS
, ike_header
+ IKE_LENGTH_OFFSET
),
583 BPF_STMT(BPF_ALU
+BPF_ADD
+BPF_K
, ip_len
+ UDP_LEN
),
584 BPF_STMT(BPF_RET
+BPF_A
, 0),
586 /* nat-t: check for marker */
587 BPF_STMT(BPF_LD
+BPF_W
+BPF_ABS
, ike_header
),
588 BPF_JUMP(BPF_JMP
+BPF_JEQ
+BPF_K
, 0, 0, 5),
589 /* nat-t: IKE version must be 2.0 */
590 BPF_STMT(BPF_LD
+BPF_B
+BPF_ABS
, ike_header
+ MARKER_LEN
+ IKE_VERSION_OFFSET
),
591 BPF_JUMP(BPF_JMP
+BPF_JEQ
+BPF_K
, 0x20, 0, 3),
592 /* nat-t: packet length is length in IKEv2 header + ip header + udp header + non esp marker */
593 BPF_STMT(BPF_LD
+BPF_W
+BPF_ABS
, ike_header
+ MARKER_LEN
+ IKE_LENGTH_OFFSET
),
594 BPF_STMT(BPF_ALU
+BPF_ADD
+BPF_K
, ip_len
+ UDP_LEN
+ MARKER_LEN
),
595 BPF_STMT(BPF_RET
+BPF_A
, 0),
596 /* packet doesn't match, ignore */
597 BPF_STMT(BPF_RET
+BPF_K
, 0),
600 /* Filter struct to use with setsockopt */
601 struct sock_fprog ikev2_filter
= {
602 sizeof(ikev2_filter_code
) / sizeof(struct sock_filter
),
606 /* set up a raw socket */
607 skt
= socket(family
, SOCK_RAW
, IPPROTO_UDP
);
610 DBG1(DBG_NET
, "unable to create raw socket: %m");
614 if (setsockopt(skt
, SOL_SOCKET
, SO_ATTACH_FILTER
,
615 &ikev2_filter
, sizeof(ikev2_filter
)) < 0)
617 DBG1(DBG_NET
, "unable to attach IKEv2 filter to raw socket: %m");
622 if (family
== AF_INET6
&&
623 /* we use IPV6_2292PKTINFO, as IPV6_PKTINFO is defined as
624 * 2 or 50 depending on kernel header version */
625 setsockopt(skt
, sol
, IPV6_2292PKTINFO
, &on
, sizeof(on
)) < 0)
627 DBG1(DBG_NET
, "unable to set IPV6_PKTINFO on raw socket: %m");
632 /* bypass incomining IKE traffic on this socket */
633 memset(&policy
, 0, sizeof(policy
));
634 policy
.sadb_x_policy_len
= sizeof(policy
) / sizeof(u_int64_t
);
635 policy
.sadb_x_policy_exttype
= SADB_X_EXT_POLICY
;
636 policy
.sadb_x_policy_type
= IPSEC_POLICY_BYPASS
;
637 policy
.sadb_x_policy_dir
= IPSEC_DIR_INBOUND
;
639 if (setsockopt(skt
, sol
, ipsec_policy
, &policy
, sizeof(policy
)) < 0)
641 DBG1(DBG_NET
, "unable to set IPSEC_POLICY on raw socket: %m");
650 * implementation of socket_t.destroy
652 static void destroy(private_socket_t
*this)
670 if (this->send4_natt
)
672 close(this->send4_natt
);
674 if (this->send6_natt
)
676 close(this->send6_natt
);
682 * See header for description
684 socket_t
*socket_create(u_int16_t port
, u_int16_t natt_port
)
686 private_socket_t
*this = malloc_thing(private_socket_t
);
688 /* public functions */
689 this->public.send
= (status_t(*)(socket_t
*, packet_t
*))sender
;
690 this->public.receive
= (status_t(*)(socket_t
*, packet_t
**))receiver
;
691 this->public.destroy
= (void(*)(socket_t
*)) destroy
;
694 this->natt_port
= natt_port
;
699 this->send4_natt
= 0;
700 this->send6_natt
= 0;
702 this->recv4
= open_recv_socket(this, AF_INET
);
703 if (this->recv4
== 0)
705 DBG1(DBG_NET
, "could not open IPv4 receive socket, IPv4 disabled");
709 this->send4
= open_send_socket(this, AF_INET
, this->port
);
710 if (this->send4
== 0)
712 DBG1(DBG_NET
, "could not open IPv4 send socket, IPv4 disabled");
717 this->send4_natt
= open_send_socket(this, AF_INET
, this->natt_port
);
718 if (this->send4_natt
== 0)
720 DBG1(DBG_NET
, "could not open IPv4 NAT-T send socket");
725 this->recv6
= open_recv_socket(this, AF_INET6
);
726 if (this->recv6
== 0)
728 DBG1(DBG_NET
, "could not open IPv6 receive socket, IPv6 disabled");
732 this->send6
= open_send_socket(this, AF_INET6
, this->port
);
733 if (this->send6
== 0)
735 DBG1(DBG_NET
, "could not open IPv6 send socket, IPv6 disabled");
740 this->send6_natt
= open_send_socket(this, AF_INET6
, this->natt_port
);
741 if (this->send6_natt
== 0)
743 DBG1(DBG_NET
, "could not open IPv6 NAT-T send socket");
748 if (!(this->send4
|| this->send6
) || !(this->recv4
|| this->recv6
))
750 DBG1(DBG_NET
, "could not create any sockets");
752 charon
->kill(charon
, "socket initialization failed");
755 return (socket_t
*)this;