kernel-netlink: Align concatenated Netlink responses
[strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_shared.c
1 /*
2 * Copyright (C) 2014 Martin Willi
3 * Copyright (C) 2014 revosec AG
4 * Copyright (C) 2008 Tobias Brunner
5 * HSR Hochschule fuer Technik Rapperswil
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
16 */
17
18 /*
19 * Copyright (C) 2016 secunet Security Networks AG
20 * Copyright (C) 2016 Thomas Egerer
21 *
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to deal
24 * in the Software without restriction, including without limitation the rights
25 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
26 * copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
28 *
29 * The above copyright notice and this permission notice shall be included in
30 * all copies or substantial portions of the Software.
31 *
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
38 * THE SOFTWARE.
39 */
40
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <linux/xfrm.h>
45 #include <errno.h>
46 #include <unistd.h>
47
48 #include "kernel_netlink_shared.h"
49
50 #include <utils/debug.h>
51 #include <threading/mutex.h>
52 #include <threading/condvar.h>
53 #include <collections/array.h>
54 #include <collections/hashtable.h>
55
56 typedef struct private_netlink_socket_t private_netlink_socket_t;
57
58 /**
59 * Private variables and functions of netlink_socket_t class.
60 */
61 struct private_netlink_socket_t {
62
63 /**
64 * public part of the netlink_socket_t object.
65 */
66 netlink_socket_t public;
67
68 /**
69 * mutex to lock access entries
70 */
71 mutex_t *mutex;
72
73 /**
74 * Netlink request entries currently active, uintptr_t seq => entry_t
75 */
76 hashtable_t *entries;
77
78 /**
79 * Current sequence number for Netlink requests
80 */
81 refcount_t seq;
82
83 /**
84 * netlink socket
85 */
86 int socket;
87
88 /**
89 * Netlink protocol
90 */
91 int protocol;
92
93 /**
94 * Enum names for Netlink messages
95 */
96 enum_name_t *names;
97
98 /**
99 * Timeout for Netlink replies, in ms
100 */
101 u_int timeout;
102
103 /**
104 * Number of times to repeat timed out queries
105 */
106 u_int retries;
107
108 /**
109 * Buffer size for received Netlink messages
110 */
111 u_int buflen;
112
113 /**
114 * Use parallel netlink queries
115 */
116 bool parallel;
117
118 /**
119 * Ignore errors potentially resulting from a retransmission
120 */
121 bool ignore_retransmit_errors;
122 };
123
124 /**
125 * #definable hook to simulate request message loss
126 */
127 #ifdef NETLINK_MSG_LOSS_HOOK
128 bool NETLINK_MSG_LOSS_HOOK(struct nlmsghdr *msg);
129 #define msg_loss_hook(msg) NETLINK_MSG_LOSS_HOOK(msg)
130 #else
131 #define msg_loss_hook(msg) FALSE
132 #endif
133
134 /**
135 * Request entry the answer for a waiting thread is collected in
136 */
137 typedef struct {
138 /** Condition variable thread is waiting */
139 condvar_t *condvar;
140 /** Array of hdrs in a multi-message response, as struct nlmsghdr* */
141 array_t *hdrs;
142 /** All response messages received? */
143 bool complete;
144 } entry_t;
145
146 /**
147 * Clean up a thread waiting entry
148 */
149 static void destroy_entry(entry_t *entry)
150 {
151 entry->condvar->destroy(entry->condvar);
152 array_destroy_function(entry->hdrs, (void*)free, NULL);
153 free(entry);
154 }
155
156 /**
157 * Write a Netlink message to socket
158 */
159 static bool write_msg(private_netlink_socket_t *this, struct nlmsghdr *msg)
160 {
161 struct sockaddr_nl addr = {
162 .nl_family = AF_NETLINK,
163 };
164 int len;
165
166 if (msg_loss_hook(msg))
167 {
168 return TRUE;
169 }
170
171 while (TRUE)
172 {
173 len = sendto(this->socket, msg, msg->nlmsg_len, 0,
174 (struct sockaddr*)&addr, sizeof(addr));
175 if (len != msg->nlmsg_len)
176 {
177 if (errno == EINTR)
178 {
179 continue;
180 }
181 DBG1(DBG_KNL, "netlink write error: %s", strerror(errno));
182 return FALSE;
183 }
184 return TRUE;
185 }
186 }
187
188 /**
189 * Read a single Netlink message from socket, return 0 on error, -1 on timeout
190 */
191 static ssize_t read_msg(private_netlink_socket_t *this,
192 char *buf, size_t buflen, bool block)
193 {
194 ssize_t len;
195
196 if (block)
197 {
198 fd_set set;
199 timeval_t tv = {};
200
201 FD_ZERO(&set);
202 FD_SET(this->socket, &set);
203 timeval_add_ms(&tv, this->timeout);
204
205 if (select(this->socket + 1, &set, NULL, NULL,
206 this->timeout ? &tv : NULL) <= 0)
207 {
208 return -1;
209 }
210 }
211 len = recv(this->socket, buf, buflen, MSG_TRUNC|(block ? 0 : MSG_DONTWAIT));
212 if (len > buflen)
213 {
214 DBG1(DBG_KNL, "netlink response exceeds buffer size");
215 return 0;
216 }
217 if (len < 0)
218 {
219 if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR)
220 {
221 DBG1(DBG_KNL, "netlink read error: %s", strerror(errno));
222 }
223 return 0;
224 }
225 return len;
226 }
227
228 /**
229 * Queue received response message
230 */
231 static bool queue(private_netlink_socket_t *this, struct nlmsghdr *buf)
232 {
233 struct nlmsghdr *hdr;
234 entry_t *entry;
235 uintptr_t seq;
236
237 seq = (uintptr_t)buf->nlmsg_seq;
238
239 this->mutex->lock(this->mutex);
240 entry = this->entries->get(this->entries, (void*)seq);
241 if (entry)
242 {
243 hdr = malloc(buf->nlmsg_len);
244 memcpy(hdr, buf, buf->nlmsg_len);
245 array_insert(entry->hdrs, ARRAY_TAIL, hdr);
246 if (hdr->nlmsg_type == NLMSG_DONE || !(hdr->nlmsg_flags & NLM_F_MULTI))
247 {
248 entry->complete = TRUE;
249 entry->condvar->signal(entry->condvar);
250 }
251 }
252 else
253 {
254 DBG1(DBG_KNL, "received unknown netlink seq %u, ignored", seq);
255 }
256 this->mutex->unlock(this->mutex);
257
258 return entry != NULL;
259 }
260
261 /**
262 * Read and queue response message, optionally blocking, returns TRUE on timeout
263 */
264 static bool read_and_queue(private_netlink_socket_t *this, bool block)
265 {
266 struct nlmsghdr *hdr;
267 char buf[this->buflen];
268 ssize_t len, read_len;
269 bool wipe = FALSE;
270
271 len = read_len = read_msg(this, buf, sizeof(buf), block);
272 if (len == -1)
273 {
274 return TRUE;
275 }
276 if (len)
277 {
278 hdr = (struct nlmsghdr*)buf;
279 while (NLMSG_OK(hdr, len))
280 {
281 if (this->protocol == NETLINK_XFRM &&
282 hdr->nlmsg_type == XFRM_MSG_NEWSA)
283 { /* wipe potential IPsec SA keys */
284 wipe = TRUE;
285 }
286 if (!queue(this, hdr))
287 {
288 break;
289 }
290 hdr = NLMSG_NEXT(hdr, len);
291 }
292 }
293 if (wipe)
294 {
295 memwipe(buf, read_len);
296 }
297 return FALSE;
298 }
299
300 CALLBACK(watch, bool,
301 private_netlink_socket_t *this, int fd, watcher_event_t event)
302 {
303 if (event == WATCHER_READ)
304 {
305 read_and_queue(this, FALSE);
306 }
307 return TRUE;
308 }
309
310 /**
311 * Send a netlink request, try once
312 */
313 static status_t send_once(private_netlink_socket_t *this, struct nlmsghdr *in,
314 uintptr_t seq, struct nlmsghdr **out, size_t *out_len)
315 {
316 struct nlmsghdr *hdr;
317 entry_t *entry;
318 u_char *ptr;
319 int i;
320
321 in->nlmsg_seq = seq;
322 in->nlmsg_pid = getpid();
323
324 if (this->names)
325 {
326 DBG3(DBG_KNL, "sending %N %u: %b", this->names, in->nlmsg_type,
327 (u_int)seq, in, in->nlmsg_len);
328 }
329
330 this->mutex->lock(this->mutex);
331 if (!write_msg(this, in))
332 {
333 this->mutex->unlock(this->mutex);
334 return FAILED;
335 }
336
337 INIT(entry,
338 .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
339 .hdrs = array_create(0, 0),
340 );
341 this->entries->put(this->entries, (void*)seq, entry);
342
343 while (!entry->complete)
344 {
345 if (this->parallel &&
346 lib->watcher->get_state(lib->watcher) != WATCHER_STOPPED &&
347 lib->processor->get_total_threads(lib->processor))
348 {
349 if (this->timeout)
350 {
351 if (entry->condvar->timed_wait(entry->condvar, this->mutex,
352 this->timeout))
353 {
354 break;
355 }
356 }
357 else
358 {
359 entry->condvar->wait(entry->condvar, this->mutex);
360 }
361 }
362 else
363 { /* During (de-)initialization, no watcher thread is active.
364 * collect responses ourselves. */
365 if (read_and_queue(this, TRUE))
366 {
367 break;
368 }
369 }
370 }
371 this->entries->remove(this->entries, (void*)seq);
372
373 this->mutex->unlock(this->mutex);
374
375 if (!entry->complete)
376 { /* timeout */
377 destroy_entry(entry);
378 return OUT_OF_RES;
379 }
380
381 for (i = 0, *out_len = 0; i < array_count(entry->hdrs); i++)
382 {
383 array_get(entry->hdrs, i, &hdr);
384 *out_len += NLMSG_ALIGN(hdr->nlmsg_len);
385 }
386 ptr = malloc(*out_len);
387 *out = (struct nlmsghdr*)ptr;
388
389 while (array_remove(entry->hdrs, ARRAY_HEAD, &hdr))
390 {
391 if (this->names)
392 {
393 DBG3(DBG_KNL, "received %N %u: %b", this->names, hdr->nlmsg_type,
394 hdr->nlmsg_seq, hdr, hdr->nlmsg_len);
395 }
396 memcpy(ptr, hdr, hdr->nlmsg_len);
397 ptr += NLMSG_ALIGN(hdr->nlmsg_len);
398 free(hdr);
399 }
400 destroy_entry(entry);
401 return SUCCESS;
402 }
403
404 /**
405 * Ignore errors for message types that might have completed previously
406 */
407 static void ignore_retransmit_error(private_netlink_socket_t *this,
408 struct nlmsgerr *err, int type)
409 {
410 switch (err->error)
411 {
412 case -EEXIST:
413 switch (this->protocol)
414 {
415 case NETLINK_XFRM:
416 switch (type)
417 {
418 case XFRM_MSG_NEWPOLICY:
419 case XFRM_MSG_NEWSA:
420 err->error = 0;
421 break;
422 }
423 break;
424 case NETLINK_ROUTE:
425 switch (type)
426 {
427 case RTM_NEWADDR:
428 case RTM_NEWLINK:
429 case RTM_NEWNEIGH:
430 case RTM_NEWROUTE:
431 case RTM_NEWRULE:
432 err->error = 0;
433 break;
434 }
435 break;
436 }
437 break;
438 case -ENOENT:
439 switch (this->protocol)
440 {
441 case NETLINK_XFRM:
442 switch (type)
443 {
444 case XFRM_MSG_DELPOLICY:
445 case XFRM_MSG_DELSA:
446 err->error = 0;
447 break;
448 }
449 break;
450 case NETLINK_ROUTE:
451 switch (type)
452 {
453 case RTM_DELADDR:
454 case RTM_DELLINK:
455 case RTM_DELNEIGH:
456 case RTM_DELROUTE:
457 case RTM_DELRULE:
458 err->error = 0;
459 break;
460 }
461 break;
462 }
463 break;
464 }
465 }
466
467 METHOD(netlink_socket_t, netlink_send, status_t,
468 private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out,
469 size_t *out_len)
470 {
471 uintptr_t seq;
472 u_int try;
473
474 seq = ref_get(&this->seq);
475
476 for (try = 0; try <= this->retries; ++try)
477 {
478 struct nlmsghdr *hdr;
479 status_t status;
480 size_t len;
481
482 if (try > 0)
483 {
484 DBG1(DBG_KNL, "retransmitting Netlink request (%u/%u)",
485 try, this->retries);
486 }
487 status = send_once(this, in, seq, &hdr, &len);
488 switch (status)
489 {
490 case SUCCESS:
491 break;
492 case OUT_OF_RES:
493 continue;
494 default:
495 return status;
496 }
497 if (hdr->nlmsg_type == NLMSG_ERROR)
498 {
499 struct nlmsgerr* err;
500
501 err = NLMSG_DATA(hdr);
502 if (err->error == -EBUSY)
503 {
504 free(hdr);
505 try--;
506 continue;
507 }
508 if (this->ignore_retransmit_errors && try > 0)
509 {
510 ignore_retransmit_error(this, err, in->nlmsg_type);
511 }
512 }
513 *out = hdr;
514 *out_len = len;
515 return SUCCESS;
516 }
517 DBG1(DBG_KNL, "Netlink request timed out after %u retransmits",
518 this->retries);
519 return OUT_OF_RES;
520 }
521
522 METHOD(netlink_socket_t, netlink_send_ack, status_t,
523 private_netlink_socket_t *this, struct nlmsghdr *in)
524 {
525 struct nlmsghdr *out, *hdr;
526 size_t len;
527
528 if (netlink_send(this, in, &out, &len) != SUCCESS)
529 {
530 return FAILED;
531 }
532 hdr = out;
533 while (NLMSG_OK(hdr, len))
534 {
535 switch (hdr->nlmsg_type)
536 {
537 case NLMSG_ERROR:
538 {
539 struct nlmsgerr* err = NLMSG_DATA(hdr);
540
541 if (err->error)
542 {
543 if (-err->error == EEXIST)
544 { /* do not report existing routes */
545 free(out);
546 return ALREADY_DONE;
547 }
548 if (-err->error == ESRCH)
549 { /* do not report missing entries */
550 free(out);
551 return NOT_FOUND;
552 }
553 DBG1(DBG_KNL, "received netlink error: %s (%d)",
554 strerror(-err->error), -err->error);
555 free(out);
556 return FAILED;
557 }
558 free(out);
559 return SUCCESS;
560 }
561 default:
562 hdr = NLMSG_NEXT(hdr, len);
563 continue;
564 case NLMSG_DONE:
565 break;
566 }
567 break;
568 }
569 DBG1(DBG_KNL, "netlink request not acknowledged");
570 free(out);
571 return FAILED;
572 }
573
574 METHOD(netlink_socket_t, destroy, void,
575 private_netlink_socket_t *this)
576 {
577 if (this->socket != -1)
578 {
579 if (this->parallel)
580 {
581 lib->watcher->remove(lib->watcher, this->socket);
582 }
583 close(this->socket);
584 }
585 this->entries->destroy(this->entries);
586 this->mutex->destroy(this->mutex);
587 free(this);
588 }
589
590 /**
591 * Described in header.
592 */
593 netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names,
594 bool parallel)
595 {
596 private_netlink_socket_t *this;
597 struct sockaddr_nl addr = {
598 .nl_family = AF_NETLINK,
599 };
600 bool force_buf = FALSE;
601 int rcvbuf_size = 0;
602
603 INIT(this,
604 .public = {
605 .send = _netlink_send,
606 .send_ack = _netlink_send_ack,
607 .destroy = _destroy,
608 },
609 .seq = 200,
610 .mutex = mutex_create(MUTEX_TYPE_RECURSIVE),
611 .socket = socket(AF_NETLINK, SOCK_RAW, protocol),
612 .entries = hashtable_create(hashtable_hash_ptr, hashtable_equals_ptr, 4),
613 .protocol = protocol,
614 .names = names,
615 .buflen = lib->settings->get_int(lib->settings,
616 "%s.plugins.kernel-netlink.buflen", 0, lib->ns),
617 .timeout = lib->settings->get_int(lib->settings,
618 "%s.plugins.kernel-netlink.timeout", 0, lib->ns),
619 .retries = lib->settings->get_int(lib->settings,
620 "%s.plugins.kernel-netlink.retries", 0, lib->ns),
621 .ignore_retransmit_errors = lib->settings->get_bool(lib->settings,
622 "%s.plugins.kernel-netlink.ignore_retransmit_errors",
623 FALSE, lib->ns),
624 .parallel = parallel,
625 );
626
627 if (!this->buflen)
628 {
629 long pagesize = sysconf(_SC_PAGESIZE);
630 if (pagesize == -1)
631 {
632 pagesize = 4096;
633 }
634 /* base this on NLMSG_GOODSIZE */
635 this->buflen = min(pagesize, 8192);
636 }
637 if (this->socket == -1)
638 {
639 DBG1(DBG_KNL, "unable to create netlink socket: %s (%d)",
640 strerror(errno), errno);
641 destroy(this);
642 return NULL;
643 }
644 if (bind(this->socket, (struct sockaddr*)&addr, sizeof(addr)))
645 {
646 DBG1(DBG_KNL, "unable to bind netlink socket: %s (%d)",
647 strerror(errno), errno);
648 destroy(this);
649 return NULL;
650 }
651 rcvbuf_size = lib->settings->get_int(lib->settings,
652 "%s.plugins.kernel-netlink.receive_buffer_size",
653 rcvbuf_size, lib->ns);
654 if (rcvbuf_size)
655 {
656 int optname;
657
658 force_buf = lib->settings->get_bool(lib->settings,
659 "%s.plugins.kernel-netlink.force_receive_buffer_size",
660 force_buf, lib->ns);
661 optname = force_buf ? SO_RCVBUFFORCE : SO_RCVBUF;
662
663 if (setsockopt(this->socket, SOL_SOCKET, optname, &rcvbuf_size,
664 sizeof(rcvbuf_size)) == -1)
665 {
666 DBG1(DBG_KNL, "failed to %supdate receive buffer size to %d: %s",
667 force_buf ? "forcibly " : "", rcvbuf_size, strerror(errno));
668 }
669 }
670 if (this->parallel)
671 {
672 lib->watcher->add(lib->watcher, this->socket, WATCHER_READ, watch, this);
673 }
674
675 return &this->public;
676 }
677
678 /**
679 * Described in header.
680 */
681 void netlink_add_attribute(struct nlmsghdr *hdr, int rta_type, chunk_t data,
682 size_t buflen)
683 {
684 struct rtattr *rta;
685
686 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_LENGTH(data.len) > buflen)
687 {
688 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
689 return;
690 }
691
692 rta = (struct rtattr*)(((char*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len));
693 rta->rta_type = rta_type;
694 rta->rta_len = RTA_LENGTH(data.len);
695 memcpy(RTA_DATA(rta), data.ptr, data.len);
696 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
697 }
698
699 /**
700 * Described in header.
701 */
702 void* netlink_reserve(struct nlmsghdr *hdr, int buflen, int type, int len)
703 {
704 struct rtattr *rta;
705
706 if (NLMSG_ALIGN(hdr->nlmsg_len) + RTA_LENGTH(len) > buflen)
707 {
708 DBG1(DBG_KNL, "unable to add attribute, buffer too small");
709 return NULL;
710 }
711
712 rta = ((void*)hdr) + NLMSG_ALIGN(hdr->nlmsg_len);
713 rta->rta_type = type;
714 rta->rta_len = RTA_LENGTH(len);
715 hdr->nlmsg_len = NLMSG_ALIGN(hdr->nlmsg_len) + rta->rta_len;
716
717 return RTA_DATA(rta);
718 }