ike: Properly support high number of retransmission tries
authorTobias Brunner <tobias@strongswan.org>
Thu, 2 Apr 2020 13:48:31 +0000 (15:48 +0200)
committerTobias Brunner <tobias@strongswan.org>
Thu, 7 May 2020 13:05:55 +0000 (15:05 +0200)
Due to the exponential backoff a high number of retransmits only
makes sense if retransmit_limit is set.  However, even with that there
was a problem.

We first calculated the timeout for the next retransmit and only then
compared that to the configured limit.  Depending on the configured
base and timeout the calculation overflowed the range of uint32_t after
a relatively low number of retransmits (with the default values after 23)
causing the timeout to first get lower (on a high level) before constantly
resulting in 0 (with the default settings after 60 retransmits).

Since that's obviously lower than any configured limit, all remaining
retransmits were then sent without any delay, causing a lot of concurrent
messages if the number of retransmits was high.

This change determines the maximum number of retransmits until an
overflow occurs based on the configuration and defaults to UINT32_MAX
if that value is exceeded.  Note that since the timeout is in milliseconds
UINT32_MAX equals nearly 50 days.

The calculation in task_manager_total_retransmit_timeout() uses a double
variable and the result is in seconds so the maximum number would be higher
there (with the default settings 1205).  However, we want its result to
be based on the actual IKE retransmission behavior.

src/libcharon/sa/ikev1/task_manager_v1.c
src/libcharon/sa/ikev2/task_manager_v2.c
src/libcharon/sa/task_manager.c

index b4944cf..00f5e6f 100644 (file)
@@ -200,6 +200,14 @@ struct private_task_manager_t {
        u_int retransmit_tries;
 
        /**
+        * Maximum number of tries possible with current retransmission settings
+        * before overflowing the range of uint32_t, which we use for the timeout.
+        * Note that UINT32_MAX milliseconds equal nearly 50 days, so that doesn't
+        * make much sense without retransmit_limit anyway.
+        */
+       u_int retransmit_tries_max;
+
+       /**
         * Retransmission timeout
         */
        double retransmit_timeout;
@@ -355,7 +363,7 @@ static status_t retransmit_packet(private_task_manager_t *this, uint32_t seqnr,
                                                        u_int mid, u_int retransmitted, array_t *packets)
 {
        packet_t *packet;
-       uint32_t t, max_jitter;
+       uint32_t t = UINT32_MAX, max_jitter;
 
        array_get(packets, 0, &packet);
        if (retransmitted > this->retransmit_tries)
@@ -364,8 +372,12 @@ static status_t retransmit_packet(private_task_manager_t *this, uint32_t seqnr,
                charon->bus->alert(charon->bus, ALERT_RETRANSMIT_SEND_TIMEOUT, packet);
                return DESTROY_ME;
        }
-       t = (uint32_t)(this->retransmit_timeout * 1000.0 *
-                                       pow(this->retransmit_base, retransmitted));
+       if (this->retransmit_tries_max &&
+               retransmitted <= this->retransmit_tries_max)
+       {
+               t = (uint32_t)(this->retransmit_timeout * 1000.0 *
+                                               pow(this->retransmit_base, retransmitted));
+       }
        if (this->retransmit_limit)
        {
                t = min(t, this->retransmit_limit);
@@ -2141,5 +2153,11 @@ task_manager_v1_t *task_manager_v1_create(ike_sa_t *ike_sa)
        }
        this->dpd_send &= 0x7FFFFFFF;
 
+       if (this->retransmit_base > 1)
+       {       /* based on 1000 * timeout * base^try */
+               this->retransmit_tries_max = log(UINT32_MAX/
+                                                                                (1000.0 * this->retransmit_timeout))/
+                                                                        log(this->retransmit_base);
+       }
        return &this->public;
 }
index 09b824a..6e1d008 100644 (file)
@@ -151,6 +151,14 @@ struct private_task_manager_t {
        u_int retransmit_tries;
 
        /**
+        * Maximum number of tries possible with current retransmission settings
+        * before overflowing the range of uint32_t, which we use for the timeout.
+        * Note that UINT32_MAX milliseconds equal nearly 50 days, so that doesn't
+        * make much sense without retransmit_limit anyway.
+        */
+       u_int retransmit_tries_max;
+
+       /**
         * Retransmission timeout
         */
        double retransmit_timeout;
@@ -331,7 +339,7 @@ METHOD(task_manager_t, retransmit, status_t,
        if (message_id == this->initiating.mid &&
                array_count(this->initiating.packets))
        {
-               uint32_t timeout, max_jitter;
+               uint32_t timeout = UINT32_MAX, max_jitter;
                job_t *job;
                enumerator_t *enumerator;
                packet_t *packet;
@@ -357,22 +365,7 @@ METHOD(task_manager_t, retransmit, status_t,
 
                if (!mobike || !mobike->is_probing(mobike))
                {
-                       if (this->initiating.retransmitted <= this->retransmit_tries)
-                       {
-                               timeout = (uint32_t)(this->retransmit_timeout * 1000.0 *
-                                       pow(this->retransmit_base, this->initiating.retransmitted));
-
-                               if (this->retransmit_limit)
-                               {
-                                       timeout = min(timeout, this->retransmit_limit);
-                               }
-                               if (this->retransmit_jitter)
-                               {
-                                       max_jitter = (timeout / 100.0) * this->retransmit_jitter;
-                                       timeout -= max_jitter * (random() / (RAND_MAX + 1.0));
-                               }
-                       }
-                       else
+                       if (this->initiating.retransmitted > this->retransmit_tries)
                        {
                                DBG1(DBG_IKE, "giving up after %d retransmits",
                                         this->initiating.retransmitted - 1);
@@ -380,7 +373,21 @@ METHOD(task_manager_t, retransmit, status_t,
                                                                   packet);
                                return DESTROY_ME;
                        }
-
+                       if (this->retransmit_tries_max &&
+                               this->initiating.retransmitted <= this->retransmit_tries_max)
+                       {
+                               timeout = (uint32_t)(this->retransmit_timeout * 1000.0 *
+                                       pow(this->retransmit_base, this->initiating.retransmitted));
+                       }
+                       if (this->retransmit_limit)
+                       {
+                               timeout = min(timeout, this->retransmit_limit);
+                       }
+                       if (this->retransmit_jitter)
+                       {
+                               max_jitter = (timeout / 100.0) * this->retransmit_jitter;
+                               timeout -= max_jitter * (random() / (RAND_MAX + 1.0));
+                       }
                        if (this->initiating.retransmitted)
                        {
                                DBG1(DBG_IKE, "retransmit %d of request with message ID %d",
@@ -2346,5 +2353,11 @@ task_manager_v2_t *task_manager_v2_create(ike_sa_t *ike_sa)
                                        "%s.make_before_break", FALSE, lib->ns),
        );
 
+       if (this->retransmit_base > 1)
+       {       /* based on 1000 * timeout * base^try */
+               this->retransmit_tries_max = log(UINT32_MAX/
+                                                                                (1000.0 * this->retransmit_timeout))/
+                                                                        log(this->retransmit_base);
+       }
        return &this->public;
 }
index e1c8d23..b3f589e 100644 (file)
@@ -25,7 +25,7 @@
 u_int task_manager_total_retransmit_timeout()
 {
        double timeout, base, limit = 0, total = 0;
-       int tries, i;
+       int tries, max_tries = 0, i;
 
        tries = lib->settings->get_int(lib->settings, "%s.retransmit_tries",
                                                                   RETRANSMIT_TRIES, lib->ns);
@@ -36,9 +36,18 @@ u_int task_manager_total_retransmit_timeout()
        limit = lib->settings->get_double(lib->settings, "%s.retransmit_limit",
                                                                          0, lib->ns);
 
+       if (base > 1)
+       {
+               max_tries = log(UINT32_MAX/(1000.0 * timeout))/log(base);
+       }
+
        for (i = 0; i <= tries; i++)
        {
-               double interval = timeout * pow(base, i);
+               double interval = UINT32_MAX/1000.0;
+               if (max_tries && i <= max_tries)
+               {
+                       interval = timeout * pow(base, i);
+               }
                if (limit)
                {
                        interval = min(interval, limit);