diff options
author | Dimitri Staessens <dimitri@ouroboros.rocks> | 2022-03-19 21:47:01 +0100 |
---|---|---|
committer | Sander Vrijders <sander@ouroboros.rocks> | 2022-03-30 14:55:13 +0200 |
commit | b248af5ee14b6910c6b21a29fb8ebe2126a8f68e (patch) | |
tree | 1d20f78ca078c36362f8178b0da501a479f9d268 /src/lib | |
parent | da6d63bd3a17e681c6f62c74ffe09abe74595c1e (diff) | |
download | ouroboros-b248af5ee14b6910c6b21a29fb8ebe2126a8f68e.tar.gz ouroboros-b248af5ee14b6910c6b21a29fb8ebe2126a8f68e.zip |
lib: Non-configurable delayed acks in FRCP
It doesn't really make sense to manually and one-sidedly configure the
timeout of delayed acknowledgements, as setting it too high upsets the
peer's sRTT estimates. Even worse, it also causes a lot of spurious
retransmissions if it exceeds the sRTT mean deviation calculated by
the receiver. Compensating on bare acknowledgment for the ack delay
could improve the RTT estimate deviation, but not the spurious
retransmissions if it was set too high. This sets the delayed ack to
wait for a single RTT mean deviation. Probably needs more tweaking to
further reduce differences between the RTT estimates at the sender and
receiver, e.g. compensate the RTT estimate for delayed acks, or
increase the RTO to add 8 mdevs to sRTT instead of 4. However, it
looks like the mdev estimate is the trickiest one to get to sync, not
the RTT average. Linux reduces the sample weight for mdev from 1/4 to
1/32 in some cases, will give that a shot some day too to see if that
further align sRTT estimates. In any case, this patch already improves
things a lot.
Also fixes a bug where the sender was sending acknowlegments on the
first packets in flight for the 0 sequence number. The receiver
activity was measured in seconds but compared to a timeout value in
nanoseconds.
There's still a lot of spurious retransmissions that start after
actual packet loss occurs, I'm still investigating what causes it.
Signed-off-by: Dimitri Staessens <dimitri@ouroboros.rocks>
Signed-off-by: Sander Vrijders <sander@ouroboros.rocks>
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/lib/config.h.in | 2 | ||||
-rw-r--r-- | src/lib/frct.c | 26 | ||||
-rw-r--r-- | src/lib/timerwheel.c | 20 |
4 files changed, 24 insertions, 30 deletions
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 022c5cca..0c327320 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -192,8 +192,6 @@ set(DELTA_T_ACK 10 CACHE STRING "Maximum time to acknowledge a packet (s)") set(DELTA_T_RTX 120 CACHE STRING "Maximum time to retransmit a packet (s)") -set(DELTA_T_ACK_DELAY 10 CACHE STRING - "Maximum time to wait before acknowledging a packet (ms)") set(FRCT_REORDER_QUEUE_SIZE 256 CACHE STRING "Size of the reordering queue, must be a power of 2") set(FRCT_START_WINDOW 64 CACHE STRING @@ -214,9 +212,9 @@ set(RXM_WHEEL_LEVELS 3 CACHE STRING "Number of levels in the retransmission wheel") set(RXM_WHEEL_SLOTS_PER_LEVEL 256 CACHE STRING "Number of slots per level in the retransmission wheel, must be a power of 2") -set(ACK_WHEEL_SLOTS 128 CACHE STRING +set(ACK_WHEEL_SLOTS 256 CACHE STRING "Number of slots in the acknowledgment wheel, must be a power of 2") -set(ACK_WHEEL_RESOLUTION 20 CACHE STRING +set(ACK_WHEEL_RESOLUTION 18 CACHE STRING "Minimum acknowledgment delay (ns), as a power to 2") if (HAVE_FUSE) diff --git a/src/lib/config.h.in b/src/lib/config.h.in index d534cf77..a51c2791 100644 --- a/src/lib/config.h.in +++ b/src/lib/config.h.in @@ -72,8 +72,6 @@ #define DELT_A (@DELTA_T_ACK@) /* ns */ #define DELT_R (@DELTA_T_RTX@) /* ns */ -#define DELT_ACK (@DELTA_T_ACK_DELAY@ * MILLION) /* ns */ - #define RQ_SIZE (@FRCT_REORDER_QUEUE_SIZE@) #define START_WINDOW (@FRCT_START_WINDOW@) #define RTO_MIN (@FRCT_RTO_MIN@ * 1000) diff --git a/src/lib/frct.c b/src/lib/frct.c index e7193da0..99962868 100644 --- a/src/lib/frct.c +++ b/src/lib/frct.c @@ -279,9 +279,11 @@ static void send_frct_pkt(struct frcti * frcti) assert(frcti); - pthread_rwlock_rdlock(&frcti->lock); + clock_gettime(PTHREAD_COND_CLOCK, &now); - if (frcti->rcv_cr.lwe == frcti->rcv_cr.seqno) { + pthread_rwlock_wrlock(&frcti->lock); + + if (!after(frcti->rcv_cr.lwe, frcti->rcv_cr.seqno)) { pthread_rwlock_unlock(&frcti->lock); return; } @@ -290,24 +292,19 @@ static void send_frct_pkt(struct frcti * frcti) ackno = frcti->rcv_cr.lwe; rwe = frcti->rcv_cr.rwe; - clock_gettime(PTHREAD_COND_CLOCK, &now); - diff = ts_diff_ns(&frcti->rcv_cr.act, &now); - pthread_rwlock_unlock(&frcti->lock); - - if (diff > frcti->a || diff < DELT_ACK) + if (diff > frcti->a || diff < frcti->mdev) { + pthread_rwlock_unlock(&frcti->lock); return; + } - __send_frct_pkt(fd, FRCT_ACK | FRCT_FC, ackno, rwe); - - pthread_rwlock_wrlock(&frcti->lock); - - if (after(frcti->rcv_cr.lwe, frcti->rcv_cr.seqno)) - frcti->rcv_cr.seqno = frcti->rcv_cr.lwe; + frcti->rcv_cr.seqno = frcti->rcv_cr.lwe; pthread_rwlock_unlock(&frcti->lock); + __send_frct_pkt(fd, FRCT_ACK | FRCT_FC, ackno, rwe); + } static void __send_rdv(int fd) @@ -734,8 +731,7 @@ static int __frcti_snd(struct frcti * frcti, frcti->t_probe = now; frcti->probe = true; } - - if (now.tv_sec - rcv_cr->act.tv_sec <= frcti->a) { + if ((now.tv_sec - rcv_cr->act.tv_sec) * BILLION <= frcti->a) { pci->flags |= FRCT_ACK; pci->ackno = hton32(rcv_cr->lwe); rcv_cr->seqno = rcv_cr->lwe; diff --git a/src/lib/timerwheel.c b/src/lib/timerwheel.c index eb25758b..580f838d 100644 --- a/src/lib/timerwheel.c +++ b/src/lib/timerwheel.c @@ -218,8 +218,6 @@ static void timerwheel_move(void) if (ts_to_ns(now) - act > (rto << 2)) rto <<= r->mul++; - else - r->mul = 0; /* Schedule at least in the next time slot. */ slot = ts_to_ns(now) >> RXMQ_RES; @@ -234,7 +232,7 @@ static void timerwheel_move(void) if (lvl >= RXMQ_LVLS) /* Can't reschedule */ goto flow_down; - rslot = (rslot + slot) & (RXMQ_SLOTS - 1); + rslot = (rslot + slot + 1) & (RXMQ_SLOTS - 1); #ifdef RXM_BLOCKING if (ipcp_sdb_reserve(&sdb, r->len) < 0) @@ -256,6 +254,7 @@ static void timerwheel_move(void) /* Retransmit the copy. */ pci->ackno = hton32(rcv_lwe); + #ifdef RXM_BLOCKING if (shm_rbuff_write_b(f->tx_rb, idx, NULL) < 0) #else @@ -264,15 +263,14 @@ static void timerwheel_move(void) goto flow_down; shm_flow_set_notify(f->set, f->flow_id, FLOW_PKT); - - reschedule: + reschedule: list_add(&r->next, &rw.rxms[lvl][rslot]); continue; - flow_down: + flow_down: shm_rbuff_set_acl(f->tx_rb, ACL_FLOWDOWN); shm_rbuff_set_acl(f->rx_rb, ACL_FLOWDOWN); - cleanup: + cleanup: #ifdef RXM_BUFFER_ON_HEAP free(r->pkt); #else @@ -375,7 +373,7 @@ static int timerwheel_rxm(struct frcti * frcti, return -EPERM; } - slot = (slot + rto_slot) & (RXMQ_SLOTS - 1); + slot = (slot + rto_slot + 1) & (RXMQ_SLOTS - 1); pthread_mutex_lock(&rw.lock); @@ -403,9 +401,13 @@ static int timerwheel_ack(int fd, clock_gettime(PTHREAD_COND_CLOCK, &now); - slot = (((ts_to_ns(now) + DELT_ACK) >> ACKQ_RES) + 1) + pthread_rwlock_rdlock(&frcti->lock); + + slot = (((ts_to_ns(now) + frcti->mdev) >> ACKQ_RES) + 1) & (ACKQ_SLOTS - 1); + pthread_rwlock_unlock(&frcti->lock); + a->fd = fd; a->frcti = frcti; a->flow_id = ai.flows[fd].flow_id; |