From b248af5ee14b6910c6b21a29fb8ebe2126a8f68e Mon Sep 17 00:00:00 2001 From: Dimitri Staessens Date: Sat, 19 Mar 2022 21:47:01 +0100 Subject: lib: Non-configurable delayed acks in FRCP It doesn't really make sense to manually and one-sidedly configure the timeout of delayed acknowledgements, as setting it too high upsets the peer's sRTT estimates. Even worse, it also causes a lot of spurious retransmissions if it exceeds the sRTT mean deviation calculated by the receiver. Compensating on bare acknowledgment for the ack delay could improve the RTT estimate deviation, but not the spurious retransmissions if it was set too high. This sets the delayed ack to wait for a single RTT mean deviation. Probably needs more tweaking to further reduce differences between the RTT estimates at the sender and receiver, e.g. compensate the RTT estimate for delayed acks, or increase the RTO to add 8 mdevs to sRTT instead of 4. However, it looks like the mdev estimate is the trickiest one to get to sync, not the RTT average. Linux reduces the sample weight for mdev from 1/4 to 1/32 in some cases, will give that a shot some day too to see if that further align sRTT estimates. In any case, this patch already improves things a lot. Also fixes a bug where the sender was sending acknowlegments on the first packets in flight for the 0 sequence number. The receiver activity was measured in seconds but compared to a timeout value in nanoseconds. There's still a lot of spurious retransmissions that start after actual packet loss occurs, I'm still investigating what causes it. Signed-off-by: Dimitri Staessens Signed-off-by: Sander Vrijders --- src/lib/CMakeLists.txt | 6 ++---- src/lib/config.h.in | 2 -- src/lib/frct.c | 26 +++++++++++--------------- src/lib/timerwheel.c | 20 +++++++++++--------- 4 files changed, 24 insertions(+), 30 deletions(-) (limited to 'src/lib') diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 022c5cca..0c327320 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -192,8 +192,6 @@ set(DELTA_T_ACK 10 CACHE STRING "Maximum time to acknowledge a packet (s)") set(DELTA_T_RTX 120 CACHE STRING "Maximum time to retransmit a packet (s)") -set(DELTA_T_ACK_DELAY 10 CACHE STRING - "Maximum time to wait before acknowledging a packet (ms)") set(FRCT_REORDER_QUEUE_SIZE 256 CACHE STRING "Size of the reordering queue, must be a power of 2") set(FRCT_START_WINDOW 64 CACHE STRING @@ -214,9 +212,9 @@ set(RXM_WHEEL_LEVELS 3 CACHE STRING "Number of levels in the retransmission wheel") set(RXM_WHEEL_SLOTS_PER_LEVEL 256 CACHE STRING "Number of slots per level in the retransmission wheel, must be a power of 2") -set(ACK_WHEEL_SLOTS 128 CACHE STRING +set(ACK_WHEEL_SLOTS 256 CACHE STRING "Number of slots in the acknowledgment wheel, must be a power of 2") -set(ACK_WHEEL_RESOLUTION 20 CACHE STRING +set(ACK_WHEEL_RESOLUTION 18 CACHE STRING "Minimum acknowledgment delay (ns), as a power to 2") if (HAVE_FUSE) diff --git a/src/lib/config.h.in b/src/lib/config.h.in index d534cf77..a51c2791 100644 --- a/src/lib/config.h.in +++ b/src/lib/config.h.in @@ -72,8 +72,6 @@ #define DELT_A (@DELTA_T_ACK@) /* ns */ #define DELT_R (@DELTA_T_RTX@) /* ns */ -#define DELT_ACK (@DELTA_T_ACK_DELAY@ * MILLION) /* ns */ - #define RQ_SIZE (@FRCT_REORDER_QUEUE_SIZE@) #define START_WINDOW (@FRCT_START_WINDOW@) #define RTO_MIN (@FRCT_RTO_MIN@ * 1000) diff --git a/src/lib/frct.c b/src/lib/frct.c index e7193da0..99962868 100644 --- a/src/lib/frct.c +++ b/src/lib/frct.c @@ -279,9 +279,11 @@ static void send_frct_pkt(struct frcti * frcti) assert(frcti); - pthread_rwlock_rdlock(&frcti->lock); + clock_gettime(PTHREAD_COND_CLOCK, &now); - if (frcti->rcv_cr.lwe == frcti->rcv_cr.seqno) { + pthread_rwlock_wrlock(&frcti->lock); + + if (!after(frcti->rcv_cr.lwe, frcti->rcv_cr.seqno)) { pthread_rwlock_unlock(&frcti->lock); return; } @@ -290,24 +292,19 @@ static void send_frct_pkt(struct frcti * frcti) ackno = frcti->rcv_cr.lwe; rwe = frcti->rcv_cr.rwe; - clock_gettime(PTHREAD_COND_CLOCK, &now); - diff = ts_diff_ns(&frcti->rcv_cr.act, &now); - pthread_rwlock_unlock(&frcti->lock); - - if (diff > frcti->a || diff < DELT_ACK) + if (diff > frcti->a || diff < frcti->mdev) { + pthread_rwlock_unlock(&frcti->lock); return; + } - __send_frct_pkt(fd, FRCT_ACK | FRCT_FC, ackno, rwe); - - pthread_rwlock_wrlock(&frcti->lock); - - if (after(frcti->rcv_cr.lwe, frcti->rcv_cr.seqno)) - frcti->rcv_cr.seqno = frcti->rcv_cr.lwe; + frcti->rcv_cr.seqno = frcti->rcv_cr.lwe; pthread_rwlock_unlock(&frcti->lock); + __send_frct_pkt(fd, FRCT_ACK | FRCT_FC, ackno, rwe); + } static void __send_rdv(int fd) @@ -734,8 +731,7 @@ static int __frcti_snd(struct frcti * frcti, frcti->t_probe = now; frcti->probe = true; } - - if (now.tv_sec - rcv_cr->act.tv_sec <= frcti->a) { + if ((now.tv_sec - rcv_cr->act.tv_sec) * BILLION <= frcti->a) { pci->flags |= FRCT_ACK; pci->ackno = hton32(rcv_cr->lwe); rcv_cr->seqno = rcv_cr->lwe; diff --git a/src/lib/timerwheel.c b/src/lib/timerwheel.c index eb25758b..580f838d 100644 --- a/src/lib/timerwheel.c +++ b/src/lib/timerwheel.c @@ -218,8 +218,6 @@ static void timerwheel_move(void) if (ts_to_ns(now) - act > (rto << 2)) rto <<= r->mul++; - else - r->mul = 0; /* Schedule at least in the next time slot. */ slot = ts_to_ns(now) >> RXMQ_RES; @@ -234,7 +232,7 @@ static void timerwheel_move(void) if (lvl >= RXMQ_LVLS) /* Can't reschedule */ goto flow_down; - rslot = (rslot + slot) & (RXMQ_SLOTS - 1); + rslot = (rslot + slot + 1) & (RXMQ_SLOTS - 1); #ifdef RXM_BLOCKING if (ipcp_sdb_reserve(&sdb, r->len) < 0) @@ -256,6 +254,7 @@ static void timerwheel_move(void) /* Retransmit the copy. */ pci->ackno = hton32(rcv_lwe); + #ifdef RXM_BLOCKING if (shm_rbuff_write_b(f->tx_rb, idx, NULL) < 0) #else @@ -264,15 +263,14 @@ static void timerwheel_move(void) goto flow_down; shm_flow_set_notify(f->set, f->flow_id, FLOW_PKT); - - reschedule: + reschedule: list_add(&r->next, &rw.rxms[lvl][rslot]); continue; - flow_down: + flow_down: shm_rbuff_set_acl(f->tx_rb, ACL_FLOWDOWN); shm_rbuff_set_acl(f->rx_rb, ACL_FLOWDOWN); - cleanup: + cleanup: #ifdef RXM_BUFFER_ON_HEAP free(r->pkt); #else @@ -375,7 +373,7 @@ static int timerwheel_rxm(struct frcti * frcti, return -EPERM; } - slot = (slot + rto_slot) & (RXMQ_SLOTS - 1); + slot = (slot + rto_slot + 1) & (RXMQ_SLOTS - 1); pthread_mutex_lock(&rw.lock); @@ -403,9 +401,13 @@ static int timerwheel_ack(int fd, clock_gettime(PTHREAD_COND_CLOCK, &now); - slot = (((ts_to_ns(now) + DELT_ACK) >> ACKQ_RES) + 1) + pthread_rwlock_rdlock(&frcti->lock); + + slot = (((ts_to_ns(now) + frcti->mdev) >> ACKQ_RES) + 1) & (ACKQ_SLOTS - 1); + pthread_rwlock_unlock(&frcti->lock); + a->fd = fd; a->frcti = frcti; a->flow_id = ai.flows[fd].flow_id; -- cgit v1.2.3