From 1bb26d793e8b95313769e4fcf530321076401390 Mon Sep 17 00:00:00 2001 From: Dimitri Staessens Date: Wed, 2 Dec 2020 16:08:47 +0100 Subject: ipcpd: Simplify multi-bit (F)ECN policy The mb-ecn policy has a couple of divisions in the math, which I wanted to avoid. Now it measures the number of bytes sent in a window, and updates the next window with AIMD logic. If the number of bytes in the window is reached, the call blocks. To avoid long packet bursts, the window size continually scales to contain between CA_MINPS (8) and CA_MAXPS (64) packets. Signed-off-by: Dimitri Staessens Signed-off-by: Sander Vrijders --- src/ipcpd/unicast/pol/ca-mb-ecn.c | 123 +++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 54 deletions(-) (limited to 'src/ipcpd/unicast') diff --git a/src/ipcpd/unicast/pol/ca-mb-ecn.c b/src/ipcpd/unicast/pol/ca-mb-ecn.c index 2de8f8e7..03f7044d 100644 --- a/src/ipcpd/unicast/pol/ca-mb-ecn.c +++ b/src/ipcpd/unicast/pol/ca-mb-ecn.c @@ -37,26 +37,32 @@ #include /* congestion avoidance constants */ -#define CA_SHFT 5 -#define CA_WND (1 << CA_SHFT) -#define CA_UPD (1 << (CA_SHFT - 3)) -#define CA_SLOT 18 -#define CA_AI 20000 +#define CA_SHFT 5 /* Average over 32 pkts */ +#define CA_WND (1 << CA_SHFT) /* 32 pkts receiver wnd */ +#define CA_UPD (1 << (CA_SHFT - 3)) /* Update snd every 8 pkt */ +#define CA_SLOT 24 /* Initial slot = 16 ms */ +#define CA_INC 1 << 16 /* ~4MiB/s^2 additive inc */ +#define CA_IWL 1 << 16 /* Initial limit ~4MiB/s */ +#define CA_MINPS 8 /* Mimimum pkts / slot */ +#define CA_MAXPS 64 /* Maximum pkts / slot */ #define ECN_Q_SHFT 5 #define ts_to_ns(ts) (ts.tv_sec * BILLION + ts.tv_nsec) struct mb_ecn_ctx { - uint16_t rx_ece; /* level of congestion (upstream) */ - size_t rx_ctr; /* receiver side packet counter */ - - uint16_t tx_ece; /* level of congestion (downstream) */ - size_t tx_ctr; /* sender side packet counter */ - size_t tx_aps; /* average packet size */ - time_t tx_wnd; /* tgt time to send packets (ns) */ + uint16_t rx_ece; /* Level of congestion (upstream) */ + size_t rx_ctr; /* Receiver side packet counter */ + + uint16_t tx_ece; /* Level of congestion (downstream) */ + size_t tx_ctr; /* Sender side packet counter */ + size_t tx_wbc; /* Window byte count */ + size_t tx_wpc; /* Window packet count */ + size_t tx_wbl; /* Window byte limit */ bool tx_cav; /* Congestion avoidance */ + size_t tx_mul; /* Slot size multiplier */ + size_t tx_inc; /* Additive increase */ size_t tx_slot; - struct timespec t_sent; /* last sent packet */ + struct timespec t_sent; /* Last sent packet */ }; struct pol_ca_ops mb_ecn_ca_ops = { @@ -71,15 +77,22 @@ struct pol_ca_ops mb_ecn_ca_ops = { void * mb_ecn_ctx_create(void) { - + struct timespec now; struct mb_ecn_ctx * ctx; ctx = malloc(sizeof(*ctx)); if (ctx == NULL) return NULL; + clock_gettime(PTHREAD_COND_CLOCK, &now); + memset(ctx, 0, sizeof(*ctx)); + ctx->tx_mul = CA_SLOT; + ctx->tx_wbl = CA_IWL; + ctx->tx_inc = CA_INC; + ctx->tx_slot = ts_to_ns(now) >> ctx->tx_mul; + return (void *) ctx; } @@ -91,54 +104,59 @@ void mb_ecn_ctx_destroy(void * ctx) ca_wnd_t mb_ecn_ctx_update_snd(void * _ctx, size_t len) { - struct timespec now; - size_t slot; - time_t gap; - ca_wnd_t wnd; - + struct timespec now; + size_t slot; + ca_wnd_t wnd; struct mb_ecn_ctx * ctx = _ctx; clock_gettime(PTHREAD_COND_CLOCK, &now); - if (ctx->tx_wnd == 0) { /* 10 ms initial window estimate */ - ctx->tx_wnd = 10 * MILLION; - gap = ctx->tx_wnd >> CA_SHFT; - ctx->tx_aps = len >> CA_SHFT; - ctx->tx_slot = ts_to_ns(now) >> CA_SLOT; - } else { - gap = ts_diff_ns(&ctx->t_sent, &now); - ctx->tx_aps -= ctx->tx_aps >> CA_SHFT; - ctx->tx_aps += len; - } - ctx->t_sent = now; - slot = ts_to_ns(now) >> CA_SLOT; + slot = ts_to_ns(now) >> ctx->tx_mul; ctx->tx_ctr++; + ctx->tx_wpc++; - if (slot - ctx->tx_slot > 0) { - ctx->tx_slot = slot; + if (ctx->tx_ctr > CA_WND) + ctx->tx_ece = 0; - if (ctx->tx_ctr > CA_WND) - ctx->tx_ece = 0; + if (slot > ctx->tx_slot) { + ctx->tx_slot = slot; - /* Slow start */ - if (!ctx->tx_cav) { - ctx->tx_wnd >>= 1; - /* Multiplicative Decrease */ - } else if (ctx->tx_ece) { /* MD */ - ctx->tx_wnd += (ctx->tx_wnd * ctx->tx_ece) + if (!ctx->tx_cav && ctx->tx_wbc > ctx->tx_wbl) /* Slow start */ + ctx->tx_wbl <<= 1; + else if (ctx->tx_ece) /* Multiplicative Decrease */ + ctx->tx_wbl -= (ctx->tx_wbl * ctx->tx_ece) >> (CA_SHFT + 8); - /* Additive Increase */ - } else { - size_t bw = ctx->tx_aps * BILLION / ctx->tx_wnd; - bw += CA_AI; - ctx->tx_wnd = ctx->tx_aps * BILLION / bw; + else /* Additive Increase */ + ctx->tx_wbl = ctx->tx_wbl + ctx->tx_inc; + + /* Window scaling */ + if (ctx->tx_wpc < CA_MINPS) { + ++ctx->tx_mul; + ctx->tx_slot >>= 1; + ctx->tx_wbl <<= 1; + ctx->tx_inc <<= 1; } + + if (ctx->tx_wpc > CA_MAXPS) { + --ctx->tx_mul; /* Underflows at ~CA_MAXPS billion pps */ + ctx->tx_slot <<= 1; + ctx->tx_wbl >>= 1; + ctx->tx_inc >>= 1; + } + + ctx->tx_wbc = 0; + ctx->tx_wpc = 0; } - wnd.wait = (ctx->tx_wnd >> CA_SHFT) - gap; + ctx->tx_wbc += len; + + if (ctx->tx_wbc > ctx->tx_wbl) + wnd.wait = ((ctx->tx_slot + 1) << ctx->tx_mul) - ts_to_ns(now); + else + wnd.wait = 0; return wnd; } @@ -147,7 +165,7 @@ void mb_ecn_wnd_wait(ca_wnd_t wnd) { if (wnd.wait > 0) { struct timespec s = {0, 0}; - if (wnd.wait > BILLION) /* Don't care throttling < 1pps */ + if (wnd.wait > BILLION) /* Don't care throttling < 1s */ s.tv_sec = 1; else s.tv_nsec = wnd.wait; @@ -169,18 +187,15 @@ bool mb_ecn_ctx_update_rcv(void * _ctx, if ((ctx->rx_ece | ecn) == 0) return false; - if (ecn == 0) { - /* end of congestion */ + if (ecn == 0) { /* End of congestion */ ctx->rx_ece >>= 2; update = ctx->rx_ece == 0; } else { - if (ctx->rx_ece == 0) { - /* start of congestion */ + if (ctx->rx_ece == 0) { /* Start of congestion */ ctx->rx_ece = ecn; ctx->rx_ctr = 0; update = true; - } else { - /* congestion update */ + } else { /* Congestion update */ ctx->rx_ece -= ctx->rx_ece >> CA_SHFT; ctx->rx_ece += ecn; update = (ctx->rx_ctr++ & (CA_UPD - 1)) == true; -- cgit v1.2.3