ipcpd: Add congestion avoidance policies

This adds congestion avoidance policies to the unicast IPCP. The default policy is a multi-bit explicit congestion avoidance algorithm based on data-center TCP congestion avoidance (DCTCP) to relay information about the maximum queue depth that packets experienced to the receiver. There's also a "nop" policy to disable congestion avoidance for testing and benchmarking purposes. The (initial) API for congestion avoidance policies is: void * (* ctx_create)(void); void (* ctx_destroy)(void * ctx); These calls create / and or destroy a context for congestion control for a specific flow. Thread-safety of the context is the responsability of the flow allocator (operations on the ctx should be performed under a lock). ca_wnd_t (* ctx_update_snd)(void * ctx, size_t len); This is the sender call to update the context, and should be called for every packet that is sent on the flow. The len parameter in this API is the packet length, which allows calculating the bandwidth. It returns an opaque union type that is used for the call to check/wait if the congestion window is open or closed (and allowing to release locks before waiting). bool (* ctx_update_rcv)(void * ctx, size_t len, uint8_t ecn, uint16_t * ece); This is the call to update the flow congestion context on the receiver side. It should be called for every received packet. It gets the ecn value from the packet and its length, and returns the ECE (explicit congestion experienced) value to be sent to the sender in case of congestion. The boolean returned signals whether or not a congestion update needs to be sent. void (* ctx_update_ece)(void * ctx, uint16_t ece); This is the call for the sending side top update the context when it receives an ECE update from the receiver. void (* wnd_wait)(ca_wnd_t wnd); This is a (blocking) call that waits for the congestion window to clear. It should be stateless (to avoid waiting under locks). This may change later on if passing the context is needed for different algorithms. uint8_t (* calc_ecn)(int fd, size_t len); This is the call that intermediate IPCPs(routers) should use to update the ECN field on passing packets. The multi-bit ECN policy bases the value for the ECN field on the depth of the rbuff queue packets will be sent on. I created another call to grab the queue depth as fccntl is write-locking the application. We can further optimize this to avoid most locking on the rbuff. Signed-off-by: Dimitri Staessens <dimitri@ouroboros.rocks> Signed-off-by: Sander Vrijders <sander@ouroboros.rocks>
author: Dimitri Staessens <dimitri@ouroboros.rocks> 2020-12-01 19:19:04 +0100
committer: Sander Vrijders <sander@ouroboros.rocks> 2020-12-02 19:21:29 +0100
commit: 8e1c0e62feb4832dca2b53e51ab0e1cb8f48e5b1 (patch)
tree: ce90e18277c38e7ee592d441ae4de197081c6476 /src/ipcpd/unicast/fa.c
parent: aef6bdb1eadf8779173145710306ea5b6d81b8ec (diff)
download: ouroboros-8e1c0e62feb4832dca2b53e51ab0e1cb8f48e5b1.tar.gz
ouroboros-8e1c0e62feb4832dca2b53e51ab0e1cb8f48e5b1.zip
1 files changed, 170 insertions, 52 deletions
diff --git a/src/ipcpd/unicast/fa.c b/src/ipcpd/unicast/fa.c
index e154d785..8f268a9d 100644
--- a/src/ipcpd/unicast/fa.c
+++ b/src/ipcpd/unicast/fa.c
@@ -42,6 +42,7 @@
 #include "psched.h"
 #include "ipcp.h"
 #include "dt.h"
+#include "ca.h"
 
 #include <pthread.h>
 #include <stdlib.h>
@@ -49,9 +50,10 @@
 
 #define TIMEOUT 10000 /* nanoseconds */
 
-#define FLOW_REQ   0
-#define FLOW_REPLY 1
-#define MSGBUFSZ   2048
+#define FLOW_REQ    0
+#define FLOW_REPLY  1
+#define FLOW_UPDATE 2
+#define MSGBUFSZ    2048
 
 struct fa_msg {
         uint64_t s_addr;
@@ -59,6 +61,7 @@ struct fa_msg {
         uint32_t s_eid;
         uint8_t  code;
         int8_t   response;
+        uint16_t ece;
         /* QoS parameters from spec, aligned */
         uint8_t  availability;
         uint8_t  in_order;
@@ -75,10 +78,16 @@ struct cmd {
         struct shm_du_buff * sdb;
 };
 
+struct fa_flow {
+        int      r_eid;  /* remote endpoint id               */
+        uint64_t r_addr; /* remote address                   */
+        void *   ctx;    /* congestion avoidance context     */
+};
+
 struct {
         pthread_rwlock_t flows_lock;
-        int              r_eid[PROG_MAX_FLOWS];
-        uint64_t         r_addr[PROG_MAX_FLOWS];
+        struct fa_flow   flows[PROG_MAX_FLOWS];
+
         int              fd;
 
         struct list_head cmds;
@@ -93,22 +102,56 @@ static void packet_handler(int                  fd,
                            qoscube_t            qc,
                            struct shm_du_buff * sdb)
 {
-        pthread_rwlock_rdlock(&fa.flows_lock);
+        struct fa_flow * flow;
+        uint64_t         r_addr;
+        uint32_t         r_eid;
+        ca_wnd_t         wnd;
+        size_t           len;
 
-        if (dt_write_packet(fa.r_addr[fd], qc, fa.r_eid[fd], sdb)) {
-                pthread_rwlock_unlock(&fa.flows_lock);
+        flow = &fa.flows[fd];
+
+        pthread_rwlock_wrlock(&fa.flows_lock);
+
+        len = shm_du_buff_tail(sdb) - shm_du_buff_head(sdb);
+
+        wnd = ca_ctx_update_snd(flow->ctx, len);
+
+        r_addr = flow->r_addr;
+        r_eid  = flow->r_eid;
+
+        pthread_rwlock_unlock(&fa.flows_lock);
+
+        ca_wnd_wait(wnd);
+
+        if (dt_write_packet(r_addr, qc, r_eid, sdb)) {
                 ipcp_sdb_release(sdb);
                 log_warn("Failed to forward packet.");
                 return;
         }
+}
 
-        pthread_rwlock_unlock(&fa.flows_lock);
+static int fa_flow_init(struct fa_flow * flow)
+{
+        memset(flow, 0, sizeof(*flow));
+
+        flow->r_eid  = -1;
+        flow->r_addr = INVALID_ADDR;
+
+        flow->ctx = ca_ctx_create();
+        if (flow->ctx == NULL)
+                return -1;
+
+        return 0;
 }
 
-static void destroy_conn(int fd)
+static void fa_flow_fini(struct fa_flow * flow)
 {
-        fa.r_eid[fd]  = -1;
-        fa.r_addr[fd] = INVALID_ADDR;
+        ca_ctx_destroy(flow->ctx);
+
+        memset(flow, 0, sizeof(*flow));
+
+        flow->r_eid  = -1;
+        flow->r_addr = INVALID_ADDR;
 }
 
 static void fa_post_packet(void *               comp,
@@ -145,14 +188,15 @@ static void * fa_handle_packet(void * o)
         (void) o;
 
         while (true) {
-                struct timespec abstime;
-                int             fd;
-                uint8_t         buf[MSGBUFSZ];
-                struct fa_msg * msg;
-                qosspec_t       qs;
-                struct cmd *    cmd;
-                size_t          len;
-                size_t          msg_len;
+                struct timespec  abstime;
+                int              fd;
+                uint8_t          buf[MSGBUFSZ];
+                struct fa_msg *  msg;
+                qosspec_t        qs;
+                struct cmd *     cmd;
+                size_t           len;
+                size_t           msg_len;
+                struct fa_flow * flow;
 
                 pthread_mutex_lock(&fa.mtx);
 
@@ -232,10 +276,14 @@ static void * fa_handle_packet(void * o)
                                 continue;
                         }
 
+                        flow = &fa.flows[fd];
+
                         pthread_rwlock_wrlock(&fa.flows_lock);
 
-                        fa.r_eid[fd]  = ntoh32(msg->s_eid);
-                        fa.r_addr[fd] = ntoh64(msg->s_addr);
+                        fa_flow_init(flow);
+
+                        flow->r_eid  = ntoh32(msg->s_eid);
+                        flow->r_addr = ntoh64(msg->s_addr);
 
                         pthread_rwlock_unlock(&fa.flows_lock);
 
@@ -248,19 +296,32 @@ static void * fa_handle_packet(void * o)
                 case FLOW_REPLY:
                         assert(len >= sizeof(*msg));
 
+                        flow = &fa.flows[ntoh32(msg->r_eid)];
+
                         pthread_rwlock_wrlock(&fa.flows_lock);
 
-                        fa.r_eid[ntoh32(msg->r_eid)] = ntoh32(msg->s_eid);
+                        flow->r_eid = ntoh32(msg->s_eid);
+
+                        if (msg->response < 0)
+                                fa_flow_fini(flow);
+                        else
+                                psched_add(fa.psched, ntoh32(msg->r_eid));
+
+                        pthread_rwlock_unlock(&fa.flows_lock);
 
                         ipcp_flow_alloc_reply(ntoh32(msg->r_eid),
                                               msg->response,
                                               buf + sizeof(*msg),
                                               len - sizeof(*msg));
+                        break;
+                case FLOW_UPDATE:
+                        assert(len >= sizeof(*msg));
 
-                        if (msg->response < 0)
-                                destroy_conn(ntoh32(msg->r_eid));
-                        else
-                                psched_add(fa.psched, ntoh32(msg->r_eid));
+                        flow = &fa.flows[ntoh32(msg->r_eid)];
+
+                        pthread_rwlock_wrlock(&fa.flows_lock);
+
+                        ca_ctx_update_ece(flow->ctx, ntoh16(msg->ece));
 
                         pthread_rwlock_unlock(&fa.flows_lock);
 
@@ -275,10 +336,6 @@ static void * fa_handle_packet(void * o)
 int fa_init(void)
 {
         pthread_condattr_t cattr;
-        int                i;
-
-        for (i = 0; i < PROG_MAX_FLOWS; ++i)
-                destroy_conn(i);
 
         if (pthread_rwlock_init(&fa.flows_lock, NULL))
                 goto fail_rwlock;
@@ -383,9 +440,10 @@ int fa_alloc(int             fd,
              size_t          dlen)
 {
         struct fa_msg *      msg;
-        uint64_t             addr;
         struct shm_du_buff * sdb;
-        qoscube_t            qc;
+        struct fa_flow *     flow;
+        uint64_t             addr;
+        qoscube_t            qc = QOS_CUBE_BE;
         size_t               len;
 
         addr = dir_query(dst);
@@ -397,7 +455,9 @@ int fa_alloc(int             fd,
         if (ipcp_sdb_reserve(&sdb, len + dlen))
                 return -1;
 
-        msg               = (struct fa_msg *) shm_du_buff_head(sdb);
+        msg = (struct fa_msg *) shm_du_buff_head(sdb);
+        memset(msg, 0, sizeof(*msg));
+
         msg->code         = FLOW_REQ;
         msg->s_eid        = hton32(fd);
         msg->s_addr       = hton64(ipcpi.dt_addr);
@@ -413,17 +473,17 @@ int fa_alloc(int             fd,
         memcpy(msg + 1, dst, ipcp_dir_hash_len());
         memcpy(shm_du_buff_head(sdb) + len, data, dlen);
 
-        qc = qos_spec_to_cube(qs);
-
         if (dt_write_packet(addr, qc, fa.fd, sdb)) {
                 ipcp_sdb_release(sdb);
                 return -1;
         }
 
+        flow = &fa.flows[fd];
+
         pthread_rwlock_wrlock(&fa.flows_lock);
 
-        assert(fa.r_eid[fd] == -1);
-        fa.r_addr[fd] = addr;
+        fa_flow_init(flow);
+        flow->r_addr = addr;
 
         pthread_rwlock_unlock(&fa.flows_lock);
 
@@ -439,10 +499,13 @@ int fa_alloc_resp(int          fd,
         struct timespec      abstime;
         struct fa_msg *      msg;
         struct shm_du_buff * sdb;
-        qoscube_t            qc;
+        struct fa_flow *     flow;
+        qoscube_t            qc = QOS_CUBE_BE;
 
         clock_gettime(PTHREAD_COND_CLOCK, &abstime);
 
+        flow = &fa.flows[fd];
+
         pthread_mutex_lock(&ipcpi.alloc_lock);
 
         while (ipcpi.alloc_id != fd && ipcp_get_state() == IPCP_OPERATIONAL) {
@@ -463,33 +526,31 @@ int fa_alloc_resp(int          fd,
         pthread_mutex_unlock(&ipcpi.alloc_lock);
 
         if (ipcp_sdb_reserve(&sdb, sizeof(*msg) + len)) {
-                destroy_conn(fd);
+                fa_flow_fini(flow);
                 return -1;
         }
 
+        msg = (struct fa_msg *) shm_du_buff_head(sdb);
+        memset(msg, 0, sizeof(*msg));
+
         pthread_rwlock_wrlock(&fa.flows_lock);
 
-        msg           = (struct fa_msg *) shm_du_buff_head(sdb);
         msg->code     = FLOW_REPLY;
-        msg->r_eid    = hton32(fa.r_eid[fd]);
+        msg->r_eid    = hton32(flow->r_eid);
         msg->s_eid    = hton32(fd);
         msg->response = response;
 
         memcpy(msg + 1, data, len);
 
         if (response < 0) {
-                destroy_conn(fd);
+                fa_flow_fini(flow);
                 ipcp_sdb_release(sdb);
         } else {
                 psched_add(fa.psched, fd);
         }
 
-        ipcp_flow_get_qoscube(fd, &qc);
-
-        assert(qc >= 0 && qc < QOS_CUBE_MAX);
-
-        if (dt_write_packet(fa.r_addr[fd], qc, fa.fd, sdb)) {
-                destroy_conn(fd);
+        if (dt_write_packet(flow->r_addr, qc, fa.fd, sdb)) {
+                fa_flow_fini(flow);
                 pthread_rwlock_unlock(&fa.flows_lock);
                 ipcp_sdb_release(sdb);
                 return -1;
@@ -505,11 +566,11 @@ int fa_dealloc(int fd)
         if (ipcp_flow_fini(fd) < 0)
                 return 0;
 
-        pthread_rwlock_wrlock(&fa.flows_lock);
-
         psched_del(fa.psched, fd);
 
-        destroy_conn(fd);
+        pthread_rwlock_wrlock(&fa.flows_lock);
+
+        fa_flow_fini(&fa.flows[fd]);
 
         pthread_rwlock_unlock(&fa.flows_lock);
 
@@ -517,3 +578,60 @@ int fa_dealloc(int fd)
 
         return 0;
 }
+
+static int fa_update_remote(int      fd,
+                            uint16_t ece)
+{
+        struct fa_msg *      msg;
+        struct shm_du_buff * sdb;
+        qoscube_t            qc = QOS_CUBE_BE;
+        struct fa_flow *     flow;
+
+        if (ipcp_sdb_reserve(&sdb, sizeof(*msg))) {
+                return -1;
+        }
+
+        msg = (struct fa_msg *) shm_du_buff_head(sdb);
+
+        memset(msg, 0, sizeof(*msg));
+
+        flow = &fa.flows[fd];
+
+        pthread_rwlock_rdlock(&fa.flows_lock);
+
+        msg->code  = FLOW_UPDATE;
+        msg->r_eid = hton32(flow->r_eid);
+        msg->ece   = hton16(ece);
+
+        if (dt_write_packet(flow->r_addr, qc, fa.fd, sdb)) {
+                pthread_rwlock_unlock(&fa.flows_lock);
+                ipcp_sdb_release(sdb);
+                return -1;
+        }
+
+        pthread_rwlock_unlock(&fa.flows_lock);
+
+
+        return 0;
+}
+
+void  fa_ecn_update(int     eid,
+                    uint8_t ecn,
+                    size_t  len)
+{
+        struct fa_flow * flow;
+        bool             update;
+        uint16_t         ece;
+
+        flow = &fa.flows[eid];
+
+        pthread_rwlock_wrlock(&fa.flows_lock);
+
+        update = ca_ctx_update_rcv(flow->ctx, len, ecn, &ece);
+
+        pthread_rwlock_unlock(&fa.flows_lock);
+
+        if (update)
+                fa_update_remote(eid, ece);
+
+}
author	Dimitri Staessens <dimitri@ouroboros.rocks>	2020-12-01 19:19:04 +0100
committer	Sander Vrijders <sander@ouroboros.rocks>	2020-12-02 19:21:29 +0100
commit	8e1c0e62feb4832dca2b53e51ab0e1cb8f48e5b1 (patch)
tree	ce90e18277c38e7ee592d441ae4de197081c6476 /src/ipcpd/unicast/fa.c
parent	aef6bdb1eadf8779173145710306ea5b6d81b8ec (diff)
download	ouroboros-8e1c0e62feb4832dca2b53e51ab0e1cb8f48e5b1.tar.gz ouroboros-8e1c0e62feb4832dca2b53e51ab0e1cb8f48e5b1.zip