diff options
| -rw-r--r-- | include/ouroboros/config.h.in | 19 | ||||
| -rw-r--r-- | include/ouroboros/np1_flow.h | 7 | ||||
| -rw-r--r-- | include/ouroboros/sockets.h | 3 | ||||
| -rw-r--r-- | include/ouroboros/tpm.h (renamed from src/lib/frct_enroll.proto) | 37 | ||||
| -rw-r--r-- | src/ipcpd/ipcp.c | 186 | ||||
| -rw-r--r-- | src/ipcpd/ipcp.h | 9 | ||||
| -rw-r--r-- | src/ipcpd/normal/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/ipcpd/normal/dht.c | 2383 | ||||
| -rw-r--r-- | src/ipcpd/normal/dht.h | 54 | ||||
| -rw-r--r-- | src/ipcpd/normal/dir.c | 167 | ||||
| -rw-r--r-- | src/ipcpd/normal/dir.h | 10 | ||||
| -rw-r--r-- | src/ipcpd/normal/dt.c | 14 | ||||
| -rw-r--r-- | src/ipcpd/normal/dt.h | 2 | ||||
| -rw-r--r-- | src/ipcpd/normal/fa.c | 60 | ||||
| -rw-r--r-- | src/ipcpd/normal/kademlia.proto | 46 | ||||
| -rw-r--r-- | src/ipcpd/normal/main.c | 28 | ||||
| -rw-r--r-- | src/ipcpd/normal/pol/flat.c | 4 | ||||
| -rw-r--r-- | src/ipcpd/normal/ribconfig.h | 2 | ||||
| -rw-r--r-- | src/ipcpd/normal/ribmgr.c | 3 | ||||
| -rw-r--r-- | src/ipcpd/normal/sdu_sched.c | 68 | ||||
| -rw-r--r-- | src/ipcpd/normal/tests/CMakeLists.txt | 37 | ||||
| -rw-r--r-- | src/ipcpd/normal/tests/dht_test.c | 99 | ||||
| -rw-r--r-- | src/irmd/main.c | 211 | ||||
| -rw-r--r-- | src/lib/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/lib/dev.c | 371 | ||||
| -rw-r--r-- | src/lib/tpm.c | 266 | 
26 files changed, 3237 insertions, 859 deletions
| diff --git a/include/ouroboros/config.h.in b/include/ouroboros/config.h.in index bae2d89e..e8341ee2 100644 --- a/include/ouroboros/config.h.in +++ b/include/ouroboros/config.h.in @@ -3,7 +3,8 @@   *   * Configuration information   * - *    Sander Vrijders <sander.vrijders@intec.ugent.be> + *    Dimitri Staessens <dimitri.staessens@ugent.be> + *    Sander Vrijders   <sander.vrijders@ugent.be>   *   * This library is free software; you can redistribute it and/or   * modify it under the terms of the GNU Lesser General Public License @@ -50,25 +51,21 @@  #define SHM_FLOW_SET_PREFIX    "/ouroboros.sets."  #define IRMD_MAX_FLOWS         4096  /* IRMD dynamic threadpooling */ -#define IRMD_MIN_AV_THREADS    16 -#define IRMD_MAX_AV_THREADS    64 -#define IRMD_MAX_THREADS       256 +#define IRMD_MIN_THREADS       16 +#define IRMD_ADD_THREADS       32  /* IPCP dynamic threadpooling */ -#define IPCP_MIN_AV_THREADS    4 -#define IPCP_MAX_AV_THREADS    32 -#define IPCP_MAX_THREADS       64 - +#define IPCP_MIN_THREADS       4 +#define IPCP_ADD_THREADS       16 +#define IPCP_SCHED_THREADS     8  #define IPCPD_MAX_CONNS        IRMD_MAX_FLOWS  #define PTHREAD_COND_CLOCK     CLOCK_MONOTONIC  #define PFT_SIZE               1 << 12  /* Timeout values */ -#define IRMD_TPM_TIMEOUT       1000 -#define IPCP_TPM_TIMEOUT       1000  #define IRMD_ACCEPT_TIMEOUT    100  #define IRMD_REQ_ARR_TIMEOUT   500  #define IRMD_FLOW_TIMEOUT      5000  #define IPCP_ACCEPT_TIMEOUT    100 -#define SOCKET_TIMEOUT         4000 +#define SOCKET_TIMEOUT         10000  #define CDAP_REPLY_TIMEOUT     1000  #define ENROLL_TIMEOUT         2000 diff --git a/include/ouroboros/np1_flow.h b/include/ouroboros/np1_flow.h index a4e94b89..3db2a0dd 100644 --- a/include/ouroboros/np1_flow.h +++ b/include/ouroboros/np1_flow.h @@ -24,10 +24,13 @@  #ifndef OUROBOROS_NP1_FLOW_H  #define OUROBOROS_NP1_FLOW_H +#include <ouroboros/qoscube.h> +  #include <unistd.h> -int  np1_flow_alloc(pid_t n_api, -                    int   port_id); +int  np1_flow_alloc(pid_t     n_api, +                    int       port_id, +                    qoscube_t qc);  int  np1_flow_resp(int port_id); diff --git a/include/ouroboros/sockets.h b/include/ouroboros/sockets.h index 0d65c15d..660709bf 100644 --- a/include/ouroboros/sockets.h +++ b/include/ouroboros/sockets.h @@ -30,9 +30,6 @@  typedef IpcpConfigMsg ipcp_config_msg_t;  typedef DifInfoMsg dif_info_msg_t; -#include "frct_enroll.pb-c.h" -typedef FrctEnrollMsg frct_enroll_msg_t; -  #include "irmd_messages.pb-c.h"  typedef IrmMsg irm_msg_t; diff --git a/src/lib/frct_enroll.proto b/include/ouroboros/tpm.h index 497d6acc..d34f06f3 100644 --- a/src/lib/frct_enroll.proto +++ b/include/ouroboros/tpm.h @@ -1,10 +1,10 @@  /*   * Ouroboros - Copyright (C) 2016 - 2017   * - * QoS messages + * Threadpool management   * - *    Dimitri Staessens <dimitri.staessens@intec.ugent.be> - *    Sander Vrijders   <sander.vrijders@intec.ugent.be> + *    Dimitri Staessens <dimitri.staessens@ugent.be> + *    Sander Vrijders   <sander.vrijders@ugent.be>   *   * This library is free software; you can redistribute it and/or   * modify it under the terms of the GNU Lesser General Public License @@ -21,12 +21,27 @@   * 02110-1301 USA   */ -syntax = "proto2"; +#ifndef OUROBOROS_LIB_TPM_H +#define OUROBOROS_LIB_TPM_H -message frct_enroll_msg { -        required bool resource_control = 1; -        required bool reliable         = 2; -        required bool error_check      = 3; -        required bool ordered          = 4; -        required bool partial          = 5; -}; +#include <stdbool.h> + +int  tpm_init(size_t    min, +              size_t    inc, +              void * (* func)(void *)); + +int  tpm_start(void); + +void tpm_stop(void); + +void tpm_fini(void); + +bool tpm_check(void); + +void tpm_exit(void); + +void tpm_dec(void); + +void tpm_inc(void); + +#endif /* OUROBOROS_LIB_TPM_H */ diff --git a/src/ipcpd/ipcp.c b/src/ipcpd/ipcp.c index fdd1edc4..48ff046c 100644 --- a/src/ipcpd/ipcp.c +++ b/src/ipcpd/ipcp.c @@ -31,6 +31,7 @@  #include <ouroboros/dev.h>  #include <ouroboros/bitmap.h>  #include <ouroboros/np1_flow.h> +#include <ouroboros/tpm.h>  #include "ipcp.h" @@ -56,6 +57,8 @@ void ipcp_sig_handler(int         sig,                          if (ipcp_get_state() == IPCP_OPERATIONAL)                                  ipcp_set_state(IPCP_SHUTDOWN);                  } + +                tpm_stop();          default:                  return;          } @@ -87,51 +90,7 @@ void ipcp_hash_str(char *          buf,          buf[2 * i] = '\0';  } -static void thread_inc(void) -{ -        pthread_mutex_lock(&ipcpi.threads_lock); - -        ++ipcpi.threads; -        pthread_cond_signal(&ipcpi.threads_cond); - -        pthread_mutex_unlock(&ipcpi.threads_lock); -} - -static void thread_dec(void) -{ -        pthread_mutex_lock(&ipcpi.threads_lock); - -        --ipcpi.threads; -        pthread_cond_signal(&ipcpi.threads_cond); - -        pthread_mutex_unlock(&ipcpi.threads_lock); -} - -static bool thread_check(void) -{ -        int ret; - -        pthread_mutex_lock(&ipcpi.threads_lock); - -        ret = ipcpi.threads > ipcpi.max_threads; - -        pthread_mutex_unlock(&ipcpi.threads_lock); - -        return ret; -} - -static void thread_exit(ssize_t id) -{ -        pthread_mutex_lock(&ipcpi.threads_lock); -        bmp_release(ipcpi.thread_ids, id); - -        --ipcpi.threads; -        pthread_cond_signal(&ipcpi.threads_cond); - -        pthread_mutex_unlock(&ipcpi.threads_lock); -} - -static void * ipcp_main_loop(void * o) +static void * mainloop(void * o)  {          int                 lsockfd;          uint8_t             buf[IPCP_MSG_BUF_SIZE]; @@ -147,7 +106,7 @@ static void * ipcp_main_loop(void * o)          struct timeval      ltv      = {(SOCKET_TIMEOUT / 1000),                                          (SOCKET_TIMEOUT % 1000) * 1000}; -        ssize_t             id       = (ssize_t)  o; +        (void)  o;          while (true) {  #ifdef __FreeBSD__ @@ -159,8 +118,8 @@ static void * ipcp_main_loop(void * o)                  if (ipcp_get_state() == IPCP_SHUTDOWN ||                      ipcp_get_state() == IPCP_NULL || -                    thread_check()) { -                        thread_exit(id); +                    tpm_check()) { +                        tpm_exit();                          break;                  } @@ -192,7 +151,7 @@ static void * ipcp_main_loop(void * o)                          continue;                  } -                thread_dec(); +                tpm_dec();                  switch (msg->code) {                  case IPCP_MSG_CODE__IPCP_BOOTSTRAP: @@ -260,7 +219,6 @@ static void * ipcp_main_loop(void * o)                          ret_msg.result = ipcpi.ops->ipcp_enroll(msg->dst_name,                                                                  &info); -                          if (ret_msg.result == 0) {                                  ret_msg.dif_info = &dif_info;                                  dif_info.dir_hash_algo = info.dir_hash_algo; @@ -332,7 +290,9 @@ static void * ipcp_main_loop(void * o)                                  break;                          } -                        fd = np1_flow_alloc(msg->api, msg->port_id); +                        fd = np1_flow_alloc(msg->api, +                                            msg->port_id, +                                            msg->qoscube);                          if (fd < 0) {                                  log_err("Failed allocating fd on port_id %d.",                                          msg->port_id); @@ -409,7 +369,7 @@ static void * ipcp_main_loop(void * o)                  if (buffer.len == 0) {                          log_err("Failed to pack reply message");                          close(lsockfd); -                        thread_inc(); +                        tpm_inc();                          continue;                  } @@ -417,7 +377,7 @@ static void * ipcp_main_loop(void * o)                  if (buffer.data == NULL) {                          log_err("Failed to create reply buffer.");                          close(lsockfd); -                        thread_inc(); +                        tpm_inc();                          continue;                  } @@ -427,14 +387,14 @@ static void * ipcp_main_loop(void * o)                          log_err("Failed to send reply message");                          free(buffer.data);                          close(lsockfd); -                        thread_inc(); +                        tpm_inc();                          continue;                  }                  free(buffer.data);                  close(lsockfd); -                thread_inc(); +                tpm_inc();          }          return (void *) 0; @@ -497,15 +457,6 @@ int ipcp_init(int               argc,          ipcpi.state     = IPCP_NULL;          ipcpi.shim_data = NULL; -        ipcpi.threadpool = malloc(sizeof(pthread_t) * IPCP_MAX_THREADS); -        if (ipcpi.threadpool == NULL) { -                ret = -ENOMEM; -                goto fail_thr; -        } - -        ipcpi.threads = 0; -        ipcpi.max_threads = IPCP_MIN_AV_THREADS; -          ipcpi.sock_path = ipcp_sock_path(getpid());          if (ipcpi.sock_path == NULL)                  goto fail_sock_path; @@ -527,11 +478,6 @@ int ipcp_init(int               argc,                  goto fail_state_mtx;          } -        if (pthread_mutex_init(&ipcpi.threads_lock, NULL)) { -                log_err("Could not create mutex."); -                goto fail_thread_lock; -        } -          if (pthread_condattr_init(&cattr)) {                  log_err("Could not create condattr.");                  goto fail_cond_attr; @@ -545,17 +491,6 @@ int ipcp_init(int               argc,                  goto fail_state_cond;          } -        if (pthread_cond_init(&ipcpi.threads_cond, &cattr)) { -                log_err("Could not init condvar."); -                goto fail_thread_cond; -        } - -        ipcpi.thread_ids = bmp_create(IPCP_MAX_THREADS, 0); -        if (ipcpi.thread_ids == NULL) { -                log_err("Could not init condvar."); -                goto fail_bmp; -        } -          if (pthread_mutex_init(&ipcpi.alloc_lock, NULL)) {                  log_err("Failed to init mutex.");                  goto fail_alloc_lock; @@ -588,94 +523,21 @@ int ipcp_init(int               argc,   fail_alloc_cond:          pthread_mutex_destroy(&ipcpi.alloc_lock);   fail_alloc_lock: -        bmp_destroy(ipcpi.thread_ids); - fail_bmp: -        pthread_cond_destroy(&ipcpi.threads_cond); - fail_thread_cond:          pthread_cond_destroy(&ipcpi.state_cond);   fail_state_cond:          pthread_condattr_destroy(&cattr);   fail_cond_attr: -        pthread_mutex_destroy(&ipcpi.threads_lock); - fail_thread_lock:          pthread_mutex_destroy(&ipcpi.state_mtx);   fail_state_mtx:          close(ipcpi.sockfd);   fail_serv_sock:          free(ipcpi.sock_path);   fail_sock_path: -        free(ipcpi.threadpool); - fail_thr:          ouroboros_fini();          return ret;  } -void * threadpoolmgr(void * o) -{ -        pthread_attr_t  pattr; -        struct timespec dl; -        struct timespec to = {(IRMD_TPM_TIMEOUT / 1000), -                              (IRMD_TPM_TIMEOUT % 1000) * MILLION}; -        (void) o; - -        if (pthread_attr_init(&pattr)) -                return (void *) -1; - -        pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED); - -        while (true) { -                clock_gettime(PTHREAD_COND_CLOCK, &dl); -                ts_add(&dl, &to, &dl); - -                if (ipcp_get_state() == IPCP_SHUTDOWN || -                    ipcp_get_state() == IPCP_NULL) { -                        pthread_attr_destroy(&pattr); -                        log_dbg("Waiting for threads to exit."); -                        pthread_mutex_lock(&ipcpi.threads_lock); -                        while (ipcpi.threads > 0) -                                pthread_cond_wait(&ipcpi.threads_cond, -                                                  &ipcpi.threads_lock); -                        pthread_mutex_unlock(&ipcpi.threads_lock); - -                        log_dbg("Threadpool manager done."); -                        break; -                } - -                pthread_mutex_lock(&ipcpi.threads_lock); - -                if (ipcpi.threads < IPCP_MIN_AV_THREADS) { -                        log_dbg("Increasing threadpool."); -                        ipcpi.max_threads = IPCP_MAX_AV_THREADS; - -                        while (ipcpi.threads < ipcpi.max_threads) { -                                ssize_t id = bmp_allocate(ipcpi.thread_ids); -                                if (!bmp_is_id_valid(ipcpi.thread_ids, id)) { -                                        log_warn("IPCP threadpool exhausted."); -                                        break; -                                } - -                                if (pthread_create(&ipcpi.threadpool[id], -                                                   &pattr, ipcp_main_loop, -                                                   (void *) id)) -                                        log_warn("Failed to start new thread."); -                                else -                                        ++ipcpi.threads; -                        } -                } - -                if (pthread_cond_timedwait(&ipcpi.threads_cond, -                                           &ipcpi.threads_lock, -                                           &dl) == ETIMEDOUT) -                        if (ipcpi.threads > IPCP_MIN_AV_THREADS) -                                --ipcpi.max_threads; - -                pthread_mutex_unlock(&ipcpi.threads_lock); -        } - -        return (void *) 0; -} -  int ipcp_boot()  {          struct sigaction sig_act; @@ -700,9 +562,15 @@ int ipcp_boot()          pthread_sigmask(SIG_BLOCK, &sigset, NULL); -        ipcp_set_state(IPCP_INIT); +        if (tpm_init(IPCP_MIN_THREADS, IPCP_ADD_THREADS, mainloop)) +                return -1; + +        if (tpm_start()) { +                tpm_fini(); +                return -1; +        } -        pthread_create(&ipcpi.tpm, NULL, threadpoolmgr, NULL); +        ipcp_set_state(IPCP_INIT);          pthread_sigmask(SIG_UNBLOCK, &sigset, NULL); @@ -711,8 +579,7 @@ int ipcp_boot()  void ipcp_shutdown()  { -        pthread_join(ipcpi.tpm, NULL); - +        tpm_fini();          log_info("IPCP %d shutting down.", getpid());  } @@ -722,16 +589,11 @@ void ipcp_fini()          if (unlink(ipcpi.sock_path))                  log_warn("Could not unlink %s.", ipcpi.sock_path); -        bmp_destroy(ipcpi.thread_ids); -          free(ipcpi.sock_path); -        free(ipcpi.threadpool);          shim_data_destroy(ipcpi.shim_data);          pthread_cond_destroy(&ipcpi.state_cond); -        pthread_cond_destroy(&ipcpi.threads_cond); -        pthread_mutex_destroy(&ipcpi.threads_lock);          pthread_mutex_destroy(&ipcpi.state_mtx);          pthread_cond_destroy(&ipcpi.alloc_cond);          pthread_mutex_destroy(&ipcpi.alloc_lock); diff --git a/src/ipcpd/ipcp.h b/src/ipcpd/ipcp.h index 3f5e1bd6..fb69df5c 100644 --- a/src/ipcpd/ipcp.h +++ b/src/ipcpd/ipcp.h @@ -93,15 +93,6 @@ struct ipcp {          pthread_cond_t     alloc_cond;          pthread_mutex_t    alloc_lock; -        pthread_t *        threadpool; - -        struct bmp *       thread_ids; -        size_t             max_threads; -        size_t             threads; -        pthread_cond_t     threads_cond; -        pthread_mutex_t    threads_lock; - -        pthread_t          tpm;  } ipcpi;  int             ipcp_init(int               argc, diff --git a/src/ipcpd/normal/CMakeLists.txt b/src/ipcpd/normal/CMakeLists.txt index 336b0e8f..8c2d4efc 100644 --- a/src/ipcpd/normal/CMakeLists.txt +++ b/src/ipcpd/normal/CMakeLists.txt @@ -15,6 +15,8 @@ include_directories(${CMAKE_BINARY_DIR}/include)  set(IPCP_NORMAL_TARGET ipcpd-normal CACHE STRING "IPCP_NORMAL_TARGET")  protobuf_generate_c(FLOW_ALLOC_SRCS FLOW_ALLOC_HDRS flow_alloc.proto) +protobuf_generate_c(KAD_PROTO_SRCS KAD_PROTO_HDRS kademlia.proto) +  # Add GPB sources of policies last  protobuf_generate_c(FSO_SRCS FSO_HDRS pol/fso.proto) @@ -22,6 +24,7 @@ set(SOURCE_FILES    # Add source files here    addr_auth.c    connmgr.c +  dht.c    dir.c    dt.c    dt_pci.c @@ -42,7 +45,7 @@ set(SOURCE_FILES    )  add_executable(ipcpd-normal ${SOURCE_FILES} ${IPCP_SOURCES} -  ${FLOW_ALLOC_SRCS} ${FSO_SRCS}) +  ${FLOW_ALLOC_SRCS} ${FSO_SRCS} ${KAD_PROTO_SRCS})  target_link_libraries(ipcpd-normal LINK_PUBLIC ouroboros)  include(AddCompileFlags) @@ -53,3 +56,4 @@ endif (CMAKE_BUILD_TYPE MATCHES Debug)  install(TARGETS ipcpd-normal RUNTIME DESTINATION sbin)  add_subdirectory(pol/tests) +add_subdirectory(tests) diff --git a/src/ipcpd/normal/dht.c b/src/ipcpd/normal/dht.c new file mode 100644 index 00000000..74618658 --- /dev/null +++ b/src/ipcpd/normal/dht.c @@ -0,0 +1,2383 @@ +/* + * Ouroboros - Copyright (C) 2016 - 2017 + * + * Distributed Hash Table based on Kademlia + * + *    Dimitri Staessens <dimitri.staessens@ugent.be> + *    Sander Vrijders   <sander.vrijders@ugent.be> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * version 2.1 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#define OUROBOROS_PREFIX "dht" + +#include <ouroboros/config.h> +#include <ouroboros/hash.h> +#include <ouroboros/bitmap.h> +#include <ouroboros/errno.h> +#include <ouroboros/logs.h> +#include <ouroboros/list.h> +#include <ouroboros/random.h> +#include <ouroboros/time_utils.h> +#include <ouroboros/utils.h> + +#include "dht.h" +#include "dt.h" + +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <inttypes.h> + +#include "kademlia.pb-c.h" +typedef KadMsg kad_msg_t; +typedef KadContactMsg kad_contact_msg_t; + +#define DHT_MAX_REQS  2048 /* KAD recommends rnd(), bmp can be changed.  */ +#define KAD_ALPHA     3    /* Parallel factor, proven optimal value.     */ +#define KAD_K         8    /* Replication factor, MDHT value.            */ +#define KAD_T_REPL    900  /* Replication time, tied to k. MDHT value.   */ +#define KAD_T_REFR    900  /* Refresh time stale bucket, MDHT value.     */ +#define KAD_T_JOIN    6    /* Response time to wait for a join.          */ +#define KAD_T_RESP    2    /* Response time to wait for a response.      */ +#define KAD_R_PING    2    /* Ping retries before declaring peer dead.   */ +#define KAD_QUEER     15   /* Time to declare peer questionable.         */ +#define KAD_BETA      8    /* Bucket split factor, must be 1, 2, 4 or 8. */ +#define KAD_RESP_RETR 6    /* Number of retries on sending a response.   */ + +enum dht_state { +        DHT_INIT = 0, +        DHT_RUNNING, +        DHT_SHUTDOWN, +}; + +enum kad_code { +        KAD_JOIN = 0, +        KAD_FIND_NODE, +        KAD_FIND_VALUE, +        /* Messages without a response below. */ +        KAD_STORE, +        KAD_RESPONSE +}; + +enum kad_req_state { +        REQ_NULL = 0, +        REQ_INIT, +        REQ_PENDING, +        REQ_RESPONSE, +        REQ_DONE, +        REQ_DESTROY +}; + +enum lookup_state { +        LU_NULL = 0, +        LU_INIT, +        LU_PENDING, +        LU_UPDATE, +        LU_COMPLETE, +        LU_DONE, +        LU_DESTROY +}; + +struct kad_req { +        struct list_head   next; + +        uint32_t           cookie; +        enum kad_code      code; +        uint8_t *          key; +        uint64_t           addr; + +        enum kad_req_state state; +        pthread_cond_t     cond; +        pthread_mutex_t    lock; + +        time_t             t_exp; +}; + +struct lookup { +        struct list_head  next; + +        uint8_t *         key; + +        struct list_head  contacts; +        size_t            n_contacts; + +        uint64_t *        addrs; +        size_t            n_addrs; + +        enum lookup_state state; +        pthread_cond_t    cond; +        pthread_mutex_t   lock; +}; + +struct val { +        struct list_head next; + +        uint64_t         addr; + +        time_t           t_exp; +        time_t           t_rep; +}; + +struct ref_entry { +        struct list_head next; + +        uint8_t *        key; + +        time_t           t_rep; +}; + +struct dht_entry { +        struct list_head next; + +        uint8_t *        key; +        size_t           n_vals; +        struct list_head vals; +}; + +struct contact { +        struct list_head next; + +        uint8_t *        id; +        uint64_t         addr; + +        size_t           fails; +        time_t           t_seen; +}; + +struct bucket { +        struct list_head contacts; +        size_t           n_contacts; + +        struct list_head alts; +        size_t           n_alts; + +        time_t           t_refr; + +        size_t           depth; +        uint8_t          mask; + +        struct bucket *  parent; +        struct bucket *  children[1L << KAD_BETA]; +}; + +struct dht { +        size_t           alpha; +        size_t           b; +        size_t           k; + +        time_t           t_expire; +        time_t           t_refresh; +        time_t           t_replic; +        time_t           t_repub; + +        uint8_t *        id; +        uint64_t         addr; + +        struct bucket *  buckets; + +        struct list_head entries; + +        struct list_head refs; + +        struct list_head lookups; + +        struct list_head requests; +        struct bmp *     cookies; + +        enum dht_state   state; +        pthread_mutex_t  mtx; + +        pthread_rwlock_t lock; + +        int              fd; + +        pthread_t        worker; +}; + +static uint8_t * dht_dup_key(const uint8_t * key, +                             size_t          len) +{ +        uint8_t * dup; + +        dup = malloc(sizeof(*dup) * len); +        if (dup == NULL) +                return NULL; + +        memcpy(dup, key, len); + +        return dup; +} + +static enum dht_state dht_get_state(struct dht * dht) +{ +        enum dht_state state; + +        pthread_mutex_lock(&dht->mtx); + +        state = dht->state; + +        pthread_mutex_unlock(&dht->mtx); + +        return state; +} + +static void dht_set_state(struct dht *   dht, +                          enum dht_state state) +{ +        pthread_mutex_lock(&dht->mtx); + +        dht->state = state; + +        pthread_mutex_unlock(&dht->mtx); +} + +static uint8_t * create_id(size_t len) +{ +        uint8_t * id; + +        id = malloc(len); +        if (id == NULL) +                return NULL; + +        if (random_buffer(id, len) < 0) { +                free(id); +                return NULL; +        } + +        return id; +} + +static struct kad_req * kad_req_create(struct dht * dht, +                                       kad_msg_t *  msg, +                                       uint64_t     addr) +{ +        struct kad_req *   req; +        pthread_condattr_t cattr; +        struct timespec    t; + +        req = malloc(sizeof(*req)); +        if (req == NULL) +                return NULL; + +        list_head_init(&req->next); + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); + +        req->t_exp  = t.tv_sec + KAD_T_RESP; +        req->addr   = addr; +        req->state  = REQ_INIT; +        req->cookie = msg->cookie; +        req->code   = msg->code; +        req->key    = NULL; + +        if (msg->has_key) { +                req->key = dht_dup_key(msg->key.data, dht->b); +                if (req->key == NULL) { +                        free(req); +                        return NULL; +                } +        } + +        if (pthread_mutex_init(&req->lock, NULL)) { +                free(req->key); +                free(req); +                return NULL; +        } + +        pthread_condattr_init(&cattr); +#ifndef __APPLE__ +        pthread_condattr_setclock(&cattr, PTHREAD_COND_CLOCK); +#endif + +        if (pthread_cond_init(&req->cond, &cattr)) { +                pthread_condattr_destroy(&cattr); +                pthread_mutex_destroy(&req->lock); +                free(req->key); +                free(req); +                return NULL; +        } + +        pthread_condattr_destroy(&cattr); + +        return req; +} + +static void kad_req_destroy(struct kad_req * req) +{ +        assert(req); + +        if (req->key != NULL) +                free(req->key); + +        pthread_mutex_lock(&req->lock); + +        switch (req->state) { +        case REQ_DESTROY: +                pthread_mutex_unlock(&req->lock); +                return; +        case REQ_PENDING: +                req->state = REQ_DESTROY; +                pthread_cond_signal(&req->cond); +                break; +        case REQ_INIT: +        case REQ_DONE: +                req->state = REQ_NULL; +                break; +        case REQ_RESPONSE: +        case REQ_NULL: +        default: +                break; +        } + +        while (req->state != REQ_NULL) +                pthread_cond_wait(&req->cond, &req->lock); + +        pthread_mutex_unlock(&req->lock); + +        pthread_cond_destroy(&req->cond); +        pthread_mutex_destroy(&req->lock); + +        free(req); +} + +static int kad_req_wait(struct kad_req * req, +                        time_t           t) +{ +        struct timespec timeo = {t, 0}; +        struct timespec abs; +        int ret = 0; + +        assert(req); + +        clock_gettime(PTHREAD_COND_CLOCK, &abs); + +        ts_add(&abs, &timeo, &abs); + +        pthread_mutex_lock(&req->lock); + +        req->state = REQ_PENDING; + +        while (req->state == REQ_PENDING && ret != -ETIMEDOUT) +                ret = -pthread_cond_timedwait(&req->cond, &req->lock, &abs); + +        switch(req->state) { +        case REQ_DESTROY: +                ret = -1; +                req->state = REQ_NULL; +                pthread_cond_signal(&req->cond); +                break; +        case REQ_PENDING: /* ETIMEDOUT */ +        case REQ_RESPONSE: +                req->state = REQ_DONE; +                pthread_cond_signal(&req->cond); +                break; +        default: +                break; +        } + +        pthread_mutex_unlock(&req->lock); + +        return ret; +} + +static void kad_req_respond(struct kad_req * req) +{ +        pthread_mutex_lock(&req->lock); + +        req->state = REQ_RESPONSE; +        pthread_cond_signal(&req->cond); + +        pthread_mutex_unlock(&req->lock); +} + +static struct contact * contact_create(const uint8_t * id, +                                       size_t          len, +                                       uint64_t        addr) +{ +        struct contact * c; +        struct timespec  t; + +        c = malloc(sizeof(*c)); +        if (c == NULL) +                return NULL; + +        list_head_init(&c->next); + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); + +        c->addr   = addr; +        c->fails  = 0; +        c->t_seen = t.tv_sec; +        c->id     = dht_dup_key(id, len); +        if (c->id == NULL) { +                free(c); +                return NULL; +        } + +        return c; +} + +static void contact_destroy(struct contact * c) +{ +        if (c != NULL) +                free(c->id); + +        free(c); +} + +static struct bucket * iter_bucket(struct bucket * b, +                                   const uint8_t * id) +{ +        uint8_t byte; +        uint8_t mask; + +        assert(b); + +        if (b->children[0] == NULL) +                return b; + +        byte = id[(b->depth * KAD_BETA) / CHAR_BIT]; + +        mask = ((1L << KAD_BETA) - 1) & 0xFF; + +        byte >>= (CHAR_BIT - KAD_BETA) - +                (((b->depth) * KAD_BETA) & (CHAR_BIT - 1)); + +        return iter_bucket(b->children[(byte & mask)], id); +} + +static struct bucket * dht_get_bucket(struct dht *    dht, +                                      const uint8_t * id) +{ +        assert(dht->buckets); + +        return iter_bucket(dht->buckets, id); +} + +/* + * If someone builds a network where the n (n > k) closest nodes all + * have IDs starting with the same 64 bits: by all means, change this. + */ +static uint64_t dist(const uint8_t * src, +                     const uint8_t * dst) +{ +        return betoh64(*((uint64_t *) src) ^ *((uint64_t *) dst)); +} + +static size_t list_add_sorted(struct list_head * l, +                              struct contact *   c, +                              const uint8_t *    key) +{ +        struct list_head * p; + +        assert(l); +        assert(c); +        assert(key); +        assert(c->id); + +        list_for_each(p, l) { +                struct contact * e = list_entry(p, struct contact, next); +                if (dist(c->id, key) > dist(e->id, key)) +                        break; +        } + +        list_add_tail(&c->next, p); + +        return 1; +} + +static size_t dht_contact_list(struct dht *       dht, +                               struct list_head * l, +                               const uint8_t *    key) +{ +        struct list_head * p; +        struct bucket *    b; +        size_t             len = 0; +        size_t             i; +        struct timespec    t; + +        assert(l); +        assert(dht); +        assert(key); +        assert(list_is_empty(l)); + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); + +        b = dht_get_bucket(dht, key); +        if (b == NULL) +                return 0; + +        b->t_refr = t.tv_sec + KAD_T_REFR; + +        if (b->n_contacts == dht->k || b->parent == NULL) { +                list_for_each(p, &b->contacts) { +                        struct contact * c; +                        c = list_entry(p, struct contact, next); +                        c = contact_create(c->id, dht->b, c->addr); +                        if (list_add_sorted(l, c, key) == 1) +                                if (++len > dht->k) +                                        break; +                } +        } else { +                struct bucket * d = b->parent; +                for (i = 0; i < (1L << KAD_BETA); ++i) { +                        list_for_each(p, &d->children[i]->contacts) { +                                struct contact * c; +                                c = list_entry(p, struct contact, next); +                                c = contact_create(c->id, dht->b, c->addr); +                                if (c == NULL) +                                        continue; +                                if (list_add_sorted(l, c, key) == 1) +                                        if (++len > dht->k) +                                                break; +                        } +                } +        } + +        assert(len == dht->k || b->parent == NULL); + +        return len; +} + +static struct lookup * lookup_create(struct dht *    dht, +                                     const uint8_t * id) +{ +        struct lookup *    lu; +        pthread_condattr_t cattr; + +        assert(dht); +        assert(id); + +        lu = malloc(sizeof(*lu)); +        if (lu == NULL) +                goto fail_malloc; + +        list_head_init(&lu->contacts); + +        lu->state   = LU_INIT; +        lu->addrs   = NULL; +        lu->n_addrs = 0; +        lu->key     = dht_dup_key(id, dht->b); +        if (lu->key == NULL) +                goto fail_id; + +        if (pthread_mutex_init(&lu->lock, NULL)) +                goto fail_mutex; + +        pthread_condattr_init(&cattr); +#ifndef __APPLE__ +        pthread_condattr_setclock(&cattr, PTHREAD_COND_CLOCK); +#endif + +        if (pthread_cond_init(&lu->cond, &cattr)) +                goto fail_cond; + +        pthread_condattr_destroy(&cattr); + +        pthread_rwlock_wrlock(&dht->lock); + +        list_add(&lu->next, &dht->lookups); + +        lu->n_contacts = dht_contact_list(dht, &lu->contacts, id); + +        pthread_rwlock_unlock(&dht->lock); + +        return lu; + + fail_cond: +        pthread_condattr_destroy(&cattr); +        pthread_mutex_destroy(&lu->lock); + fail_mutex: +        free(lu->key); + fail_id: +        free(lu); + fail_malloc: +        return NULL; +} + +static void lookup_destroy(struct lookup * lu) +{ +        struct list_head * p; +        struct list_head * h; + +        assert(lu); + +        pthread_mutex_lock(&lu->lock); + +        switch (lu->state) { +        case LU_DESTROY: +                pthread_mutex_unlock(&lu->lock); +                return; +        case LU_PENDING: +                lu->state = LU_DESTROY; +                pthread_cond_signal(&lu->cond); +                break; +        case LU_INIT: +        case LU_DONE: +        case LU_UPDATE: +        case LU_COMPLETE: +                lu->state = REQ_NULL; +                break; +        case LU_NULL: +        default: +                break; +        } + +        while (lu->state != LU_NULL) +                pthread_cond_wait(&lu->cond, &lu->lock); + +        if (lu->key != NULL) +                free(lu->key); +        if (lu->addrs != NULL) +                free(lu->addrs); + +        list_for_each_safe(p, h, &lu->contacts) { +                struct contact * c = list_entry(p, struct contact, next); +                list_del(&c->next); +                contact_destroy(c); +        } + +        pthread_mutex_unlock(&lu->lock); + +        pthread_cond_destroy(&lu->cond); +        pthread_mutex_destroy(&lu->lock); + +        free(lu); +} + +static void lookup_update(struct dht *    dht, +                          struct lookup * lu, +                          kad_msg_t *     msg) +{ +        struct list_head * p = NULL; +        struct contact *   c = NULL; +        size_t             n; +        size_t             pos = 0; + +        assert(lu); +        assert(msg); + +        if (dht_get_state(dht) != DHT_RUNNING) +                return; + +        pthread_mutex_lock(&lu->lock); + +        if (msg->n_addrs > 0) { +                if (lu->addrs == NULL) { +                        lu->addrs = malloc(sizeof(*lu->addrs) * msg->n_addrs); +                        for (n = 0; n < msg->n_addrs; ++n) +                                lu->addrs[n] = msg->addrs[n]; +                        lu->n_addrs = msg->n_addrs; +                } +                lu->state = LU_COMPLETE; +                pthread_cond_signal(&lu->cond); +                pthread_mutex_unlock(&lu->lock); +                return; +        } + +        while (lu->state == LU_INIT) +                pthread_cond_wait(&lu->cond, &lu->lock); + +        for (n = 0; n < msg->n_contacts; ++n) { +                c = contact_create(msg->contacts[n]->id.data, +                                   dht->b, msg->contacts[n]->addr); +                if (c == NULL) +                        continue; + +                list_for_each(p, &lu->contacts) { +                        struct contact * e; +                        e = list_entry(p, struct contact, next); +                        if (!memcmp(e->id, c->id, dht->b)) { +                                contact_destroy(c); +                                c = NULL; +                                break; +                        } + +                        if (dist(c->id, lu->key) > dist(e->id, lu->key)) +                                break; +                        pos++; +                } + +                if (c == NULL) +                        continue; + +                if (lu->n_contacts < dht->k) { +                        list_add_tail(&c->next, p); +                        ++lu->n_contacts; +                } else if (pos == dht->k) { +                        contact_destroy(c); +                        continue; +                } else { +                        struct contact * d; +                        list_add_tail(&c->next, p); +                        d = list_last_entry(&lu->contacts, struct contact, next); +                        list_del(&d->next); +                        contact_destroy(d); +                } +        } + +        lu->state = LU_UPDATE; +        pthread_cond_signal(&lu->cond); +        pthread_mutex_unlock(&lu->lock); +        return; +} + +static ssize_t lookup_get_addrs(struct lookup * lu, +                                uint64_t *      addrs) +{ +        ssize_t n; + +        assert(lu); + +        pthread_mutex_lock(&lu->lock); + +        for (n = 0; (size_t) n < lu->n_addrs; ++n) +                addrs[n] = lu->addrs[n]; + +        assert((size_t) n == lu->n_addrs); + +        pthread_mutex_unlock(&lu->lock); + +        return n; +} + +static ssize_t lookup_contact_addrs(struct lookup * lu, +                                    uint64_t *      addrs) +{ +        struct list_head * p; +        ssize_t            n = 0; + +        assert(lu); +        assert(addrs); + +        pthread_mutex_lock(&lu->lock); + +        list_for_each(p, &lu->contacts) { +                struct contact * c = list_entry(p, struct contact, next); +                addrs[n] = c->addr; +                n++; +        } + +        pthread_mutex_unlock(&lu->lock); + +        return n; +} + +static void lookup_new_addrs(struct lookup * lu, +                                uint64_t *      addrs) +{ +        struct list_head * p; +        size_t             n = 0; + +        assert(lu); +        assert(addrs); + +        pthread_mutex_lock(&lu->lock); + +        /* Uses fails to check if the contact has been contacted. */ +        list_for_each(p, &lu->contacts) { +                struct contact * c = list_entry(p, struct contact, next); +                if (c->fails == 0) { +                        c->fails = 1; +                        addrs[n] = c->addr; +                        n++; +                } + +                if (n == KAD_ALPHA) +                        break; +        } + +        assert(n <= KAD_ALPHA); + +        addrs[n] = 0; + +        if (n == 0) +                lu->state = LU_DONE; + +        pthread_mutex_unlock(&lu->lock); +} + +static enum lookup_state lookup_wait(struct lookup * lu) +{ +        enum lookup_state state; + +        pthread_mutex_lock(&lu->lock); + +        lu->state = LU_PENDING; +        pthread_cond_signal(&lu->cond); + +        pthread_cleanup_push((void (*)(void *)) lookup_destroy, (void *) lu); + +        while (lu->state == LU_PENDING) +                pthread_cond_wait(&lu->cond, &lu->lock); + +        pthread_cleanup_pop(false); + +        state = lu->state; + +        pthread_mutex_unlock(&lu->lock); + +        return state; +} + +static struct kad_req * dht_find_request(struct dht * dht, +                                         kad_msg_t *  msg) +{ +        struct list_head * p; + +        assert(dht); +        assert(msg); + +        list_for_each(p, &dht->requests) { +                struct kad_req * r = list_entry(p, struct kad_req, next); +                if (r->cookie == msg->cookie) +                        return r; +        } + +        return NULL; +} + +static struct lookup * dht_find_lookup(struct dht *    dht, +                                       const uint8_t * key) +{ +        struct list_head * p; + +        assert(dht); +        assert(key); + +        list_for_each(p, &dht->lookups) { +                struct lookup * l = list_entry(p, struct lookup, next); +                if (!memcmp(l->key, key, dht->b)) +                        return l; +        } + +        return NULL; +} + +static struct val * val_create(uint64_t addr, +                               time_t   exp) +{ +        struct val *    v; +        struct timespec t; + +        v = malloc(sizeof(*v)); +        if (v == NULL) +                return NULL; + +        list_head_init(&v->next); +        v->addr = addr; + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); + +        v->t_exp = t.tv_sec + exp; +        v->t_rep = t.tv_sec + KAD_T_REPL; + +        return v; +} + +static void val_destroy(struct val * v) +{ +        assert(v); + +        free(v); +} + +static struct ref_entry * ref_entry_create(struct dht *    dht, +                                           const uint8_t * key) +{ +        struct ref_entry * e; +        struct timespec    t; + +        assert(dht); +        assert(key); + +        e = malloc(sizeof(*e)); +        if (e == NULL) +                return NULL; + +        e->key = dht_dup_key(key, dht->b); +        if (e->key == NULL) { +                free(e); +                return NULL; +        } + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); + +        e->t_rep = t.tv_sec + dht->t_repub; + +        return e; +} + +static void ref_entry_destroy(struct ref_entry * e) +{ +        free(e->key); +        free(e); +} + +static struct dht_entry * dht_entry_create(struct dht *    dht, +                                           const uint8_t * key) +{ +        struct dht_entry * e; + +        assert(dht); +        assert(key); + +        e = malloc(sizeof(*e)); +        if (e == NULL) +                return NULL; + +        list_head_init(&e->next); +        list_head_init(&e->vals); + +        e->n_vals = 0; + +        e->key = dht_dup_key(key, dht->b); +        if (e->key == NULL) { +                free(e); +                return NULL; +        } + +        return e; +} + +static void dht_entry_destroy(struct dht_entry * e) +{ +        struct list_head * p; +        struct list_head * h; + +        assert(e); + +        list_for_each_safe(p, h, &e->vals) { +                struct val * v = list_entry(p, struct val, next); +                list_del(&v->next); +                val_destroy(v); +        } + +        free(e->key); + +        free(e); +} + +static int dht_entry_add_addr(struct dht_entry * e, +                              uint64_t           addr, +                              time_t             exp) +{ +        struct list_head * p; +        struct val * val; +        struct timespec t; + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); + +        list_for_each(p, &e->vals) { +                struct val * v = list_entry(p, struct val, next); +                if (v->addr == addr) { +                        if (v->t_exp < t.tv_sec + exp) { +                                v->t_exp = t.tv_sec + exp; +                                v->t_rep = t.tv_sec + KAD_T_REPL; +                        } + +                        return 0; +                } +        } + +        val = val_create(addr, exp); +        if (val == NULL) +                return -ENOMEM; + +        list_add(&val->next, &e->vals); +        ++e->n_vals; + +        return 0; +} + + +static void dht_entry_del_addr(struct dht_entry * e, +                               uint64_t           addr) +{ +        struct list_head * p; +        struct list_head * h; + +        assert(e); + +        list_for_each_safe(p, h, &e->vals) { +                struct val * v = list_entry(p, struct val, next); +                if (v->addr == addr) { +                        list_del(&v->next); +                        val_destroy(v); +                        --e->n_vals; +                } +        } + +        if (e->n_vals == 0) { +                list_del(&e->next); +                dht_entry_destroy(e); +        } +} + +static uint64_t dht_entry_get_addr(struct dht *       dht, +                                   struct dht_entry * e) +{ +        struct list_head * p; + +        assert(e); +        assert(!list_is_empty(&e->vals)); + +        list_for_each(p, &e->vals) { +                struct val * v = list_entry(p, struct val, next); +                if (v->addr != dht->addr) +                        return v->addr; +        } + +        return 0; +} + +/* Forward declaration. */ +static struct lookup * kad_lookup(struct dht *    dht, +                                  const uint8_t * key, +                                  enum kad_code   code); + + +/* Build a refresh list. */ +static void bucket_refresh(struct dht *       dht, +                           struct bucket *    b, +                           time_t             t, +                           struct list_head * r) +{ +        size_t i; + +        if (*b->children != NULL) +                for (i = 0; i < (1L << KAD_BETA); ++i) +                        bucket_refresh(dht, b->children[i], t, r); + +        if (b->n_contacts == 0) +                return; + +        if (t > b->t_refr) { +                struct contact * c; +                struct contact * d; +                c = list_first_entry(&b->contacts, struct contact, next); +                d = contact_create(c->id, dht->b, c->addr); +                if (c != NULL) +                        list_add(&d->next, r); +                return; +        } +} + + +static struct bucket * bucket_create(void) +{ +        struct bucket * b; +        struct timespec t; +        size_t          i; + +        b = malloc(sizeof(*b)); +        if (b == NULL) +                return NULL; + +        list_head_init(&b->contacts); +        b->n_contacts = 0; + +        list_head_init(&b->alts); +        b->n_alts = 0; + +        clock_gettime(CLOCK_REALTIME_COARSE, &t); +        b->t_refr = t.tv_sec + KAD_T_REFR; + +        for (i = 0; i < (1L << KAD_BETA); ++i) +                b->children[i]  = NULL; + +        b->parent = NULL; +        b->depth = 0; + +        return b; +} + +static void bucket_destroy(struct bucket * b) +{ +        struct list_head * p; +        struct list_head * h; +        size_t             i; + +        assert(b); + +        for (i = 0; i < (1L << KAD_BETA); ++i) +                if (b->children[i] != NULL) +                        bucket_destroy(b->children[i]); + +        list_for_each_safe(p, h, &b->contacts) { +                struct contact * c = list_entry(p, struct contact, next); +                list_del(&c->next); +                contact_destroy(c); +                --b->n_contacts; +        } + +        list_for_each_safe(p, h, &b->alts) { +                struct contact * c = list_entry(p, struct contact, next); +                list_del(&c->next); +                contact_destroy(c); +                --b->n_contacts; +        } + +        free(b); +} + +static bool bucket_has_id(struct bucket * b, +                          const uint8_t * id) +{ +        uint8_t mask; +        uint8_t byte; + +        if (b->depth == 0) +                return true; + +        byte = id[(b->depth * KAD_BETA) / CHAR_BIT]; + +        mask = ((1L << KAD_BETA) - 1) & 0xFF; + +        byte >>= (CHAR_BIT - KAD_BETA) - +                (((b->depth - 1) * KAD_BETA) & (CHAR_BIT - 1)); + +        return ((byte & mask) == b->mask); +} + +static int split_bucket(struct bucket * b) +{ +        struct list_head * p; +        struct list_head * h; +        uint8_t mask = 0; +        size_t i; +        size_t c; + +        assert(b); +        assert(b->n_alts == 0); +        assert(b->n_contacts); +        assert(b->children[0] == NULL); + +        c = b->n_contacts; + +        for (i = 0; i < (1L << KAD_BETA); ++i) { +                b->children[i] = bucket_create(); +                if (b->children[i] == NULL) { +                        size_t j; +                        for (j = 0; j < i; ++j) +                                bucket_destroy(b->children[j]); +                        return -1; +                } + +                b->children[i]->depth  = b->depth + 1; +                b->children[i]->mask   = mask; +                b->children[i]->parent = b; + +                list_for_each_safe(p, h, &b->contacts) { +                        struct contact * c; +                        c = list_entry(p, struct contact, next); +                        if (bucket_has_id(b->children[i], c->id)) { +                                list_del(&c->next); +                                --b->n_contacts; +                                list_add(&c->next, &b->children[i]->contacts); +                                ++b->children[i]->n_contacts; +                        } +                } + +                mask++; +        } + +        for (i = 0; i < (1L << KAD_BETA); ++i) +                if (b->children[i]->n_contacts == c) +                        split_bucket(b->children[i]); + +        return 0; +} + +/* Locked externally to mandate update as (final) part of join transaction. */ +static int dht_update_bucket(struct dht *    dht, +                             const uint8_t * id, +                             uint64_t        addr) +{ +        struct list_head * p; +        struct list_head * h; +        struct bucket *    b; +        struct contact *   c; + +        assert(dht); + +        b = dht_get_bucket(dht, id); +        if (b == NULL) +                return -1; + +        c = contact_create(id, dht->b, addr); +        if (c == NULL) +                return -1; + +        list_for_each_safe(p, h, &b->contacts) { +                struct contact * d = list_entry(p, struct contact, next); +                if (d->addr == addr) { +                        list_del(&d->next); +                        contact_destroy(d); +                        --b->n_contacts; +                } +        } + +        if (b->n_contacts == dht->k) { +                if (bucket_has_id(b, dht->id)) { +                        list_add_tail(&c->next, &b->contacts); +                        ++b->n_contacts; +                        if (split_bucket(b)) { +                                list_del(&c->next); +                                contact_destroy(c); +                                --b->n_contacts; +                        } +                } else if (b->n_alts == dht->k) { +                        struct contact * d; +                        d = list_first_entry(&b->alts, struct contact, next); +                        list_del(&d->next); +                        contact_destroy(d); +                        list_add_tail(&c->next, &b->alts); +                } else { +                        list_add_tail(&c->next, &b->alts); +                        ++b->n_alts; +                } +        } else { +                list_add_tail(&c->next, &b->contacts); +                ++b->n_contacts; +        } + +        return 0; +} + +static int send_msg(struct dht * dht, +                    kad_msg_t *  msg, +                    uint64_t     addr) +{ +        struct shm_du_buff * sdb; +        struct kad_req *     req; +        size_t               len; +        int                  retr = 0; + +        if (msg->code == KAD_RESPONSE) +                retr = KAD_RESP_RETR; + +        pthread_rwlock_wrlock(&dht->lock); + +        if (dht->id != NULL) { +                msg->has_s_id = true; +                msg->s_id.data = dht->id; +                msg->s_id.len  = dht->b; +        } + +        msg->s_addr = dht->addr; + +        if (msg->code < KAD_STORE) { +                msg->cookie = bmp_allocate(dht->cookies); +                if (!bmp_is_id_valid(dht->cookies, msg->cookie)) +                        goto fail_bmp_alloc; +        } + +        len = kad_msg__get_packed_size(msg); +        if (len == 0) +                goto fail_msg; + +        if (ipcp_sdb_reserve(&sdb, len)) +                goto fail_msg; + +        kad_msg__pack(msg, shm_du_buff_head(sdb)); + +#ifndef __DHT_TEST__ +        while (retr >= 0) { +                if (dt_write_sdu(addr, QOS_CUBE_BE, dht->fd, sdb)) +                        retr--; +                else +                        break; +                sleep(1); +        } + +        if (retr < 0) +                goto fail_write; +#else +        (void) addr; +        (void) retr; +        ipcp_sdb_release(sdb); +#endif /* __DHT_TEST__ */ + +        if (msg->code < KAD_STORE && dht->state != DHT_SHUTDOWN) { +                req = kad_req_create(dht, msg, addr); +                if (req != NULL) +                        list_add(&req->next, &dht->requests); +        } + +        pthread_rwlock_unlock(&dht->lock); + +        return 0; + +#ifndef __DHT_TEST__ + fail_write: +        ipcp_sdb_release(sdb); +#endif + fail_msg: +        bmp_release(dht->cookies, msg->cookie); + fail_bmp_alloc: +        pthread_rwlock_unlock(&dht->lock); +        return -1; +} + +static struct dht_entry * dht_find_entry(struct dht *    dht, +                                         const uint8_t * key) +{ +        struct list_head * p; + +        list_for_each(p, &dht->entries) { +                struct dht_entry * e = list_entry(p, struct dht_entry, next); +                if (!memcmp(key, e->key, dht->b)) +                        return e; +        } + +        return NULL; +} + +static int kad_add(struct dht *              dht, +                   const kad_contact_msg_t * contacts, +                   ssize_t                   n, +                   time_t                    exp) +{ +        struct dht_entry * e; + +        pthread_rwlock_wrlock(&dht->lock); + +        while (n-- > 0) { +                if (contacts[n].id.len != dht->b) +                        log_warn("Bad key length in contact data."); + +                e = dht_find_entry(dht, contacts[n].id.data); +                if (e != NULL) { +                        if (dht_entry_add_addr(e, contacts[n].addr, exp)) +                                goto fail; +                } else { +                        e = dht_entry_create(dht, contacts[n].id.data); +                        if (e == NULL) +                                goto fail; + +                        if (dht_entry_add_addr(e, contacts[n].addr, exp)) { +                                dht_entry_destroy(e); +                                goto fail; +                        } + +                        list_add(&e->next, &dht->entries); +                } +        } + +        pthread_rwlock_unlock(&dht->lock); +        return 0; + + fail: +        pthread_rwlock_unlock(&dht->lock); +        return -ENOMEM; +} + +static int wait_resp(struct dht * dht, +                     kad_msg_t *  msg, +                     time_t       timeo) +{ +        struct kad_req * req; + +        assert(dht); +        assert(msg); + +        pthread_rwlock_rdlock(&dht->lock); + +        req = dht_find_request(dht, msg); +        if (req == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return -EPERM; +        } + +        pthread_rwlock_unlock(&dht->lock); + +        return kad_req_wait(req, timeo); +} + +static int kad_store(struct dht *    dht, +                     const uint8_t * key, +                     uint64_t        addr, +                     uint64_t        r_addr, +                     time_t          ttl) +{ +        kad_msg_t msg = KAD_MSG__INIT; +        kad_contact_msg_t cmsg = KAD_CONTACT_MSG__INIT; +        kad_contact_msg_t * cmsgp[1]; + +        cmsg.id.data = (uint8_t *) key; +        cmsg.id.len  = dht->b; +        cmsg.addr    = addr; + +        cmsgp[0] = &cmsg; + +        msg.code         = KAD_STORE; +        msg.has_t_expire = true; +        msg.t_expire     = ttl; +        msg.n_contacts   = 1; +        msg.contacts     = cmsgp; + +        if (send_msg(dht, &msg, r_addr)) +                return -1; + +        return 0; +} + +static ssize_t kad_find(struct dht *     dht, +                        const uint8_t *  key, +                        const uint64_t * addrs, +                        enum kad_code    code) +{ +        kad_msg_t msg  = KAD_MSG__INIT; +        ssize_t   sent = 0; + +        assert(dht); +        assert(key); + +        msg.code = code; + +        msg.has_key       = true; +        msg.key.data      = (uint8_t *) key; +        msg.key.len       = dht->b; + +        while (*addrs != 0) { +                if (*addrs != dht->addr) { +                        send_msg(dht, &msg, *addrs); +                        sent++; +                } +                ++addrs; +        } + +        return sent; +} + +static void lookup_set_state(struct lookup *   lu, +                             enum lookup_state state) +{ +        pthread_mutex_lock(&lu->lock); + +        lu->state = state; +        pthread_cond_signal(&lu->cond); + +        pthread_mutex_unlock(&lu->lock); +} + +static struct lookup * kad_lookup(struct dht *    dht, +                                  const uint8_t * id, +                                  enum kad_code   code) +{ +        uint64_t          addrs[KAD_ALPHA + 1]; +        enum lookup_state state; +        struct lookup *   lu; + +        lu = lookup_create(dht, id); +        if (lu == NULL) +                return NULL; + +        lookup_new_addrs(lu, addrs); + +        if (addrs[0] == 0) { +                pthread_rwlock_wrlock(&dht->lock); +                list_del(&lu->next); +                pthread_rwlock_unlock(&dht->lock); +                lookup_destroy(lu); +                return NULL; +        } + +        if (kad_find(dht, id, addrs, code) == 0) { +                pthread_rwlock_wrlock(&dht->lock); +                list_del(&lu->next); +                pthread_rwlock_unlock(&dht->lock); +                lu->state = LU_COMPLETE; +                return lu; +        } + +        while ((state = lookup_wait(lu)) != LU_COMPLETE) { +                switch (state) { +                case LU_UPDATE: +                        lookup_new_addrs(lu, addrs); +                        if (addrs[0] == 0) { +                                pthread_rwlock_wrlock(&dht->lock); +                                list_del(&lu->next); +                                pthread_rwlock_unlock(&dht->lock); +                                return lu; +                        } + +                        kad_find(dht, id, addrs, code); +                        break; +                case LU_DESTROY: +                        pthread_rwlock_wrlock(&dht->lock); +                        list_del(&lu->next); +                        pthread_rwlock_unlock(&dht->lock); +                        lookup_set_state(lu, LU_NULL); +                        return NULL; +                default: +                        break; +                }; +        } + +        assert(state = LU_COMPLETE); + +        pthread_rwlock_wrlock(&dht->lock); +        list_del(&lu->next); +        pthread_rwlock_unlock(&dht->lock); + +        return lu; +} + +static void kad_publish(struct dht *    dht, +                        const uint8_t * key, +                        uint64_t        addr, +                        time_t          exp) +{ +        struct lookup * lu; +        uint64_t        addrs[KAD_K]; +        ssize_t         n; + +        assert(dht); +        assert(key); + +        lu = kad_lookup(dht, key, KAD_FIND_NODE); +        if (lu == NULL) +                return; + +        n = lookup_contact_addrs(lu, addrs); + +        while (n-- > 0) { +                if (addrs[n] == dht->addr) { +                        kad_contact_msg_t msg = KAD_CONTACT_MSG__INIT; +                        msg.id.data = (uint8_t *) key; +                        msg.id.len  = dht->b; +                        msg.addr    = addr; +                        kad_add(dht, &msg, 1, exp); +                } else { +                        if (kad_store(dht, key, addr, addrs[n], dht->t_expire)) +                                log_warn("Failed to send store message."); +                } +        } + +        lookup_destroy(lu); +} + +static int kad_join(struct dht * dht, +                    uint64_t     addr) +{ +        kad_msg_t       msg = KAD_MSG__INIT; +        struct lookup * lu; + +        msg.code = KAD_JOIN; + +        msg.has_alpha       = true; +        msg.has_b           = true; +        msg.has_k           = true; +        msg.has_t_refresh   = true; +        msg.has_t_replicate = true; +        msg.alpha           = KAD_ALPHA; +        msg.b               = dht->b; +        msg.k               = KAD_K; +        msg.t_refresh       = KAD_T_REFR; +        msg.t_replicate     = KAD_T_REPL; + +        if (send_msg(dht, &msg, addr)) +                return -1; + +        if (wait_resp(dht, &msg, KAD_T_JOIN) < 0) +                return -1; + +        dht->id = create_id(dht->b); +        if (dht->id == NULL) +                return -1; + +        pthread_rwlock_wrlock(&dht->lock); + +        dht_update_bucket(dht, dht->id, dht->addr); + +        pthread_rwlock_unlock(&dht->lock); + +        lu = kad_lookup(dht, dht->id, KAD_FIND_NODE); +        if (lu != NULL) +                lookup_destroy(lu); + +        return 0; +} + +static void dht_dead_peer(struct dht * dht, +                          uint8_t *    key, +                          uint64_t     addr) +{ +        struct list_head * p; +        struct list_head * h; +        struct bucket *    b; + +        b = dht_get_bucket(dht, key); + +        list_for_each_safe(p, h, &b->contacts) { +                struct contact * c = list_entry(p, struct contact, next); +                if (b->n_contacts + b->n_alts <= dht->k) { +                        ++c->fails; +                        return; +                } + +                if (c->addr == addr) { +                        list_del(&c->next); +                        contact_destroy(c); +                        --b->n_contacts; +                        break; +                } +        } + +        while (b->n_contacts < dht->k && b->n_alts > 0) { +                struct contact * c; +                c = list_first_entry(&b->alts, struct contact, next); +                list_del(&c->next); +                --b->n_alts; +                list_add(&c->next, &b->contacts); +                ++b->n_contacts; +        } +} + +static int dht_del(struct dht *    dht, +                   const uint8_t * key, +                   uint64_t        addr) +{ +        struct dht_entry * e; + +        pthread_rwlock_wrlock(&dht->lock); + +        e = dht_find_entry(dht, key); +        if (e == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return -EPERM; +        } + +        dht_entry_del_addr(e, addr); + +        pthread_rwlock_unlock(&dht->lock); + +        return 0; +} + +static buffer_t dht_retrieve(struct dht *    dht, +                             const uint8_t * key) +{ +        struct dht_entry * e; +        struct list_head * p; +        buffer_t           buf; +        uint64_t *         pos; + +        buf.len = 0; + +        pthread_rwlock_rdlock(&dht->lock); + +        e = dht_find_entry(dht, key); +        if (e == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return buf; +        } + +        buf.data = malloc(sizeof(dht->addr) * e->n_vals); +        if (buf.data == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return buf; +        } + +        buf.len = e->n_vals; + +        pos = (uint64_t *) buf.data;; + +        list_for_each(p, &e->vals) { +                struct val * v = list_entry(p, struct val, next); +                *pos++ = v->addr; +        } + +        pthread_rwlock_unlock(&dht->lock); + +        return buf; +} + +static ssize_t dht_get_contacts(struct dht *          dht, +                                const uint8_t *       key, +                                kad_contact_msg_t *** msgs) +{ +        struct list_head   l; +        struct list_head * p; +        struct list_head * h; +        size_t             len; +        size_t             i = 0; + +        list_head_init(&l); + +        pthread_rwlock_rdlock(&dht->lock); + +        len = dht_contact_list(dht, &l, key); +        if (len == 0) +                return 0; + +        *msgs = malloc(len * sizeof(**msgs)); +        if (*msgs == NULL) +                return 0; + +        list_for_each_safe(p, h, &l) { +                struct contact * c = list_entry(p, struct contact, next); +                (*msgs)[i] = malloc(sizeof(***msgs)); +                if ((*msgs)[i] == NULL) { +                        pthread_rwlock_unlock(&dht->lock); +                        while (i > 0) +                                free(*msgs[--i]); +                        free(*msgs); +                        return 0; +                } + +                kad_contact_msg__init((*msgs)[i]); + +                (*msgs)[i]->id.data = c->id; +                (*msgs)[i]->id.len  = dht->b; +                (*msgs)[i++]->addr  = c->addr; +                list_del(&c->next); +                free(c); +        } + +        pthread_rwlock_unlock(&dht->lock); + +        return i; +} + +static time_t gcd(time_t a, +                  time_t b) +{ +        if (a == 0) +                return b; + +        return gcd(b % a, a); +} + +static void * work(void * o) +{ +        struct dht *       dht; +        struct timespec    now; +        struct list_head * p; +        struct list_head * h; +        struct list_head   reflist; +        time_t             intv; +        struct lookup *    lu; + +        dht = (struct dht *) o; + +        intv = gcd(dht->t_expire, dht->t_repub); +        intv = gcd(intv, gcd(KAD_T_REPL, KAD_T_REFR)) / 2; + +        list_head_init(&reflist); + +        while (true) { +                clock_gettime(CLOCK_REALTIME_COARSE, &now); + +                pthread_rwlock_wrlock(&dht->lock); + +                /* Republish registered hashes. */ +                list_for_each_safe(p, h, &dht->refs) { +                        struct ref_entry * e; +                        e = list_entry(p, struct ref_entry, next); +                        if (now.tv_sec > e->t_rep) { +                                kad_publish(dht, e->key, dht->addr, +                                            dht->t_expire); +                                e->t_rep = now.tv_sec + dht->t_repub; +                        } +                } + +                /* Remove stale entries and republish if necessary. */ +                list_for_each_safe(p, h, &dht->entries) { +                        struct list_head * p1; +                        struct list_head * h1; +                        struct dht_entry * e; +                        e = list_entry (p, struct dht_entry, next); +                        list_for_each_safe(p1, h1, &e->vals) { +                                struct val * v; +                                v = list_entry(p1, struct val, next); +                                if (now.tv_sec > v->t_exp) { +                                        list_del(&v->next); +                                        val_destroy(v); +                                 } + +                                if (now.tv_sec > v->t_rep) { +                                        kad_publish(dht, e->key, v->addr, +                                                    dht->t_expire - now.tv_sec); +                                        v->t_rep = now.tv_sec + dht->t_replic; +                                } +                        } +                } + +                /* Check the requests list for unresponsive nodes. */ +                list_for_each_safe(p, h, &dht->requests) { +                        struct kad_req * r; +                        r = list_entry(p, struct kad_req, next); +                        if (now.tv_sec > r->t_exp) { +                                list_del(&r->next); +                                bmp_release(dht->cookies, r->cookie); +                                dht_dead_peer(dht, r->key, r->addr); +                                kad_req_destroy(r); +                        } +                } + +                /* Refresh unaccessed buckets. */ +                bucket_refresh(dht, dht->buckets, now.tv_sec, &reflist); + +                pthread_rwlock_unlock(&dht->lock); + +                list_for_each_safe(p, h, &reflist) { +                        struct contact * c; +                        c = list_entry(p, struct contact, next); +                        lu = kad_lookup(dht, c->id, KAD_FIND_NODE); +                        if (lu != NULL) +                                lookup_destroy(lu); +                        list_del(&c->next); +                        contact_destroy(c); +                } + +                sleep(intv); +        } + +        return (void *) 0; +} + +static int kad_handle_join_resp(struct dht *     dht, +                                struct kad_req * req, +                                kad_msg_t *      msg) +{ +        assert(dht); +        assert(req); +        assert(msg); + +        /* We might send version numbers later to warn of updates if needed. */ +        if (!(msg->has_alpha && msg->has_b && msg->has_k && msg->has_t_expire && +              msg->has_t_refresh && msg->has_t_replicate)) { +                log_warn("Join refused by remote."); +                return -1; +        } + +        if (msg->b < sizeof(uint64_t)) { +                log_err("Hash sizes less than 8 bytes unsupported."); +                return -1; +        } + +        pthread_rwlock_wrlock(&dht->lock); + +        dht->buckets = bucket_create(); +        if (dht->buckets == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return -1; +        } + +        /* Likely corrupt packet. The member will refuse, we might here too. */ +        if (msg->alpha != KAD_ALPHA || msg->k != KAD_K) +                log_warn("Different kademlia parameters detected."); + +        if (msg->t_replicate != KAD_T_REPL) +                log_warn("Different kademlia replication time detected."); + +        if (msg->t_refresh != KAD_T_REFR) +                log_warn("Different kademlia refresh time detected."); + +        dht->k        = msg->k; +        dht->b        = msg->b; +        dht->t_expire = msg->t_expire; +        dht->t_repub  = MAX(1, dht->t_expire - 10); + +        if (pthread_create(&dht->worker, NULL, work, dht)) { +                bucket_destroy(dht->buckets); +                pthread_rwlock_unlock(&dht->lock); +                return -1; +        } + +        dht->state = DHT_RUNNING; + +        kad_req_respond(req); + +        dht_update_bucket(dht, msg->s_id.data, msg->s_addr); + +        pthread_rwlock_unlock(&dht->lock); + +        log_dbg("Enrollment of DHT completed."); + +        return 0; +} + +static int kad_handle_find_resp(struct dht *     dht, +                                struct kad_req * req, +                                kad_msg_t *      msg) +{ +        struct lookup * lu; + +        assert(dht); +        assert(req); +        assert(msg); + +        pthread_rwlock_rdlock(&dht->lock); + +        lu = dht_find_lookup(dht, req->key); +        if (lu == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return -1; +        } + +        lookup_update(dht, lu, msg); + +        pthread_rwlock_unlock(&dht->lock); + +        return 0; +} + +static void kad_handle_response(struct dht * dht, +                                kad_msg_t *  msg) +{ +        struct kad_req * req; + +        assert(dht); +        assert(msg); + +        pthread_rwlock_wrlock(&dht->lock); + +        req = dht_find_request(dht, msg); +        if (req == NULL) { +                pthread_rwlock_unlock(&dht->lock); +                return; +        } + +        bmp_release(dht->cookies, req->cookie); +        list_del(&req->next); + +        pthread_rwlock_unlock(&dht->lock); + +        switch(req->code) { +        case KAD_JOIN: +                if (kad_handle_join_resp(dht, req, msg)) +                        log_err("Enrollment of DHT failed."); +                break; +        case KAD_FIND_VALUE: +        case KAD_FIND_NODE: +                if (dht_get_state(dht) != DHT_RUNNING) +                        return; +                kad_handle_find_resp(dht, req, msg); +                break; +        default: +                break; +        } + +        kad_req_destroy(req); +} + +int dht_bootstrap(struct dht * dht, +                  size_t       b, +                  time_t       t_expire) +{ +        assert(dht); + +        pthread_rwlock_wrlock(&dht->lock); + +        dht->id = create_id(b); +        if (dht->id == NULL) +                goto fail_id; + +        dht->buckets = bucket_create(); +        if (dht->buckets == NULL) +                goto fail_buckets; + +        dht->buckets->depth = 0; +        dht->buckets->mask  = 0; + +        dht->b        = b / CHAR_BIT; +        dht->t_expire = MAX(2, t_expire); +        dht->t_repub  = MAX(1, t_expire - 10); +        dht->k        = KAD_K; + +        if (pthread_create(&dht->worker, NULL, work, dht)) +                goto fail_pthread_create; + +        dht->state = DHT_RUNNING; + +        dht_update_bucket(dht, dht->id, dht->addr); + +        pthread_rwlock_unlock(&dht->lock); + +        return 0; + + fail_pthread_create: +        bucket_destroy(dht->buckets); +        dht->buckets = NULL; + fail_buckets: +        free(dht->id); +        dht->id = NULL; + fail_id: +        pthread_rwlock_unlock(&dht->lock); +        return -1; +} + +int dht_enroll(struct dht * dht, +               uint64_t     addr) +{ +        assert(dht); + +        return kad_join(dht, addr); +} + +int dht_reg(struct dht *    dht, +            const uint8_t * key) +{ +        struct ref_entry * e; + +        assert(dht); +        assert(key); +        assert(dht->addr != 0); + +        if (dht_get_state(dht) != DHT_RUNNING) +                return -1; + +        e = ref_entry_create(dht, key); +        if (e == NULL) +                return -ENOMEM; + +        pthread_rwlock_wrlock(&dht->lock); + +        list_add(&e->next, &dht->refs); + +        pthread_rwlock_unlock(&dht->lock); + +        kad_publish(dht, key, dht->addr, dht->t_expire); + +        return 0; +} + +int dht_unreg(struct dht *    dht, +              const uint8_t * key) +{ +        struct list_head * p; +        struct list_head * h; + +        assert(dht); +        assert(key); + +        if (dht_get_state(dht) != DHT_RUNNING) +                return -1; + +        pthread_rwlock_wrlock(&dht->lock); + +        list_for_each_safe(p, h, &dht->refs) { +                struct ref_entry * r = list_entry(p, struct ref_entry, next); +                if (!memcmp(key, r->key, dht-> b) ) { +                        list_del(&r->next); +                        ref_entry_destroy(r); +                } +        } + +        dht_del(dht, key, dht->addr); + +        pthread_rwlock_unlock(&dht->lock); + +        return 0; +} + +uint64_t dht_query(struct dht *    dht, +                   const uint8_t * key) +{ +        struct dht_entry * e; +        struct lookup *    lu; +        uint64_t           addrs[KAD_K]; +        size_t             n; + +        addrs[0] = 0; + +        pthread_rwlock_rdlock(&dht->lock); + +        e = dht_find_entry(dht, key); +        if (e != NULL) +                addrs[0] = dht_entry_get_addr(dht, e); + +        pthread_rwlock_unlock(&dht->lock); + +        if (addrs[0] != 0 && addrs[0] != dht->addr) +                return addrs[0]; + +        lu = kad_lookup(dht, key, KAD_FIND_VALUE); +        if (lu == NULL) +                return 0; + +        n = lookup_get_addrs(lu, addrs); +        if (n == 0) { +                lookup_destroy(lu); +                return 0; +        } + +        lookup_destroy(lu); + +        /* Current behaviour is anycast and return the first peer address. */ +        if (addrs[0] != dht->addr) +                return addrs[0]; + +        if (n > 1) +                return addrs[1]; + +        return 0; +} + +void dht_post_sdu(void *               ae, +                  struct shm_du_buff * sdb) +{ +        struct dht *         dht; +        kad_msg_t *          msg; +        kad_contact_msg_t ** cmsgs; +        kad_msg_t            resp_msg = KAD_MSG__INIT; +        uint64_t             addr; +        buffer_t             buf; +        size_t               i; + +        assert(ae); +        assert(sdb); + +        memset(&buf, 0, sizeof(buf)); + +        dht = (struct dht *) ae; + +        msg = kad_msg__unpack(NULL, +                              shm_du_buff_tail(sdb) - shm_du_buff_head(sdb), +                              shm_du_buff_head(sdb)); + +        ipcp_sdb_release(sdb); + +        if (msg == NULL) { +                log_err("Failed to unpack message."); +                return; +        } + +        if (msg->has_key && msg->key.len != dht->b) { +                kad_msg__free_unpacked(msg, NULL); +                log_warn("Bad key in message."); +                return; +        } + +        if (msg->has_s_id && !msg->has_b && msg->s_id.len != dht->b) { +                kad_msg__free_unpacked(msg, NULL); +                log_warn("Bad source ID in message of type %d.", msg->code); +                return; +        } + +        if (msg->code != KAD_RESPONSE && dht_get_state(dht) != DHT_RUNNING) { +                kad_msg__free_unpacked(msg, NULL); +                return; +        } + +        addr = msg->s_addr; + +        resp_msg.code   = KAD_RESPONSE; +        resp_msg.cookie = msg->cookie; + +        switch(msg->code) { +        case KAD_JOIN: +                /* Refuse enrollee on check fails. */ +                if (msg->alpha != KAD_ALPHA || msg->k != KAD_K) { +                        log_warn("Parameter mismatch. " +                                 "DHT enrolment refused."); +                        break; +                } + +                if (msg->t_replicate != KAD_T_REPL) { +                        log_warn("Replication time mismatch. " +                                 "DHT enrolment refused."); + +                        break; +                } + +                if (msg->t_refresh != KAD_T_REFR) { +                        log_warn("Refresh time mismatch. " +                                 "DHT enrolment refused."); +                        break; +                } + +                resp_msg.has_alpha       = true; +                resp_msg.has_b           = true; +                resp_msg.has_k           = true; +                resp_msg.has_t_expire    = true; +                resp_msg.has_t_refresh   = true; +                resp_msg.has_t_replicate = true; +                resp_msg.alpha           = KAD_ALPHA; +                resp_msg.b               = dht->b; +                resp_msg.k               = KAD_K; +                resp_msg.t_expire        = dht->t_expire; +                resp_msg.t_refresh       = KAD_T_REFR; +                resp_msg.t_replicate     = KAD_T_REPL; +                break; +        case KAD_FIND_VALUE: +                buf = dht_retrieve(dht, msg->key.data); +                if (buf.len != 0) { +                        resp_msg.n_addrs = buf.len; +                        resp_msg.addrs   = (uint64_t *) buf.data; +                        break; +                } +                /* FALLTHRU */ +        case KAD_FIND_NODE: +                /* Return k closest contacts. */ +                resp_msg.n_contacts = +                        dht_get_contacts(dht, msg->key.data, &cmsgs); +                resp_msg.contacts = cmsgs; +                break; +        case KAD_STORE: +                if (msg->n_contacts < 1) { +                        log_warn("No contacts in store message."); +                        break; +                } + +                if (!msg->has_t_expire) { +                        log_warn("No expiry time in store message."); +                        break; +                } + +                kad_add(dht, *msg->contacts, msg->n_contacts, msg->t_expire); +                break; +        case KAD_RESPONSE: +                kad_handle_response(dht, msg); +                break; +        default: +                assert(false); +                break; +        } + +        if (msg->code != KAD_JOIN) { +                pthread_rwlock_wrlock(&dht->lock); +                if (dht_update_bucket(dht, msg->s_id.data, addr)) +                        log_warn("Failed to update bucket."); +                pthread_rwlock_unlock(&dht->lock); +        } + +        if (msg->code < KAD_STORE) { +                if (send_msg(dht, &resp_msg, addr)) +                        log_warn("Failed to send response."); +        } + +        kad_msg__free_unpacked(msg, NULL); + +        if (resp_msg.n_addrs > 0) +                free(resp_msg.addrs); + +        if (resp_msg.n_contacts == 0) +                return; + +        for (i = 0; i < resp_msg.n_contacts; ++i) +                kad_contact_msg__free_unpacked(resp_msg.contacts[i], NULL); +        free(resp_msg.contacts); +} + +void dht_destroy(struct dht * dht) +{ +        struct list_head * p; +        struct list_head * h; + +        if (dht == NULL) +                return; + +        if (dht_get_state(dht) == DHT_RUNNING) +                dht_set_state(dht, DHT_SHUTDOWN); + +        pthread_rwlock_wrlock(&dht->lock); + +        list_for_each_safe(p, h, &dht->entries) { +                struct dht_entry * e = list_entry(p, struct dht_entry, next); +                list_del(&e->next); +                dht_entry_destroy(e); +        } + +        list_for_each_safe(p, h, &dht->requests) { +                struct kad_req * r = list_entry(p, struct kad_req, next); +                list_del(&r->next); +                free(r); +        } + +        list_for_each_safe(p, h, &dht->refs) { +                struct ref_entry * e = list_entry(p, struct ref_entry, next); +                list_del(&e->next); +                ref_entry_destroy(e); +        } + +        list_for_each_safe(p, h, &dht->lookups) { +                struct lookup * l = list_entry(p, struct lookup, next); +                list_del(&l->next); +                lookup_destroy(l); +        } + +        pthread_rwlock_unlock(&dht->lock); + +        if (dht_get_state(dht) == DHT_SHUTDOWN) { +                pthread_cancel(dht->worker); +                pthread_join(dht->worker, NULL); +        } + +        if (dht->buckets != NULL) +                bucket_destroy(dht->buckets); + +        bmp_destroy(dht->cookies); + +        pthread_mutex_destroy(&dht->mtx); + +        pthread_rwlock_destroy(&dht->lock); + +        free(dht->id); + +        free(dht); +} + +struct dht * dht_create(uint64_t addr) +{ +        struct dht * dht; + +        dht = malloc(sizeof(*dht)); +        if (dht == NULL) +                goto fail_malloc; + +        dht->buckets = NULL; + +        list_head_init(&dht->entries); +        list_head_init(&dht->requests); +        list_head_init(&dht->refs); +        list_head_init(&dht->lookups); + +        if (pthread_rwlock_init(&dht->lock, NULL)) +                goto fail_rwlock; + +        if (pthread_mutex_init(&dht->mtx, NULL)) +                goto fail_mutex; + +        dht->cookies = bmp_create(DHT_MAX_REQS, 1); +        if (dht->cookies == NULL) +                goto fail_bmp; + +        dht->b    = 0; +        dht->addr = addr; +        dht->id   = NULL; +#ifndef __DHT_TEST__ +        dht->fd   = dt_reg_ae(dht, &dht_post_sdu); +#endif /* __DHT_TEST__ */ + +        dht->state = DHT_INIT; + +        return dht; + + fail_bmp: +        pthread_mutex_destroy(&dht->mtx); + fail_mutex: +        pthread_rwlock_destroy(&dht->lock); + fail_rwlock: +        free(dht); + fail_malloc: +        return NULL; +} diff --git a/src/ipcpd/normal/dht.h b/src/ipcpd/normal/dht.h new file mode 100644 index 00000000..5d7fc894 --- /dev/null +++ b/src/ipcpd/normal/dht.h @@ -0,0 +1,54 @@ +/* + * Ouroboros - Copyright (C) 2016 - 2017 + * + * Distributed Hash Table based on Kademlia + * + *    Dimitri Staessens <dimitri.staessens@ugent.be> + *    Sander Vrijders   <sander.vrijders@ugent.be> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * version 2.1 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef OUROBOROS_IPCPD_NORMAL_DHT_H +#define OUROBOROS_IPCPD_NORMAL_DHT_H + +#include <ouroboros/ipcp-dev.h> + +#include <stdint.h> +#include <sys/types.h> + +struct dht; + +struct dht * dht_create(uint64_t addr); + +int          dht_bootstrap(struct dht * dht, +                           size_t       b, +                           time_t       t_expire); + +int          dht_enroll(struct dht * dht, +                        uint64_t     addr); + +void         dht_destroy(struct dht * dht); + +int          dht_reg(struct dht *    dht, +                     const uint8_t * key); + +int          dht_unreg(struct dht *    dht, +                       const uint8_t * key); + +uint64_t     dht_query(struct dht *    dht, +                       const uint8_t * key); + +#endif /* OUROBOROS_IPCPD_NORMAL_DHT_H */ diff --git a/src/ipcpd/normal/dir.c b/src/ipcpd/normal/dir.c index 5ea8a300..69b7e90e 100644 --- a/src/ipcpd/normal/dir.c +++ b/src/ipcpd/normal/dir.c @@ -20,129 +20,134 @@   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.   */ +#define OUROBOROS_PREFIX "directory" +  #include <ouroboros/config.h> +#include <ouroboros/endian.h>  #include <ouroboros/errno.h> +#include <ouroboros/logs.h>  #include <ouroboros/rib.h> +#include <ouroboros/utils.h>  #include "dir.h" +#include "dht.h"  #include "ipcp.h"  #include "ribconfig.h"  #include <stdlib.h>  #include <string.h>  #include <assert.h> +#include <inttypes.h> -static char dir_path[RIB_MAX_PATH_LEN + 1]; +#define KAD_B (hash_len(ipcpi.dir_hash_algo) * CHAR_BIT) +#define ENROL_RETR 6 +#define ENROL_INTV 1 -static void dir_path_reset(void) { -        dir_path[strlen(DIR_PATH)]= '\0'; -        assert(strcmp(DIR_PATH, dir_path) == 0); -} +struct dht * dht; -int dir_init(void) +static uint64_t find_peer_addr(void)  { -        /* FIXME: set ribmgr dissemination here */ -        if (rib_add(RIB_ROOT, DIR_NAME)) -                return -1; +        ssize_t  i; +        char ** members; +        ssize_t n_members; +        size_t  reset; +        char    path[RIB_MAX_PATH_LEN + 1]; -        strcpy(dir_path, DIR_PATH); +        strcpy(path, MEMBERS_PATH); -        return 0; -} +        reset = strlen(path); -int dir_fini(void) -{ -        /* FIXME: remove ribmgr dissemination here*/ +        n_members = rib_children(path, &members); +        if (n_members == 1) { +                freepp(ssize_t, members, n_members); +                return 0; +        } + +        for (i = 0; i < n_members; ++i) { +                uint64_t addr; +                rib_path_append(path, members[i]); +                if (rib_read(path, &addr, sizeof(addr)) != sizeof(addr)) { +                        log_err("Failed to read address from RIB."); +                        freepp(ssize_t, members, n_members); +                        return ipcpi.dt_addr; +                } + +                if (addr != ipcpi.dt_addr) { +                        freepp(ssize_t, members, n_members); +                        return addr; +                } + +                path[reset] = '\0'; +        } -        dir_path_reset(); -        rib_del(dir_path); +        freepp(ssize_t, members, n_members);          return 0;  } -int dir_reg(const uint8_t * hash) +int dir_init()  { -        char hashstr[ipcp_dir_hash_strlen() + 1]; -        int ret; - -        assert(hash); +        uint64_t addr; -        dir_path_reset(); - -        ipcp_hash_str(hashstr, hash); - -        ret = rib_add(dir_path, hashstr); -        if (ret == -ENOMEM) -                 return -ENOMEM; - -        rib_path_append(dir_path, hashstr); +        dht = dht_create(ipcpi.dt_addr); +        if (dht == NULL) +                return -ENOMEM; -        ret = rib_add(dir_path, ipcpi.name); -        if (ret == -EPERM) +        addr = find_peer_addr(); +        if (addr == ipcpi.dt_addr) { +                log_err("Failed to get peer address."); +                dht_destroy(dht);                  return -EPERM; -        if (ret == -ENOMEM) { -                if (rib_children(dir_path, NULL) == 0) -                        rib_del(dir_path); -                return -ENOMEM;          } -        return 0; -} - -int dir_unreg(const uint8_t * hash) -{ -        char hashstr[ipcp_dir_hash_strlen() + 1]; -        size_t len; - -        assert(hash); +        if (addr != 0) { +                size_t retr = 0; +                log_dbg("Enrolling directory with peer %" PRIu64 ".", addr); +                /* NOTE: we could try other members if dht_enroll times out. */ +                while (dht_enroll(dht, addr)) { +                        if (retr++ == ENROL_RETR) { +                                dht_destroy(dht); +                                return -EPERM; +                        } -        dir_path_reset(); +                        log_dbg("Directory enrollment failed, retrying..."); +                        sleep(ENROL_INTV); +                } -        ipcp_hash_str(hashstr, hash); +                log_dbg("Directory enrolled."); -        rib_path_append(dir_path, hashstr); - -        if (!rib_has(dir_path))                  return 0; +        } -        len = strlen(dir_path); - -        rib_path_append(dir_path, ipcpi.name); - -        rib_del(dir_path); +        log_dbg("Bootstrapping directory."); -        dir_path[len] = '\0'; +        /* TODO: get parameters for bootstrap from IRM tool. */ +        if (dht_bootstrap(dht, KAD_B, 86400)) { +                dht_destroy(dht); +                return -ENOMEM; +        } -        if (rib_children(dir_path, NULL) == 0) -                rib_del(dir_path); +        log_dbg("Directory bootstrapped.");          return 0;  } -int dir_query(const uint8_t * hash) +void dir_fini(void)  { -        char hashstr[ipcp_dir_hash_strlen() + 1]; -        size_t len; - -        dir_path_reset(); - -        ipcp_hash_str(hashstr, hash); - -        rib_path_append(dir_path, hashstr); - -        if (!rib_has(dir_path)) -                return -1; - -        /* FIXME: assert after local IPCP is deprecated */ -        len = strlen(dir_path); +        dht_destroy(dht); +} -        rib_path_append(dir_path, ipcpi.name); +int dir_reg(const uint8_t * hash) +{ +        return dht_reg(dht, hash); +} -        if (rib_has(dir_path)) { -                dir_path[len] = '\0'; -                if (rib_children(dir_path, NULL) == 1) -                        return -1; -        } +int dir_unreg(const uint8_t * hash) +{ +        return dht_unreg(dht, hash); +} -        return 0; +uint64_t dir_query(const uint8_t * hash) +{ +        return dht_query(dht, hash);  } diff --git a/src/ipcpd/normal/dir.h b/src/ipcpd/normal/dir.h index 1b28a5c0..4091a3e8 100644 --- a/src/ipcpd/normal/dir.h +++ b/src/ipcpd/normal/dir.h @@ -23,14 +23,14 @@  #ifndef OUROBOROS_IPCPD_NORMAL_DIR_H  #define OUROBOROS_IPCPD_NORMAL_DIR_H -int dir_init(void); +int      dir_init(void); -int dir_fini(void); +void     dir_fini(void); -int dir_reg(const uint8_t * hash); +int      dir_reg(const uint8_t * hash); -int dir_unreg(const uint8_t * hash); +int      dir_unreg(const uint8_t * hash); -int dir_query(const uint8_t * hash); +uint64_t dir_query(const uint8_t * hash);  #endif /* OUROBOROS_IPCPD_NORMAL_DIR_H */ diff --git a/src/ipcpd/normal/dt.c b/src/ipcpd/normal/dt.c index 1867c13b..5fcc5865 100644 --- a/src/ipcpd/normal/dt.c +++ b/src/ipcpd/normal/dt.c @@ -50,7 +50,7 @@  #include <assert.h>  struct ae_info { -        int    (*post_sdu)(void * ae, struct shm_du_buff * sdb); +        void   (* post_sdu)(void * ae, struct shm_du_buff * sdb);          void * ae;  }; @@ -131,11 +131,14 @@ static int sdu_handler(int                  fd,                          return 0;                  } -                if (dt.aes[dt_pci.fd].post_sdu(dt.aes[dt_pci.fd].ae, sdb)) { +                if (dt.aes[dt_pci.fd].post_sdu == NULL) { +                        log_err("No registered AE on fd %d.", dt_pci.fd);                          ipcp_sdb_release(sdb); -                        return -1; +                        return -EPERM;                  } +                dt.aes[dt_pci.fd].post_sdu(dt.aes[dt_pci.fd].ae, sdb); +                  return 0;          } @@ -295,7 +298,7 @@ void dt_stop(void)  }  int dt_reg_ae(void * ae, -              int (* func)(void * func, struct shm_du_buff *)) +              void (* func)(void * func, struct shm_du_buff *))  {          int res_fd; @@ -330,10 +333,11 @@ int dt_write_sdu(uint64_t             dst_addr,          struct dt_pci dt_pci;          assert(sdb); +        assert(dst_addr != ipcpi.dt_addr);          fd = pff_nhop(dt.pff[qc], dst_addr);          if (fd < 0) { -                log_err("Could not get nhop for addr %" PRIu64 ".", dst_addr); +                log_dbg("Could not get nhop for addr %" PRIu64 ".", dst_addr);                  return -1;          } diff --git a/src/ipcpd/normal/dt.h b/src/ipcpd/normal/dt.h index 0e1a8cc3..15ef51f0 100644 --- a/src/ipcpd/normal/dt.h +++ b/src/ipcpd/normal/dt.h @@ -38,7 +38,7 @@ int  dt_start(void);  void dt_stop(void);  int  dt_reg_ae(void * ae, -               int (* func)(void * ae, struct shm_du_buff * sdb)); +               void (* func)(void * ae, struct shm_du_buff * sdb));  int  dt_write_sdu(uint64_t             dst_addr,                    qoscube_t            qc, diff --git a/src/ipcpd/normal/fa.c b/src/ipcpd/normal/fa.c index 40a680c3..704f4f16 100644 --- a/src/ipcpd/normal/fa.c +++ b/src/ipcpd/normal/fa.c @@ -30,6 +30,7 @@  #include <ouroboros/dev.h>  #include <ouroboros/ipcp-dev.h> +#include "dir.h"  #include "dt_pci.h"  #include "fa.h"  #include "sdu_sched.h" @@ -79,8 +80,8 @@ static void destroy_conn(int fd)          fa.r_addr[fd] = INVALID_ADDR;  } -static int fa_post_sdu(void *               ae, -                       struct shm_du_buff * sdb) +static void fa_post_sdu(void *               ae, +                        struct shm_du_buff * sdb)  {          struct timespec    ts  = {0, TIMEOUT * 1000};          struct timespec    abstime; @@ -98,9 +99,12 @@ static int fa_post_sdu(void *               ae,                                       shm_du_buff_tail(sdb) -                                       shm_du_buff_head(sdb),                                       shm_du_buff_head(sdb)); + +        ipcp_sdb_release(sdb); +          if (msg == NULL) {                  log_err("Failed to unpack flow alloc message."); -                return -1; +                return;          }          switch (msg->code) { @@ -113,7 +117,7 @@ static int fa_post_sdu(void *               ae,                          log_err("Bad flow request.");                          pthread_mutex_unlock(&ipcpi.alloc_lock);                          flow_alloc_msg__free_unpacked(msg, NULL); -                        return -1; +                        return;                  }                  while (ipcpi.alloc_id != -1 && @@ -128,7 +132,7 @@ static int fa_post_sdu(void *               ae,                          log_dbg("Won't allocate over non-operational IPCP.");                          pthread_mutex_unlock(&ipcpi.alloc_lock);                          flow_alloc_msg__free_unpacked(msg, NULL); -                        return -1; +                        return;                  }                  assert(ipcpi.alloc_id == -1); @@ -141,7 +145,7 @@ static int fa_post_sdu(void *               ae,                          pthread_mutex_unlock(&ipcpi.alloc_lock);                          flow_alloc_msg__free_unpacked(msg, NULL);                          log_err("Failed to get fd for flow."); -                        return -1; +                        return;                  }                  pthread_rwlock_wrlock(&fa.flows_lock); @@ -173,13 +177,10 @@ static int fa_post_sdu(void *               ae,          default:                  log_err("Got an unknown flow allocation message.");                  flow_alloc_msg__free_unpacked(msg, NULL); -                return -1; +                return;          }          flow_alloc_msg__free_unpacked(msg, NULL); -        ipcp_sdb_release(sdb); - -        return 0;  }  int fa_init(void) @@ -240,47 +241,10 @@ int fa_alloc(int             fd,               qoscube_t       qc)  {          flow_alloc_msg_t     msg = FLOW_ALLOC_MSG__INIT; -        char                 path[RIB_MAX_PATH_LEN + 1];          uint64_t             addr; -        ssize_t              ch; -        ssize_t              i; -        char **              children; -        char                 hashstr[ipcp_dir_hash_strlen() + 1]; -        char *               dst_ipcp = NULL;          struct shm_du_buff * sdb; -        ipcp_hash_str(hashstr, dst); - -        assert(strlen(hashstr) + strlen(DIR_PATH) + 1 -               < RIB_MAX_PATH_LEN); - -        strcpy(path, DIR_PATH); - -        rib_path_append(path, hashstr); - -        ch = rib_children(path, &children); -        if (ch <= 0) -                return -1; - -        for (i = 0; i < ch; ++i) -                if (dst_ipcp == NULL && strcmp(children[i], ipcpi.name) != 0) -                        dst_ipcp = children[i]; -                else -                        free(children[i]); - -        free(children); - -        if (dst_ipcp == NULL) -                return -1; - -        strcpy(path, MEMBERS_PATH); - -        rib_path_append(path, dst_ipcp); - -        free(dst_ipcp); - -        if (rib_read(path, &addr, sizeof(addr)) != sizeof(addr)) -                return -1; +        addr = dir_query(dst);          msg.code         = FLOW_ALLOC_CODE__FLOW_REQ;          msg.has_hash     = true; diff --git a/src/ipcpd/normal/kademlia.proto b/src/ipcpd/normal/kademlia.proto new file mode 100644 index 00000000..0b7e8beb --- /dev/null +++ b/src/ipcpd/normal/kademlia.proto @@ -0,0 +1,46 @@ +/* + * Ouroboros - Copyright (C) 2016 - 2017 + * + * KAD protocol + * + *    Dimitri Staessens <dimitri.staessens@intec.ugent.be> + *    Sander Vrijders   <sander.vrijders@intec.ugent.be> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * version 2.1 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +syntax = "proto2"; + +message kad_contact_msg { +        required bytes  id   = 1; +        required uint64 addr = 2; +}; + +message kad_msg { +        required uint32 code              =  1; +        required uint32 cookie            =  2; +        required uint64 s_addr            =  3; +        optional bytes  s_id              =  4; +        optional bytes  key               =  5; +        repeated uint64 addrs             =  6; +        repeated kad_contact_msg contacts =  7; +        // enrolment parameters +        optional uint32 alpha             =  8; +        optional uint32 b                 =  9; +        optional uint32 k                 = 10; +        optional uint32 t_expire          = 11; +        optional uint32 t_refresh         = 12; +        optional uint32 t_replicate       = 13; +};
\ No newline at end of file diff --git a/src/ipcpd/normal/main.c b/src/ipcpd/normal/main.c index 8c28de78..f94c15de 100644 --- a/src/ipcpd/normal/main.c +++ b/src/ipcpd/normal/main.c @@ -116,11 +116,6 @@ static int boot_components(void)          log_dbg("Starting ribmgr."); -        if (dir_init()) { -                log_err("Failed to initialize directory."); -                goto fail_dir; -        } -          if (ribmgr_init()) {                  log_err("Failed to initialize RIB manager.");                  goto fail_ribmgr; @@ -148,6 +143,11 @@ static int boot_components(void)                  goto fail_fa_start;          } +        if (dir_init()) { +                log_err("Failed to initialize directory."); +                goto fail_dir; +        } +          if (enroll_start()) {                  log_err("Failed to start enroll.");                  goto fail_enroll_start; @@ -166,6 +166,8 @@ static int boot_components(void)          ipcp_set_state(IPCP_INIT);          enroll_stop();   fail_enroll_start: +        dir_fini(); + fail_dir:          fa_stop();   fail_fa_start:          dt_stop(); @@ -176,8 +178,6 @@ static int boot_components(void)   fail_dt:          ribmgr_fini();   fail_ribmgr: -        dir_fini(); - fail_dir:          addr_auth_fini();   fail_addr_auth:          free(ipcpi.dif_name); @@ -191,6 +191,8 @@ void shutdown_components(void)          enroll_stop(); +        dir_fini(); +          fa_stop();          dt_stop(); @@ -201,8 +203,6 @@ void shutdown_components(void)          ribmgr_fini(); -        dir_fini(); -          addr_auth_fini();          free(ipcpi.dif_name); @@ -227,10 +227,9 @@ static int normal_ipcp_enroll(const char *      dst,                  return -1;          } -        log_dbg("Enrolled with " HASH_FMT, HASH_VAL(dst)); +        log_dbg("Enrolled with %s.", dst);          info->dir_hash_algo = ipcpi.dir_hash_algo; -          strcpy(info->dif_name, ipcpi.dif_name);          return 0; @@ -347,12 +346,17 @@ static int normal_ipcp_bootstrap(const struct ipcp_config * conf)          return 0;  } +static int normal_ipcp_query(const uint8_t * dst) +{ +        return dir_query(dst) ? 0 : -1; +} +  static struct ipcp_ops normal_ops = {          .ipcp_bootstrap       = normal_ipcp_bootstrap,          .ipcp_enroll          = normal_ipcp_enroll,          .ipcp_reg             = dir_reg,          .ipcp_unreg           = dir_unreg, -        .ipcp_query           = dir_query, +        .ipcp_query           = normal_ipcp_query,          .ipcp_flow_alloc      = fa_alloc,          .ipcp_flow_alloc_resp = fa_alloc_resp,          .ipcp_flow_dealloc    = fa_dealloc diff --git a/src/ipcpd/normal/pol/flat.c b/src/ipcpd/normal/pol/flat.c index e709da7c..0907cf7a 100644 --- a/src/ipcpd/normal/pol/flat.c +++ b/src/ipcpd/normal/pol/flat.c @@ -56,7 +56,7 @@ static int addr_taken(char *  name,          char path[RIB_MAX_PATH_LEN + 1];          size_t reset; -        strcpy(path, "/" MEMBERS_NAME); +        strcpy(path, MEMBERS_PATH);          reset = strlen(path); @@ -102,7 +102,7 @@ uint64_t flat_address(void)          char ** members;          ssize_t n_members; -        strcpy(path, "/" MEMBERS_NAME); +        strcpy(path, MEMBERS_PATH);          if (!rib_has(path)) {                  log_err("Could not read members from RIB."); diff --git a/src/ipcpd/normal/ribconfig.h b/src/ipcpd/normal/ribconfig.h index 31c79fbe..db1ff1bb 100644 --- a/src/ipcpd/normal/ribconfig.h +++ b/src/ipcpd/normal/ribconfig.h @@ -29,9 +29,7 @@  #define DLR          "/"  #define BOOT_NAME    "boot"  #define MEMBERS_NAME "members" -#define DIR_NAME     "directory"  #define ROUTING_NAME "fsdb" -#define DIR_PATH     DLR DIR_NAME  #define BOOT_PATH    DLR BOOT_NAME  #define MEMBERS_PATH DLR MEMBERS_NAME  #define ROUTING_PATH DLR ROUTING_NAME diff --git a/src/ipcpd/normal/ribmgr.c b/src/ipcpd/normal/ribmgr.c index 266a628d..3beb917c 100644 --- a/src/ipcpd/normal/ribmgr.c +++ b/src/ipcpd/normal/ribmgr.c @@ -299,9 +299,8 @@ static void * sync_rib(void *o)                          rib_path_append(path, children[--ch]);                          free(children[ch]); -                        /* Only sync fsdb, members and directory */ +                        /* Only sync fsdb and members */                          if (strcmp(path, MEMBERS_PATH) == 0 -                            || strcmp(path, DIR_PATH) == 0                              || strcmp(path, ROUTING_PATH) == 0)                                  ribmgr_sync(path);                  } diff --git a/src/ipcpd/normal/sdu_sched.c b/src/ipcpd/normal/sdu_sched.c index 63259430..a4b9e074 100644 --- a/src/ipcpd/normal/sdu_sched.c +++ b/src/ipcpd/normal/sdu_sched.c @@ -36,11 +36,19 @@  struct sdu_sched {          flow_set_t * set[QOS_CUBE_MAX]; -        fqueue_t *   fqs[QOS_CUBE_MAX];          next_sdu_t   callback; -        pthread_t    sdu_reader; +        pthread_t    sdu_readers[IPCP_SCHED_THREADS];  }; +static void cleanup_reader(void * o) +{ +        int         i; +        fqueue_t ** fqs = (fqueue_t **) o; + +        for (i = 0; i < QOS_CUBE_MAX; ++i) +                fqueue_destroy(fqs[i]); +} +  static void * sdu_reader(void * o)  {          struct sdu_sched *   sched; @@ -49,14 +57,27 @@ static void * sdu_reader(void * o)          int                  fd;          int                  i = 0;          int                  ret; +        fqueue_t *           fqs[QOS_CUBE_MAX];          sched = (struct sdu_sched *) o; +        for (i = 0; i < QOS_CUBE_MAX; ++i) { +                fqs[i] = fqueue_create(); +                if (fqs[i] == NULL) { +                        int j; +                        for (j = 0; j < i; ++j) +                                fqueue_destroy(fqs[j]); +                        return (void *) -1; +                } +        } + +        pthread_cleanup_push(cleanup_reader, fqs); +          while (true) {                  /* FIXME: replace with scheduling policy call */                  i = (i + 1) % QOS_CUBE_MAX; -                ret = flow_event_wait(sched->set[i], sched->fqs[i], &timeout); +                ret = flow_event_wait(sched->set[i], fqs[i], &timeout);                  if (ret == -ETIMEDOUT)                          continue; @@ -65,7 +86,7 @@ static void * sdu_reader(void * o)                          continue;                  } -                while ((fd = fqueue_next(sched->fqs[i])) >= 0) { +                while ((fd = fqueue_next(fqs[i])) >= 0) {                          if (ipcp_flow_read(fd, &sdb)) {                                  log_warn("Failed to read SDU from fd %d.", fd);                                  continue; @@ -78,6 +99,8 @@ static void * sdu_reader(void * o)                  }          } +        pthread_cleanup_pop(true); +          return (void *) 0;  } @@ -89,7 +112,7 @@ struct sdu_sched * sdu_sched_create(next_sdu_t callback)          sdu_sched = malloc(sizeof(*sdu_sched));          if (sdu_sched == NULL) -                return NULL; +                goto fail_malloc;          sdu_sched->callback = callback; @@ -98,31 +121,27 @@ struct sdu_sched * sdu_sched_create(next_sdu_t callback)                  if (sdu_sched->set[i] == NULL) {                          for (j = 0; j < i; ++j)                                  flow_set_destroy(sdu_sched->set[j]); -                        goto fail_sdu_sched; +                        goto fail_flow_set;                  }          } -        for (i = 0; i < QOS_CUBE_MAX; ++i) { -                sdu_sched->fqs[i] = fqueue_create(); -                if (sdu_sched->fqs[i] == NULL) { -                        for (j = 0; j < i; ++j) -                                fqueue_destroy(sdu_sched->fqs[j]); +        for (i = 0; i < IPCP_SCHED_THREADS; ++i) { +                if (pthread_create(&sdu_sched->sdu_readers[i], NULL, +                                   sdu_reader, sdu_sched)) { +                        int j; +                        for (j = 0; j < i; ++j) { +                                pthread_cancel(sdu_sched->sdu_readers[j]); +                                pthread_join(sdu_sched->sdu_readers[j], NULL); +                        }                          goto fail_flow_set;                  }          } -        pthread_create(&sdu_sched->sdu_reader, -                       NULL, -                       sdu_reader, -                       (void *) sdu_sched); -          return sdu_sched;   fail_flow_set: -        for (i = 0; i < QOS_CUBE_MAX; ++i) -                flow_set_destroy(sdu_sched->set[i]); - fail_sdu_sched:           free(sdu_sched); + fail_malloc:           return NULL;  } @@ -132,14 +151,13 @@ void sdu_sched_destroy(struct sdu_sched * sdu_sched)          assert(sdu_sched); -        pthread_cancel(sdu_sched->sdu_reader); - -        pthread_join(sdu_sched->sdu_reader, NULL); +        for (i = 0; i < IPCP_SCHED_THREADS; ++i) { +                pthread_cancel(sdu_sched->sdu_readers[i]); +                pthread_join(sdu_sched->sdu_readers[i], NULL); +        } -        for (i = 0; i < QOS_CUBE_MAX; ++i) { -                fqueue_destroy(sdu_sched->fqs[i]); +        for (i = 0; i < QOS_CUBE_MAX; ++i)                  flow_set_destroy(sdu_sched->set[i]); -        }          free(sdu_sched);  } diff --git a/src/ipcpd/normal/tests/CMakeLists.txt b/src/ipcpd/normal/tests/CMakeLists.txt new file mode 100644 index 00000000..d975caf6 --- /dev/null +++ b/src/ipcpd/normal/tests/CMakeLists.txt @@ -0,0 +1,37 @@ +get_filename_component(CURRENT_SOURCE_PARENT_DIR +  ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) +get_filename_component(CURRENT_BINARY_PARENT_DIR +  ${CMAKE_CURRENT_BINARY_DIR} DIRECTORY) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +include_directories(${CURRENT_SOURCE_PARENT_DIR}) +include_directories(${CURRENT_BINARY_PARENT_DIR}) + +include_directories(${CMAKE_SOURCE_DIR}/include) +include_directories(${CMAKE_BINARY_DIR}/include) + +get_filename_component(PARENT_PATH ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) +get_filename_component(PARENT_DIR ${PARENT_PATH} NAME) + +create_test_sourcelist(${PARENT_DIR}_tests test_suite.c +  # Add new tests here +  dht_test.c +) + +set_source_files_properties(${KAD_PROTO_SRCS} PROPERTIES GENERATED TRUE) + +add_executable(${PARENT_DIR}_test EXCLUDE_FROM_ALL ${${PARENT_DIR}_tests} +  ${KAD_PROTO_SRCS}) +target_link_libraries(${PARENT_DIR}_test ouroboros) + +add_dependencies(check ${PARENT_DIR}_test) + +set(tests_to_run ${${PARENT_DIR}_tests}) +remove(tests_to_run test_suite.c) + +foreach (test ${tests_to_run}) +  get_filename_component(test_name ${test} NAME_WE) +  add_test(${test_name} ${C_TEST_PATH}/${PARENT_DIR}_test ${test_name}) +endforeach (test) diff --git a/src/ipcpd/normal/tests/dht_test.c b/src/ipcpd/normal/tests/dht_test.c new file mode 100644 index 00000000..861ae10a --- /dev/null +++ b/src/ipcpd/normal/tests/dht_test.c @@ -0,0 +1,99 @@ +/* + * Ouroboros - Copyright (C) 2016 - 2017 + * + * Unit tests of the DHT AE + * + *    Dimitri Staessens <dimitri.staessens@ugent.be> + *    Sander Vrijders   <sander.vrijders@ugent.be> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define __DHT_TEST__ + +#include "dht.c" + +#include <pthread.h> +#include <time.h> +#include <stdlib.h> +#include <stdio.h> + +#define KEY_LEN  32 + +#define EXP      86400 +#define CONTACTS 1000 + +int dht_test(int     argc, +             char ** argv) +{ +        struct dht * dht; +        uint64_t     addr = 0x0D1F; +        uint8_t      key[KEY_LEN]; +        size_t       i; + +        (void) argc; +        (void) argv; + +        dht = dht_create(addr); +        if (dht == NULL) { +                printf("Failed to create dht.\n"); +                return -1; +        } + +        dht_destroy(dht); + +        dht = dht_create(addr); +        if (dht == NULL) { +                printf("Failed to re-create dht.\n"); +                return -1; +        } + +        if (dht_bootstrap(dht, KEY_LEN, EXP)) { +                printf("Failed to bootstrap dht.\n"); +                dht_destroy(dht); +                return -1; +        } + +        dht_destroy(dht); + +        dht = dht_create(addr); +        if (dht == NULL) { +                printf("Failed to re-create dht.\n"); +                return -1; +        } + +        if (dht_bootstrap(dht, KEY_LEN, EXP)) { +                printf("Failed to bootstrap dht.\n"); +                dht_destroy(dht); +                return -1; +        } + +        for (i = 0; i < CONTACTS; ++i) { +                uint64_t addr; +                random_buffer(&addr, sizeof(addr)); +                random_buffer(key, KEY_LEN); +                pthread_rwlock_wrlock(&dht->lock); +                if (dht_update_bucket(dht, key, addr)) { +                        pthread_rwlock_unlock(&dht->lock); +                        printf("Failed to update bucket.\n"); +                        dht_destroy(dht); +                        return -1; +                } +                pthread_rwlock_unlock(&dht->lock); +        } + +        dht_destroy(dht); + +        return 0; +} diff --git a/src/irmd/main.c b/src/irmd/main.c index 63ae6b13..8b22bdef 100644 --- a/src/irmd/main.c +++ b/src/irmd/main.c @@ -36,6 +36,7 @@  #include <ouroboros/bitmap.h>  #include <ouroboros/qos.h>  #include <ouroboros/time_utils.h> +#include <ouroboros/tpm.h>  #include <ouroboros/logs.h>  #include "utils.h" @@ -99,18 +100,9 @@ struct irm {          struct shm_rdrbuff * rdrb;         /* rdrbuff for SDUs           */          int                  sockfd;       /* UNIX socket                */ -        pthread_t *          threadpool;   /* pool of mainloop threads   */ - -        struct bmp *         thread_ids;   /* ids for mainloop threads   */ -        size_t               max_threads;  /* max threads set by tpm     */ -        size_t               threads;      /* available mainloop threads */ -        pthread_cond_t       threads_cond; /* signal thread entry/exit   */ -        pthread_mutex_t      threads_lock; /* mutex for threads/condvar  */ -          enum irm_state       state;        /* state of the irmd          */          pthread_rwlock_t     state_lock;   /* lock for the entire irmd   */ -        pthread_t            tpm;          /* threadpool manager         */          pthread_t            irm_sanitize; /* clean up irmd resources    */          pthread_t            shm_sanitize; /* keep track of rdrbuff use  */  } irmd; @@ -315,7 +307,7 @@ static pid_t create_ipcp(char *         name,                          break;          } -        list_add_tail(&tmp->next, &irmd.ipcps); +        list_add_tail(&tmp->next, p);          list_add(&api->next, &irmd.spawned_apis); @@ -478,7 +470,7 @@ static int enroll_ipcp(pid_t  api,          pthread_rwlock_unlock(&irmd.reg_lock);          if (ipcp_enroll(api, dst_name, &info) < 0) { -                log_err("Could not enroll IPCP."); +                log_err("Could not enroll IPCP %d.", api);                  return -1;          } @@ -1426,16 +1418,6 @@ static void irm_fini(void)          if (irmd_get_state() != IRMD_NULL)                  log_warn("Unsafe destroy."); -        pthread_mutex_lock(&irmd.threads_lock); - -        if (irmd.thread_ids != NULL) -                bmp_destroy(irmd.thread_ids); - -        pthread_mutex_unlock(&irmd.threads_lock); - -        if (irmd.threadpool != NULL) -                free(irmd.threadpool); -          pthread_rwlock_wrlock(&irmd.flows_lock);          if (irmd.port_ids != NULL) @@ -1509,8 +1491,8 @@ void irmd_sig_handler(int         sig,                  }                  log_info("IRMd shutting down..."); -                  irmd_set_state(IRMD_NULL); +                tpm_stop();                  break;          case SIGPIPE:                  log_dbg("Ignored SIGPIPE."); @@ -1692,55 +1674,11 @@ void * irm_sanitize(void * o)          }  } -static void thread_inc(void) -{ -        pthread_mutex_lock(&irmd.threads_lock); - -        ++irmd.threads; -        pthread_cond_signal(&irmd.threads_cond); - -        pthread_mutex_unlock(&irmd.threads_lock); -} - -static void thread_dec(void) -{ -        pthread_mutex_lock(&irmd.threads_lock); - -        --irmd.threads; -        pthread_cond_signal(&irmd.threads_cond); - -        pthread_mutex_unlock(&irmd.threads_lock); -} - -static bool thread_check(void) -{ -        int ret; - -        pthread_mutex_lock(&irmd.threads_lock); - -        ret = irmd.threads > irmd.max_threads; - -        pthread_mutex_unlock(&irmd.threads_lock); - -        return ret; -} - -static void thread_exit(ssize_t id) -{ -        pthread_mutex_lock(&irmd.threads_lock); -        bmp_release(irmd.thread_ids, id); - -        --irmd.threads; -        pthread_cond_signal(&irmd.threads_cond); - -        pthread_mutex_unlock(&irmd.threads_lock); -} -  void * mainloop(void * o)  {          uint8_t buf[IRM_MSG_BUF_SIZE]; -        ssize_t id = (ssize_t) o; +        (void) o;          while (true) {  #ifdef __FreeBSD__ @@ -1760,8 +1698,8 @@ void * mainloop(void * o)                  struct timeval    tv      = {(SOCKET_TIMEOUT / 1000),                                               (SOCKET_TIMEOUT % 1000) * 1000}; -                if (irmd_get_state() != IRMD_RUNNING || thread_check()) { -                        thread_exit(id); +                if (irmd_get_state() != IRMD_RUNNING || tpm_check()) { +                        tpm_exit();                          break;                  } @@ -1772,7 +1710,6 @@ void * mainloop(void * o)                  if (select(irmd.sockfd + 1, &fds, NULL, NULL, &timeout) <= 0)                          continue;  #endif -                  cli_sockfd = accept(irmd.sockfd, 0, 0);                  if (cli_sockfd < 0)                          continue; @@ -1790,7 +1727,7 @@ void * mainloop(void * o)                  if (irmd_get_state() != IRMD_RUNNING) {                          close(cli_sockfd); -                        thread_exit(id); +                        tpm_exit();                          break;                  } @@ -1800,7 +1737,7 @@ void * mainloop(void * o)                          continue;                  } -                thread_dec(); +                tpm_dec();                  if (msg->has_timeo_sec) {                          assert(msg->has_timeo_nsec); @@ -1929,7 +1866,7 @@ void * mainloop(void * o)                  if (ret_msg.result == -EPIPE || !ret_msg.has_result) {                          close(cli_sockfd); -                        thread_inc(); +                        tpm_inc();                          continue;                  } @@ -1939,7 +1876,7 @@ void * mainloop(void * o)                          if (apis != NULL)                                  free(apis);                          close(cli_sockfd); -                        thread_inc(); +                        tpm_inc();                          continue;                  } @@ -1948,7 +1885,7 @@ void * mainloop(void * o)                          if (apis != NULL)                                  free(apis);                          close(cli_sockfd); -                        thread_inc(); +                        tpm_inc();                          continue;                  } @@ -1963,70 +1900,7 @@ void * mainloop(void * o)                  free(buffer.data);                  close(cli_sockfd); -                thread_inc(); -        } - -        return (void *) 0; -} - -void * threadpoolmgr(void * o) -{ -        pthread_attr_t  pattr; -        struct timespec dl; -        struct timespec to = {(IRMD_TPM_TIMEOUT / 1000), -                              (IRMD_TPM_TIMEOUT % 1000) * MILLION}; -        (void) o; - -        if (pthread_attr_init(&pattr)) -                return (void *) -1; - -        pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED); - -        while (true) { -                clock_gettime(PTHREAD_COND_CLOCK, &dl); -                ts_add(&dl, &to, &dl); - -                if (irmd_get_state() != IRMD_RUNNING) { -                        pthread_attr_destroy(&pattr); -                        log_dbg("Waiting for threads to exit."); -                        pthread_mutex_lock(&irmd.threads_lock); -                        while (irmd.threads > 0) -                                pthread_cond_wait(&irmd.threads_cond, -                                                  &irmd.threads_lock); -                        pthread_mutex_unlock(&irmd.threads_lock); -                        log_dbg("Threadpool manager done."); -                        break; -                } - -                pthread_mutex_lock(&irmd.threads_lock); - -                if (irmd.threads < IRMD_MIN_AV_THREADS) { -                        log_dbg("Increasing threadpool."); -                        irmd.max_threads = IRMD_MAX_AV_THREADS; - -                        while (irmd.threads < irmd.max_threads) { -                                ssize_t id = bmp_allocate(irmd.thread_ids); -                                if (!bmp_is_id_valid(irmd.thread_ids, id)) { -                                        log_warn("IRMd threadpool exhausted."); -                                        break; -                                } - -                                if (pthread_create(&irmd.threadpool[id], -                                                   &pattr, mainloop, -                                                   (void *) id)) -                                        log_warn("Failed to start new thread."); -                                else -                                        ++irmd.threads; -                        } -                } - -                if (pthread_cond_timedwait(&irmd.threads_cond, -                                           &irmd.threads_lock, -                                           &dl) == ETIMEDOUT) -                        if (irmd.threads > IRMD_MIN_AV_THREADS ) -                                --irmd.max_threads; - -                pthread_mutex_unlock(&irmd.threads_lock); +                tpm_inc();          }          return (void *) 0; @@ -2035,7 +1909,6 @@ void * threadpoolmgr(void * o)  static int irm_init(void)  {          struct stat st; -        pthread_condattr_t cattr;          struct timeval timeout = {(IRMD_ACCEPT_TIMEOUT / 1000),                                    (IRMD_ACCEPT_TIMEOUT % 1000) * 1000}; @@ -2058,24 +1931,6 @@ static int irm_init(void)                  goto fail_flows_lock;          } -        if (pthread_mutex_init(&irmd.threads_lock, NULL)) { -                log_err("Failed to initialize mutex."); -                goto fail_threads_lock; -        } - -        if (pthread_condattr_init(&cattr)) { -                log_err("Failed to initialize condattr."); -                goto fail_cattr; -        } - -#ifndef __APPLE__ -        pthread_condattr_setclock(&cattr, PTHREAD_COND_CLOCK); -#endif -        if (pthread_cond_init(&irmd.threads_cond, &cattr)) { -                log_err("Failed to initialize cond."); -                goto fail_threads_cond; -        } -          list_head_init(&irmd.ipcps);          list_head_init(&irmd.api_table);          list_head_init(&irmd.apn_table); @@ -2089,18 +1944,6 @@ static int irm_init(void)                  goto fail_port_ids;          } -        irmd.thread_ids = bmp_create(IRMD_MAX_THREADS, 0); -        if (irmd.thread_ids == NULL) { -                log_err("Failed to thread thread_ids bitmap."); -                goto fail_thread_ids; -        } - -        irmd.threadpool = malloc(sizeof(pthread_t) * IRMD_MAX_THREADS); -        if (irmd.threadpool == NULL) { -                log_err("Failed to malloc threadpool"); -                goto fail_thrpool; -        } -          if ((irmd.lf = lockfile_create()) == NULL) {                  if ((irmd.lf = lockfile_open()) == NULL) {                          log_err("Lockfile error."); @@ -2155,8 +1998,6 @@ static int irm_init(void)                  goto fail_rdrbuff;          } -        irmd.threads     = 0; -        irmd.max_threads = IRMD_MIN_AV_THREADS;          irmd.state       = IRMD_RUNNING;          log_info("Ouroboros IPC Resource Manager daemon started..."); @@ -2172,18 +2013,8 @@ fail_sock_path:  fail_stat:          lockfile_destroy(irmd.lf);  fail_lockfile: -        free(irmd.threadpool); -fail_thrpool: -        bmp_destroy(irmd.thread_ids); -fail_thread_ids:          bmp_destroy(irmd.port_ids);  fail_port_ids: -        pthread_cond_destroy(&irmd.threads_cond); -fail_threads_cond: -        pthread_condattr_destroy(&cattr); -fail_cattr: -        pthread_mutex_destroy(&irmd.threads_lock); -fail_threads_lock:          pthread_rwlock_destroy(&irmd.flows_lock);  fail_flows_lock:          pthread_rwlock_destroy(&irmd.reg_lock); @@ -2253,12 +2084,24 @@ int main(int     argc,                  exit(EXIT_FAILURE);          } -        pthread_create(&irmd.tpm, NULL, threadpoolmgr, NULL); +        if (tpm_init(IRMD_MIN_THREADS, IRMD_ADD_THREADS, mainloop)) { +                log_fini(); +                exit(EXIT_FAILURE); +        } + +        if (tpm_start()) { +                tpm_fini(); +                log_fini(); +                exit(EXIT_FAILURE); +        }          pthread_create(&irmd.irm_sanitize, NULL, irm_sanitize, NULL);          pthread_create(&irmd.shm_sanitize, NULL, shm_sanitize, irmd.rdrb); -        pthread_join(irmd.tpm, NULL); +        /* tpm_stop() called from sighandler */ + +        tpm_fini(); +          pthread_join(irmd.irm_sanitize, NULL);          pthread_join(irmd.shm_sanitize, NULL); diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index e08869b8..75eac6f9 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -11,7 +11,6 @@ protobuf_generate_c(DIF_CONFIG_PROTO_SRCS DIF_CONFIG_PROTO_HDRS  protobuf_generate_c(CDAP_PROTO_SRCS CDAP_PROTO_HDRS cdap.proto)  protobuf_generate_c(RO_PROTO_SRCS RO_PROTO_HDRS ro.proto)  protobuf_generate_c(CACEP_PROTO_SRCS CACEP_PROTO_HDRS cacep.proto) -protobuf_generate_c(FRCT_ENROLL_SRCS FRCT_ENROLL_HDRS frct_enroll.proto)  if (NOT APPLE)    find_library(LIBRT_LIBRARIES rt) @@ -59,12 +58,13 @@ set(SOURCE_FILES    shm_rdrbuff.c    sockets.c    time_utils.c +  tpm.c    utils.c    )  add_library(ouroboros SHARED ${SOURCE_FILES} ${IRM_PROTO_SRCS}    ${IPCP_PROTO_SRCS} ${DIF_CONFIG_PROTO_SRCS} ${CDAP_PROTO_SRCS} -  ${CACEP_PROTO_SRCS} ${RO_PROTO_SRCS} ${FRCT_ENROLL_SRCS}) +  ${CACEP_PROTO_SRCS} ${RO_PROTO_SRCS})  include(AddCompileFlags)  if (CMAKE_BUILD_TYPE MATCHES Debug) diff --git a/src/lib/dev.c b/src/lib/dev.c index 14971528..c8e43778 100644 --- a/src/lib/dev.c +++ b/src/lib/dev.c @@ -85,7 +85,6 @@ struct flow {  struct {          char *                ap_name; -        char *                daf_name;          pid_t                 api;          struct shm_rdrbuff *  rdrb; @@ -205,7 +204,7 @@ static int api_announce(char * ap_name)          return ret;  } -static void init_flow(int fd) +static void flow_clear(int fd)  {          assert(!(fd < 0)); @@ -216,9 +215,9 @@ static void init_flow(int fd)          ai.flows[fd].cube     = QOS_CUBE_BE;  } -static void reset_flow(int fd) +static void flow_fini(int fd)  { -        assert (!(fd < 0)); +        assert(!(fd < 0));          if (ai.flows[fd].port_id != -1)                  port_destroy(&ai.ports[ai.flows[fd].port_id]); @@ -232,7 +231,59 @@ static void reset_flow(int fd)          if (ai.flows[fd].set != NULL)                  shm_flow_set_close(ai.flows[fd].set); -        init_flow(fd); +        flow_clear(fd); +} + +static int flow_init(int       port_id, +                     pid_t     api, +                     qoscube_t qc) +{ +        int fd; + +        pthread_rwlock_wrlock(&ai.flows_lock); + +        fd = bmp_allocate(ai.fds); +        if (!bmp_is_id_valid(ai.fds, fd)) { +                pthread_rwlock_unlock(&ai.flows_lock); +                return -EBADF; +        } + +        ai.flows[fd].rx_rb = shm_rbuff_open(ai.api, port_id); +        if (ai.flows[fd].rx_rb == NULL) { +                bmp_release(ai.fds, fd); +                pthread_rwlock_unlock(&ai.flows_lock); +                return -ENOMEM; +        } + +        ai.flows[fd].tx_rb = shm_rbuff_open(api, port_id); +        if (ai.flows[fd].tx_rb == NULL) { +                flow_fini(fd); +                bmp_release(ai.fds, fd); +                pthread_rwlock_unlock(&ai.flows_lock); +                return -ENOMEM; +        } + +        ai.flows[fd].set = shm_flow_set_open(api); +        if (ai.flows[fd].set == NULL) { +                flow_fini(fd); +                bmp_release(ai.fds, fd); +                pthread_rwlock_unlock(&ai.flows_lock); +                return -ENOMEM; +        } + +        ai.flows[fd].port_id = port_id; +        ai.flows[fd].oflags  = FLOW_O_DEFAULT; +        ai.flows[fd].api     = api; +        ai.flows[fd].cube    = qc; +        ai.flows[fd].spec    = qos_cube_to_spec(qc); + +        ai.ports[port_id].fd = fd; + +        port_set_state(&ai.ports[port_id], PORT_ID_ASSIGNED); + +        pthread_rwlock_unlock(&ai.flows_lock); + +        return fd;  }  int ouroboros_init(const char * ap_name) @@ -242,7 +293,6 @@ int ouroboros_init(const char * ap_name)          assert(ai.ap_name == NULL);          ai.api = getpid(); -        ai.daf_name = NULL;          ai.fds = bmp_create(AP_MAX_FLOWS - AP_RES_FDS, AP_RES_FDS);          if (ai.fds == NULL) @@ -279,7 +329,7 @@ int ouroboros_init(const char * ap_name)          }          for (i = 0; i < AP_MAX_FLOWS; ++i) -                init_flow(i); +                flow_clear(i);          ai.ports = malloc(sizeof(*ai.ports) * IRMD_MAX_FLOWS);          if (ai.ports == NULL) { @@ -333,9 +383,6 @@ void ouroboros_fini()          shm_flow_set_destroy(ai.fqset); -        if (ai.daf_name != NULL) -                free(ai.daf_name); -          if (ai.ap_name != NULL)                  free(ai.ap_name); @@ -346,7 +393,7 @@ void ouroboros_fini()                          ssize_t idx;                          while ((idx = shm_rbuff_read(ai.flows[i].rx_rb)) >= 0)                                  shm_rdrbuff_remove(ai.rdrb, idx); -                        reset_flow(i); +                        flow_fini(i);                  }          } @@ -368,13 +415,9 @@ void ouroboros_fini()  int flow_accept(qosspec_t *             qs,                  const struct timespec * timeo)  { -        irm_msg_t           msg      = IRM_MSG__INIT; -        irm_msg_t *         recv_msg = NULL; -        int                 fd       = -1; -        frct_enroll_msg_t * frct_enroll; -        qosspec_t           spec; -        uint8_t             data[BUF_SIZE]; -        ssize_t             n; +        irm_msg_t   msg      = IRM_MSG__INIT; +        irm_msg_t * recv_msg = NULL; +        int         fd       = -1;          msg.code    = IRM_MSG_CODE__IRM_FLOW_ACCEPT;          msg.has_api = true; @@ -408,83 +451,15 @@ int flow_accept(qosspec_t *             qs,                  return -EIRMD;          } -        pthread_rwlock_wrlock(&ai.flows_lock); - -        fd = bmp_allocate(ai.fds); -        if (!bmp_is_id_valid(ai.fds, fd)) { -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -EBADF; -        } - -        ai.flows[fd].rx_rb = shm_rbuff_open(ai.api, recv_msg->port_id); -        if (ai.flows[fd].rx_rb == NULL) { -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -ENOMEM; -        } - -        ai.flows[fd].tx_rb = shm_rbuff_open(recv_msg->api, recv_msg->port_id); -        if (ai.flows[fd].tx_rb == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -ENOMEM; -        } - -        ai.flows[fd].set = shm_flow_set_open(recv_msg->api); -        if (ai.flows[fd].set == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -ENOMEM; -        } - -        ai.flows[fd].port_id = recv_msg->port_id; -        ai.flows[fd].oflags  = FLOW_O_DEFAULT; -        ai.flows[fd].api     = recv_msg->api; -        ai.flows[fd].cube    = recv_msg->qoscube; - -        assert(ai.ports[ai.flows[fd].port_id].state == PORT_INIT); - -        spec = qos_cube_to_spec(recv_msg->qoscube); - -        ai.ports[recv_msg->port_id].fd    = fd; -        ai.ports[recv_msg->port_id].state = PORT_ID_ASSIGNED; - -        pthread_rwlock_unlock(&ai.flows_lock); +        fd = flow_init(recv_msg->port_id, recv_msg->api, recv_msg->qoscube);          irm_msg__free_unpacked(recv_msg, NULL); -        n = flow_read(fd, data, BUF_SIZE); -        if (n < 0) { -                flow_dealloc(fd); -                return n; -        } - -        frct_enroll = frct_enroll_msg__unpack(NULL, n, data); -        if (frct_enroll == NULL) { -                flow_dealloc(fd); -                return -1; -        } - -        spec.resource_control = frct_enroll->resource_control; -        spec.reliable = frct_enroll->reliable; -        spec.error_check = frct_enroll->error_check; -        spec.ordered = frct_enroll->ordered; -        spec.partial = frct_enroll->partial; - -        frct_enroll_msg__free_unpacked(frct_enroll, NULL); - -        pthread_rwlock_wrlock(&ai.flows_lock); -        ai.flows[fd].spec = spec; -        pthread_rwlock_unlock(&ai.flows_lock); +        if (fd < 0) +                return fd;          if (qs != NULL) -                *qs = spec; +                *qs = ai.flows[fd].spec;          return fd;  } @@ -493,14 +468,10 @@ int flow_alloc(const char *            dst_name,                 qosspec_t *             qs,                 const struct timespec * timeo)  { -        irm_msg_t         msg         = IRM_MSG__INIT; -        frct_enroll_msg_t frct_enroll = FRCT_ENROLL_MSG__INIT; -        irm_msg_t *       recv_msg    = NULL; -        qoscube_t         qc          = QOS_CUBE_BE; -        int               fd; -        ssize_t           len; -        uint8_t *         data; -        int               ret; +        irm_msg_t   msg      = IRM_MSG__INIT; +        irm_msg_t * recv_msg = NULL; +        qoscube_t   qc       = QOS_CUBE_BE; +        int         fd;          msg.code        = IRM_MSG_CODE__IRM_FLOW_ALLOC;          msg.dst_name    = (char *) dst_name; @@ -508,15 +479,8 @@ int flow_alloc(const char *            dst_name,          msg.has_qoscube = true;          msg.api         = ai.api; -        if (qs != NULL) { -                frct_enroll.resource_control = qs->resource_control; -                frct_enroll.reliable = qs->reliable; -                frct_enroll.error_check = qs->error_check; -                frct_enroll.ordered = qs->ordered; -                frct_enroll.partial = qs->partial; - +        if (qs != NULL)                  qc = qos_spec_to_cube(*qs); -        }          msg.qoscube = qc; @@ -547,78 +511,10 @@ int flow_alloc(const char *            dst_name,                  return -EIRMD;          } -        pthread_rwlock_wrlock(&ai.flows_lock); - -        fd = bmp_allocate(ai.fds); -        if (!bmp_is_id_valid(ai.fds, fd)) { -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -EBADF; -        } - -        ai.flows[fd].rx_rb = shm_rbuff_open(ai.api, recv_msg->port_id); -        if (ai.flows[fd].rx_rb == NULL) { -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -ENOMEM; -        } - -        ai.flows[fd].tx_rb = shm_rbuff_open(recv_msg->api, recv_msg->port_id); -        if (ai.flows[fd].tx_rb == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -ENOMEM; -        } - -        ai.flows[fd].set = shm_flow_set_open(recv_msg->api); -        if (ai.flows[fd].set == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -ENOMEM; -        } - -        ai.flows[fd].port_id = recv_msg->port_id; -        ai.flows[fd].oflags  = FLOW_O_DEFAULT; -        ai.flows[fd].api     = recv_msg->api; -        ai.flows[fd].cube    = recv_msg->qoscube; - -        assert(ai.ports[recv_msg->port_id].state == PORT_INIT); - -        ai.ports[recv_msg->port_id].fd    = fd; -        ai.ports[recv_msg->port_id].state = PORT_ID_ASSIGNED; +        fd = flow_init(recv_msg->port_id, recv_msg->api, qc);          irm_msg__free_unpacked(recv_msg, NULL); -        pthread_rwlock_unlock(&ai.flows_lock); - -        len = frct_enroll_msg__get_packed_size(&frct_enroll); -        if (len < 0) { -                flow_dealloc(fd); -                return -1; -        } - -        data = malloc(len); -        if (data == NULL) { -                flow_dealloc(fd); -                return -ENOMEM; -        } - -        frct_enroll_msg__pack(&frct_enroll, data); - -        ret = flow_write(fd, data, len); -        if (ret < 0) { -                flow_dealloc(fd); -                free(data); -                return ret; -        } - -        free(data); -          return fd;  } @@ -657,7 +553,7 @@ int flow_dealloc(int fd)          pthread_rwlock_wrlock(&ai.flows_lock); -        reset_flow(fd); +        flow_fini(fd);          bmp_release(ai.fds, fd);          pthread_rwlock_unlock(&ai.flows_lock); @@ -1073,53 +969,11 @@ int flow_event_wait(struct flow_set *       set,  /* ipcp-dev functions */ -int np1_flow_alloc(pid_t n_api, -                   int   port_id) +int np1_flow_alloc(pid_t     n_api, +                   int       port_id, +                   qoscube_t qc)  { -        int fd; - -        pthread_rwlock_wrlock(&ai.flows_lock); - -        fd = bmp_allocate(ai.fds); -        if (!bmp_is_id_valid(ai.fds, fd)) { -                pthread_rwlock_unlock(&ai.flows_lock); -                return -1; -        } - -        ai.flows[fd].rx_rb = shm_rbuff_open(ai.api, port_id); -        if (ai.flows[fd].rx_rb == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                return -1; -        } - -        ai.flows[fd].tx_rb = shm_rbuff_open(n_api, port_id); -        if (ai.flows[fd].tx_rb == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                return -1; -        } - -        ai.flows[fd].set = shm_flow_set_open(n_api); -        if (ai.flows[fd].set == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                return -1; -        } - -        ai.flows[fd].port_id = port_id; -        ai.flows[fd].oflags  = FLOW_O_DEFAULT; -        ai.flows[fd].api     = n_api; - -        ai.ports[port_id].fd    = fd; -        ai.ports[port_id].state = PORT_ID_ASSIGNED; - -        pthread_rwlock_unlock(&ai.flows_lock); - -        return fd; +        return flow_init(port_id, n_api, qc);  }  int np1_flow_dealloc(int port_id) @@ -1182,11 +1036,10 @@ int ipcp_create_r(pid_t api,  int ipcp_flow_req_arr(pid_t           api,                        const uint8_t * dst,                        size_t          len, -                      qoscube_t       cube) +                      qoscube_t       qc)  {          irm_msg_t msg = IRM_MSG__INIT;          irm_msg_t * recv_msg = NULL; -        int port_id = -1;          int fd = -1;          if (dst == NULL) @@ -1199,88 +1052,24 @@ int ipcp_flow_req_arr(pid_t           api,          msg.hash.len    = len;          msg.hash.data   = (uint8_t *) dst;          msg.has_qoscube = true; -        msg.qoscube     = cube; - -        pthread_rwlock_wrlock(&ai.flows_lock); - -        fd = bmp_allocate(ai.fds); -        if (!bmp_is_id_valid(ai.fds, fd)) { -                pthread_rwlock_unlock(&ai.flows_lock); -                return -1; /* -ENOMOREFDS */ -        } - -        pthread_rwlock_unlock(&ai.flows_lock); +        msg.qoscube     = qc;          recv_msg = send_recv_irm_msg(&msg); -        pthread_rwlock_wrlock(&ai.flows_lock); - -        if (recv_msg == NULL) { -                ai.ports[fd].state = PORT_INIT; -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); +        if (recv_msg == NULL)                  return -EIRMD; -        }          if (!recv_msg->has_port_id || !recv_msg->has_api) { -                ai.ports[fd].state = PORT_INIT; -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock);                  irm_msg__free_unpacked(recv_msg, NULL);                  return -1;          }          if (recv_msg->has_result && recv_msg->result) { -                ai.ports[fd].state = PORT_INIT; -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -1; -        } - -        port_id = recv_msg->port_id; -        if (port_id < 0) { -                ai.ports[fd].state = PORT_INIT; -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock);                  irm_msg__free_unpacked(recv_msg, NULL);                  return -1;          } -        ai.flows[fd].rx_rb = shm_rbuff_open(ai.api, port_id); -        if (ai.flows[fd].rx_rb == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -1; -        } - -        ai.flows[fd].tx_rb = shm_rbuff_open(recv_msg->api, port_id); -        if (ai.flows[fd].tx_rb == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -1; -        } - -        ai.flows[fd].set = shm_flow_set_open(recv_msg->api); -        if (ai.flows[fd].set == NULL) { -                reset_flow(fd); -                bmp_release(ai.fds, fd); -                pthread_rwlock_unlock(&ai.flows_lock); -                irm_msg__free_unpacked(recv_msg, NULL); -                return -1; -        } - -        ai.flows[fd].port_id = port_id; -        ai.flows[fd].oflags  = FLOW_O_DEFAULT; - -        ai.ports[port_id].fd = fd; -        port_set_state(&ai.ports[port_id], PORT_ID_ASSIGNED); - -        pthread_rwlock_unlock(&ai.flows_lock); +        fd = flow_init(recv_msg->port_id, recv_msg->api, qc);          irm_msg__free_unpacked(recv_msg, NULL); diff --git a/src/lib/tpm.c b/src/lib/tpm.c new file mode 100644 index 00000000..8298eeb5 --- /dev/null +++ b/src/lib/tpm.c @@ -0,0 +1,266 @@ +/* + * Ouroboros - Copyright (C) 2016 - 2017 + * + * Threadpool management + * + *    Dimitri Staessens <dimitri.staessens@ugent.be> + *    Sander Vrijders   <sander.vrijders@ugent.be> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * version 2.1 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <ouroboros/config.h> +#include <ouroboros/errno.h> +#include <ouroboros/list.h> +#include <ouroboros/time_utils.h> +#include <ouroboros/tpm.h> + +#include <pthread.h> +#include <stdlib.h> + +#define TPM_TIMEOUT 1000 + +struct pthr_el { +        struct list_head next; + +        bool             join; + +        pthread_t        thr; +}; + +enum tpm_state { +        TPM_NULL = 0, +        TPM_INIT, +        TPM_RUNNING +}; + +struct { +        size_t           min; +        size_t           inc; +        size_t           max; +        size_t           cur; + +        void * (* func)(void *); + +        struct list_head pool; + +        enum tpm_state   state; + +        pthread_cond_t   cond; +        pthread_mutex_t  lock; + +        pthread_t        mgr; +} tpm; + +static void tpm_join(void) +{ +        struct list_head * p; +        struct list_head * h; + +        list_for_each_safe(p, h, &tpm.pool) { +                struct pthr_el * e = list_entry(p, struct pthr_el, next); +                if (tpm.state != TPM_RUNNING) +                        while (!e->join) +                                pthread_cond_wait(&tpm.cond, &tpm.lock); + +                if (e->join) { +                        pthread_join(e->thr, NULL); +                        list_del(&e->next); +                        free(e); +                } +        } +} + +static void * tpmgr(void * o) +{ +        struct timespec dl; +        struct timespec to = {(TPM_TIMEOUT / 1000), +                              (TPM_TIMEOUT % 1000) * MILLION}; +        (void) o; + +        while (true) { +                clock_gettime(PTHREAD_COND_CLOCK, &dl); +                ts_add(&dl, &to, &dl); + +                pthread_mutex_lock(&tpm.lock); + +                tpm_join(); + +                if (tpm.state != TPM_RUNNING) { +                        tpm.max = 0; +                        tpm_join(); +                        pthread_mutex_unlock(&tpm.lock); +                        break; +                } + +                if (tpm.cur < tpm.min) { +                        tpm.max = tpm.inc; + +                        while (tpm.cur < tpm.max) { +                                struct pthr_el * e = malloc(sizeof(*e)); +                                if (e == NULL) +                                        break; + +                                e->join = false; + +                                if (pthread_create(&e->thr, NULL, +                                                   tpm.func, NULL)) { +                                        free(e); +                                } else { +                                        list_add(&e->next, &tpm.pool); +                                        ++tpm.cur; +                                } +                        } +                } + +                if (pthread_cond_timedwait(&tpm.cond, &tpm.lock, &dl) +                    == ETIMEDOUT) +                        if (tpm.cur > tpm.min ) +                                --tpm.max; + +                pthread_mutex_unlock(&tpm.lock); +        } + +        return (void *) 0; +} + +int tpm_init(size_t min, +             size_t inc, +             void * (* func)(void *)) +{ +        pthread_condattr_t cattr; + +        if (pthread_mutex_init(&tpm.lock, NULL)) +                goto fail_lock; + +        if (pthread_condattr_init(&cattr)) +                goto fail_cattr; + +#ifndef __APPLE__ +        pthread_condattr_setclock(&cattr, PTHREAD_COND_CLOCK); +#endif +        if (pthread_cond_init(&tpm.cond, &cattr)) +                goto fail_cond; + +        list_head_init(&tpm.pool); + +        pthread_condattr_destroy(&cattr); + +        tpm.state = TPM_INIT; +        tpm.func  = func; +        tpm.min   = min; +        tpm.inc   = inc; +        tpm.max   = 0; +        tpm.cur   = 0; + +        return 0; + + fail_cond: +        pthread_condattr_destroy(&cattr); + fail_cattr: +        pthread_mutex_destroy(&tpm.lock); + fail_lock: +        return -1; +} + +int tpm_start(void) +{ +        pthread_mutex_lock(&tpm.lock); + +        if (pthread_create(&tpm.mgr, NULL, tpmgr, NULL)) { +                pthread_mutex_unlock(&tpm.lock); +                return -1; +        } + +        tpm.state = TPM_RUNNING; + +        pthread_mutex_unlock(&tpm.lock); + +        return 0; +} + +void tpm_stop(void) +{ +        pthread_mutex_lock(&tpm.lock); + +        tpm.state = TPM_NULL; + +        pthread_mutex_unlock(&tpm.lock); +} + +void tpm_fini(void) +{ +        pthread_join(tpm.mgr, NULL); + +        pthread_mutex_destroy(&tpm.lock); +        pthread_cond_destroy(&tpm.cond); +} + +bool tpm_check(void) +{ +        bool ret; + +        pthread_mutex_lock(&tpm.lock); + +        ret = tpm.cur > tpm.max; + +        pthread_mutex_unlock(&tpm.lock); + +        return ret; +} + +void tpm_inc(void) +{ +        pthread_mutex_lock(&tpm.lock); + +        ++tpm.cur; + +        pthread_mutex_unlock(&tpm.lock); +} + +void tpm_dec(void) +{ +        pthread_mutex_lock(&tpm.lock); + +        --tpm.cur; + +        pthread_cond_signal(&tpm.cond); + +        pthread_mutex_unlock(&tpm.lock); +} + +void tpm_exit(void) +{ +        struct list_head * p; +        pthread_t          id; + +        id = pthread_self(); + +        pthread_mutex_lock(&tpm.lock); + +        --tpm.cur; + +        list_for_each(p, &tpm.pool) { +                struct pthr_el * e = list_entry(p, struct pthr_el, next); +                if (e->thr == id) { +                        e->join = true; +                        break; +                } +        } + +        pthread_cond_signal(&tpm.cond); + +        pthread_mutex_unlock(&tpm.lock); +} | 
