├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── README.md
├── crash-recover-test
    ├── CMakeLists.txt
    ├── test_crash_client.cc
    └── test_crash_server.cc
├── documents
    └── fast23_FUSEE_Extended_Version.pdf
├── micro-test
    ├── CMakeLists.txt
    ├── gen-micro-workload.py
    ├── latency_test.cc
    ├── latency_test.h
    ├── latency_test_client.cc
    ├── latency_test_client_cr.cc
    ├── micro_test.cc
    ├── micro_test.h
    ├── micro_test_multi_client.cc
    └── micro_test_multi_client_cr.cc
├── setup
    ├── download_gdrive.py
    ├── download_workload.sh
    └── setup-env.sh
├── src
    ├── CMakeLists.txt
    ├── client.cc
    ├── client.h
    ├── client_cr.cc
    ├── client_cr.h
    ├── client_mm.cc
    ├── client_mm.h
    ├── hashtable.cc
    ├── hashtable.h
    ├── ib.cc
    ├── ib.h
    ├── init.cc
    ├── kv_debug.h
    ├── kv_utils.cc
    ├── kv_utils.h
    ├── nm.cc
    ├── nm.h
    ├── server.cc
    ├── server.h
    ├── server_mm.cc
    ├── server_mm.h
    └── spinlock.h
├── tests
    ├── CMakeLists.txt
    ├── client_config.json
    ├── client_kv_shell.cc
    ├── ddckv_test.cc
    ├── ddckv_test.h
    ├── server_config.json
    ├── test_client.h
    ├── test_client_client.cc
    ├── test_client_server.cc
    ├── test_conf.json
    ├── test_kv_utils.cc
    ├── test_mm.cc
    ├── test_mm.h
    ├── test_nm.cc
    ├── test_nm.h
    ├── test_remote_nm.cc
    ├── test_remote_nm.h
    ├── test_server.cc
    └── test_server.h
└── ycsb-test
    ├── CMakeLists.txt
    ├── gen-ycsb-workload.py
    ├── merge-ycsb-lat.py
    ├── split-workload.py
    ├── ycsb_multi_client_cont_tpt.cc
    ├── ycsb_server_crash_multi_client.cc
    ├── ycsb_test.cc
    ├── ycsb_test.h
    ├── ycsb_test_client.cc
    ├── ycsb_test_multi_client.cc
    ├── ycsb_test_server.cc
    ├── ycsb_wl_loader.cc
    └── ycsb_wl_worker.cc


/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | .vscode
 3 | ycsb-test/workloads/*
 4 | micro-test/micro-workloads/*
 5 | ycsb-test/upd-workloads/*
 6 | *.ipynb
 7 | workloads.tgz
 8 | micro-workloads.tgz
 9 | upd-workload.tgz
10 | setup/workloads
11 | setup/install/
12 | setup/micro-workloads/
13 | setup/upd-workloads.tgz
14 | setup/upd-workloads/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ycsb-test/YCSB-C"]
2 | 	path = ycsb-test/YCSB-C
3 | 	url = https://gitee.com/bernardshen/YCSB-C.git
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
 2 | 
 3 | project(DDCKV LANGUAGES CXX)
 4 | set(CMAKE_CXX_STANDARD 11)
 5 | 
 6 | find_package(Boost REQUIRED)
 7 | 
 8 | include_directories(src)
 9 | set(CMAKE_BUILD_TYPE Release)
10 | 
11 | add_subdirectory(src)
12 | 
13 | add_subdirectory(ycsb-test)
14 | add_subdirectory(crash-recover-test)
15 | add_subdirectory(micro-test)
16 | 
17 | enable_testing()
18 | add_subdirectory(tests)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FUSEE: A Fully Memory-Disaggregated Key-Value Store
  2 | 
  3 | 
  4 | This is the implementation repository of our FAST'23 paper: **FUSEE: A Fully Memory-Disaggregated Key-Value Store**.
  5 | 
  6 | 
  7 | 
  8 | ## Description
  9 | 
 10 | We proposes ***FUSEE***, a <em><strong>FU</strong>lly memory-di<strong>S</strong>aggr<strong>E</strong>gated</em> KV Stor***E*** that brings disaggregation to metadata management. *FUSEE* replicates metadata, *i.e.*, the index and memory management information, on memory nodes, manages them directly on the client side, and handles complex failures under the DM architecture. To scalably replicate the index on clients, *FUSEE* proposes a client-centric replication protocol that allows clients to concurrently access and modify the replicated index. To efficiently manage disaggregated memory, *FUSEE* adopts a two-level memory management scheme that splits the memory management duty among clients and memory nodes. Finally, to handle the metadata corruption under client failures, *FUSEE* leverages an embedded operation log scheme to repair metadata with low log maintenance overhead.
 11 | 
 12 | 
 13 | ## Environment
 14 | 
 15 | * For hardware, each machine should be equipped with one **8-core Intel processer**(*e.g.*, Intel Xeon E5-2450),  **16GB DRAM**  and one **RDMA NIC card** (*e.g.*, Mellanox ConnectX-3). Each RNIC should be connected to an **Infiniband or Ethernet switch** (*e.g.*, Mellanox SX6036G). All machines are separated into memory nodes and compute nodes. At maximum 5 memory nodes and 17 compute nodes are used for the experiments in our paper. If you do not have such testbed, consider using [CloudLab](https://www.cloudlab.us/).
 16 | 
 17 | * For software, **Ubuntu 18.04** is recommended for each machine.  In our experiments, **7168 HugePages** of 2MB size in each memory node and **2048** ones in compute nodes is need to be allocated. You can set up this with  `echo 7168 > /proc/sys/vm/nr_hugepages` command for memory nodes and `echo 2048 > /proc/sys/vm/nr_hugepages` for compute nodes.
 18 | 
 19 | 
 20 | 
 21 | ## Configurations
 22 | 
 23 | Configuration files for servers and clients should be provided to the program. Here are two example configuration files below.
 24 | 
 25 | #### 1. Servers configuration
 26 | 
 27 | For each memory node, you should provide a configuration file `server_config.json`  where you can flexibly configure the server:
 28 | 
 29 | ```json
 30 | {
 31 |     "role": "SERVER",
 32 |     "conn_type": "IB",
 33 |     "server_id": 0,
 34 |     "udp_port": 2333,
 35 |     "memory_num": 3,
 36 |     "memory_ips": [
 37 |         "10.10.10.1",
 38 |         "10.10.10.2",
 39 |         "10.10.10.3"
 40 |     ],
 41 |     "ib_dev_id": 0,
 42 |     "ib_port_id": 1,
 43 |     "ib_gid_idx": 0,
 44 | 
 45 |     "server_base_addr":  "0x10000000",
 46 |     "server_data_len":   15032385536,
 47 |     "block_size":        67108864,
 48 |     "subblock_size":     256,
 49 |     "client_local_size": 1073741824,
 50 | 
 51 |     "num_replication": 3,
 52 | 
 53 |     "main_core_id": 0,
 54 |     "poll_core_id": 1,
 55 |     "bg_core_id": 2,
 56 |     "gc_core_id": 3
 57 | }
 58 | ```
 59 | 
 60 | For briefness, we call each memory node as "server `i`" (`i` = 0, 1, ...).
 61 | 
 62 | #### 2. Clients configuration
 63 | 
 64 | For each compute node, you should provide a configuration file `client_config.json` where you can flexibly configure the client:
 65 | 
 66 | ```json
 67 | {
 68 |     "role": "CLIENT",
 69 |     "conn_type": "IB",
 70 |     "server_id": 2,
 71 |     "udp_port": 2333,
 72 |     "memory_num": 2,
 73 |     "memory_ips": [
 74 |         "128.110.96.102",
 75 |         "128.110.96.81"
 76 |     ],
 77 |     "ib_dev_id": 0,
 78 |     "ib_port_id": 1,
 79 |     "ib_gid_idx": 0,
 80 | 
 81 |     "server_base_addr":  "0x10000000",
 82 |     "server_data_len":   15032385536,
 83 |     "block_size":        67108864,
 84 |     "subblock_size":     1024,
 85 |     "client_local_size": 1073741824,
 86 | 
 87 |     "num_replication": 2,
 88 |     "num_idx_rep": 1,
 89 |     "num_coroutines": 10,
 90 |     "miss_rate_threash": 0.1,
 91 |     "workload_run_time": 10,
 92 |     "micro_workload_num": 10000,
 93 | 
 94 |     "main_core_id": 0,
 95 |     "poll_core_id": 1,
 96 |     "bg_core_id": 2,
 97 |     "gc_core_id": 3
 98 | }
 99 | ```
100 | 
101 | For briefness, we call each compute node as "client `i`" (`i` = 0, 1, 2, ...).
102 | 
103 | It should be noted that, the `server_id` parameter of client `i` should be set to `2+i*8`. For example, the `server_id` of the first three client is 2, 10, 18 respectively.
104 | 
105 | 
106 | 
107 | ## Experiments
108 | 
109 | For each node, execute the following commands to compile the entire program:
110 | 
111 | ```shell
112 | mkdir build && cd build
113 | cmake ..
114 | make -j
115 | ```
116 | 
117 | We test *FUSEE* with **micro-benchmark** and **YCSB benchmarks** respectively. For each experiments, you should put `server_config.json` in directory `./build`, and then use the following command in memory nodes to set up servers:
118 | 
119 | ```shell
120 | numactl -N 0 -m 0 ./ycsb-test/ycsb_test_server [SERVER_NUM]
121 | ```
122 | 
123 | `[SERVER_NUM]` should be the serial number of this memory node, counting from 0.
124 | 
125 | 
126 | 
127 | #### 1. Micro-benchmark
128 | 
129 | * **Latency**
130 | 
131 |     To evaluate the latency of each operation, we use a single client to iteratively execute each operation (**INSERT**, **DELETE**, **UPDATE**, and **SEARCH**) for 10,000 times.
132 | 
133 |     Enter `./build/micro-test` and use the following command in client `0`：
134 | 
135 |     ```shell
136 |     numactl -N 0 -m 0 ./latency_test_client [PATH_TO_CLIENT_CONFIG]
137 |     ```
138 | 
139 |     Test results will be saved in `./build/micro-test/results`.
140 | 
141 | * **Throughput**
142 | 
143 |     To evaluate the throughput of each operations, each client first iteratively INSERTs different keys for 0.5 seconds. UPDATE and SEARCH operations are then executed on these keys for 10 seconds. Finally, each client executes DELETE for 0.5 seconds.
144 | 
145 |     Enter `./build/micro-test` and execute the following command on all client nodes at the same time:
146 | 
147 |     ```shell
148 |     numactl -N 0 -m 0 ./micro_test_multi_client [PATH_TO_CLIENT_CONFIG] 8
149 |     ```
150 | 
151 |     Number `8` indicates there are 8 client threads in each client node. You will need to use the keyboard to simultaneously send space signals to each client node for starting each operation testing synchronously.
152 | 
153 |     Test results will be displayed on each client terminal.
154 | 
155 | 
156 | 
157 | #### 2. YCSB benchmarks 
158 | 
159 | * **Workload preparation**
160 | 
161 |     Firstly, download all the testing workloads using `sh download_workload.sh` in directory `./setup` and unpack the workloads you want to `./build/ycsb-test/workloads`.
162 | 
163 |     Here is the description of the YCSB workloads:
164 | 
165 |     | Workload | SEARCH | UPDATE | INSERT |
166 |     | -------- | ------ | ------ | ------ |
167 |     | A        | 0.5    | 0.5    | 0      |
168 |     | B        | 0.95   | 0.95   | 0      |
169 |     | C        | 1      | 0      | 0      |
170 |     | D        | 0.95   | 0      | 0.05   |
171 |     | upd[X]   | 1-[X]% | [X]%   | 0      |
172 | 
173 |     Then, you should execute the following command in `./build/ycsb-test` to split the workloads into N parts(N is the total number of client threads):
174 | 
175 |     ```shell
176 |     python split-workload.py [N]
177 |     ```
178 | 
179 |     And then we can  start testing *FUSEE* using YCSB benchmarks.
180 | 
181 | * **Throughput**
182 | 
183 |     To show the **scalability** of *FUSEE*，we can test the throughput of *FUSEE* with different number of client nodes. Besides, we can evaluate the **read-write performance** of *FUSEE* by testing the throughput of *FUSEE* using workloads with different search-update ratios `X`. Here is the command of testing the throughput of *FUSEE*:
184 | 
185 |     ```shell
186 |     numactl -N 0 -m 0 ./ycsb_test_multi_client [PATH_TO_CLIENT_CONFIG] [WORKLOAD-NAME] 8
187 |     ```
188 | 
189 |     Execute the command on all the client nodes at the same time. `[WORKLOAD-NAME]` can be chosen from `workloada ~ workloadd` or `workloadudp0 ~ workloadudp100` (indicating different search-update ratios) .  Number `8` indicates there are 8 client threads in each client node. You will need to use the keyboard to simultaneously send space signals to each client node for starting each operation testing synchronously.
190 | 
191 |     Test results will be displayed on each client terminal.
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/crash-recover-test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(test_crash_client test_crash_client.cc)
2 | 
3 | target_link_libraries(test_crash_client
4 |     libddckv
5 |     pthread
6 |     ibverbs
7 | )


--------------------------------------------------------------------------------
/crash-recover-test/test_crash_client.cc:
--------------------------------------------------------------------------------
  1 | #include "client.h"
  2 | 
  3 | #define INSERT_NUM 1000
  4 | #define UPDATE_NUM 1000
  5 | 
  6 | const char * key = "test-12345-k";
  7 | const char * key_template = "test-%d-k";
  8 | const char * value_insert_template = "test-%d-v-insert";
  9 | const char * value_update_template = "test-12345-v-update-%d";
 10 | const char * value_template = "test-12345-v-%s";
 11 | 
 12 | KVReqCtx * prepare_ctx(Client & client, const char * key, const char * value, int req_type) {
 13 |     uint64_t client_input_buf = (uint64_t)client.get_input_buf();
 14 |     uint32_t client_input_buf_lkey = client.get_input_buf_lkey();
 15 | 
 16 |     memcpy((void *)(client_input_buf + sizeof(KVLogHeader)), key, strlen(key));
 17 |     memcpy((void *)(client_input_buf + sizeof(KVLogHeader) + strlen(key)), value, strlen(value));
 18 |     KVLogHeader * header = (KVLogHeader *)client_input_buf;
 19 |     header->is_valid = true;
 20 |     header->key_length = strlen(key);
 21 |     header->value_length = strlen(value);
 22 | 
 23 |     KVInfo * kv_info = (KVInfo *)malloc(sizeof(KVInfo));
 24 |     kv_info->l_addr = (void *)client_input_buf;
 25 |     kv_info->key_len = strlen(key);
 26 |     kv_info->value_len = strlen(value);
 27 |     kv_info->lkey = client_input_buf_lkey;
 28 | 
 29 |     KVReqCtx * ctx = new KVReqCtx;
 30 |     ctx->req_type = req_type;
 31 |     ctx->use_cache = true;
 32 |     ctx->kv_info = kv_info;
 33 |     ctx->lkey = client.get_local_buf_mr()->lkey;
 34 | 
 35 |     int num_idx_rep = client.get_num_idx_rep();
 36 |     int num_replication = client.get_num_rep();
 37 |     ctx->kv_modify_pr_cas_list.resize(1);
 38 |     ctx->kv_modify_bk_0_cas_list.resize(num_idx_rep - 1);
 39 |     ctx->kv_modify_bk_1_cas_list.resize(num_idx_rep - 1);
 40 |     ctx->log_commit_addr_list.resize(num_replication);
 41 |     char key_buf[128] = {0};
 42 |     memcpy(key_buf, (void *)((uint64_t)ctx->kv_info->l_addr + sizeof(KVLogHeader)), ctx->kv_info->key_len);
 43 |     ctx->key_str = std::string(key_buf);
 44 |     return ctx;
 45 | }
 46 | 
 47 | void init_insert_ctx(Client & client, KVReqCtx * ctx) {
 48 |     uint64_t client_local_buf = (uint64_t)client.get_local_buf_mr()->addr;
 49 |     uint32_t client_local_buf_lkey = client.get_local_buf_mr()->lkey;
 50 | 
 51 |     ctx->local_bucket_addr = (RaceHashBucket *)client_local_buf;
 52 |     ctx->local_cas_target_value_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket));
 53 |     ctx->local_cas_return_value_addr = (void *)((uint64_t)ctx->local_cas_target_value_addr + sizeof(uint64_t));
 54 |     ctx->op_laddr = (void *)((uint64_t)ctx->local_cas_return_value_addr + sizeof(uint64_t) * MAX_REP_NUM);
 55 |     ctx->lkey = client_local_buf_lkey;
 56 | 
 57 |     KVLogHeader * header = (KVLogHeader *)ctx->kv_info->l_addr;
 58 |     KVLogTail * tail = (KVLogTail *)((uint64_t)ctx->kv_info->l_addr
 59 |         + sizeof(KVLogHeader) + header->key_length + header->value_length);
 60 |     tail->op = KV_OP_INSERT;
 61 | }
 62 | 
 63 | void init_update_ctx(Client & client, KVReqCtx * ctx) {
 64 |     uint64_t client_local_buf = (uint64_t)client.get_local_buf_mr()->addr;
 65 |     uint32_t client_local_buf_lkey = (uint64_t)client.get_local_buf_mr()->rkey;
 66 | 
 67 |     ctx->local_bucket_addr = (RaceHashBucket *)client_local_buf;
 68 |     ctx->local_kv_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket));
 69 |     ctx->local_cas_target_value_addr = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket));
 70 |     ctx->local_cas_return_value_addr = (void *)((uint64_t)ctx->local_cas_target_value_addr + sizeof(uint64_t));
 71 |     ctx->op_laddr = (void *)((uint64_t)ctx->local_cas_target_value_addr + sizeof(uint64_t) * MAX_REP_NUM);
 72 |     ctx->local_cache_addr = (void *)((uint64_t)ctx->op_laddr + 2048);
 73 |     ctx->lkey = client_local_buf_lkey;
 74 | 
 75 |     KVLogHeader * header = (KVLogHeader *)ctx->kv_info->l_addr;
 76 |     KVLogTail * tail = (KVLogTail *)((uint64_t)ctx->kv_info->l_addr
 77 |         + sizeof(KVLogHeader) + header->key_length + header->value_length);
 78 |     tail->op = KV_OP_UPDATE;
 79 | }
 80 | 
 81 | void init_search_ctx(Client & client, KVReqCtx * ctx) {
 82 |     uint64_t client_local_buf = (uint64_t)client.get_local_buf_mr()->addr;
 83 |     uint32_t client_local_buf_lkey = (uint64_t)client.get_local_buf_mr()->rkey;
 84 | 
 85 |     ctx->local_bucket_addr = (RaceHashBucket *)client_local_buf;
 86 |     ctx->local_cache_addr  = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket));
 87 |     ctx->local_kv_addr     = (void *)((uint64_t)client_local_buf + 4 * sizeof(RaceHashBucket));
 88 |     ctx->lkey = client_local_buf_lkey;
 89 | }
 90 | 
 91 | void test_crash_update_prepare(Client & client, int crash_point) {
 92 |     int ret = 0;
 93 |     // insert a kv
 94 |     char value_buf[128];
 95 |     bool should_stop = false;
 96 | 
 97 |     sprintf(value_buf, value_insert_template, INSERT_NUM);
 98 |     KVReqCtx * insert_ctx = prepare_ctx(client, key, value_buf, KV_REQ_INSERT);
 99 |     init_insert_ctx(client, insert_ctx);
100 |     insert_ctx->should_stop = &should_stop;
101 |     insert_ctx->coro_id = 100;
102 |     ret = client.kv_insert_sync(insert_ctx);
103 |     assert(ret == 0);
104 |     
105 |     // update the kv and crash
106 |     for (int i = 0; i < UPDATE_NUM - 1; i ++) {
107 |         sprintf(value_buf, value_update_template, i);
108 |         KVReqCtx * update_ctx = prepare_ctx(client, key, value_buf, KV_REQ_UPDATE);
109 |         init_update_ctx(client, update_ctx);
110 |         ret = client.kv_update_sync(update_ctx);
111 |     }
112 |     sprintf(value_buf, value_update_template, UPDATE_NUM);
113 |     KVReqCtx * update_ctx = prepare_ctx(client, key, value_buf, KV_REQ_UPDATE);
114 |     init_update_ctx(client, update_ctx);
115 |     ret = client.kv_update_w_crash(update_ctx, crash_point);
116 |     assert(ret == -1);
117 | }
118 | 
119 | void test_crash_insert_prepare(Client & client, int crash_point) {
120 |     int ret = 0;
121 |     char key_buf[128];
122 |     char value_buf[128];
123 |     // insert 1000 kv
124 |     for (int i = 0; i < INSERT_NUM; i ++) {
125 |         sprintf(key_buf, key_template, i);
126 |         sprintf(value_buf, value_insert_template, i);
127 |         KVReqCtx * insert_ctx = prepare_ctx(client, key_buf, value_buf, KV_REQ_INSERT);
128 |         init_insert_ctx(client, insert_ctx);
129 |         bool should_stop = false;
130 |         insert_ctx->should_stop = &should_stop;
131 |         insert_ctx->coro_id = 100;
132 |         ret = client.kv_insert_sync(insert_ctx);
133 |         if (ret != 0) {
134 |             printf("[%s] insert error\n", __FUNCTION__);
135 |             exit(1);
136 |         }
137 |     }
138 | 
139 |     sprintf(value_buf, value_insert_template, INSERT_NUM);
140 |     KVReqCtx * insert_ctx = prepare_ctx(client, key, value_buf, KV_REQ_INSERT);
141 |     init_insert_ctx(client, insert_ctx);
142 |     bool should_stop = false;
143 |     insert_ctx->should_stop = &should_stop;
144 |     insert_ctx->coro_id = 100;
145 |     ret = client.kv_insert_w_crash(insert_ctx, crash_point);
146 |     if (ret != -1) {
147 |         printf("[%s] failed to crash\n", __FUNCTION__);
148 |         exit(1);
149 |     }
150 | }
151 | 
152 | void test_crash_recover(Client & client) {
153 |     int ret = 0;
154 |     void * search_ret;
155 |     char new_value_buf[256];
156 |     sprintf(new_value_buf, value_template, "update-after-crash");
157 |     // recover
158 |     KVReqCtx * update_ctx = prepare_ctx(client, key, new_value_buf, KV_REQ_UPDATE);
159 |     init_update_ctx(client, update_ctx);
160 |     ret = client.kv_update_sync(update_ctx);
161 |     if (ret != 0) {
162 |         printf("[%s] error update %d\n", __FUNCTION__, ret);
163 |     }
164 |     // assert(ret == 0);
165 | 
166 |     KVReqCtx * search_ctx = prepare_ctx(client, key, new_value_buf, KV_REQ_SEARCH);
167 |     init_search_ctx(client, search_ctx);
168 |     search_ret = client.kv_search_sync(search_ctx);
169 |     if (memcmp(search_ret, new_value_buf, strlen(new_value_buf)) != 0) {
170 |         printf("recover failed\n");
171 |     } else {
172 |         printf("recover success!\n");
173 |     }
174 | }
175 | 
176 | int main(int argc, char ** argv) {
177 |     if (argc != 2) {
178 |         printf("Usage: %s path-to-config-file\n", argv[0]);
179 |     }
180 |     int ret = 0;
181 |     GlobalConfig config;
182 |     ret = load_config(argv[1], &config);
183 |     assert(ret == 0);
184 | 
185 |     config.num_coroutines = 1;
186 |     config.is_recovery = false;
187 |     Client client(&config);
188 |     // pthread_t pollint_tid = client.start_polling_thread();
189 |     client.start_gc_fiber();
190 |     
191 |     if (config.is_recovery == false) {
192 |         test_crash_insert_prepare(client, KV_CRASH_UNCOMMITTED_BK_CONSENSUS_0);
193 |         printf("crashed\n");
194 |     }
195 |     client.stop_gc_fiber();
196 | 
197 |     config.is_recovery = true;
198 |     std::vector<struct timeval> recover_time_bd;
199 |     struct timeval st, et;
200 |     gettimeofday(&st, NULL);
201 |     Client clientr(&config);
202 |     clientr.start_gc_fiber();
203 |     gettimeofday(&et, NULL);
204 |     clientr.get_recover_time(recover_time_bd);
205 |     test_crash_recover(clientr);
206 | 
207 |     clientr.stop_gc_fiber();
208 | 
209 |     uint64_t connection_recover_time_us = time_spent_us(&recover_time_bd[0], &recover_time_bd[1]);
210 |     uint64_t local_recover_space_reg_time_us = time_spent_us(&recover_time_bd[1], &recover_time_bd[2]);
211 |     uint64_t get_meta_addr_time_us = time_spent_us(&recover_time_bd[2], &recover_time_bd[3]);
212 |     uint64_t traverse_log_time_us = time_spent_us(&recover_time_bd[3], &recover_time_bd[4]);
213 |     uint64_t mm_recover_time_us = time_spent_us(&recover_time_bd[4], &recover_time_bd[5]);
214 |     uint64_t local_mr_reg_time_us = time_spent_us(&recover_time_bd[5], &recover_time_bd[6]);
215 |     uint64_t kv_ops_recover_time_us = time_spent_us(&recover_time_bd[6], &recover_time_bd[7]);
216 | 
217 |     printf("0. conn rec: %ld us\n", connection_recover_time_us);
218 |     printf("1. rec space reg: %ld us\n", local_recover_space_reg_time_us);
219 |     printf("2. get meta addr: %ld us\n", get_meta_addr_time_us);
220 |     printf("3. taverse log: %ld us\n", traverse_log_time_us);
221 |     printf("4. mm rec: %ld us\n", mm_recover_time_us);
222 |     printf("5. local mr reg time: %ld us\n", local_mr_reg_time_us);
223 |     printf("6. ops rec: %ld us\n", kv_ops_recover_time_us);
224 |     printf("total:%ld us\n", time_spent_us(&st, &et));
225 | }


--------------------------------------------------------------------------------
/crash-recover-test/test_crash_server.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <assert.h>
 4 | #include <pthread.h>
 5 | 
 6 | #include "server.h"
 7 | 
 8 | int main(int argc, char ** argv) {
 9 |     if (argc != 2) {
10 |         printf("Usage: %s [server_id]\n", argv[0]);
11 |         return -1;
12 |     }
13 | 
14 |     int32_t server_id = atoi(argv[1]);
15 |     int32_t ret = 0;
16 |     struct GlobalConfig server_conf;
17 |     ret = load_config("./server_config.json", &server_conf);
18 |     assert(ret == 0);
19 |     server_conf.server_id = server_id;
20 | 
21 |     printf("===== Starting Server %d =====\n", server_conf.server_id);
22 |     Server * server = new Server(&server_conf);
23 |     pthread_t server_tid;
24 |     pthread_create(&server_tid, NULL, server_main, (void *)server);
25 | 
26 |     printf("press to exit\n");
27 |     getchar();
28 |     printf("===== Ending Server %d =====\n", server_conf.server_id);
29 | 
30 |     server->stop();
31 |     return 0;
32 | }


--------------------------------------------------------------------------------
/documents/fast23_FUSEE_Extended_Version.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmemsys/FUSEE/d1e9932a0aad3deffb446511811911cc0f7e82f7/documents/fast23_FUSEE_Extended_Version.pdf


--------------------------------------------------------------------------------
/micro-test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(latency_test latency_test.cc)
 2 | add_library(micro_test micro_test.cc)
 3 | add_executable(latency_test_client latency_test_client.cc)
 4 | add_executable(latency_test_client_cr latency_test_client_cr.cc)
 5 | add_executable(micro_test_multi_client micro_test_multi_client.cc)
 6 | add_executable(micro_test_multi_client_cr micro_test_multi_client_cr.cc)
 7 | 
 8 | target_link_libraries(latency_test
 9 |     libddckv
10 |     ycsb_test
11 |     pthread
12 |     ibverbs
13 | )
14 | 
15 | target_link_libraries(latency_test_client
16 |     latency_test
17 |     libddckv
18 |     pthread
19 |     ibverbs
20 | )
21 | 
22 | target_link_libraries(latency_test_client_cr
23 |     latency_test
24 |     libddckv
25 |     pthread
26 |     ibverbs
27 | )
28 | 
29 | target_link_libraries(micro_test
30 |     libddckv
31 |     pthread
32 |     ibverbs
33 | )
34 | 
35 | target_link_libraries(micro_test_multi_client
36 |     micro_test
37 |     libddckv
38 |     pthread
39 |     ibverbs
40 | )
41 | 
42 | target_link_libraries(micro_test_multi_client_cr
43 |     micro_test
44 |     libddckv
45 |     pthread
46 |     ibverbs
47 | )


--------------------------------------------------------------------------------
/micro-test/gen-micro-workload.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | insertTemplate = "INSERT usertable {}\n"
 4 | updateTemplate = "UPDATE usertable {}\n"
 5 | searchTemplate = "READ usertable {}\n"
 6 | deleteTemplate = "DELETE usertable {}\n"
 7 | 
 8 | workloadNameTemplate = "workload{}.spec_trans"
 9 | workloadNameList = ["ins", "upd", "rea", "del"]
10 | templateDict = {
11 |     "ins": insertTemplate, 
12 |     "upd": updateTemplate, 
13 |     "rea": searchTemplate, 
14 |     "del": deleteTemplate
15 | }
16 | 
17 | workloadSize = int(sys.argv[1])
18 | 
19 | for wl in workloadNameList:
20 |     wlName = "micro-workloads/" + workloadNameTemplate.format(wl)
21 |     lineTemplate = templateDict[wl]
22 |     lineList = []
23 |     for key in range(workloadSize):
24 |         line = lineTemplate.format(key)
25 |         lineList.append(line)
26 |     of = open(wlName, "w")
27 |     of.writelines(lineList)
28 |     of.close()


--------------------------------------------------------------------------------
/micro-test/latency_test.cc:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <sys/time.h>
  3 | 
  4 | #include "client.h"
  5 | 
  6 | #include "latency_test.h"
  7 | 
  8 | #define WORKLOAD_ALL (-1)
  9 | // #define WORKLOAD_NUM WORKLOAD_ALL
 10 | #define WORKLOAD_NUM 100000
 11 | 
 12 | 
 13 | static int test_lat(Client & client, char * op_type, const char * out_fname) {
 14 |     int ret = 0;
 15 |     ret = client.load_seq_kv_requests(WORKLOAD_NUM, op_type);
 16 |     assert(ret == 0);
 17 | 
 18 |     printf("lat test %s\n", op_type);
 19 |     uint64_t * lat_list = (uint64_t *)malloc(sizeof(uint64_t) * client.num_local_operations_);
 20 |     memset(lat_list, 0, sizeof(uint64_t) * client.num_local_operations_);
 21 | 
 22 |     uint32_t num_failed = 0;
 23 |     void * search_addr;
 24 |     struct timeval st, et;
 25 |     bool should_stop = false;
 26 |     client.init_kvreq_space(0, 0, client.num_local_operations_);
 27 |     for (int i = 0; i < client.num_local_operations_; i ++) {
 28 |         KVReqCtx * ctx = &client.kv_req_ctx_list_[i];
 29 |         ctx->coro_id = 0;
 30 |         ctx->should_stop = &should_stop;
 31 |         // ctx->use_cache = false;
 32 | 
 33 |         switch (ctx->req_type) {
 34 |         case KV_REQ_SEARCH:
 35 |             gettimeofday(&st, NULL);
 36 |             search_addr = client.kv_search_sync(ctx);
 37 |             gettimeofday(&et, NULL);
 38 |             if (search_addr == NULL) {
 39 |                 num_failed ++;
 40 |             }
 41 |             break;
 42 |         case KV_REQ_INSERT:
 43 |             gettimeofday(&st, NULL);
 44 |             ret = client.kv_insert_sync(ctx);
 45 |             gettimeofday(&et, NULL);
 46 |             if (ret == KV_OPS_FAIL_RETURN) {
 47 |                 num_failed ++;
 48 |             }
 49 |             break;
 50 |         case KV_REQ_UPDATE:
 51 |             gettimeofday(&st, NULL);
 52 |             ret = client.kv_update_sync(ctx);
 53 |             if (ret == KV_OPS_FAIL_RETURN) {
 54 |                 num_failed ++;
 55 |             }
 56 |             gettimeofday(&et, NULL);
 57 |             break;
 58 |         case KV_REQ_DELETE:
 59 |             gettimeofday(&st, NULL);
 60 |             ret = client.kv_delete_sync(ctx);
 61 |             if (ret == KV_OPS_FAIL_RETURN) {
 62 |                 num_failed ++;
 63 |             }
 64 |             gettimeofday(&et, NULL);
 65 |             break;
 66 |         default:
 67 |             assert(0);
 68 |             break;
 69 |         }
 70 | 
 71 |         lat_list[i] = (et.tv_sec - st.tv_sec) * 1000000 + (et.tv_usec - st.tv_usec);
 72 |     }
 73 |     printf("Failed: %d\n", num_failed);
 74 | 
 75 |     FILE * lat_fp = fopen(out_fname, "w");
 76 |     assert(lat_fp != NULL);
 77 |     for (int i = 0; i < client.num_local_operations_; i ++) {
 78 |         fprintf(lat_fp, "%ld\n", lat_list[i]);
 79 |     }
 80 |     fclose(lat_fp);
 81 |     return 0;
 82 | }
 83 | 
 84 | static int test_lat(ClientCR & client, char * op_type, const char * out_fname) {
 85 |     int ret = 0;
 86 |     ret = client.load_seq_kv_requests(WORKLOAD_NUM, op_type);
 87 |     assert(ret == 0);
 88 | 
 89 |     printf("lat test %s\n", op_type);
 90 |     uint64_t * lat_list = (uint64_t *)malloc(sizeof(uint64_t) * client.num_local_operations_);
 91 |     memset(lat_list, 0, sizeof(uint64_t) * client.num_local_operations_);
 92 | 
 93 |     uint32_t num_failed = 0;
 94 |     void * search_addr;
 95 |     struct timeval st, et;
 96 |     bool should_stop = false;
 97 |     client.init_kvreq_space(0, 0, client.num_local_operations_);
 98 |     for (int i = 0; i < client.num_local_operations_; i ++) {
 99 |         KVReqCtx * ctx = &client.kv_req_ctx_list_[i];
100 |         ctx->coro_id = 0;
101 |         ctx->should_stop = &should_stop;
102 | 
103 |         switch (ctx->req_type) {
104 |         case KV_REQ_SEARCH:
105 |             gettimeofday(&st, NULL);
106 |             search_addr = client.kv_search_sync(ctx);
107 |             gettimeofday(&et, NULL);
108 |             if (search_addr == NULL) {
109 |                 num_failed ++;
110 |             }
111 |             break;
112 |         case KV_REQ_INSERT:
113 |             gettimeofday(&st, NULL);
114 |             ret = client.kv_insert_sync(ctx);
115 |             gettimeofday(&et, NULL);
116 |             if (ret == KV_OPS_FAIL_REDO || ret == KV_OPS_FAIL_RETURN) {
117 |                 num_failed ++;
118 |             }
119 |             break;
120 |         case KV_REQ_UPDATE:
121 |             gettimeofday(&st, NULL);
122 |             ret = client.kv_update_sync(ctx);
123 |             gettimeofday(&et, NULL);
124 |             break;
125 |         case KV_REQ_DELETE:
126 |             gettimeofday(&st, NULL);
127 |             ret = client.kv_delete_sync(ctx);
128 |             gettimeofday(&et, NULL);
129 |             break;
130 |         default:
131 |             assert(0);
132 |             break;
133 |         }
134 | 
135 |         lat_list[i] = (et.tv_sec - st.tv_sec) * 1000000 + (et.tv_usec - st.tv_usec);
136 |     }
137 |     printf("Failed: %d\n", num_failed);
138 | 
139 |     FILE * lat_fp = fopen(out_fname, "w");
140 |     assert(lat_fp != NULL);
141 |     for (int i = 0; i < client.num_local_operations_; i ++) {
142 |         fprintf(lat_fp, "%ld\n", lat_list[i]);
143 |     }
144 |     fclose(lat_fp);
145 |     return 0;
146 | }
147 | 
148 | int test_insert_lat(Client & client) {
149 |     char out_fname[128];
150 |     int num_rep = client.get_num_rep();
151 |     sprintf(out_fname, "results/insert_lat-%drp.txt", num_rep);
152 |     return test_lat(client, "INSERT", out_fname);
153 | }
154 | 
155 | int test_search_lat(Client & client) {
156 |     char out_fname[128];
157 |     int num_rep = client.get_num_rep();
158 |     sprintf(out_fname, "results/search_lat-%drp.txt", num_rep);
159 |     return test_lat(client, "READ", out_fname);
160 | }
161 | 
162 | int test_update_lat(Client & client) {
163 |     char out_fname[128];
164 |     int num_rep = client.get_num_rep();
165 |     sprintf(out_fname, "results/update_lat-%drp.txt", num_rep);
166 |     return test_lat(client, "UPDATE", out_fname);
167 | }
168 | 
169 | int test_delete_lat(Client & client) {
170 |     char out_fname[128];
171 |     int num_rep = client.get_num_rep();
172 |     sprintf(out_fname, "results/delete_lat-%drp.txt", num_rep);
173 |     return test_lat(client, "DELETE", out_fname);
174 | }
175 | 
176 | int test_insert_lat(ClientCR & client) {
177 |     char out_fname[128];
178 |     int num_rep = client.get_num_rep();
179 |     sprintf(out_fname, "results/insert_cr_lat-%drp.txt", num_rep);
180 |     return test_lat(client, "INSERT", out_fname);
181 | }
182 | 
183 | int test_search_lat(ClientCR & client) {
184 |     char out_fname[128];
185 |     int num_rep = client.get_num_rep();
186 |     sprintf(out_fname, "results/search_cr_lat-%drp.txt", num_rep);
187 |     return test_lat(client, "READ", out_fname);
188 | }
189 | 
190 | int test_update_lat(ClientCR & client) {
191 |     char out_fname[128];
192 |     int num_rep = client.get_num_rep();
193 |     sprintf(out_fname, "results/update_cr_lat-%drp.txt", num_rep);
194 |     return test_lat(client, "UPDATE", out_fname);
195 | }
196 | 
197 | int test_delete_lat(ClientCR & client) {
198 |     char out_fname[128];
199 |     int num_rep = client.get_num_rep();
200 |     sprintf(out_fname, "results/delete_cr_lat-%drp.txt", num_rep);
201 |     return test_lat(client, "DELETE", out_fname);
202 | }


--------------------------------------------------------------------------------
/micro-test/latency_test.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_LATENCY_TEST_H_
 2 | #define DDCKV_LATENCY_TEST_H_
 3 | 
 4 | #include "client.h"
 5 | #include "client_cr.h"
 6 | 
 7 | int test_insert_lat(Client & client);
 8 | int test_search_lat(Client & client);
 9 | int test_update_lat(Client & client);
10 | int test_delete_lat(Client & client);
11 | 
12 | int test_insert_lat(ClientCR & client);
13 | int test_search_lat(ClientCR & client);
14 | int test_update_lat(ClientCR & client);
15 | int test_delete_lat(ClientCR & client);
16 | 
17 | #endif


--------------------------------------------------------------------------------
/micro-test/latency_test_client.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <sched.h>
 3 | 
 4 | #include "latency_test.h"
 5 | 
 6 | int main(int argc, char ** argv) {
 7 |     if (argc != 2) {
 8 |         printf("Usage: %s path-to-config-file\n", argv[0]);
 9 |         return 1;
10 |     }
11 | 
12 |     int ret = 0;
13 |     GlobalConfig config;
14 |     ret = load_config(argv[1], &config);
15 |     assert(ret == 0);
16 | 
17 |     cpu_set_t cpuset;
18 |     CPU_ZERO(&cpuset);
19 |     CPU_SET(config.main_core_id, &cpuset);
20 |     ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
21 |     assert(ret == 0);
22 |     ret = sched_getaffinity(0, sizeof(cpuset), &cpuset);
23 |     assert(ret == 0);
24 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
25 |         if (CPU_ISSET(i, &cpuset)) {
26 |             printf("main process running on core: %d\n", i);
27 |         }
28 |     }
29 | 
30 |     Client client(&config);
31 | 
32 |     ret = test_insert_lat(client);
33 |     assert(ret == 0);
34 | 
35 |     ret = test_search_lat(client);
36 |     assert(ret == 0);
37 | 
38 |     ret = test_update_lat(client);
39 |     assert(ret == 0);
40 | 
41 |     ret = test_delete_lat(client);
42 |     assert(ret == 0);
43 | }


--------------------------------------------------------------------------------
/micro-test/latency_test_client_cr.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <sched.h>
 3 | 
 4 | #include "latency_test.h"
 5 | 
 6 | int main(int argc, char ** argv) {
 7 |     if (argc != 2) {
 8 |         printf("Usage: %s path-to-config-file\n", argv[0]);
 9 |         return 1;
10 |     }
11 | 
12 |     int ret = 0;
13 |     GlobalConfig config;
14 |     ret = load_config(argv[1], &config);
15 |     assert(ret == 0);
16 | 
17 |     cpu_set_t cpuset;
18 |     CPU_ZERO(&cpuset);
19 |     CPU_SET(config.main_core_id, &cpuset);
20 |     ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
21 |     assert(ret == 0);
22 |     ret = sched_getaffinity(0, sizeof(cpuset), &cpuset);
23 |     assert(ret == 0);
24 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
25 |         if (CPU_ISSET(i, &cpuset)) {
26 |             printf("main process running on core: %d\n", i);
27 |         }
28 |     }
29 | 
30 |     ClientCR client(&config);
31 | 
32 |     ret = test_insert_lat(client);
33 |     assert(ret == 0);
34 | 
35 |     ret = test_search_lat(client);
36 |     assert(ret == 0);
37 | 
38 |     ret = test_update_lat(client);
39 |     assert(ret == 0);
40 | 
41 |     ret = test_delete_lat(client);
42 |     assert(ret == 0);
43 | }


--------------------------------------------------------------------------------
/micro-test/micro_test.cc:
--------------------------------------------------------------------------------
  1 | #include "micro_test.h"
  2 | #include "client.h"
  3 | #include "client_cr.h"
  4 | 
  5 | static void timer_fb_func(volatile bool * should_stop, int seconds) {
  6 |     boost::this_fiber::sleep_for(std::chrono::seconds(seconds));
  7 |     *should_stop = true;
  8 |     // printf("stopped!\n");
  9 | }
 10 | 
 11 | static void timer_fb_func_ms(volatile bool * should_stop, int milliseconds) {
 12 |     boost::this_fiber::sleep_for(std::chrono::milliseconds(milliseconds));
 13 |     *should_stop = true;
 14 |     // printf("stopped!\n");
 15 | }
 16 | 
 17 | static int micro_test_tpt(Client & client, MicroRunClientArgs * args) {
 18 |     int ret = 0;
 19 |     ret = client.load_seq_kv_requests(client.micro_workload_num_, args->op_type);
 20 |     assert(ret == 0);
 21 | 
 22 |     printf("Test phase start\n");
 23 |     boost::fibers::barrier global_barrier(client.num_coroutines_ + 1);
 24 |     ClientFiberArgs * fb_args_list = (ClientFiberArgs *)malloc(sizeof(ClientFiberArgs) * client.num_local_operations_);
 25 |     uint32_t coro_num_ops = client.num_local_operations_ / client.num_coroutines_;
 26 |     for (int i = 0; i < client.num_coroutines_; i ++) {
 27 |         fb_args_list[i].client = &client;
 28 |         fb_args_list[i].coro_id = i;
 29 |         fb_args_list[i].ops_num = coro_num_ops;
 30 |         fb_args_list[i].ops_st_idx = coro_num_ops * i;
 31 |         fb_args_list[i].num_failed = 0;
 32 |         fb_args_list[i].b = &global_barrier;
 33 |         fb_args_list[i].should_stop = args->should_stop;
 34 |     }
 35 |     fb_args_list[client.num_coroutines_ - 1].ops_num += client.num_local_operations_ % client.num_coroutines_;
 36 | 
 37 |     boost::fibers::fiber fb_list[client.num_coroutines_];
 38 |     for (int i = 0; i < client.num_coroutines_; i ++) {
 39 |         boost::fibers::fiber fb(client_ops_fb_cnt_ops_micro, &fb_args_list[i]);
 40 |         fb_list[i] = std::move(fb);
 41 |     }
 42 | 
 43 |     global_barrier.wait();
 44 |     boost::fibers::fiber timer_fb;
 45 |     if (args->thread_id == 0) {
 46 |         printf("%d initializes timer\n", args->thread_id);
 47 |         pthread_barrier_wait(args->timer_barrier);
 48 |         boost::fibers::fiber fb(timer_fb_func_ms, args->should_stop, client.workload_run_time_);
 49 |         timer_fb = std::move(fb);
 50 |     } else {
 51 |         printf("%d wait for timer\n", args->thread_id);
 52 |         pthread_barrier_wait(args->timer_barrier);
 53 |     }
 54 | 
 55 |     printf("%d passed barrier\n", args->thread_id);
 56 |     if (args->thread_id == 0) {
 57 |         timer_fb.join();
 58 |     }
 59 |     uint32_t ops_cnt = 0;
 60 |     uint32_t num_failed = 0;
 61 |     for (int i = 0; i < client.num_coroutines_; i ++) {
 62 |         fb_list[i].join();
 63 |         ops_cnt += fb_args_list[i].ops_cnt;
 64 |         num_failed += fb_args_list[i].num_failed;
 65 |         printf("fb%d finished\n", fb_args_list[i].coro_id);
 66 |     }
 67 |     printf("thread: %d %d ops/s\n", args->thread_id, ops_cnt / 10);
 68 |     printf("%d failed\n", num_failed);
 69 |     
 70 |     // update counter
 71 |     if (strcmp(args->op_type, "INSERT") == 0) {
 72 |         args->ret_num_insert_ops = ops_cnt;
 73 |         args->ret_fail_insert_num = num_failed;
 74 |     } else if (strcmp(args->op_type, "UPDATE") == 0) {
 75 |         args->ret_num_update_ops = ops_cnt;
 76 |         args->ret_fail_update_num = num_failed;
 77 |     } else if (strcmp(args->op_type, "READ") == 0) {
 78 |         args->ret_num_search_ops = ops_cnt;
 79 |         args->ret_fail_search_num = num_failed;
 80 |     } else {
 81 |         assert(strcmp(args->op_type, "DELETE") == 0);
 82 |         args->ret_num_delete_ops = ops_cnt;
 83 |         args->ret_fail_delete_num = num_failed;
 84 |     }
 85 |     free(fb_args_list);
 86 |     return 0;
 87 | }
 88 | 
 89 | static int micro_test_tpt(ClientCR & client, MicroRunClientArgs * args) {
 90 |     int ret = 0;
 91 |     ret = client.load_seq_kv_requests(client.micro_workload_num_, args->op_type);
 92 |     assert(ret == 0);
 93 | 
 94 |     printf("Test phase start\n");
 95 |     boost::fibers::barrier global_barrier(client.num_coroutines_ + 1);
 96 |     ClientFiberArgs * fb_args_list = (ClientFiberArgs *)malloc(sizeof(ClientFiberArgs) * client.num_local_operations_);
 97 |     uint32_t coro_num_ops = client.num_local_operations_ / client.num_coroutines_;
 98 |     for (int i = 0; i < client.num_coroutines_; i ++) {
 99 |         fb_args_list[i].client_cr = &client;
100 |         fb_args_list[i].coro_id = i;
101 |         fb_args_list[i].ops_num = coro_num_ops;
102 |         fb_args_list[i].ops_st_idx = coro_num_ops * i;
103 |         fb_args_list[i].num_failed = 0;
104 |         fb_args_list[i].b = &global_barrier;
105 |         fb_args_list[i].should_stop = args->should_stop;
106 |     }
107 |     fb_args_list[client.num_coroutines_ - 1].ops_num += client.num_local_operations_ % client.num_coroutines_;
108 | 
109 |     boost::fibers::fiber fb_list[client.num_coroutines_];
110 |     for (int i = 0; i < client.num_coroutines_; i ++) {
111 |         boost::fibers::fiber fb(client_cr_ops_fb_cnt_ops_micro, &fb_args_list[i]);
112 |         fb_list[i] = std::move(fb);
113 |     }
114 | 
115 |     global_barrier.wait();
116 |     boost::fibers::fiber timer_fb;
117 |     if (args->thread_id == 0) {
118 |         printf("%d initializes timer\n", args->thread_id);
119 |         pthread_barrier_wait(args->timer_barrier);
120 |         boost::fibers::fiber fb(timer_fb_func, args->should_stop, client.workload_run_time_);
121 |         timer_fb = std::move(fb);
122 |     } else {
123 |         printf("%d wait for timer\n", args->thread_id);
124 |         pthread_barrier_wait(args->timer_barrier);
125 |     }
126 | 
127 |     printf("%d passed barrier\n", args->thread_id);
128 |     if (args->thread_id == 0) {
129 |         timer_fb.join();
130 |     }
131 |     uint32_t ops_cnt = 0;
132 |     uint32_t num_failed = 0;
133 |     for (int i = 0; i < client.num_coroutines_; i ++) {
134 |         fb_list[i].join();
135 |         ops_cnt += fb_args_list[i].ops_cnt;
136 |         num_failed += fb_args_list[i].num_failed;
137 |         printf("fb%d finished\n", fb_args_list[i].coro_id);
138 |     }
139 |     printf("thread: %d %d ops/s\n", args->thread_id, ops_cnt / 10);
140 |     printf("%d failed\n", num_failed);
141 |     
142 |     // update counter
143 |     if (strcmp(args->op_type, "INSERT") == 0) {
144 |         args->ret_num_insert_ops = ops_cnt;
145 |         args->ret_fail_insert_num = num_failed;
146 |     } else if (strcmp(args->op_type, "UPDATE") == 0) {
147 |         args->ret_num_update_ops = ops_cnt;
148 |         args->ret_fail_update_num = num_failed;
149 |     } else if (strcmp(args->op_type, "READ") == 0) {
150 |         args->ret_num_search_ops = ops_cnt;
151 |         args->ret_fail_search_num = num_failed;
152 |     } else {
153 |         assert(strcmp(args->op_type, "DELETE") == 0);
154 |         args->ret_num_delete_ops = ops_cnt;
155 |         args->ret_fail_delete_num = num_failed;
156 |     }
157 |     free(fb_args_list);
158 |     return 0;
159 | }
160 | 
161 | void * run_client(void * _args) {
162 |     MicroRunClientArgs * args = (MicroRunClientArgs *)_args;
163 |     
164 |     int ret = 0;
165 |     GlobalConfig config;
166 |     ret = load_config(args->config_file, &config);
167 |     assert(ret == 0);
168 | 
169 |     config.main_core_id = args->main_core_id;
170 |     config.poll_core_id = args->poll_core_id;
171 |     config.server_id += args->thread_id;
172 | 
173 |     cpu_set_t cpuset;
174 |     CPU_ZERO(&cpuset);
175 |     CPU_SET(config.main_core_id, &cpuset);
176 |     pthread_t this_tid = pthread_self();
177 |     ret = pthread_setaffinity_np(this_tid, sizeof(cpuset), &cpuset);
178 |     // assert(ret == 0);
179 |     ret = pthread_getaffinity_np(this_tid, sizeof(cpuset), &cpuset);
180 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
181 |         if (CPU_ISSET(i, &cpuset)) {
182 |             printf("client %d main process running on core: %d\n", args->thread_id, i);
183 |         }
184 |     }
185 | 
186 |     Client client(&config);
187 | 
188 |     pthread_t polling_tid = client.start_polling_thread();
189 | 
190 |     args->op_type = "INSERT";
191 |     client.workload_run_time_ = 500;
192 |     if (args->thread_id == 0) {
193 |         printf("press to sync start %s\n", args->op_type);
194 |         getchar();
195 |     }
196 |     pthread_barrier_wait(args->insert_start_barrier);
197 | 
198 |     // insert
199 |     printf("%d start %s\n", args->thread_id, args->op_type);
200 |     ret = micro_test_tpt(client, args);
201 |     assert(ret == 0);
202 |     printf("%d %s finished\n", args->thread_id, args->op_type);
203 |     pthread_barrier_wait(args->insert_finish_barrier);
204 | 
205 |     args->op_type = "READ";
206 |     client.workload_run_time_ = 5000;
207 |     if (args->thread_id == 0) {
208 |         pthread_barrier_init(args->timer_barrier, NULL, args->num_threads);
209 |         *args->should_stop = false;
210 |         printf("press to sync start %s\n", args->op_type);
211 |         getchar();
212 |     }
213 |     pthread_barrier_wait(args->search_start_barrier);
214 | 
215 |     printf("%d start %s\n", args->thread_id, args->op_type);
216 |     ret = micro_test_tpt(client, args);
217 |     assert(ret == 0);
218 |     printf("%d %s finished\n", args->thread_id, args->op_type);
219 |     pthread_barrier_wait(args->search_finish_barrier);
220 | 
221 |     args->op_type = "UPDATE";
222 |     client.workload_run_time_ = 5000;
223 |     if (args->thread_id == 0) {
224 |         pthread_barrier_init(args->timer_barrier, NULL, args->num_threads);
225 |         *args->should_stop = false;
226 |         printf("press to sync start %s\n", args->op_type);
227 |         getchar();
228 |     }
229 |     pthread_barrier_wait(args->update_start_barrier);
230 | 
231 |     printf("%d start %s\n", args->thread_id, args->op_type);
232 |     ret = micro_test_tpt(client, args);
233 |     assert(ret == 0);
234 |     printf("%d %s finished\n", args->thread_id, args->op_type);
235 |     pthread_barrier_wait(args->update_finish_barrier);
236 | 
237 |     args->op_type = "DELETE";
238 |     client.workload_run_time_ = 500;
239 |     if (args->thread_id == 0) {
240 |         pthread_barrier_init(args->timer_barrier, NULL, args->num_threads);
241 |         *args->should_stop = false;
242 |         printf("press to sync start %s\n", args->op_type);
243 |         getchar();
244 |     }
245 |     pthread_barrier_wait(args->delete_start_barrier);
246 | 
247 |     printf("%d start %s\n", args->thread_id, args->op_type);
248 |     ret = micro_test_tpt(client, args);
249 |     assert(ret == 0);
250 |     printf("%d %s finished\n", args->thread_id, args->op_type);
251 |     pthread_barrier_wait(args->delete_finish_barrier);
252 |     
253 |     client.stop_polling_thread();
254 |     pthread_join(polling_tid, NULL);
255 |     return 0;
256 | }
257 | 
258 | void * run_client_cr(void * _args) {
259 |     MicroRunClientArgs * args = (MicroRunClientArgs *)_args;
260 | 
261 |     int ret = 0;
262 |     GlobalConfig config;
263 |     ret = load_config(args->config_file, &config);
264 |     assert(ret == 0);
265 | 
266 |     config.main_core_id = args->main_core_id;
267 |     config.poll_core_id = args->poll_core_id;
268 |     config.server_id += args->thread_id;
269 | 
270 |     cpu_set_t cpuset;
271 |     CPU_ZERO(&cpuset);
272 |     CPU_SET(config.main_core_id, &cpuset);
273 |     pthread_t this_tid = pthread_self();
274 |     ret = pthread_setaffinity_np(this_tid, sizeof(cpuset), &cpuset);
275 |     // assert(ret == 0);
276 |     ret = pthread_getaffinity_np(this_tid, sizeof(cpuset), &cpuset);
277 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
278 |         if (CPU_ISSET(i, &cpuset)) {
279 |             printf("client %d main process running on core: %d\n", args->thread_id, i);
280 |         }
281 |     }
282 | 
283 |     ClientCR client(&config);
284 | 
285 |     pthread_t polling_tid = client.start_polling_thread();
286 | 
287 |     args->op_type = "INSERT";
288 |     if (args->thread_id == 0) {
289 |         printf("press to sync start %s\n", args->op_type);
290 |         getchar();
291 |     }
292 |     pthread_barrier_wait(args->insert_start_barrier);
293 | 
294 |     // insert
295 |     printf("%d start %s\n", args->thread_id, args->op_type);
296 |     ret = micro_test_tpt(client, args);
297 |     assert(ret == 0);
298 |     printf("%d %s finished\n", args->thread_id, args->op_type);
299 |     pthread_barrier_wait(args->insert_finish_barrier);
300 | 
301 |     args->op_type = "UPDATE";
302 |     if (args->thread_id == 0) {
303 |         pthread_barrier_init(args->timer_barrier, NULL, args->num_threads);
304 |         *args->should_stop = false;
305 |         printf("press to sync start %s\n", args->op_type);
306 |         getchar();
307 |     }
308 |     pthread_barrier_wait(args->update_start_barrier);
309 | 
310 |     printf("%d start %s\n", args->thread_id, args->op_type);
311 |     ret = micro_test_tpt(client, args);
312 |     assert(ret == 0);
313 |     printf("%d %s finished\n", args->thread_id, args->op_type);
314 |     pthread_barrier_wait(args->update_finish_barrier);
315 | 
316 |     args->op_type = "READ";
317 |     if (args->thread_id == 0) {
318 |         pthread_barrier_init(args->timer_barrier, NULL, args->num_threads);
319 |         *args->should_stop = false;
320 |         printf("press to sync start %s\n", args->op_type);
321 |         getchar();
322 |     }
323 |     pthread_barrier_wait(args->search_start_barrier);
324 | 
325 |     printf("%d start %s\n", args->thread_id, args->op_type);
326 |     ret = micro_test_tpt(client, args);
327 |     assert(ret == 0);
328 |     printf("%d %s finished\n", args->thread_id, args->op_type);
329 |     pthread_barrier_wait(args->search_finish_barrier);
330 | 
331 |     args->op_type = "DELETE";
332 |     if (args->thread_id == 0) {
333 |         pthread_barrier_init(args->timer_barrier, NULL, args->num_threads);
334 |         *args->should_stop = false;
335 |         printf("press to sync start %s\n", args->op_type);
336 |         getchar();
337 |     }
338 |     pthread_barrier_wait(args->delete_start_barrier);
339 | 
340 |     printf("%d start %s\n", args->thread_id, args->op_type);
341 |     ret = micro_test_tpt(client, args);
342 |     assert(ret == 0);
343 |     printf("%d %s finished\n", args->thread_id, args->op_type);
344 |     pthread_barrier_wait(args->delete_finish_barrier);
345 |     
346 |     client.stop_polling_thread();
347 |     pthread_join(polling_tid, NULL);
348 |     return 0;
349 | }


--------------------------------------------------------------------------------
/micro-test/micro_test.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_MICRO_TEST_H_
 2 | #define DDCKV_MICRO_TEST_H_
 3 | 
 4 | #include <stdint.h>
 5 | #include <pthread.h>
 6 | 
 7 | #include "client.h"
 8 | 
 9 | typedef struct TagMicroRunClientArgs {
10 |     int thread_id;
11 |     int main_core_id;
12 |     int poll_core_id;
13 |     char * workload_name;
14 |     char * config_file;
15 |     pthread_barrier_t * insert_start_barrier;
16 |     pthread_barrier_t * insert_finish_barrier;
17 |     pthread_barrier_t * update_start_barrier;
18 |     pthread_barrier_t * update_finish_barrier;
19 |     pthread_barrier_t * search_start_barrier;
20 |     pthread_barrier_t * search_finish_barrier;
21 |     pthread_barrier_t * delete_start_barrier;
22 |     pthread_barrier_t * delete_finish_barrier;
23 |     volatile bool * should_stop;
24 |     // bool * timer_is_ready;
25 |     pthread_barrier_t * timer_barrier;
26 | 
27 |     uint32_t ret_num_insert_ops;
28 |     uint32_t ret_num_update_ops;
29 |     uint32_t ret_num_search_ops;
30 |     uint32_t ret_num_delete_ops;
31 |     uint32_t ret_fail_insert_num;
32 |     uint32_t ret_fail_update_num;
33 |     uint32_t ret_fail_search_num;
34 |     uint32_t ret_fail_delete_num;
35 | 
36 |     uint32_t client_id;
37 |     uint32_t num_threads;
38 |     char * op_type;
39 |     Client * client;
40 | } MicroRunClientArgs;
41 | 
42 | void * run_client(void * _args);
43 | void * run_client_cr(void * _args);
44 | 
45 | #endif


--------------------------------------------------------------------------------
/micro-test/micro_test_multi_client.cc:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #include <atomic>
  8 | 
  9 | #include "client.h"
 10 | #include "micro_test.h"
 11 | 
 12 | static void start_client_threads(char * op_type, int num_clients, GlobalConfig * config, 
 13 |         char * config_fname) {
 14 |     MicroRunClientArgs * client_args_list = (MicroRunClientArgs *)malloc(sizeof(MicroRunClientArgs) * num_clients);
 15 |     pthread_barrier_t insert_start_barrier;
 16 |     pthread_barrier_t insert_finish_barrier;
 17 |     pthread_barrier_t update_start_barrier;
 18 |     pthread_barrier_t update_finish_barrier;
 19 |     pthread_barrier_t search_start_barrier;
 20 |     pthread_barrier_t search_finish_barrier;
 21 |     pthread_barrier_t delete_start_barrier;
 22 |     pthread_barrier_t delete_finish_barrier;
 23 |     pthread_barrier_t global_timer_barrier;
 24 |     pthread_barrier_init(&insert_start_barrier, NULL, num_clients);
 25 |     pthread_barrier_init(&insert_finish_barrier, NULL, num_clients);
 26 |     pthread_barrier_init(&update_start_barrier, NULL, num_clients);
 27 |     pthread_barrier_init(&update_finish_barrier, NULL, num_clients);
 28 |     pthread_barrier_init(&search_start_barrier, NULL, num_clients);
 29 |     pthread_barrier_init(&search_finish_barrier, NULL, num_clients);
 30 |     pthread_barrier_init(&delete_start_barrier, NULL, num_clients);
 31 |     pthread_barrier_init(&delete_finish_barrier, NULL, num_clients);
 32 |     pthread_barrier_init(&global_timer_barrier, NULL, num_clients);
 33 |     volatile bool should_stop = false;
 34 | 
 35 |     pthread_t tid_list[num_clients];
 36 |     for (int i = 0; i < num_clients; i ++) {
 37 |         client_args_list[i].client_id    = config->server_id - config->memory_num;
 38 |         client_args_list[i].thread_id    = i;
 39 |         client_args_list[i].num_threads  = num_clients;
 40 |         client_args_list[i].main_core_id = config->main_core_id + i * 2;
 41 |         client_args_list[i].poll_core_id = config->poll_core_id + i * 2;
 42 |         client_args_list[i].config_file   = config_fname;
 43 |         client_args_list[i].insert_start_barrier= &insert_start_barrier;
 44 |         client_args_list[i].insert_finish_barrier= &insert_finish_barrier;
 45 |         client_args_list[i].update_start_barrier= &update_start_barrier;
 46 |         client_args_list[i].update_finish_barrier= &update_finish_barrier;
 47 |         client_args_list[i].search_start_barrier= &search_start_barrier;
 48 |         client_args_list[i].search_finish_barrier= &search_finish_barrier;
 49 |         client_args_list[i].delete_start_barrier= &delete_start_barrier;
 50 |         client_args_list[i].delete_finish_barrier= &delete_finish_barrier;
 51 |         client_args_list[i].timer_barrier = &global_timer_barrier;
 52 |         client_args_list[i].should_stop   = &should_stop;
 53 |         client_args_list[i].ret_num_insert_ops = 0;
 54 |         client_args_list[i].ret_num_update_ops = 0;
 55 |         client_args_list[i].ret_num_search_ops = 0;
 56 |         client_args_list[i].ret_num_delete_ops = 0;
 57 |         client_args_list[i].ret_fail_insert_num = 0;
 58 |         client_args_list[i].ret_fail_update_num = 0;
 59 |         client_args_list[i].ret_fail_search_num = 0;
 60 |         client_args_list[i].ret_fail_delete_num = 0;
 61 |         client_args_list[i].op_type = op_type;
 62 |         pthread_t tid;
 63 |         pthread_create(&tid, NULL, run_client, &client_args_list[i]);
 64 |         tid_list[i] = tid;
 65 |     }
 66 | 
 67 |     uint32_t total_insert_tpt = 0;
 68 |     uint32_t total_insert_failed = 0;
 69 |     uint32_t total_update_tpt = 0;
 70 |     uint32_t total_update_failed = 0;
 71 |     uint32_t total_search_tpt = 0;
 72 |     uint32_t total_search_failed = 0;
 73 |     uint32_t total_delete_tpt = 0;
 74 |     uint32_t total_delete_failed = 0;
 75 |     for (int i = 0; i < num_clients; i ++) {
 76 |         pthread_join(tid_list[i], NULL);
 77 |         total_insert_tpt += client_args_list[i].ret_num_insert_ops;
 78 |         total_update_tpt += client_args_list[i].ret_num_update_ops;
 79 |         total_search_tpt += client_args_list[i].ret_num_search_ops;
 80 |         total_delete_tpt += client_args_list[i].ret_num_delete_ops;
 81 |         total_insert_failed += client_args_list[i].ret_fail_insert_num;
 82 |         total_update_failed += client_args_list[i].ret_fail_update_num;
 83 |         total_search_failed += client_args_list[i].ret_fail_search_num;
 84 |         total_delete_failed += client_args_list[i].ret_fail_delete_num;
 85 |     }
 86 |     printf("insert total: %d ops\n", total_insert_tpt);
 87 |     printf("insert failed: %d ops\n", total_insert_failed);
 88 |     printf("insert tpt: %d ops/s\n", (total_insert_tpt - total_insert_failed) * 1000 / 500);
 89 |     printf("update total: %d ops\n", total_update_tpt);
 90 |     printf("update failed: %d ops\n", total_update_failed);
 91 |     printf("update tpt: %d ops/s\n", (total_update_tpt - total_update_failed) * 1000 / 5000);
 92 |     printf("search total: %d ops\n", total_search_tpt);
 93 |     printf("search failed: %d ops\n", total_search_failed);
 94 |     printf("search tpt: %d ops/s\n", (total_search_tpt - total_search_failed) * 1000 / 5000);
 95 |     printf("delete total: %d ops\n", total_delete_tpt);
 96 |     printf("delete failed: %d ops\n", total_delete_failed);
 97 |     printf("delete tpt: %d ops/s\n", (total_delete_tpt - total_delete_failed) * 1000 / 500);
 98 |     free(client_args_list);
 99 | }
100 | 
101 | int main(int argc, char ** argv) {
102 |     if (argc != 3) {
103 |         printf("Usage: %s path-to-config-file num-clients\n", argv[0]);
104 |         return 1;
105 |     }
106 | 
107 |     int num_clients = atoi(argv[2]);
108 | 
109 |     GlobalConfig config;
110 |     int ret = load_config(argv[1], &config);
111 |     assert(ret == 0);
112 | 
113 |     start_client_threads("INSERT", num_clients, &config, argv[1]);   
114 | }


--------------------------------------------------------------------------------
/micro-test/micro_test_multi_client_cr.cc:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #include <atomic>
  8 | 
  9 | #include "client.h"
 10 | #include "micro_test.h"
 11 | 
 12 | static void start_client_threads(char * op_type, int num_clients, GlobalConfig * config, 
 13 |         char * config_fname) {
 14 |     MicroRunClientArgs * client_args_list = (MicroRunClientArgs *)malloc(sizeof(MicroRunClientArgs) * num_clients);
 15 |     pthread_barrier_t insert_start_barrier;
 16 |     pthread_barrier_t insert_finish_barrier;
 17 |     pthread_barrier_t update_start_barrier;
 18 |     pthread_barrier_t update_finish_barrier;
 19 |     pthread_barrier_t search_start_barrier;
 20 |     pthread_barrier_t search_finish_barrier;
 21 |     pthread_barrier_t delete_start_barrier;
 22 |     pthread_barrier_t delete_finish_barrier;
 23 |     pthread_barrier_t global_timer_barrier;
 24 |     pthread_barrier_init(&insert_start_barrier, NULL, num_clients);
 25 |     pthread_barrier_init(&insert_finish_barrier, NULL, num_clients);
 26 |     pthread_barrier_init(&update_start_barrier, NULL, num_clients);
 27 |     pthread_barrier_init(&update_finish_barrier, NULL, num_clients);
 28 |     pthread_barrier_init(&search_start_barrier, NULL, num_clients);
 29 |     pthread_barrier_init(&search_finish_barrier, NULL, num_clients);
 30 |     pthread_barrier_init(&delete_start_barrier, NULL, num_clients);
 31 |     pthread_barrier_init(&delete_finish_barrier, NULL, num_clients);
 32 |     pthread_barrier_init(&global_timer_barrier, NULL, num_clients);
 33 |     volatile bool should_stop = false;
 34 | 
 35 |     pthread_t tid_list[num_clients];
 36 |     for (int i = 0; i < num_clients; i ++) {
 37 |         client_args_list[i].client_id    = config->server_id - config->memory_num;
 38 |         client_args_list[i].thread_id    = i;
 39 |         client_args_list[i].num_threads  = num_clients;
 40 |         client_args_list[i].main_core_id = config->main_core_id + i * 2;
 41 |         client_args_list[i].poll_core_id = config->poll_core_id + i * 2;
 42 |         client_args_list[i].config_file   = config_fname;
 43 |         client_args_list[i].insert_start_barrier= &insert_start_barrier;
 44 |         client_args_list[i].insert_finish_barrier= &insert_finish_barrier;
 45 |         client_args_list[i].update_start_barrier= &update_start_barrier;
 46 |         client_args_list[i].update_finish_barrier= &update_finish_barrier;
 47 |         client_args_list[i].search_start_barrier= &search_start_barrier;
 48 |         client_args_list[i].search_finish_barrier= &search_finish_barrier;
 49 |         client_args_list[i].delete_start_barrier= &delete_start_barrier;
 50 |         client_args_list[i].delete_finish_barrier= &delete_finish_barrier;
 51 |         client_args_list[i].timer_barrier = &global_timer_barrier;
 52 |         client_args_list[i].should_stop   = &should_stop;
 53 |         client_args_list[i].ret_num_insert_ops = 0;
 54 |         client_args_list[i].ret_num_update_ops = 0;
 55 |         client_args_list[i].ret_num_search_ops = 0;
 56 |         client_args_list[i].ret_num_delete_ops = 0;
 57 |         client_args_list[i].ret_fail_insert_num = 0;
 58 |         client_args_list[i].ret_fail_update_num = 0;
 59 |         client_args_list[i].ret_fail_search_num = 0;
 60 |         client_args_list[i].ret_fail_delete_num = 0;
 61 |         client_args_list[i].op_type = op_type;
 62 |         pthread_t tid;
 63 |         pthread_create(&tid, NULL, run_client_cr, &client_args_list[i]);
 64 |         tid_list[i] = tid;
 65 |     }
 66 | 
 67 |     uint32_t total_insert_tpt = 0;
 68 |     uint32_t total_insert_failed = 0;
 69 |     uint32_t total_update_tpt = 0;
 70 |     uint32_t total_update_failed = 0;
 71 |     uint32_t total_search_tpt = 0;
 72 |     uint32_t total_search_failed = 0;
 73 |     uint32_t total_delete_tpt = 0;
 74 |     uint32_t total_delete_failed = 0;
 75 |     for (int i = 0; i < num_clients; i ++) {
 76 |         pthread_join(tid_list[i], NULL);
 77 |         total_insert_tpt += client_args_list[i].ret_num_insert_ops;
 78 |         total_update_tpt += client_args_list[i].ret_num_update_ops;
 79 |         total_search_tpt += client_args_list[i].ret_num_search_ops;
 80 |         total_delete_tpt += client_args_list[i].ret_num_delete_ops;
 81 |         total_insert_failed += client_args_list[i].ret_fail_insert_num;
 82 |         total_update_failed += client_args_list[i].ret_fail_update_num;
 83 |         total_search_failed += client_args_list[i].ret_fail_search_num;
 84 |         total_delete_failed += client_args_list[i].ret_fail_delete_num;
 85 |     }
 86 |     printf("insert total: %d ops\n", total_insert_tpt);
 87 |     printf("insert failed: %d ops\n", total_insert_failed);
 88 |     printf("insert tpt: %d ops/s\n", (total_insert_tpt - total_insert_failed) / config->workload_run_time);
 89 |     printf("update total: %d ops\n", total_update_tpt);
 90 |     printf("update failed: %d ops\n", total_update_failed);
 91 |     printf("update tpt: %d ops/s\n", (total_update_tpt - total_update_failed) / config->workload_run_time);
 92 |     printf("search total: %d ops\n", total_search_tpt);
 93 |     printf("search failed: %d ops\n", total_search_failed);
 94 |     printf("search tpt: %d ops/s\n", (total_search_tpt - total_search_failed) / config->workload_run_time);
 95 |     printf("delete total: %d ops\n", total_delete_tpt);
 96 |     printf("delete failed: %d ops\n", total_delete_failed);
 97 |     printf("delete tpt: %d ops/s\n", (total_delete_tpt - total_delete_failed) / config->workload_run_time);
 98 |     free(client_args_list);
 99 | }
100 | 
101 | int main(int argc, char ** argv) {
102 |     if (argc != 3) {
103 |         printf("Usage: %s path-to-config-file num-clients\n", argv[0]);
104 |         return 1;
105 |     }
106 | 
107 |     int num_clients = atoi(argv[2]);
108 | 
109 |     GlobalConfig config;
110 |     int ret = load_config(argv[1], &config);
111 |     assert(ret == 0);
112 | 
113 |     start_client_threads("INSERT", num_clients, &config, argv[1]);   
114 | }


--------------------------------------------------------------------------------
/setup/download_gdrive.py:
--------------------------------------------------------------------------------
1 | import gdown
2 | import sys
3 | 
4 | fid = sys.argv[1]
5 | output = sys.argv[2]
6 | 
7 | url = "https://drive.google.com/uc?id={}&export=download".format(fid)
8 | 
9 | gdown.download(url, output, quiet=False)


--------------------------------------------------------------------------------
/setup/download_workload.sh:
--------------------------------------------------------------------------------
 1 | # install python and gdown
 2 | sudo apt install python3-pip -y
 3 | pip3 install gdown
 4 | 
 5 | # download workload
 6 | echo "downloading workloads.tgz"
 7 | if [ ! -d "./workloads.tgz" ]; then
 8 |   python3 ./download_gdrive.py 1Ifd8AwQ5e6EMcm3l9yYn8tgI3qMwhRpb workloads.tgz
 9 | fi
10 | 
11 | echo "downloading micro-workloads.tgz"
12 | if [ ! -d "./micro-workloads.tgz" ]; then
13 |   python3 ./download_gdrive.py 1727S-g5j568BEgqMjc4zghT2_pz0EZhf micro-workloads.tgz
14 | fi
15 | 
16 | # decompress upd-workload
17 | echo "downloading upd-workloads"
18 | if [ ! -d "./upd-workloads" ]; then
19 |   python3 ./download_gdrive.py 1CJjkswX08XqoF2RaxXBiKgWapjyMrXdi upd-workloads.tgz
20 | fi
21 | 
22 | # decompress workload
23 | echo "decompressing workload files"
24 | if [ ! -d "./workloads" ]; then
25 |   tar zxvf workloads.tgz
26 | fi
27 | 
28 | if [ ! -d "./micro-workloads" ]; then
29 |   tar zxvf micro-workloads.tgz
30 | fi
31 | 
32 | if [ ! -d "./upd-workloads" ]; then
33 |   tar zxvf upd-workloads.tgz
34 | fi


--------------------------------------------------------------------------------
/setup/setup-env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #!/bin/bash
 3 | 
 4 | mode="$1"
 5 | ubuntu_version=$(lsb_release -r -s)
 6 | 
 7 | if [ $ubuntu_version == "18.04" ]; then
 8 |   wget https://content.mellanox.com/ofed/MLNX_OFED-4.9-5.1.0.0/MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu18.04-x86_64.tgz
 9 |   mv MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu18.04-x86_64.tgz ofed.tgz
10 | elif [ $ubuntu_version == "20.04" ]; then
11 |   wget https://content.mellanox.com/ofed/MLNX_OFED-4.9-5.1.0.0/MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu20.04-x86_64.tgz
12 |   mv MLNX_OFED_LINUX-4.9-5.1.0.0-ubuntu20.04-x86_64.tgz ofed.tgz
13 | else
14 |   echo "Wrong ubuntu distribution for $mode!"
15 |   exit 0
16 | fi
17 | echo $mode $ubuntu_version $ofed_fid
18 | 
19 | sudo apt update -y
20 | 
21 | # install anaconda
22 | mkdir install
23 | mv ofed.tgz install
24 | 
25 | cd install
26 | if [ ! -f "./anaconda-install.sh" ]; then
27 |   wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh -O anaconda-install.sh
28 | fi
29 | if [ ! -d "$HOME/anaconda3" ]; then
30 |   chmod +x anaconda-install.sh
31 |   ./anaconda-install.sh -b
32 |   export PATH=$PATH:$HOME/anaconda3/bin
33 |   # add conda to path
34 |   echo PATH=$PATH:$HOME/anaconda3/bin >> $HOME/.bashrc
35 |   conda init
36 |   source ~/.bashrc
37 |   # activate base
38 | fi
39 | conda activate base
40 | cd ..
41 | 
42 | pip install gdown
43 | sudo apt install memcached -y
44 | sudo apt install libtbb-dev libboost-all-dev -y
45 | 
46 | # install ofed
47 | cd install
48 | if [ ! -d "./ofed" ]; then
49 |   tar zxf ofed.tgz
50 |   mv MLNX* ofed
51 | fi
52 | cd ofed
53 | sudo ./mlnxofedinstall --force
54 | if [ $mode == "scalestore" ]; then
55 |   sudo /etc/init.d/openibd restart
56 | fi
57 | cd ..
58 | 
59 | # install cmake
60 | cd install
61 | if [ ! -f cmake-3.16.8.tar.gz ]; then
62 |   wget https://cmake.org/files/v3.16/cmake-3.16.8.tar.gz
63 | fi
64 | if [ ! -d "./cmake-3.16.8" ]; then
65 |   tar zxf cmake-3.16.8.tar.gz
66 |   cd cmake-3.16.8 && ./configure && make -j 4 && sudo make install
67 | fi
68 | cd ..
69 | 
70 | # install gtest
71 | if [ ! -d "/usr/src/gtest" ]; then
72 |   sudo apt install -y libgtest-dev
73 | fi
74 | cd /usr/src/gtest
75 | sudo cmake .
76 | sudo make
77 | sudo make install


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | list(APPEND source_ddckv 
 2 |     nm.cc 
 3 |     ib.cc 
 4 |     server_mm.cc 
 5 |     server.cc
 6 |     kv_utils.cc
 7 |     hashtable.cc
 8 |     client_mm.cc
 9 |     client.cc
10 |     client_cr.cc)
11 | 
12 | add_library(libddckv STATIC ${source_ddckv})
13 | 
14 | target_compile_options(
15 |     libddckv
16 |     PRIVATE
17 |     "-O2"
18 |     # ${CMAKE_CXX_FLAGS_DEBUG}
19 |     "-g"
20 |     # "-D_DEBUG"
21 | )
22 | 
23 | target_link_libraries(libddckv ${Boost_LIBRARIES} boost_context boost_fiber tbb)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/client_cr.h:
--------------------------------------------------------------------------------
  1 | #ifndef DDCKV_CLIENT_CR_H_
  2 | #define DDCKV_CLIENT_CR_H_
  3 | 
  4 | #include <map>
  5 | 
  6 | #include <pthread.h>
  7 | #include <infiniband/verbs.h>
  8 | #include <assert.h>
  9 | #include <sys/time.h>
 10 | 
 11 | #include <string>
 12 | #include <boost/fiber/all.hpp>
 13 | 
 14 | #include "client_mm.h"
 15 | #include "nm.h"
 16 | #include "kv_utils.h"
 17 | #include "hashtable.h"
 18 | #include "ib.h"
 19 | #include "kv_debug.h"
 20 | #include "client.h"
 21 | 
 22 | class ClientCR {
 23 | private:
 24 |     ClientMM          * mm_;
 25 |     UDPNetworkManager * nm_;
 26 | 
 27 |     uint32_t my_server_id_;
 28 |     uint32_t num_replication_;
 29 |     uint32_t num_memory_;
 30 |     uint32_t num_idx_rep_;
 31 | 
 32 |     uint8_t  pr_log_server_id_;
 33 |     uint64_t pr_log_head_;
 34 |     uint64_t pr_log_tail_;
 35 | 
 36 |     uint64_t remote_global_meta_addr_;
 37 |     uint64_t remote_meta_addr_;
 38 |     uint64_t remote_gc_addr_;
 39 |     uint64_t remote_root_addr_;
 40 | 
 41 |     uint64_t server_st_addr_;
 42 |     uint64_t server_data_len_;
 43 | 
 44 |     float miss_rate_threash_;
 45 | 
 46 |     RaceHashRoot * race_root_;
 47 |     struct ibv_mr * race_root_mr_;
 48 | 
 49 |     void * local_buf_;
 50 |     struct ibv_mr * local_buf_mr_;
 51 | 
 52 |     void * input_buf_;
 53 |     struct ibv_mr * input_buf_mr_;
 54 | 
 55 |     uint64_t * coro_local_addr_list_;
 56 | 
 57 |     std::map<std::string, LocalCacheEntry *> addr_cache_;
 58 |     std::map<uint32_t, struct MrInfo *> server_mr_info_map_;
 59 | 
 60 |     // core bind information
 61 |     uint32_t main_core_id_;
 62 |     uint32_t poll_core_id_;
 63 |     uint32_t bg_core_id_;
 64 |     uint32_t gc_core_id_;
 65 | 
 66 |     // crash testing information
 67 |     std::map<uint8_t, bool> server_crash_map_;
 68 |     std::vector<ClientMetaAddrInfo> meta_addr_info_;
 69 | 
 70 | // private inline methods
 71 | private:
 72 |     inline int get_race_root() {
 73 |         int ret = nm_->nm_rdma_read_from_sid((void *)race_root_, race_root_mr_->lkey, sizeof(RaceHashRoot),
 74 |                 remote_root_addr_, server_mr_info_map_[0]->rkey, 0);
 75 |         // assert(ret == 0);
 76 |         return 0;
 77 |     }
 78 | 
 79 |     inline int write_race_root() {
 80 |         int ret = 0;
 81 |         for (int i = 0; i < num_replication_; i ++) {
 82 |             ret = nm_->nm_rdma_write_to_sid((void *)race_root_, race_root_mr_->lkey, sizeof(RaceHashRoot),
 83 |                     remote_root_addr_, server_mr_info_map_[i]->rkey, i);
 84 |             // assert(ret == 0);
 85 |         }
 86 |         return 0;
 87 |     }
 88 | 
 89 |     inline char * get_key(KVInfo * kv_info) {
 90 |         return (char *)((uint64_t)kv_info->l_addr + sizeof(KVLogHeader));
 91 |     }
 92 | 
 93 |     inline char * get_value(KVInfo * kv_info) {
 94 |         return (char *)((uint64_t)kv_info->l_addr + sizeof(KVLogHeader) + kv_info->key_len);
 95 |     }
 96 | 
 97 |     inline KVLogHeader * get_header(KVInfo * kv_info) {
 98 |         return (KVLogHeader *)kv_info->l_addr;
 99 |     }
100 | 
101 |     inline void update_cache(std::string key_str, RaceHashSlot * slot_info, uint64_t * r_slot_addr_list) {
102 |         // char key_buf[128] = {0};
103 |         // memcpy(key_buf, get_key(kv_info), kv_info->key_len);
104 |         // std::string tmp_key(key_buf);
105 |         
106 |         std::map<std::string, LocalCacheEntry *>::iterator it = addr_cache_.find(key_str);
107 |         if (it != addr_cache_.end()) {
108 |             LocalCacheEntry * entry = it->second;
109 |             // check if is miss
110 |             if (*(uint64_t *)(&entry->l_slot_ptr) != *(uint64_t *)slot_info) {
111 |                 entry->miss_cnt ++;
112 |                 memcpy(&entry->l_slot_ptr, slot_info, sizeof(RaceHashSlot));
113 |                 for (int i = 0; i < num_idx_rep_; i ++) {
114 |                     entry->r_slot_addr[i] = r_slot_addr_list[i];
115 |                 }
116 |             }
117 |             // update access cnt
118 |             entry->acc_cnt ++;
119 |             return;
120 |         }
121 |         
122 |         LocalCacheEntry * tmp_value = (LocalCacheEntry *)malloc(sizeof(LocalCacheEntry));
123 |         memcpy(&tmp_value->l_slot_ptr, slot_info, sizeof(RaceHashSlot));
124 |         tmp_value->acc_cnt  = 1;
125 |         tmp_value->miss_cnt = 0;
126 |         
127 |         for (int i = 0; i < num_idx_rep_; i ++) {
128 |             tmp_value->r_slot_addr[i] = r_slot_addr_list[i];
129 |         }
130 |         
131 |         addr_cache_[key_str] = tmp_value;
132 |         // print_log(DEBUG, "\t[%s] %s->slot(%lx) kv(%lx)", __FUNCTION__, key_buf, r_slot_addr_list[0], HashIndexConvert40To64Bits(tmp_value->l_slot_ptr.pointer));
133 |     }
134 | 
135 |     inline LocalCacheEntry * check_cache(std::string key_str) {
136 |         // char key_buf[128] = {0};
137 |         // memcpy(key_buf, get_key(kv_info), kv_info->key_len);
138 |         // std::string tmp_key(key_buf);
139 | 
140 |         std::map<std::string, LocalCacheEntry *>::iterator it = addr_cache_.find(key_str);
141 |         if (it == addr_cache_.end()) {
142 |             // print_log(DEBUG, "\t\t[%s] cache miss", __FUNCTION__);
143 |             return NULL;
144 |         }
145 |         if (HashIndexConvert40To64Bits(it->second->l_slot_ptr.pointer) == 0) {
146 |             free(it->second);
147 |             addr_cache_.erase(it);
148 |             // print_log(DEBUG, "\t\t[%s] cache empty pointer miss", __FUNCTION__);
149 |             return NULL;
150 |         }
151 | 
152 |         float miss_rate = ((float)it->second->miss_cnt / it->second->acc_cnt);
153 |         if (miss_rate > miss_rate_threash_) {
154 |             return NULL;
155 |         }
156 |         // print_log(DEBUG, "\t\t[%s] cache hit", __FUNCTION__);
157 |         return it->second;
158 |     }
159 | 
160 | inline void remove_cache(std::string key_str) {
161 |         std::map<std::string, LocalCacheEntry *>::iterator it = addr_cache_.find(key_str);
162 |         if (it != addr_cache_.end()) {
163 |             addr_cache_.erase(it);
164 |         }
165 |     }
166 | 
167 |     inline bool delete_cache(KVInfo * kv_info) {
168 |         char key_buf[256];
169 |         memset(key_buf, 0, 256);
170 |         memcpy(key_buf, get_key(kv_info), kv_info->key_len);
171 |         std::string tmp_key(key_buf);
172 | 
173 |         return addr_cache_.erase(tmp_key);
174 |     }
175 | 
176 |     inline bool check_key(KVLogHeader * log_header, KVInfo * kv_info) {
177 |         uint64_t r_key_addr = (uint64_t)log_header + sizeof(log_header);
178 |         uint64_t l_key_addr = (uint64_t)kv_info->l_addr + sizeof(KVLogHeader);
179 |         return CheckKey((void *)r_key_addr, log_header->key_length, (void *)l_key_addr, kv_info->key_len);
180 |     }
181 | 
182 |     inline int poll_completion(std::map<uint64_t, struct ibv_wc *> & wait_wrid_wc_map) {
183 |         int ret = 0;
184 |         while (ib_is_all_wrid_finished(wait_wrid_wc_map) == false) {
185 |             // print_log(DEBUG, "\t\t[%s] fiber: %ld yielding", __FUNCTION__, boost::this_fiber::get_id());
186 |             // boost::this_fiber::yield();
187 |             boost::this_fiber::sleep_for(std::chrono::microseconds(10));
188 |             ret = nm_->nm_check_completion(wait_wrid_wc_map);
189 |             // kv_assert(ret == 0);
190 |         }
191 |         return ret;
192 |     }
193 | 
194 |     inline int poll_completion(std::map<uint64_t, struct ibv_wc *> & wait_wrid_wc_map, volatile bool * should_stop) {
195 |         int ret = 0;
196 |         while (ib_is_all_wrid_finished(wait_wrid_wc_map) == false && (*should_stop) == false) {
197 |             // print_log(DEBUG, "\t\t[%s] fiber: %ld yielding", __FUNCTION__, boost::this_fiber::get_id());
198 |             if (*(should_stop)) {
199 |                 return ret;
200 |             }
201 |             boost::this_fiber::yield();
202 |             ret = nm_->nm_check_completion(wait_wrid_wc_map);
203 |             // kv_assert(ret == 0);
204 |         }
205 |         return ret;
206 |     }
207 | 
208 | // private methods
209 | private:
210 |     bool init_is_finished();
211 |     int  sync_init_finish();
212 |     int  connect_ib_qps();
213 |     int  write_client_meta_info();
214 | 
215 |     void get_kv_addr_info(KVHashInfo * a_kv_hash_info, __OUT KVTableAddrInfo * a_kv_addr_info);
216 |     void get_kv_hash_info(KVInfo * a_kv_info, __OUT KVHashInfo * a_kv_hash_info);
217 |     void fill_slot(ClientMMAllocCtx * mm_alloc_ctx, KVHashInfo * a_kv_hash_info, 
218 |         __OUT RaceHashSlot * local_slot);
219 |     void fill_cas_addr(KVTableAddrInfo * addr_info, uint64_t remote_slot_addr, RaceHashSlot * old_local_slot_addr, RaceHashSlot * new_local_slot_addr,
220 |         __OUT KVCASAddr * pr_cas_addr, __OUT KVCASAddr * bk_cas_addr);
221 |     void fill_cas_addr(KVReqCtx * ctx, uint64_t * remote_slot_addr, RaceHashSlot * old_local_slot_addr, RaceHashSlot * new_local_slot_addr);
222 |     void fill_heartbeat_addr(uint8_t server_id, __OUT KVRWAddr * hb_addr);
223 |     void fill_invalid_addr(KVReqCtx * ctx, RaceHashSlot * local_slot);
224 |     
225 |     IbvSrList * gen_read_bucket_sr_lists(KVReqCtx * ctx, __OUT uint32_t * num_sr_lists);
226 |     void        free_read_bucket_sr_lists(IbvSrList * sr_list);
227 |     IbvSrList * gen_read_all_bucket_sr_lists(KVReqCtx * ctx, __OUT uint32_t * num_sr_lists);
228 |     void        free_read_all_bucket_sr_lists(IbvSrList * sr_list);
229 |     IbvSrList * gen_read_bucket_sr_lists_on_crash(KVReqCtx * ctx, __OUT uint32_t * num_sr_lists);
230 |     void        free_read_bucket_sr_lists_on_crash(IbvSrList * sr_lists, int num_sr_lists);
231 |     IbvSrList * gen_write_kv_sr_lists(uint32_t coro_id, KVInfo * a_kv_info, ClientMMAllocCtx * r_mm_info, __OUT uint32_t * num_sr_lists);
232 |     void        free_write_kv_sr_lists(IbvSrList * sr_list);
233 |     IbvSrList * gen_write_del_log_sr_lists(uint32_t coro_id, KVInfo * a_kv_info, 
234 |         ClientMMAllocCtx * r_mm_info, __OUT uint32_t * num_sr_lists);
235 |     void        free_write_del_log_sr_lists(IbvSrList * sr_list);
236 |     IbvSrList * gen_read_kv_sr_lists(uint32_t coro_id, const std::vector<KVRWAddr> & r_addr_list, __OUT uint32_t * num_sr_lists);
237 |     void        free_read_kv_sr_lists(IbvSrList * sr_lists, int num_sr_lists);
238 |     IbvSrList * gen_cas_sr_lists(uint32_t coro_id, const std::vector<KVCASAddr> & cas_addr_list, __OUT uint32_t * num_sr_lists);
239 |     void        free_cas_sr_lists(IbvSrList * sr_lists, int num_sr_lists);
240 |     IbvSrList * gen_invalid_sr_lists(uint32_t coro_id, KVRWAddr * r_addr, uint64_t local_data_addr);
241 |     void        free_invalid_sr_lists(IbvSrList * sr_list);
242 |     IbvSrList * gen_read_cache_kv_sr_lists(uint32_t coro_id, RaceHashSlot * local_slot_ptr, uint64_t local_addr);
243 |     void        free_read_cache_kv_sr_lists(IbvSrList * sr_lists);
244 |     IbvSrList * gen_write_hb_sr_lists(uint32_t coro_id, std::vector<KVRWAddr> & rw_addr_list, __OUT uint32_t * num_sr_lists);
245 |     void        free_write_hb_sr_lists(IbvSrList * sr_lists, int num_sr_lists);
246 |     IbvSrList * gen_log_commit_sr_lists(uint32_t coro_id, void * local_addr, uint32_t size, 
247 |         std::vector<KVRWAddr> & rw_addr_list, __OUT uint32_t * num_sr_lists);
248 |     void        free_log_commit_sr_lists(IbvSrList * sr_lists, int num_sr_lists);
249 | 
250 |     void prepare_request(KVReqCtx * ctx);
251 |     void prepare_log_commit_addrs(KVReqCtx * ctx);
252 |     void find_kv_in_buckets(KVReqCtx * ctx);
253 |     void find_kv_in_buckets_on_crash(KVReqCtx * ctx);
254 |     void find_empty_slot(KVReqCtx * ctx);
255 |     int32_t find_match_kv_idx(KVReqCtx * ctx);
256 |     void get_local_bucket_info(KVReqCtx * ctx);
257 |     void modify_primary_idx(KVReqCtx * ctx);
258 |     void modify_primary_idx_sync(KVReqCtx * ctx);
259 |     void kv_log_commit(KVReqCtx * ctx);
260 |     void kv_log_commit_sync(KVReqCtx * ctx);
261 |     void check_recover_need_cas_pr(KVReqCtx * ctx);
262 |     void recover_modified_slots(KVReqCtx * ctx);
263 |     void check_failed_index(KVReqCtx * ctx);
264 |     void check_failed_data(KVReqCtx * ctx);
265 |     RaceHashSlot * check_failed_cache(LocalCacheEntry * local_cache_entry);
266 |     int32_t find_healthy_idx(uint8_t target_server, uint64_t target_addr);
267 |     ClientMetaAddrInfo * find_corresponding_addr_info(uint8_t target_server, uint64_t target_addr);
268 | 
269 |     void kv_search_read_buckets(KVReqCtx * ctx);
270 |     void kv_search_read_buckets_sync(KVReqCtx * ctx);
271 |     void kv_search_read_kv(KVReqCtx * ctx);
272 |     void kv_search_read_kv_sync(KVReqCtx * ctx);
273 |     void kv_search_check_kv(KVReqCtx * ctx);
274 |     void kv_search_read_all_healthy_index(KVReqCtx * ctx);
275 |     void kv_search_read_failed_kv(KVReqCtx * ctx);
276 | 
277 |     void kv_insert_read_buckets_and_write_kv(KVReqCtx * ctx);
278 |     void kv_insert_read_buckets_and_write_kv_sync(KVReqCtx * ctx);
279 |     void kv_insert_backup_consensus_0(KVReqCtx * ctx);
280 |     void kv_insert_backup_consensus_0_sync(KVReqCtx * ctx);
281 |     void kv_insert_commit_log(KVReqCtx * ctx);
282 |     void kv_insert_commit_log_sync(KVReqCtx * ctx);
283 |     void kv_insert_cas_primary(KVReqCtx * ctx);
284 |     void kv_insert_cas_primary_sync(KVReqCtx * ctx);
285 | 
286 |     void kv_update_read_buckets_and_write_kv(KVReqCtx * ctx);
287 |     void kv_update_read_buckets_and_write_kv_sync(KVReqCtx * ctx);
288 |     void kv_update_read_kv(KVReqCtx * ctx);
289 |     void kv_update_read_kv_sync(KVReqCtx * ctx);
290 |     void kv_update_backup_consensus_0(KVReqCtx * ctx);
291 |     void kv_update_backup_consensus_0_sync(KVReqCtx * ctx);
292 |     void kv_update_commit_log(KVReqCtx * ctx);
293 |     void kv_update_commit_log_sync(KVReqCtx * ctx);
294 |     void kv_update_cas_primary(KVReqCtx * ctx);
295 |     void kv_update_cas_primary_sync(KVReqCtx * ctx);
296 |     void kv_update_bg_operations(KVReqCtx * ctx);
297 | 
298 |     void kv_delete_read_buckets_write_log(KVReqCtx * ctx);
299 |     void kv_delete_read_buckets_write_log_sync(KVReqCtx * ctx);
300 |     void kv_delete_read_kv(KVReqCtx * ctx);
301 |     void kv_delete_read_kv_sync(KVReqCtx * ctx);
302 |     void kv_delete_backup_consensus_0(KVReqCtx * ctx);
303 |     void kv_delete_backup_consensus_0_sync(KVReqCtx * ctx);
304 |     void kv_delete_commit_log(KVReqCtx * ctx);
305 |     void kv_delete_commit_log_sync(KVReqCtx * ctx);
306 |     void kv_delete_cas_primary(KVReqCtx * ctx);
307 |     void kv_delete_cas_primary_sync(KVReqCtx * ctx);
308 |     void kv_delete_bg_operations(KVReqCtx * ctx);
309 | 
310 |     int post_sr_lists_and_yield_wait(IbvSrList * sr_lists, uint32_t sr_lists_num);
311 |     int post_sr_list_batch_and_yield_wait(std::vector<IbvSrList *> sr_list_batch, 
312 |         std::vector<uint32_t> sr_list_batch_num);
313 | 
314 |     void init_kv_req_ctx(KVReqCtx * req_ctx, KVInfo * kv_info, char * operation);
315 |     void update_log_tail(KVLogTail * kv_header, ClientMMAllocCtx * alloc_ctx);
316 | 
317 |     int  client_recovery();
318 |     void init_recover_req_ctx(KVInfo * kv_info, __OUT KVReqCtx * rec_ctx);
319 | 
320 |     void iteratively_cas_sync(IbvSrList * sr_lists, uint32_t sr_lists_num);
321 |     void iteratively_cas_async(IbvSrList * sr_lists, uint32_t sr_lsits_num);
322 |     int init_hash_table();
323 | 
324 | // inline methods
325 | public:
326 |     inline void * get_input_buf() {
327 |         return input_buf_;
328 |     }
329 | 
330 |     inline uint32_t get_input_buf_lkey() {
331 |         return input_buf_mr_->lkey;
332 |     }
333 | 
334 |     inline struct ibv_mr * get_local_buf_mr() {
335 |         return local_buf_mr_;
336 |     }
337 | 
338 |     // public methods
339 | public:
340 |     ClientCR(const struct GlobalConfig * conf);
341 |     ~ClientCR();
342 | 
343 |     KVInfo   * kv_info_list_;
344 |     KVReqCtx * kv_req_ctx_list_;
345 |     uint32_t   num_total_operations_;
346 |     uint32_t   num_local_operations_;
347 |     uint32_t   num_coroutines_;
348 |     int        workload_run_time_;
349 |     int        micro_workload_num_;
350 | 
351 |     int kv_update(KVInfo * kv_info);
352 |     int kv_update(KVReqCtx * ctx);
353 |     int kv_update_w_cache(KVInfo * kv_info);
354 |     int kv_update_w_crash(KVReqCtx * ctx, int crash_point);
355 |     int kv_update_sync(KVReqCtx * ctx);
356 |     
357 |     int kv_insert(KVInfo * kv_info);
358 |     int kv_insert(KVReqCtx * ctx);
359 |     int kv_insert_w_cache(KVInfo * kv_info);
360 |     int kv_insert_w_crash(KVReqCtx * ctx, int crash_point);
361 |     int kv_insert_sync(KVReqCtx * ctx);
362 | 
363 |     void * kv_search(KVInfo * kv_info);
364 |     void * kv_search(KVReqCtx * ctx);
365 |     void * kv_search_w_cache(KVInfo * kv_info);
366 |     void * kv_search_on_crash(KVReqCtx * ctx);
367 |     void * kv_search_sync(KVReqCtx * ctx);
368 |     
369 |     int kv_delete(KVInfo * kv_info);
370 |     int kv_delete(KVReqCtx * ctx);
371 |     int kv_delete_w_cache(KVInfo * kv_info);
372 |     int kv_delete_sync(KVReqCtx * ctx);
373 | 
374 |     
375 |     pthread_t start_polling_thread();
376 |     boost::fibers::fiber start_polling_fiber();
377 |     void stop_polling_thread();
378 | 
379 |     void init_kvreq_space(uint32_t coro_id, uint32_t kv_req_st_idx, uint32_t num_ops);
380 |     void init_kv_insert_space(void * coro_local_addr, uint32_t kv_req_idx);
381 |     void init_kv_insert_space(void * coro_local_addr, KVReqCtx * kv_req_ctx);
382 |     void init_kv_search_space(void * coro_local_addr, uint32_t kv_req_idx);
383 |     void init_kv_search_space(void * coro_local_addr, KVReqCtx * kv_req_ctx);
384 |     void init_kv_update_space(void * coro_local_addr, uint32_t kv_req_idx);
385 |     void init_kv_update_space(void * coro_local_addr, KVReqCtx * kv_req_ctx);
386 |     void init_kv_delete_space(void * coro_local_addr, uint32_t kv_req_idx);
387 |     void init_kv_delete_space(void * coro_local_addr, KVReqCtx * kv_req_ctx);
388 | 
389 |     void crash_server(const std::vector<uint8_t> & fail_server_list);
390 |     void get_addr_translate_table(const std::vector<uint8_t> & server_id_list);
391 | 
392 |     void dump_cache();
393 |     void load_cache();
394 |     int  load_seq_kv_requests(uint32_t num_keys, char * op_type);
395 |     int  load_kv_requests(const char * fname, uint32_t st_idx, int32_t num_ops);
396 | 
397 |     int get_num_rep();
398 | 
399 | // for testing
400 | public:
401 |     int test_get_root(__OUT RaceHashRoot * race_root);
402 |     int test_get_log_meta_info(__OUT ClientLogMetaInfo * remote_log_meta_info_list, 
403 |             __OUT ClientLogMetaInfo * local_meta);
404 |     int test_get_pr_log_meta_info(__OUT ClientLogMetaInfo * pr_log_meta_info);
405 |     int test_get_remote_log_header(uint8_t server_id, uint64_t raddr, uint32_t buf_size,
406 |             __OUT void * buf);
407 |     int test_get_local_mm_blocks(__OUT ClientMMBlock * mm_block_list, __OUT uint64_t * list_len);
408 |     ClientMetaAddrInfo ** test_get_meta_addr_info(__OUT uint64_t * list_len);
409 | 
410 |     inline ClientMM * get_mm() {
411 |         return mm_;
412 |     }
413 | 
414 |     inline UDPNetworkManager * get_nm() {
415 |         return nm_;
416 |     }
417 | };
418 | 
419 | void * client_cr_ops_fb_cnt_time(void * arg);
420 | void * client_cr_ops_fb_cnt_ops(void * arg);
421 | void * client_cr_ops_fb_cnt_ops_micro(void * arg);
422 | void * client_cr_ops_fb_on_crash(void * arg);
423 | 
424 | #endif


--------------------------------------------------------------------------------
/src/client_mm.h:
--------------------------------------------------------------------------------
  1 | #ifndef DDCKV_CLIENT_MM_H_
  2 | #define DDCKV_CLIENT_MM_H_
  3 | 
  4 | #include <infiniband/verbs.h>
  5 | 
  6 | #include <vector>
  7 | #include <boost/fiber/all.hpp>
  8 | #include <thread>
  9 | #include <mutex>
 10 | #include <queue>
 11 | #include <deque>
 12 | #include <unordered_map>
 13 | 
 14 | #include "kv_utils.h"
 15 | #include "nm.h"
 16 | #include "spinlock.h"
 17 | #include "hashtable.h"
 18 | 
 19 | #define MAX_NUM_SUBBLOCKS 4
 20 | #define MAX_WATER_MARK 0.7
 21 | 
 22 | typedef struct TagClientMMBlock {
 23 |     struct MrInfo mr_info_list[MAX_REP_NUM];
 24 |     uint8_t server_id_list[MAX_REP_NUM];
 25 |     bool * bmap;
 26 |     uint32_t num_allocated;
 27 |     int32_t  prev_free_subblock_idx;
 28 |     int32_t  next_free_subblock_idx;
 29 |     int32_t  next_free_subblock_cnt;
 30 | 
 31 |     uint64_t next_mmblock_addr[MAX_REP_NUM];
 32 | } ClientMMBlock;
 33 | 
 34 | enum ClientAllocType {
 35 |     TYPE_SUBTABLE = 1,
 36 |     TYPE_KVBLOCK  = 2,
 37 | };
 38 | 
 39 | typedef struct TagClientMMAllocCtx {
 40 |     uint8_t  server_id_list[MAX_REP_NUM];
 41 |     uint64_t addr_list[MAX_REP_NUM];
 42 |     uint64_t prev_addr_list[MAX_REP_NUM];
 43 |     uint64_t next_addr_list[MAX_REP_NUM];
 44 |     uint32_t rkey_list[MAX_REP_NUM];
 45 |     uint32_t prev_rkey_list[MAX_REP_NUM];
 46 |     uint32_t next_rkey_list[MAX_REP_NUM];
 47 | 
 48 |     uint32_t num_subblocks;
 49 |     bool     need_change_prev;
 50 | } ClientMMAllocCtx;
 51 | 
 52 | typedef struct TagClientMMAllocSubtableCtx {
 53 |     uint8_t  server_id;
 54 |     uint64_t addr;
 55 | } ClientMMAllocSubtableCtx;
 56 | 
 57 | typedef struct TagRecoverLogInfo {
 58 |     KVLogTail * local_tail_addr;
 59 |     uint32_t    key_len;
 60 |     uint32_t    val_len;
 61 |     uint64_t    remote_addr;
 62 |     uint8_t     server_id;
 63 | } RecoverLogInfo;
 64 | 
 65 | typedef struct TagSubblockInfo {
 66 |     uint64_t addr_list[MAX_REP_NUM];
 67 |     uint32_t rkey_list[MAX_REP_NUM];
 68 |     uint8_t  server_id_list[MAX_REP_NUM];
 69 | } SubblockInfo;
 70 | 
 71 | class ClientMM {
 72 | private:
 73 |     uint32_t num_replication_;
 74 |     uint32_t num_idx_rep_;
 75 |     uint32_t num_memory_;
 76 | 
 77 |     std::vector<ClientMMBlock *> mm_blocks_;
 78 |     spinlock_t mm_blocks_lock_;
 79 |     uint32_t cur_mm_block_idx_;
 80 | 
 81 |     uint32_t subblock_num_;
 82 |     uint32_t last_allocated_;
 83 | 
 84 |     uint32_t bmap_block_num_;
 85 | 
 86 |     uint8_t  pr_log_server_id_;
 87 |     uint64_t pr_log_head_;
 88 | 
 89 |     uint64_t client_meta_addr_;
 90 |     uint64_t client_gc_addr_;
 91 | 
 92 |     uint64_t server_limit_addr_;
 93 |     uint64_t server_kv_area_off_;
 94 |     uint64_t server_kv_area_addr_;
 95 |     uint64_t server_num_blocks_;
 96 | 
 97 |     std::mutex alloc_new_block_lock_;
 98 |     bool is_allocing_new_block_;
 99 | 
100 |     // for recovery
101 |     void * recover_buf_;
102 |     struct ibv_mr             * recover_mr_;
103 |     std::vector<RecoverLogInfo> recover_log_info_list_;
104 |     std::unordered_map<uint64_t, bool> recover_addr_is_allocated_map_;
105 |     KVLogTail * log_tail_st_ptr_;
106 |     void * tmp_buf_;
107 | 
108 |     // modification
109 |     std::deque<SubblockInfo> subblock_free_queue_;
110 |     SubblockInfo             last_allocated_info_;
111 | 
112 | 
113 | 
114 |     // std::map<std::string, std::queue<SubblockInfo>> allocated_subblock_key_map_;
115 | 
116 |     struct timeval local_recover_space_et_;
117 |     struct timeval get_addr_meta_et_;
118 |     struct timeval traverse_log_et_;
119 | 
120 | // private methods
121 | private:
122 |     int init_get_new_block_from_server(UDPNetworkManager * nm);
123 |     int init_reg_space(struct MrInfo mr_inf_list[][MAX_REP_NUM], uint8_t server_id_list[][MAX_REP_NUM],
124 |             UDPNetworkManager * nm, int reg_type);
125 |     int dyn_get_new_block_from_server(UDPNetworkManager * nm);
126 |     int get_new_block_from_server(UDPNetworkManager * nm);
127 |     int local_reg_blocks(const struct MrInfo * mr_info_list, const uint8_t * server_id_list);
128 |     int reg_new_space(const struct MrInfo * mr_info_list, const uint8_t * server_id_list,
129 |             UDPNetworkManager * nm, int reg_type);
130 |     int dyn_reg_new_space(const struct MrInfo * mr_info_list, const uint8_t * server_id_list,
131 |             UDPNetworkManager * nm, int reg_type);
132 |     int32_t alloc_from_sid(uint32_t server_id, UDPNetworkManager * nm, int alloc_type,
133 |             __OUT struct MrInfo * mr_info);
134 |     void update_mm_block_next(ClientMMBlock * mm_block);
135 |     int remote_write_meta_addr(UDPNetworkManager * nm);
136 | 
137 |     int mm_recovery(UDPNetworkManager * nm);
138 |     int mm_recover_prepare_space(UDPNetworkManager * nm);
139 |     int get_remote_log_header(UDPNetworkManager * nm, uint8_t server_id, uint64_t r_addr, 
140 |             KVLogHeader * local_addr);
141 |     int mm_traverse_log(UDPNetworkManager * nm);
142 |     int mm_get_addr_meta(UDPNetworkManager * nm);
143 |     int mm_recover_mm_blocks(UDPNetworkManager * nm);
144 | 
145 |     uint32_t get_subblock_idx(uint64_t addr, ClientMMBlock * cur_block);
146 |     ClientMMBlock * get_new_mmblock();
147 | 
148 |     void gen_subblock_info(ClientMMBlock * mm_block, uint32_t subblock_idx, __OUT SubblockInfo * subblock_info);
149 | 
150 |     void get_block_map();
151 | 
152 | // inline private methods
153 | private:
154 |     inline uint32_t get_alloc_hint_rr() {
155 | #ifndef SERVER_MM
156 |         return last_allocated_ ++;
157 | #else   
158 |         // last_allocated_ ++;
159 |         // return last_allocated_ / 65536;
160 |         return last_allocated_ ++;
161 | #endif
162 |     }
163 | 
164 |     inline float get_water_mark() {
165 |         float num_used = 0;
166 |         for (size_t i = 0; i < mm_blocks_.size(); i ++) {
167 |             num_used += mm_blocks_[i]->num_allocated;
168 |         }
169 |         return num_used / (mm_blocks_.size() * subblock_num_);
170 |     }
171 | 
172 | // public methods
173 | public:
174 |     uint64_t mm_block_sz_;
175 |     uint64_t subblock_sz_;
176 |     // block_mapping
177 |     std::unordered_map<uint64_t, std::vector<uint64_t> > alloc_block_map_;
178 |     std::unordered_map<uint64_t, std::vector<uint64_t> > total_block_map_;
179 | 
180 |     // for free
181 |     std::unordered_map<std::string, uint64_t> free_faa_map_;
182 | 
183 |     ClientMM(const struct GlobalConfig * conf, 
184 |         UDPNetworkManager * nm);
185 |     ~ClientMM();
186 | 
187 |     void get_log_head(__OUT uint64_t * pr_log_head, __OUT uint64_t * bk_log_head);
188 |     
189 |     void mm_alloc(size_t size, UDPNetworkManager * nm, __OUT ClientMMAllocCtx * ctx);
190 |     void mm_alloc(size_t size, UDPNetworkManager * nm, std::string key, __OUT ClientMMAllocCtx * ctx);
191 |     void mm_alloc_log_info(RecoverLogInfo * log_info, __OUT ClientMMAllocCtx * ctx);
192 | 
193 |     void mm_free_cur(const ClientMMAllocCtx * ctx);
194 |     void mm_free(uint64_t orig_slot_value);
195 |     
196 |     void mm_alloc_subtable(UDPNetworkManager * nm, __OUT ClientMMAllocSubtableCtx * ctx);
197 | 
198 |     int  get_last_log_recover_info(__OUT RecoverLogInfo * recover_log_info);
199 |     void free_recover_buf();
200 | 
201 |     void get_time_bread_down(std::vector<struct timeval> & time_vec);
202 | 
203 | // inline public methods
204 | public:
205 |     inline uint64_t get_remote_meta_ptr() {
206 |         return client_meta_addr_;
207 |     }
208 | 
209 |     inline uint32_t get_num_mm_blocks() {
210 |         return mm_blocks_.size();
211 |     }
212 | 
213 |     inline bool should_alloc_new() {
214 |         float water_mark = get_water_mark();
215 |         return water_mark > MAX_WATER_MARK;
216 |     }
217 | 
218 |     inline bool should_start_gc() {
219 |         ClientMMBlock * cur_mmblock = mm_blocks_[cur_mm_block_idx_];
220 |         return cur_mmblock->next_free_subblock_cnt < MAX_NUM_SUBBLOCKS;
221 |     }
222 | 
223 |     inline ClientMMBlock * get_cur_mm_block() {
224 |         return mm_blocks_[cur_mm_block_idx_];
225 |     }
226 | 
227 |     inline void get_log_head(__OUT uint8_t * pr_log_server_id, __OUT uint64_t * pr_log_head) {
228 |         *pr_log_server_id = pr_log_server_id_;
229 |         *pr_log_head = pr_log_head_;
230 |     }
231 | 
232 |     inline size_t get_aligned_size(size_t size) {
233 |         if ((size % subblock_sz_) == 0) {
234 |             return size;
235 |         }
236 |         size_t aligned = ((size / subblock_sz_) + 1) * subblock_sz_;
237 |         return aligned;
238 |     }
239 | };
240 | 
241 | #endif


--------------------------------------------------------------------------------
/src/hashtable.cc:
--------------------------------------------------------------------------------
  1 | #include "hashtable.h"
  2 | 
  3 | #include <stdlib.h>
  4 | #include <assert.h>
  5 | 
  6 | #include "kv_debug.h"
  7 | 
  8 | #define NUMBER64_1 11400714785074694791ULL
  9 | #define NUMBER64_2 14029467366897019727ULL
 10 | #define NUMBER64_3 1609587929392839161ULL
 11 | #define NUMBER64_4 9650029242287828579ULL
 12 | #define NUMBER64_5 2870177450012600261ULL
 13 | 
 14 | #define hash_get64bits(x) hash_read64_align(x, align)
 15 | #define hash_get32bits(x) hash_read32_align(x, align)
 16 | #define shifting_hash(x, r) ((x << r) | (x >> (64 - r)))
 17 | #define TO64(x) (((U64_INT *)(x))->v)
 18 | #define TO32(x) (((U32_INT *)(x))->v)
 19 | 
 20 | typedef struct U64_INT {
 21 |     uint64_t v;
 22 | } U64_INT;
 23 | 
 24 | typedef struct U32_INT {
 25 |     uint32_t v;
 26 | } U32_INT;
 27 | 
 28 | static uint64_t hash_read64_align(const void * ptr, uint32_t align) {
 29 |     if (align == 0) {
 30 |         return TO64(ptr);
 31 |     }
 32 |     return *(uint64_t *)ptr;
 33 | }
 34 | 
 35 | static uint32_t hash_read32_align(const void * ptr, uint32_t align) {
 36 |     if (align == 0) {
 37 |         return TO32(ptr);
 38 |     }
 39 |     return *(uint32_t *)ptr;
 40 | }
 41 | 
 42 | static uint64_t string_key_hash_computation(const void * data, uint64_t length, 
 43 |         uint64_t seed, uint32_t align) {
 44 |     const uint8_t * p = (const uint8_t *)data;
 45 |     const uint8_t * end = p + length;
 46 |     uint64_t hash;
 47 | 
 48 |     if (length >= 32) {
 49 |         const uint8_t * const limitation  = end - 32;
 50 |         uint64_t v1 = seed + NUMBER64_1 + NUMBER64_2;
 51 |         uint64_t v2 = seed + NUMBER64_2;
 52 |         uint64_t v3 = seed + 0;
 53 |         uint64_t v4 = seed - NUMBER64_1;
 54 | 
 55 |         do {
 56 |             v1 += hash_get64bits(p) * NUMBER64_2;
 57 |             p += 8;
 58 |             v1 = shifting_hash(v1, 31);
 59 |             v1 *= NUMBER64_1;
 60 |             v2 += hash_get64bits(p) * NUMBER64_2;
 61 |             p += 8;
 62 |             v2 = shifting_hash(v2, 31);
 63 |             v2 *= NUMBER64_1;
 64 |             v3 += hash_get64bits(p) * NUMBER64_2;
 65 |             p += 8;
 66 |             v3 = shifting_hash(v3, 31);
 67 |             v3 *= NUMBER64_1;
 68 |             v4 += hash_get64bits(p) * NUMBER64_2;
 69 |             p += 8;
 70 |             v4 = shifting_hash(v4, 31);
 71 |             v4 *= NUMBER64_1;
 72 |         } while (p <= limitation);
 73 | 
 74 |         hash = shifting_hash(v1, 1) + shifting_hash(v2, 7) + shifting_hash(v3, 12) + shifting_hash(v4, 18);
 75 | 
 76 |         v1 *= NUMBER64_2;
 77 |         v1 = shifting_hash(v1, 31);
 78 |         v1 *= NUMBER64_1;
 79 |         hash ^= v1;
 80 |         hash = hash * NUMBER64_1 + NUMBER64_4;
 81 | 
 82 |         v2 *= NUMBER64_2;
 83 |         v2 = shifting_hash(v2, 31);
 84 |         v2 *= NUMBER64_1;
 85 |         hash ^= v2;
 86 |         hash = hash * NUMBER64_1 + NUMBER64_4;
 87 | 
 88 |         v3 *= NUMBER64_2;
 89 |         v3 = shifting_hash(v3, 31);
 90 |         v3 *= NUMBER64_1;
 91 |         hash ^= v3;
 92 |         hash = hash * NUMBER64_1 + NUMBER64_4;
 93 | 
 94 |         v4 *= NUMBER64_2;
 95 |         v4 = shifting_hash(v4, 31);
 96 |         v4 *= NUMBER64_1;
 97 |         hash ^= v4;
 98 |         hash = hash * NUMBER64_1 + NUMBER64_4;
 99 |     } else {
100 |         hash = seed + NUMBER64_5;
101 |     }
102 | 
103 |     hash += (uint64_t)length;
104 | 
105 |     while (p + 8 <= end) {
106 |         uint64_t k1 = hash_get64bits(p);
107 |         k1 *= NUMBER64_2;
108 |         k1 = shifting_hash(k1, 31);
109 |         k1 *= NUMBER64_1;
110 |         hash ^= k1;
111 |         hash = shifting_hash(hash, 27) * NUMBER64_1 + NUMBER64_4;
112 |         p += 8;
113 |     }
114 | 
115 |     if (p + 4 <= end) {
116 |         hash ^= (uint64_t)(hash_get32bits(p)) * NUMBER64_1;
117 |         hash = shifting_hash(hash, 23) * NUMBER64_2 + NUMBER64_3;
118 |         p += 4;
119 |     }
120 | 
121 |     while (p < end) {
122 |         hash ^= (*p) * NUMBER64_5;
123 |         hash = shifting_hash(hash, 11) * NUMBER64_1;
124 |         p ++;
125 |     }
126 | 
127 |     hash ^= hash >> 33;
128 |     hash *= NUMBER64_2;
129 |     hash ^= hash >> 29;
130 |     hash *= NUMBER64_3;
131 |     hash ^= hash >> 32;
132 | 
133 |     return hash;
134 | }
135 | 
136 | uint64_t VariableLengthHash(const void * data, uint64_t length, uint64_t seed) {
137 |     if ((((uint64_t)data) & 7) == 0) {
138 |         return string_key_hash_computation(data, length, seed, 1);
139 |     }
140 |     return string_key_hash_computation(data, length, seed, 0);
141 | }
142 | 
143 | uint32_t GetFreeSlotNum(RaceHashBucket * bucket, __OUT uint32_t * free_idx) {
144 |     *free_idx = RACE_HASH_ASSOC_NUM;
145 |     uint32_t free_num = 0;
146 |     for (int i = 0; i < RACE_HASH_ASSOC_NUM; i++) {
147 |         if (bucket->slots[i].fp == 0 && bucket->slots[i].kv_len == 0 && 
148 |                 IsEmptyPointer(bucket->slots[i].pointer, 5)) {
149 |             // free_idx_list[free_num] = i;
150 |             free_num ++;
151 |             *free_idx = i;
152 |         }
153 |     }
154 |     return free_num;
155 | }
156 | 
157 | bool IsEmptyPointer(uint8_t * pointer, uint32_t num) {
158 |     for (int i = 0; i < num; i ++) {
159 |         if (pointer[i] != 0) {
160 |             return false;
161 |         }
162 |     }
163 |     return true;
164 | }
165 | 
166 | uint8_t HashIndexComputeFp(uint64_t hash) {
167 |     uint8_t fp = 0;
168 |     hash >>= 48;
169 |     fp ^= hash;
170 |     hash >>= 8;
171 |     fp ^= hash;
172 |     return fp;
173 | }
174 | 
175 | bool CheckKey(void * r_key_addr, uint32_t r_key_len, void * l_key_addr, uint32_t l_key_len) {
176 |     // TODO: delete the following code
177 |     // char local_key_buf[256] = {0};
178 |     // char remote_key_buf[256] = {0};
179 |     // memset(local_key_buf, 0, 256);
180 |     // memset(remote_key_buf, 0, 256);
181 |     // printf("%d %d\n", r_key_len, l_key_len);
182 |     // memcpy(local_key_buf, l_key_addr, l_key_len);
183 |     // memcpy(remote_key_buf, r_key_addr, r_key_len);
184 |     // printf("%s %s\n", remote_key_buf, local_key_buf);
185 |     // print_log(DEBUG, "  [%s] comparing %s %s %d %d", __FUNCTION__, local_key_buf, remote_key_buf, l_key_len, r_key_len);
186 | 
187 |     if (r_key_len != l_key_len)
188 |         return false;
189 | 
190 |     uint64_t r_hash_value = VariableLengthHash(r_key_addr, r_key_len, 0);
191 |     uint64_t l_hash_value = VariableLengthHash(l_key_addr, l_key_len, 0);
192 | 
193 |     if (r_hash_value != l_hash_value) 
194 |         return false;
195 | 
196 |     return memcmp(r_key_addr, l_key_addr, r_key_len) == 0;
197 | }


--------------------------------------------------------------------------------
/src/hashtable.h:
--------------------------------------------------------------------------------
  1 | #ifndef DDCKV_HASH_TABLE_H_
  2 | #define DDCKV_HASH_TABLE_H_
  3 | 
  4 | #include <stdint.h>
  5 | #include "kv_utils.h"
  6 | 
  7 | #define RACE_HASH_GLOBAL_DEPTH              (5)
  8 | #define RACE_HASH_INIT_LOCAL_DEPTH          (5)
  9 | #define RACE_HASH_SUBTABLE_NUM              (1 << RACE_HASH_GLOBAL_DEPTH)
 10 | #define RACE_HASH_INIT_SUBTABLE_NUM         (1 << RACE_HASH_INIT_LOCAL_DEPTH)
 11 | #define RACE_HASH_MAX_GLOBAL_DEPTH          (5)
 12 | #define RACE_HASH_MAX_SUBTABLE_NUM          (1 << RACE_HASH_MAX_GLOBAL_DEPTH)
 13 | #define RACE_HASH_ADDRESSABLE_BUCKET_NUM    (34000ULL)
 14 | #define RACE_HASH_SUBTABLE_BUCKET_NUM       (RACE_HASH_ADDRESSABLE_BUCKET_NUM * 3 / 2)
 15 | #define RACE_HASH_ASSOC_NUM                 (7)
 16 | #define RACE_HASH_RESERVED_MAX_KV_NUM       (1024ULL * 1024 * 10)
 17 | #define RACE_HASH_KVOFFSET_RING_NUM         (1024ULL * 1024 * 16)
 18 | #define RACE_HASH_KV_BLOCK_LENGTH           (64ULL)
 19 | #define SUBTABLE_USED_HASH_BIT_NUM          (32)
 20 | #define RACE_HASH_MASK(n)                   ((1 << n) - 1)
 21 | 
 22 | #define ROOT_RES_LEN          (sizeof(RaceHashRoot))
 23 | #define SUBTABLE_LEN          (RACE_HASH_ADDRESSABLE_BUCKET_NUM * sizeof(RaceHashBucket))
 24 | #define SUBTABLE_RES_LEN      (RACE_HASH_MAX_SUBTABLE_NUM * SUBTABLE_LEN)
 25 | #define KV_RES_LEN            (RACE_HASH_RESERVED_MAX_KV_NUM * RACE_HASH_KV_BLOCK_LENGTH)
 26 | // #define META_AREA_LEN         (128 * 1024 * 1024)
 27 | #define META_AREA_LEN         (256 * 1024 * 1024)
 28 | // #define GC_AREA_LEN           (128 * 1024 * 1024)
 29 | #define GC_AREA_LEN           (0)
 30 | #define HASH_AREA_LEN         (128 * 1024 * 1024)
 31 | #define CLIENT_META_LEN       (1 * 1024 * 1024)
 32 | #define CLIENT_GC_LEN         (1 * 1024 * 1024)
 33 | // #define RACE_HASH_MAX_NUM_REP (10)
 34 | 
 35 | 
 36 | typedef struct __attribute__((__packed__)) TagRaceHashSlot {
 37 |     uint8_t fp;
 38 |     uint8_t kv_len;
 39 |     uint8_t server_id;
 40 |     uint8_t pointer[5];
 41 | } RaceHashSlot;
 42 | 
 43 | typedef struct __attribute__((__packed__)) TagRacsHashBucket {
 44 |     uint32_t local_depth;
 45 |     uint32_t prefix;
 46 |     RaceHashSlot slots[RACE_HASH_ASSOC_NUM];
 47 | } RaceHashBucket;
 48 | 
 49 | typedef struct TagRaceHashSubtableEntry {
 50 |     uint8_t lock;
 51 |     uint8_t local_depth;
 52 |     uint8_t server_id;
 53 |     uint8_t pointer[5];
 54 | } RaceHashSubtableEntry;
 55 | 
 56 | typedef struct TagRaceHashRoot {
 57 |     uint64_t global_depth;
 58 |     uint64_t init_local_depth;
 59 |     uint64_t max_global_depth;
 60 |     uint64_t prefix_num;
 61 |     uint64_t subtable_res_num;
 62 |     uint64_t subtable_init_num;
 63 |     uint64_t subtable_hash_num;
 64 |     uint64_t subtable_hash_range;
 65 |     uint64_t subtable_bucket_num;
 66 |     uint64_t seed;
 67 | 
 68 |     uint64_t mem_id;
 69 |     uint64_t root_offset;
 70 |     uint64_t subtable_offset;
 71 |     uint64_t kv_offset;
 72 |     uint64_t kv_len;
 73 |     
 74 |     uint64_t lock;
 75 |     RaceHashSubtableEntry subtable_entry[RACE_HASH_MAX_SUBTABLE_NUM][MAX_REP_NUM];
 76 | } RaceHashRoot;
 77 | 
 78 | typedef struct TagRaceHashSearchContext {
 79 |     int32_t  result;
 80 |     int32_t  no_back;
 81 |     uint64_t hash_value;
 82 |     uint8_t  fp; // fingerprint
 83 |     // HashIndexSearchReq * req;
 84 |     uint64_t f_com_bucket_addr;
 85 |     uint64_t s_com_bucket_addr;
 86 |     uint64_t read_kv_addr;
 87 | 
 88 |     uint64_t f_remote_com_bucket_offset;
 89 |     uint64_t s_remote_com_bucket_offset;
 90 | 
 91 |     uint64_t read_kv_offset;
 92 |     uint32_t read_kv_len;
 93 | 
 94 |     RaceHashRoot * local_root;
 95 | 
 96 |     void * key;
 97 |     uint32_t key_len;
 98 |     uint32_t value_len;
 99 | 
100 |     bool sync_root_done;
101 |     bool is_resizing;
102 | } RaceHashSearchContext;
103 | 
104 | typedef struct TagKVTableAddrInfo {
105 |     uint8_t     server_id_list[MAX_REP_NUM];
106 |     uint64_t    f_bucket_addr[MAX_REP_NUM];
107 |     uint64_t    s_bucket_addr[MAX_REP_NUM];
108 |     uint32_t    f_bucket_addr_rkey[MAX_REP_NUM];
109 |     uint32_t    s_bucket_addr_rkey[MAX_REP_NUM];
110 |     uint32_t    f_main_idx;
111 |     uint32_t    s_main_idx;
112 |     uint32_t    f_idx;
113 |     uint32_t    s_idx;
114 | } KVTableAddrInfo;
115 | 
116 | typedef struct TagKVHashInfo {
117 |     uint64_t hash_value;
118 |     uint64_t prefix;
119 |     uint8_t  fp;
120 |     uint8_t  local_depth;
121 | } KVHashInfo;
122 | 
123 | typedef struct TagKVInfo {
124 |     void   * l_addr;
125 |     uint32_t lkey;
126 |     uint32_t key_len;
127 |     uint32_t value_len;
128 | } KVInfo;
129 | 
130 | typedef struct TagKVRWAddr {
131 |     uint8_t  server_id;
132 |     uint64_t r_kv_addr;
133 |     uint64_t l_kv_addr;
134 |     uint32_t rkey;
135 |     uint32_t lkey;
136 |     uint32_t length;
137 | } KVRWAddr;
138 | 
139 | typedef struct TagKVCASAddr {
140 |     uint8_t  server_id;
141 |     uint64_t r_kv_addr;
142 |     uint64_t l_kv_addr;
143 |     uint32_t rkey;
144 |     uint32_t lkey;
145 |     uint64_t orig_value;
146 |     uint64_t swap_value;
147 | } KVCASAddr;
148 | 
149 | typedef struct TagLocalCacheEntry {
150 |     uint64_t       r_slot_addr[MAX_REP_NUM];
151 |     RaceHashSlot   l_slot_ptr;
152 |     uint32_t       miss_cnt;
153 |     uint32_t       acc_cnt;
154 | } LocalCacheEntry;
155 | 
156 | static inline uint64_t SubtableFirstIndex(uint64_t hash_value, uint64_t capacity) {
157 |     return hash_value % (capacity / 2);
158 | }
159 | 
160 | static inline uint64_t SubtableSecondIndex(uint64_t hash_value, uint64_t f_index, uint64_t capacity) {
161 |     uint32_t hash = hash_value;
162 |     uint16_t partial = (uint16_t)(hash >> 16);
163 |     uint16_t non_sero_tag = (partial >> 1 << 1) + 1;
164 |     uint64_t hash_of_tag = (uint64_t)(non_sero_tag * 0xc6a4a7935bd1e995);
165 |     return (uint64_t)(((uint64_t)(f_index) ^ hash_of_tag) % (capacity / 2) + capacity / 2);
166 | }
167 | 
168 | static inline uint64_t HashIndexConvert40To64Bits(uint8_t * addr) {
169 |     uint64_t ret = 0;
170 |     return ret | ((uint64_t)addr[0] << 40) | ((uint64_t)addr[1] << 32)
171 |         | ((uint64_t)addr[2] << 24) | ((uint64_t)addr[3] << 16) 
172 |         | ((uint64_t)addr[4] << 8);
173 | }
174 | 
175 | static inline void HashIndexConvert64To40Bits(uint64_t addr, __OUT uint8_t * o_addr) {
176 |     o_addr[0] = (uint8_t)((addr >> 40) & 0xFF);
177 |     o_addr[1] = (uint8_t)((addr >> 32) & 0xFF);
178 |     o_addr[2] = (uint8_t)((addr >> 24) & 0xFF);
179 |     o_addr[3] = (uint8_t)((addr >> 16) & 0xFF);
180 |     o_addr[4] = (uint8_t)((addr >> 8)  & 0xFF);
181 | }
182 | 
183 | static inline void ConvertSlotToAddr(RaceHashSlot * slot, __OUT KVRWAddr * kv_addr) {
184 |     kv_addr->server_id = slot->server_id;
185 |     kv_addr->r_kv_addr = HashIndexConvert40To64Bits(slot->pointer);
186 | }
187 | 
188 | static inline uint64_t ConvertSlotToInt(RaceHashSlot * slot) {
189 |     return *(uint64_t *)slot;
190 | }
191 | 
192 | uint64_t VariableLengthHash(const void * data, uint64_t length, uint64_t seed);
193 | uint8_t  HashIndexComputeFp(uint64_t hash);
194 | uint32_t GetFreeSlotNum(RaceHashBucket * bucekt, uint32_t * free_idx);
195 | bool IsEmptyPointer(uint8_t * pointer, uint32_t num);
196 | bool CheckKey(void * r_key_addr, uint32_t r_key_len, void * l_key_addr, uint32_t l_key_len);
197 | 
198 | #endif


--------------------------------------------------------------------------------
/src/ib.cc:
--------------------------------------------------------------------------------
  1 | #include "ib.h"
  2 | 
  3 | #include <assert.h>
  4 | 
  5 | static int modify_qp_to_init(struct ibv_qp * qp, const struct QpInfo * local_qp_info) {
  6 |     struct ibv_qp_attr attr;
  7 |     int    attr_mask;
  8 |     int    rc;
  9 |     memset(&attr, 0, sizeof(struct ibv_qp_attr));
 10 |     attr.qp_state = IBV_QPS_INIT;
 11 |     attr.port_num = local_qp_info->port_num;
 12 |     attr.pkey_index = 0;
 13 |     attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | 
 14 |         IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;
 15 |     attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
 16 |     rc = ibv_modify_qp(qp, &attr, attr_mask);
 17 |     // assert(rc == 0);
 18 |     return 0;
 19 | }
 20 | 
 21 | static int modify_qp_to_rtr(struct ibv_qp * local_qp, 
 22 |     const struct QpInfo * local_qp_info,
 23 |     const struct QpInfo * remote_qp_info,
 24 |     uint8_t conn_type) {
 25 |     struct ibv_qp_attr attr;
 26 |     int    attr_mask;
 27 |     int    rc;
 28 |     memset(&attr, 0, sizeof(struct ibv_qp_attr));
 29 |     attr.qp_state    = IBV_QPS_RTR;
 30 |     attr.path_mtu    = IBV_MTU_1024;
 31 |     attr.dest_qp_num = remote_qp_info->qp_num;
 32 |     attr.rq_psn      = 0;
 33 |     attr.max_dest_rd_atomic = 16;
 34 |     attr.min_rnr_timer = 0x12;
 35 |     attr.ah_attr.is_global = 0;
 36 |     attr.ah_attr.dlid  = remote_qp_info->lid;
 37 |     attr.ah_attr.sl    = 0;
 38 |     attr.ah_attr.src_path_bits = 0;
 39 |     attr.ah_attr.port_num = local_qp_info->port_num;
 40 |     if (conn_type == ROCE) {
 41 |         attr.ah_attr.is_global = 1;
 42 |         attr.ah_attr.port_num  = local_qp_info->port_num;
 43 |         memcpy(&attr.ah_attr.grh.dgid, remote_qp_info->gid, 16);
 44 |         attr.ah_attr.grh.flow_label = 0;
 45 |         attr.ah_attr.grh.hop_limit  = 1;
 46 |         attr.ah_attr.grh.sgid_index = local_qp_info->gid_idx;
 47 |         attr.ah_attr.grh.traffic_class = 0;
 48 |     }
 49 |     attr_mask = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
 50 |                 IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
 51 |     rc = ibv_modify_qp(local_qp, &attr, attr_mask);
 52 |     // assert(rc == 0);
 53 |     return 0;
 54 | }
 55 | 
 56 | static int modify_qp_to_rts(struct ibv_qp * local_qp) {
 57 |     struct ibv_qp_attr attr;
 58 |     int    attr_mask;
 59 |     int    rc;
 60 |     memset(&attr, 0, sizeof(struct ibv_qp_attr));
 61 |     attr.qp_state = IBV_QPS_RTS;
 62 |     attr.timeout = 0x12;
 63 |     attr.retry_cnt = 6;
 64 |     attr.rnr_retry = 0;
 65 |     attr.sq_psn = 0;
 66 |     attr.max_rd_atomic = 16;
 67 |     attr_mask = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
 68 |                 IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC;
 69 |     rc = ibv_modify_qp(local_qp, &attr, attr_mask);
 70 |     // assert(rc == 0);
 71 |     return 0;
 72 | }
 73 | 
 74 | struct ibv_context * ib_get_ctx(uint32_t dev_id, uint32_t port_id) {
 75 |     struct ibv_device ** ib_dev_list;
 76 |     struct ibv_device *  ib_dev;
 77 |     int    num_device;
 78 | 
 79 |     ib_dev_list = ibv_get_device_list(&num_device);
 80 |     // assert(ib_dev_list != NULL && num_device > dev_id);
 81 |     ib_dev = ib_dev_list[dev_id];
 82 | 
 83 |     struct ibv_context * ret = ibv_open_device(ib_dev);
 84 |     // assert(ret != NULL);
 85 |     ibv_free_device_list(ib_dev_list);
 86 |     return ret;
 87 | }
 88 | 
 89 | struct ibv_qp * ib_create_rc_qp(struct ibv_pd * ib_pd, 
 90 |     struct ibv_qp_init_attr * qp_init_attr) {
 91 |     return ibv_create_qp(ib_pd, qp_init_attr);
 92 | }
 93 | 
 94 | int ib_connect_qp(struct ibv_qp * local_qp, 
 95 |     const struct QpInfo * local_qp_info, 
 96 |     const struct QpInfo * remote_qp_info, 
 97 |     uint8_t conn_type, uint8_t role) {
 98 |     int rc = 0;
 99 |     rc = modify_qp_to_init(local_qp, local_qp_info);
100 |     // assert(rc == 0);
101 | 
102 |     rc = modify_qp_to_rtr(local_qp, local_qp_info, remote_qp_info, conn_type);
103 |     // assert(rc == 0);
104 | 
105 |     if (role == SERVER) {
106 |         return 0;
107 |     }
108 |     
109 |     // assert(role == CLIENT);
110 |     rc = modify_qp_to_rts(local_qp);
111 |     // assert(rc == 0);
112 |     return 0;
113 | }
114 | 
115 | struct ibv_send_wr * ib_merge_sr_lists_unsignaled(std::vector<IbvSrList *> sr_lists) {
116 |     struct ibv_send_wr * ret_sr_head = sr_lists[0]->sr_list;
117 |     for (size_t i = 1; i < sr_lists.size(); i ++) {
118 |         uint32_t pre_num_sr = sr_lists[i - 1]->num_sr;
119 |         sr_lists[i - 1]->sr_list[pre_num_sr - 1].next = sr_lists[i]->sr_list;
120 |     }
121 | 
122 |     size_t last_idx = sr_lists.size() - 1;
123 |     uint32_t num_sr = sr_lists[last_idx]->num_sr;
124 |     sr_lists[last_idx]->sr_list[num_sr - 1].next = NULL;
125 | 
126 |     return ret_sr_head;
127 | }
128 | 
129 | struct ibv_send_wr * ib_merge_sr_lists(std::vector<IbvSrList *> sr_lists, __OUT uint64_t * last_wr_id) {
130 |     struct ibv_send_wr * ret_sr_head = sr_lists[0]->sr_list;
131 |     for (size_t i = 1; i < sr_lists.size(); i ++) {
132 |         uint32_t pre_num_sr = sr_lists[i - 1]->num_sr;
133 |         sr_lists[i - 1]->sr_list[pre_num_sr - 1].next = sr_lists[i]->sr_list;
134 |     }
135 | 
136 |     size_t last_idx = sr_lists.size() - 1;
137 |     uint32_t num_sr = sr_lists[last_idx]->num_sr;
138 |     sr_lists[last_idx]->sr_list[num_sr - 1].next = NULL;
139 |     sr_lists[last_idx]->sr_list[num_sr - 1].send_flags |= IBV_SEND_SIGNALED;
140 | 
141 |     *last_wr_id = sr_lists[last_idx]->sr_list[num_sr - 1].wr_id;
142 | 
143 |     return ret_sr_head;
144 | }
145 | 
146 | void ib_free_sr_lists(IbvSrList * sr_lists, uint32_t num_sr_list) {
147 |     // TODO: finish this
148 |     return;
149 |     // free(sr_lists[0].sr_list->sg_list);
150 |     // free(sr_lists[0].sr_list);
151 |     // free(sr_lists);
152 | }
153 | 
154 | void ib_free_sr_lists_batch(std::vector<IbvSrList *> & sr_lists_batch, std::vector<uint32_t> & sr_list_num_batch) {
155 |     for (int i = 0; i < sr_lists_batch.size(); i ++) {
156 |         ib_free_sr_lists(sr_lists_batch[i], sr_list_num_batch[i]);
157 |     }
158 | }
159 | 
160 | inline uint64_t gen_wr_id(uint8_t server_id, uint64_t wr_id) {
161 |     return server_id * 1000 + wr_id;
162 | }
163 | 
164 | inline uint64_t wr_id_to_server_wr_id(uint64_t wr_id) {
165 |     return wr_id % 1000;
166 | }
167 | 
168 | inline uint8_t wr_id_to_server_id(uint64_t wr_id) {
169 |     return (uint8_t)(wr_id / 1000);
170 | }


--------------------------------------------------------------------------------
/src/ib.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_IB_H_
 2 | #define DDCKV_IB_H_
 3 | 
 4 | #include "kv_utils.h"
 5 | 
 6 | #include <infiniband/verbs.h>
 7 | #include <stdlib.h>
 8 | 
 9 | #include <vector>
10 | #include <map>
11 | #include <unordered_map>
12 | 
13 | typedef struct TagIbvSrList {
14 |     struct ibv_send_wr * sr_list;
15 |     uint32_t num_sr;
16 |     uint32_t server_id;
17 | } IbvSrList;
18 | 
19 | struct ibv_context * ib_get_ctx(uint32_t dev_id, uint32_t port_id);
20 | struct ibv_qp * ib_create_rc_qp(struct ibv_pd * ib_pd, struct ibv_qp_init_attr * qp_init_attr);
21 | 
22 | int ib_connect_qp(struct ibv_qp * local_qp, 
23 |     const struct QpInfo * local_qp_info, 
24 |     const struct QpInfo * remote_qp_info, uint8_t conn_type, uint8_t role);
25 | 
26 | // merge wr_lists and set the last wr to be signaled
27 | struct ibv_send_wr * ib_merge_sr_lists_unsignaled(std::vector<IbvSrList *> sr_lists);
28 | struct ibv_send_wr * ib_merge_sr_lists(std::vector<IbvSrList *> sr_lists, __OUT uint64_t * last_wr_id);
29 | void   ib_free_sr_lists(IbvSrList * sr_lists, uint32_t num_sr_list);
30 | void   ib_free_sr_lists_batch(std::vector<IbvSrList *> & sr_lists_batch, std::vector<uint32_t> & sr_list_num_batch);
31 | void   ib_free_sr_list(IbvSrList * sr_list);
32 | 
33 | inline bool ib_is_all_wrid_finished(const std::map<uint64_t, struct ibv_wc *> & wait_wrid_wc_map) {
34 |     std::map<uint64_t, struct ibv_wc *>::const_iterator it;
35 |     for (it = wait_wrid_wc_map.begin(); it != wait_wrid_wc_map.end(); it ++) {
36 |         if (it->second == NULL) {
37 |             return false;
38 |         }
39 |     }
40 |     return true;
41 | }
42 | 
43 | inline uint64_t ib_gen_wr_id(uint32_t coro_id, uint8_t dst_server_id, uint32_t req_type_st, uint32_t req_seq) {
44 |     return (((uint64_t)coro_id << 8) + dst_server_id) * 1000 + req_type_st + req_seq;
45 | }
46 | 
47 | inline uint32_t wrid_to_fiber_id(uint64_t wr_id) {
48 |     return (uint32_t)((wr_id / 1000) >> 8);
49 | }
50 | 
51 | inline uint8_t wrid_to_dst_sid(uint64_t wr_id) {
52 |     return (uint8_t)((wr_id / 1000) & 0xFF);
53 | }
54 | 
55 | inline uint32_t wrid_to_req_seq(uint64_t wr_id) {
56 |     return (uint32_t)(wr_id % 1000);
57 | }
58 | 
59 | inline uint64_t wr_id_to_server_wr_id(uint64_t wr_id);
60 | inline uint8_t  wr_id_to_server_id(uint64_t wr_id);
61 | 
62 | 
63 | #endif


--------------------------------------------------------------------------------
/src/init.cc:
--------------------------------------------------------------------------------
 1 | #include "kv_utils.h"
 2 | 
 3 | #include <assert.h>
 4 | 
 5 | int run_server(struct GlobalConfig * conf);
 6 | int run_client(struct GlobalConfig * conf);
 7 | 
 8 | int main(int argc, char ** argv) {
 9 |     // assert(argc == 2);
10 |     char * conf_file_name = argv[1];
11 |     struct GlobalConfig conf;
12 |     int ret = 0;
13 | 
14 |     ret = load_config(conf_file_name, &conf);
15 |     if (ret != 0) {
16 |         return 1;
17 |     }
18 | 
19 |     if (conf.role == SERVER) {
20 |         ret = run_server(&conf);
21 |     } else {
22 |         // assert(conf.role == CLIENT);
23 |         ret = run_client(&conf);
24 |     }
25 | }


--------------------------------------------------------------------------------
/src/kv_debug.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_DEBUG_H_
 2 | #define DDCKV_DEBUG_H_
 3 | 
 4 | #include <stdio.h>
 5 | #include <string.h>
 6 | #include <stdlib.h>
 7 | #include <stdarg.h>
 8 | #include <assert.h>
 9 | #include <infiniband/verbs.h>
10 | 
11 | #include <vector>
12 | 
13 | #include "kv_utils.h"
14 | #include "hashtable.h"
15 | #include "ib.h"
16 | 
17 | enum {
18 |     INFO = 0,
19 |     DEBUG
20 | };
21 | 
22 | static const char * str_prefix[] = {"[INFO]", "[DEBUG]"};
23 | 
24 | static inline void kv_assert(bool value) {
25 | #ifdef _DEBUG
26 |     assert(value);
27 | #endif
28 | }
29 | 
30 | static inline void print_log(int log_level, const char * fmt, ...) {
31 |     if (log_level == DEBUG) {
32 | #ifdef _DEBUG
33 |     int fmt_len = strlen(fmt);
34 |     char * new_fmt_buf = (char *)malloc(fmt_len + 10);
35 |     sprintf(new_fmt_buf, "%s %s\n", str_prefix[log_level], fmt);
36 |     va_list args;
37 |     va_start(args, fmt);
38 |     vprintf(new_fmt_buf, args);
39 |     va_end(args);
40 | #endif
41 |     } else {
42 |         int fmt_len = strlen(fmt);
43 |         char * new_fmt_buf = (char *)malloc(fmt_len + 10);
44 |         sprintf(new_fmt_buf, "%s %s\n", str_prefix[log_level], fmt);
45 |         va_list args;
46 |         va_start(args, fmt);
47 |         vprintf(new_fmt_buf, args);
48 |         va_end(args);
49 |     }
50 | }
51 | 
52 | static inline void print_sr_list(struct ibv_send_wr * sr_list) {
53 |     struct ibv_send_wr * p;
54 |     for (p = sr_list; p != NULL; p = p->next) {
55 |         // print_log(DEBUG, "wr_id(%ld) raddr(%lx) rkey(%x)", p->wr_id, p->wr.rdma.remote_addr, 
56 |         //     p->wr.rdma.rkey);
57 |     }
58 | }
59 | 
60 | static inline void print_sr_lists(std::vector<IbvSrList *> & sr_list_batch,
61 |         std::vector<uint32_t> & sr_list_num_batch) {
62 |     for (size_t i = 0; i < sr_list_batch.size(); i ++) {
63 |         uint8_t server_id;
64 |         for (int j = 0; j < sr_list_num_batch[i]; j ++) {
65 |             server_id = sr_list_batch[i][j].server_id;
66 |             // print_log(DEBUG, "server_id(%d)", server_id);
67 |             print_sr_list(sr_list_batch[i][j].sr_list);
68 |         }
69 |     }
70 | }
71 | 
72 | static inline void print_key(char * key_addr, uint32_t key_len) {
73 |     char keystr[256];
74 |     memset(keystr, 0, 256);
75 |     memcpy(keystr, key_addr, key_len);
76 |     printf("%s", keystr);
77 | }
78 | 
79 | #endif


--------------------------------------------------------------------------------
/src/kv_utils.cc:
--------------------------------------------------------------------------------
  1 | #include "kv_utils.h"
  2 | 
  3 | #include <boost/property_tree/ptree.hpp>
  4 | #include <boost/property_tree/json_parser.hpp>
  5 | #include <boost/foreach.hpp>
  6 | #include <arpa/inet.h>
  7 | #include <sys/time.h>
  8 | #include <stdio.h>
  9 | #include <assert.h>
 10 | #include <stdlib.h>
 11 | 
 12 | #include <sstream>
 13 | #include <fstream>
 14 | #include <string>
 15 | 
 16 | inline static uint64_t htonll(uint64_t val) {
 17 |     return (((uint64_t) htonl(val)) << 32) + htonl(val >> 32);
 18 | }
 19 |  
 20 | inline static uint64_t ntohll(uint64_t val) {
 21 |     return (((uint64_t) ntohl(val)) << 32) + ntohl(val >> 32);
 22 | }
 23 | 
 24 | void serialize_kvmsg(__OUT struct KVMsg * kvmsg) {
 25 |     switch (kvmsg->type) {
 26 |     case REQ_CONNECT:
 27 |     case REP_CONNECT:
 28 |         serialize_conn_info(&kvmsg->body.conn_info);
 29 |         break;
 30 |     case REQ_ALLOC:
 31 |     case REP_ALLOC:
 32 |     case REQ_ALLOC_SUBTABLE:
 33 |     case REP_ALLOC_SUBTABLE:
 34 |         serialize_mr_info(&kvmsg->body.mr_info);
 35 |         break;
 36 |     default:
 37 |         break;
 38 |     }
 39 |     kvmsg->type = htons(kvmsg->type);
 40 |     kvmsg->id   = htons(kvmsg->id);
 41 | }
 42 | 
 43 | void deserialize_kvmsg(__OUT struct KVMsg * kvmsg) {
 44 |     kvmsg->type = ntohs(kvmsg->type);
 45 |     kvmsg->id   = ntohs(kvmsg->id);
 46 |     switch (kvmsg->type) {
 47 |     case REQ_CONNECT:
 48 |     case REP_CONNECT:
 49 |         deserialize_conn_info(&kvmsg->body.conn_info);
 50 |         break;
 51 |     case REQ_ALLOC:
 52 |     case REP_ALLOC:
 53 |     case REQ_ALLOC_SUBTABLE:
 54 |     case REP_ALLOC_SUBTABLE:
 55 |         deserialize_mr_info(&kvmsg->body.mr_info);
 56 |         break;
 57 |     default:
 58 |         break;
 59 |     }
 60 | }
 61 | 
 62 | void serialize_qp_info(__OUT struct QpInfo * qp_info) {
 63 |     qp_info->qp_num = htonl(qp_info->qp_num);
 64 |     qp_info->lid    = htons(qp_info->lid);
 65 | }
 66 | 
 67 | void deserialize_qp_info(__OUT struct QpInfo * qp_info) {
 68 |     qp_info->qp_num = ntohl(qp_info->qp_num);
 69 |     qp_info->lid    = ntohs(qp_info->lid);
 70 | }
 71 | 
 72 | void serialize_mr_info(__OUT struct MrInfo * mr_info) {
 73 |     mr_info->addr = htonll(mr_info->addr);
 74 |     mr_info->rkey = htonl(mr_info->rkey);
 75 | }
 76 | 
 77 | void deserialize_mr_info(__OUT struct MrInfo * mr_info) {
 78 |     mr_info->addr = ntohll(mr_info->addr);
 79 |     mr_info->rkey = ntohl(mr_info->rkey);
 80 | }
 81 | 
 82 | void serialize_conn_info(__OUT struct ConnInfo * conn_info) {
 83 |     serialize_qp_info(&conn_info->qp_info);
 84 |     serialize_mr_info(&conn_info->gc_info);
 85 | }
 86 | 
 87 | void deserialize_conn_info(__OUT struct ConnInfo * conn_info) {
 88 |     deserialize_qp_info(&conn_info->qp_info);
 89 |     deserialize_mr_info(&conn_info->gc_info);
 90 | }
 91 | 
 92 | int load_config(const char * fname, __OUT struct GlobalConfig * config) {
 93 |     std::fstream config_fs(fname);
 94 |     // assert(config_fs.is_open());
 95 | 
 96 |     boost::property_tree::ptree pt;
 97 |     try {
 98 |         boost::property_tree::read_json(config_fs, pt);
 99 |     } catch (boost::property_tree::ptree_error & e) {
100 |         return -1;
101 |     }
102 | 
103 |     try {
104 |         std::string role_str = pt.get<std::string>("role");
105 |         if (role_str == std::string("SERVER")) {
106 |             config->role = SERVER;
107 |         } else {
108 |             // assert(role_str == std::string("CLIENT"));
109 |             config->role = CLIENT;
110 |         }
111 | 
112 |         std::string conn_type_str = pt.get<std::string>("conn_type");
113 |         if (conn_type_str == std::string("IB")) {
114 |             config->conn_type = IB;
115 |         } else {
116 |             // assert(conn_type_str == std::string("ROCE"));
117 |             config->conn_type = ROCE;
118 |         }
119 | 
120 |         config->server_id = pt.get<uint32_t>("server_id");
121 |         config->udp_port  = pt.get<uint16_t>("udp_port");
122 |         config->memory_num = pt.get<uint16_t>("memory_num");
123 | 
124 |         int i = 0;
125 |         BOOST_FOREACH(boost::property_tree::ptree::value_type & v, pt.get_child("memory_ips")) {
126 |             // assert(v.first.empty());
127 |             std::string ip = v.second.get<std::string>("");
128 |             // assert(ip.length() > 0 && ip.length() < 16);
129 |             strcpy(config->memory_ips[i], ip.c_str());
130 |             i ++;
131 |         }
132 |         // assert(i == config->memory_num);
133 | 
134 |         config->ib_dev_id = pt.get<uint32_t>("ib_dev_id");
135 |         config->ib_port_id = pt.get<uint32_t>("ib_port_id");
136 |         config->ib_gid_idx = pt.get<uint32_t>("ib_gid_idx", -1);
137 |         
138 |         std::string server_base_addr_str = pt.get<std::string>("server_base_addr");
139 |         sscanf(server_base_addr_str.c_str(), "0x%lx", &config->server_base_addr);
140 | 
141 |         config->server_data_len   = pt.get<uint64_t>("server_data_len");
142 |         config->block_size        = pt.get<uint64_t>("block_size");
143 |         config->subblock_size     = pt.get<uint64_t>("subblock_size");
144 |         config->client_local_size = pt.get<uint64_t>("client_local_size");
145 | 
146 |         config->num_replication   = pt.get<uint32_t>("num_replication");
147 |         config->num_coroutines    = pt.get<uint32_t>("num_coroutines", 1);
148 | 
149 |         config->main_core_id = pt.get<uint32_t>("main_core_id", 0);
150 |         config->poll_core_id = pt.get<uint32_t>("poll_core_id", 0);
151 |         config->bg_core_id   = pt.get<uint32_t>("bg_core_id", 0);
152 |         config->gc_core_id   = pt.get<uint32_t>("gc_core_id", 0);
153 | 
154 |         config->is_recovery  = pt.get<uint32_t>("is_recovery", 0);
155 |         
156 |         config->num_idx_rep  = pt.get<uint32_t>("num_idx_rep", 1);
157 |         config->miss_rate_threash = pt.get<float>("miss_rate_threash", 0.1);
158 |         config->workload_run_time = pt.get<int>("workload_run_time", 10);
159 |         config->micro_workload_num = pt.get<int>("micro_workload_num", 10000);
160 |     } catch (boost::property_tree::ptree_error & e) {
161 |         return -1;
162 |     }
163 |     return 0;
164 | }
165 | 
166 | void encode_gc_slot(DecodedClientGCSlot * d_gc_slot, __OUT uint64_t * e_gc_slot) {
167 |     uint64_t masked_block_off = (d_gc_slot->pr_addr >> 8) & BLOCK_OFF_BMASK;
168 |     uint64_t masked_pr_addr = (d_gc_slot->pr_addr >> 26) & BLOCK_ADDR_BMASK;
169 |     uint64_t masked_bk_addr = (d_gc_slot->bk_addr >> 26) & BLOCK_ADDR_BMASK;
170 |     uint64_t masked_num_subblock = d_gc_slot->num_subblocks & SUBBLOCK_NUM_BMASK;
171 |     *(e_gc_slot) = (masked_block_off << 46) | (masked_pr_addr << 25) 
172 |         | (masked_bk_addr << 4) | (masked_num_subblock);
173 | }
174 | 
175 | void decode_gc_slot(uint64_t e_gc_slot, __OUT DecodedClientGCSlot * d_gc_slot) {
176 |     uint64_t block_offset = e_gc_slot >> 46;
177 |     uint64_t pr_block_addr = (e_gc_slot >> 25) & BLOCK_ADDR_BMASK;
178 |     uint64_t bk_block_addr = (e_gc_slot >> 4)  & BLOCK_ADDR_BMASK;
179 |     uint8_t  num_subblocks = e_gc_slot & SUBBLOCK_NUM_BMASK;
180 |     d_gc_slot->pr_addr = (pr_block_addr << 26) | (block_offset << 8);
181 |     d_gc_slot->bk_addr = (bk_block_addr << 26) | (block_offset << 8);
182 |     d_gc_slot->num_subblocks = num_subblocks;
183 | }
184 | 
185 | int stick_this_thread_to_core(int core_id) {
186 |     int num_cores = sysconf(_SC_NPROCESSORS_CONF);
187 |     if (core_id < 0 || core_id >= num_cores) {
188 |         return -1;
189 |     }
190 |     
191 |     cpu_set_t cpuset;
192 |     CPU_ZERO(&cpuset);
193 |     CPU_SET(core_id, &cpuset);
194 | 
195 |     pthread_t current_thread = pthread_self();
196 |     return pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset);
197 | }
198 | 
199 | uint64_t current_time_us() {
200 |     struct timeval now;
201 |     gettimeofday(&now, NULL);
202 |     return now.tv_usec;
203 | }
204 | 
205 | void dump_lat_file(char * fname, const std::vector<uint64_t> & lat_vec) {
206 |     if (lat_vec.size() == 0) {
207 |         return;
208 |     }
209 |     FILE * out_fp = fopen(fname, "w");
210 |     for (size_t i = 0; i < lat_vec.size(); i ++) {
211 |         fprintf(out_fp, "%ld\n", lat_vec[i]);
212 |     }
213 | }


--------------------------------------------------------------------------------
/src/kv_utils.h:
--------------------------------------------------------------------------------
  1 | #ifndef DDCKV_KV_UTILS_H
  2 | #define DDCKV_KV_UTILS_H
  3 | 
  4 | #include <infiniband/verbs.h>
  5 | #include <pthread.h>
  6 | #include <stdint.h>
  7 | #include <unistd.h>
  8 | 
  9 | #include <vector>
 10 | 
 11 | #define __OUT
 12 | #define DDCKV_MAX_SERVER 64
 13 | 
 14 | #define SERVER_ID_BMASK     0x3F
 15 | #define BLOCK_ADDR_BMASK    0x1FFFFFULL
 16 | #define BLOCK_OFF_BMASK     0x3FFFFULL
 17 | #define SUBBLOCK_NUM_BMASK  0xF
 18 | #define MAX_REP_NUM         10
 19 | 
 20 | // #define YCSB_10M
 21 | // #define SERVER_MM
 22 | 
 23 | enum ConnType {
 24 |     IB,
 25 |     ROCE,
 26 | };
 27 | 
 28 | enum Role {
 29 |     CLIENT,
 30 |     SERVER
 31 | };
 32 | 
 33 | // enum KVLogState {
 34 | //     KV_LOG_VALID = 1,
 35 | //     KV_LOG_COMMITTED = 1 << 1,
 36 | //     KV_LOG_GC = 1 << 2,
 37 | //     KV_LOG_INSERT = 1 << 3,
 38 | // };
 39 | 
 40 | enum KVLogOp {
 41 |     KV_OP_INSERT = 1,
 42 |     KV_OP_UPDATE,
 43 |     KV_OP_DELETE,
 44 |     KV_OP_FINISH
 45 | };
 46 | 
 47 | struct GlobalConfig {
 48 |     uint8_t  role;
 49 |     uint8_t  conn_type;
 50 |     uint32_t server_id;
 51 |     uint16_t udp_port;
 52 |     uint32_t memory_num; // 0 ~ memory_num -1 is the server id 
 53 |     char     memory_ips[16][16];
 54 | 
 55 |     uint32_t ib_dev_id;
 56 |     uint32_t ib_port_id;
 57 |     int32_t  ib_gid_idx;
 58 | 
 59 |     uint64_t server_base_addr;
 60 |     uint64_t server_data_len;
 61 |     uint64_t block_size;
 62 |     uint64_t subblock_size;
 63 |     uint64_t client_local_size;
 64 | 
 65 |     uint32_t num_replication; // 0 ~ num_replication_ - 1 is the meta replication server
 66 |     uint32_t num_idx_rep;
 67 |     uint32_t num_coroutines;
 68 | 
 69 |     uint32_t main_core_id;
 70 |     uint32_t poll_core_id;
 71 |     uint32_t bg_core_id;
 72 |     uint32_t gc_core_id;
 73 | 
 74 |     int is_recovery;
 75 | 
 76 |     // for master
 77 |     uint16_t master_port;
 78 |     char     master_ip[16];
 79 |     float    miss_rate_threash;
 80 |     int      workload_run_time;
 81 |     int      micro_workload_num;
 82 | };
 83 | 
 84 | struct GlobalInfo {
 85 |     int local_id;
 86 |     int num_clients;
 87 |     int num_memories;
 88 | 
 89 |     struct ibv_context * ctx;
 90 |     int port_index;
 91 |     int device_id;
 92 |     int dev_port_id;
 93 |     int numa_node_id;
 94 | 
 95 |     struct ibv_pd * pd;
 96 |     struct ibv_qp * ud_qp;
 97 | 
 98 |     int role;
 99 |     pthread_mutex_t lock;
100 | };
101 | 
102 | enum KVMsgType {
103 |     REQ_CONNECT,
104 |     REQ_ALLOC,
105 |     REQ_ALLOC_SUBTABLE,
106 |     REP_CONNECT,
107 |     REP_ALLOC,
108 |     REP_ALLOC_SUBTABLE,
109 |     REQ_REGISTER,
110 |     REP_REGISTER,
111 |     REQ_RECOVER,
112 |     REP_RECOVER,
113 |     REQ_HEARTBEAT,
114 |     REP_HEAETBEAT
115 | };
116 | 
117 | struct QpInfo {
118 |     uint32_t qp_num;
119 |     uint16_t lid;
120 |     uint8_t  port_num;
121 |     uint8_t  gid[16];
122 |     uint8_t  gid_idx;
123 | };
124 | 
125 | struct MrInfo {
126 |     uint64_t addr;
127 |     uint32_t rkey;
128 | };
129 | 
130 | struct IbInfo {
131 |     uint8_t conn_type;
132 |     struct ibv_context   * ib_ctx;
133 |     struct ibv_pd        * ib_pd;
134 |     struct ibv_cq        * ib_cq;
135 |     struct ibv_port_attr * ib_port_attr;
136 |     union  ibv_gid       * ib_gid;
137 | };
138 | 
139 | struct ConnInfo {
140 |     struct QpInfo qp_info;
141 |     struct MrInfo gc_info;
142 | };
143 | 
144 | struct KVMsg {
145 |     uint16_t  type;
146 |     uint16_t  id;
147 |     union {
148 |         struct ConnInfo conn_info;
149 |         struct MrInfo   mr_info;
150 |     } body;
151 | };
152 | 
153 | enum MMBlockRole {
154 |     PRIMARY,
155 |     BACKUP
156 | };
157 | 
158 | struct KVLogHeader {
159 |     uint8_t  is_valid;
160 |     uint16_t key_length;
161 |     uint32_t value_length;
162 | };
163 | 
164 | struct KVLogTail {
165 |     uint8_t next_addr[6];
166 |     uint8_t prev_addr[6];
167 |     uint64_t old_value;
168 |     uint8_t  crc;
169 |     uint8_t  op;
170 | };
171 | 
172 | typedef struct TagClientLogMetaInfo {
173 |     uint8_t  pr_server_id;
174 |     uint64_t pr_log_head;
175 |     uint64_t pr_log_tail;
176 | } ClientLogMetaInfo;
177 | 
178 | typedef struct TagEncodedClientGCSlot {
179 |     // off: 18bit
180 |     // block addr: 21 * 2
181 |     // len: 4bit
182 |     uint64_t meta_gc_addr;
183 | } EncodedClientGCSlot;
184 | 
185 | typedef struct TagClientMetaAddrInfo {
186 |     uint8_t  meta_info_type;
187 |     uint8_t  server_id_list[MAX_REP_NUM];
188 |     uint64_t addr_list[MAX_REP_NUM];
189 | } ClientMetaAddrInfo;
190 | 
191 | typedef struct TagDecodedClientGCSlot {
192 |     uint64_t pr_addr;
193 |     uint64_t bk_addr;
194 |     uint8_t  num_subblocks;
195 | } DecodedClientGCSlot;
196 | 
197 | static inline uint64_t roundup_256(uint64_t len) {
198 |     if (len % 256 == 0) {
199 |         return len;
200 |     }
201 |     return (len / 256 + 1) * 256;
202 | }
203 | 
204 | static inline bool log_is_valid(KVLogHeader * head) {
205 |     return head->is_valid == true;
206 | }
207 | 
208 | static inline bool log_is_committed(KVLogTail * tail) {
209 |     return tail->old_value != 0;
210 | }
211 | 
212 | static inline bool log_is_insert(KVLogTail * tail) {
213 |     return tail->op == KV_OP_INSERT;
214 | }
215 | 
216 | static inline uint64_t time_spent_us(struct timeval * st, struct timeval * et) {
217 |     return (et->tv_sec - st->tv_sec) * 1000000 + (et->tv_usec - st->tv_usec);
218 | }
219 | 
220 | static inline uint64_t round_up(uint64_t addr, uint32_t align) {
221 |     return ((addr) + align - 1) - ((addr + align - 1) % align);
222 | }
223 | 
224 | void serialize_kvmsg(__OUT struct KVMsg * kvmsg);
225 | void deserialize_kvmsg(__OUT struct KVMsg * kvmsg);
226 | void serialize_qp_info(__OUT struct QpInfo * qp_info);
227 | void deserialize_qp_info(__OUT struct QpInfo * qp_info);
228 | void serialize_mr_info(__OUT struct MrInfo * mr_info);
229 | void deserialize_mr_info(__OUT struct MrInfo * mr_info);
230 | void serialize_conn_info(__OUT struct ConnInfo * conn_info);
231 | void deserialize_conn_info(__OUT struct ConnInfo * conn_info);
232 | 
233 | int load_config(const char * fname, __OUT struct GlobalConfig * config);
234 | 
235 | void encode_gc_slot(DecodedClientGCSlot * d_gc_slot, __OUT uint64_t * e_gc_slot);
236 | void decode_gc_slot(uint64_t e_gc_slot, __OUT DecodedClientGCSlot * d_gc_slot);
237 | 
238 | int stick_this_thread_to_core(int core_id);
239 | 
240 | uint64_t current_time_us();
241 | 
242 | void dump_lat_file(char * fname, const std::vector<uint64_t> & lat_vec);
243 | 
244 | #endif


--------------------------------------------------------------------------------
/src/nm.h:
--------------------------------------------------------------------------------
  1 | #ifndef DDCKV_NM_H
  2 | #define DDCKV_NM_H
  3 | 
  4 | #include <infiniband/verbs.h>
  5 | #include <stdint.h>
  6 | #include <netdb.h>
  7 | 
  8 | #include <vector>
  9 | #include <map>
 10 | #include <mutex>
 11 | #include <thread>
 12 | #include <boost/fiber/all.hpp>
 13 | #include <tbb/concurrent_hash_map.h>
 14 | 
 15 | #include "kv_utils.h"
 16 | #include "ib.h"
 17 | 
 18 | class UDPNetworkManager {
 19 | private:
 20 |     uint32_t udp_sock_;
 21 |     uint16_t udp_port_;
 22 |     uint8_t  role_;
 23 |     uint8_t  conn_type_;
 24 |     struct sockaddr_in * server_addr_list_;
 25 |     uint32_t num_server_;
 26 |     uint32_t server_id_;
 27 | 
 28 |     struct ibv_context   * ib_ctx_;
 29 |     struct ibv_pd        * ib_pd_;
 30 |     struct ibv_cq        * ib_cq_;
 31 |     uint8_t                ib_port_num_;
 32 |     struct ibv_port_attr   ib_port_attr_;
 33 |     struct ibv_device_attr ib_device_attr_;
 34 |     union  ibv_gid       * ib_gid_;
 35 |     std::vector<struct ibv_qp *> rc_qp_list_;
 36 |     std::vector<struct MrInfo *> mr_info_list_;
 37 | 
 38 |     tbb::concurrent_hash_map<uint64_t, struct ibv_wc *> wrid_wc_map_;
 39 | 
 40 |     volatile bool stop_polling_;
 41 | 
 42 | 
 43 |     int UDPCMInitClient(const struct GlobalConfig * conf);
 44 |     int UDPCMInitServer(const struct GlobalConfig * conf);
 45 | 
 46 | // private methods
 47 | private:
 48 |     struct ibv_qp * server_create_rc_qp();
 49 |     struct ibv_qp * client_create_rc_qp();
 50 |     int  get_qp_info(struct ibv_qp * qp, __OUT struct QpInfo * qp_info);
 51 |     bool is_all_complete(const std::map<uint64_t, bool> & wr_id_comp_map);
 52 | 
 53 | // inline public functions
 54 | public:
 55 |     inline uint32_t get_one_server_id(uint32_t hint) {
 56 |         return hint % num_server_;
 57 |     }
 58 | 
 59 |     inline uint32_t get_server_rkey(uint8_t server_id) {
 60 |         return mr_info_list_[server_id]->rkey;
 61 |     }
 62 | 
 63 |     inline uint32_t get_server_id()  {
 64 |         return server_id_;
 65 |     }
 66 | 
 67 |     inline uint32_t get_num_servers() {
 68 |         return num_server_;
 69 |     }
 70 | 
 71 | public:
 72 |     UDPNetworkManager(const struct GlobalConfig * conf);
 73 |     ~UDPNetworkManager();
 74 | 
 75 |     // common udp functions
 76 |     int nm_recv_udp_msg(__OUT struct KVMsg * kvmsg, 
 77 |         __OUT struct sockaddr_in * src_addr, __OUT socklen_t * src_addr_len);
 78 |     int nm_send_udp_msg(struct KVMsg * kvmsg, struct sockaddr_in * dest_addr,
 79 |         socklen_t dest_addr_len);
 80 |     int nm_send_udp_msg_to_server(struct KVMsg * kvmsg, uint32_t server_id);
 81 |     void close_udp_sock();
 82 | 
 83 |     // common ib functions
 84 |     void get_ib_info(__OUT struct IbInfo * ib_info);
 85 |     int  rdma_post_send_batch_async(uint32_t server_id, struct ibv_send_wr * wr_list);
 86 |     int  rdma_post_send_batch_sync(uint32_t server_id, struct ibv_send_wr * wr_list);
 87 |     int  rdma_post_sr_lists_async_unsignaled(IbvSrList * sr_lists, 
 88 |         uint32_t num_sr_lists);
 89 |     int  rdma_post_sr_lists_sync_unsignaled(IbvSrList * sr_lists,
 90 |         uint32_t num_sr_lists);
 91 |     int  rdma_post_sr_lists_sync(IbvSrList * sr_lists, uint32_t num_sr_lists, 
 92 |         __OUT struct ibv_wc * wc);
 93 |     int  rdma_post_sr_lists_async(IbvSrList * sr_lists, uint32_t num_sr_lists, 
 94 |         __OUT std::map<uint64_t, struct ibv_wc *> & wait_wrid_wc_map);
 95 |     int  rdma_post_sr_list_batch_sync(std::vector<IbvSrList *> & sr_list_batch,
 96 |         std::vector<uint32_t> & sr_list_num_batch, __OUT struct ibv_wc * wc);
 97 |     int  rdma_post_sr_list_batch_async(std::vector<IbvSrList *> & sr_list_batch, 
 98 |         std::vector<uint32_t> & sr_list_num_batch, __OUT std::map<uint64_t, struct ibv_wc *> & wait_wrid_wc_map);
 99 |     
100 |     int  rdma_poll_one_completion(struct ibv_wc * wc);
101 |     int  nm_check_completion(std::map<uint64_t, struct ibv_wc *> & wrid_wc_map);
102 |     int  nm_poll_completion_sync(std::map<uint64_t, struct ibv_wc *> & wrid_wc_map);
103 |     int  nm_rdma_write_inl_to_sid(void * data, uint32_t size, uint64_t remote_addr, 
104 |             uint32_t remote_rkey, uint32_t server_id);
105 |     int  nm_rdma_write_inl_to_sid_sync(void * data, uint32_t size, uint64_t remote_addr,
106 |             uint32_t remote_rkey, uint32_t server_id);
107 |     int  nm_rdma_read_from_sid_sync(void * local_addr, uint32_t local_lkey, 
108 |             uint32_t size, uint64_t remote_addr, uint32_t remote_rkey, uint32_t server_id);
109 |     int  nm_rdma_read_from_sid(void * local_addr, uint32_t local_lkey, 
110 |             uint32_t size, uint64_t remote_addr, uint32_t remote_rkey, uint32_t server_id);
111 |     int  nm_rdma_write_to_sid(void * local_addr, uint32_t local_lkey, 
112 |             uint32_t size, uint64_t remote_addr, uint32_t remote_rkey, uint32_t server_id);
113 | 
114 |     // for server
115 |     int nm_on_connect_new_qp(const struct KVMsg * request, __OUT struct QpInfo * qp_info);
116 |     int nm_on_connect_connect_qp(uint32_t client_id, 
117 |         const struct QpInfo * local_qp_info, 
118 |         const struct QpInfo * remote_qp_info);
119 | 
120 |     // for client
121 |     int client_connect_all_rc_qp();
122 |     int client_connect_one_rc_qp(uint32_t server_id, __OUT struct MrInfo * mr_info);
123 |     int client_connect_one_rc_qp(uint32_t server_id);
124 | 
125 |     // for polling thread
126 |     void nm_fiber_polling();
127 |     void nm_thread_polling();
128 |     void stop_polling();
129 | };
130 | 
131 | typedef struct TagNMPollingThreadArgs {
132 |     UDPNetworkManager * nm;
133 |     int core_id;
134 | } NMPollingThreadArgs;
135 | 
136 | void * nm_polling_thread(void * args);
137 | void * nm_polling_fiber(void * args);
138 | 
139 | #endif


--------------------------------------------------------------------------------
/src/server.cc:
--------------------------------------------------------------------------------
  1 | #include "server.h"
  2 | 
  3 | #include <assert.h>
  4 | 
  5 | #include "kv_utils.h"
  6 | #include "kv_debug.h"
  7 | 
  8 | void * server_main(void * server_main_args) {
  9 |     ServerMainArgs * args = (ServerMainArgs *)server_main_args;
 10 |     Server * server_instance = args->server;
 11 |     
 12 |     // stick to a core
 13 |     int ret = stick_this_thread_to_core(args->core_id);
 14 |     // assert(ret == 0);
 15 |     // print_log(DEBUG, "server is running on core: %d", args->core_id);
 16 | 
 17 |     // start working
 18 |     return server_instance->thread_main();   
 19 | }
 20 | 
 21 | Server::Server(const struct GlobalConfig * conf) {
 22 |     server_id_ = conf->server_id;
 23 |     need_stop_ = 0;
 24 | 
 25 |     nm_ = new UDPNetworkManager(conf);
 26 | 
 27 |     struct IbInfo ib_info;
 28 |     nm_->get_ib_info(&ib_info);
 29 |     mm_ = new ServerMM(conf->server_base_addr, conf->server_data_len,
 30 |         conf->block_size, &ib_info, conf);
 31 | }
 32 | 
 33 | Server::~Server() {
 34 |     delete mm_;
 35 |     delete nm_;
 36 | }
 37 | 
 38 | int Server::server_on_connect(const struct KVMsg * request, 
 39 |     struct sockaddr_in * src_addr, 
 40 |     socklen_t src_addr_len) {
 41 |     int rc = 0;
 42 |     struct KVMsg reply;
 43 |     memset(&reply, 0, sizeof(struct KVMsg));
 44 | 
 45 |     reply.id   = server_id_;
 46 |     reply.type = REP_CONNECT;
 47 |     rc = nm_->nm_on_connect_new_qp(request, &reply.body.conn_info.qp_info);
 48 |     // assert(rc == 0);
 49 | 
 50 |     rc = mm_->get_mr_info(&reply.body.conn_info.gc_info);
 51 |     // assert(rc == 0);
 52 | 
 53 |     serialize_kvmsg(&reply);
 54 | 
 55 |     rc = nm_->nm_send_udp_msg(&reply, src_addr, src_addr_len);
 56 |     // assert(rc == 0);
 57 |     
 58 |     deserialize_kvmsg(&reply);
 59 |     rc = nm_->nm_on_connect_connect_qp(request->id, &reply.body.conn_info.qp_info, &request->body.conn_info.qp_info);
 60 |     // assert(rc == 0);
 61 |     return 0;
 62 | }
 63 | 
 64 | int Server::server_on_alloc(const struct KVMsg * request, struct sockaddr_in * src_addr, 
 65 |         socklen_t src_addr_len) {
 66 |     uint64_t alloc_addr = mm_->mm_alloc();
 67 |     // assert(mmblock != NULL);
 68 |     // print_log(DEBUG, "allocated addr: %lx", mmblock->addr);
 69 |     // assert((mmblock->addr & 0x3FFFFFF) == 0);
 70 | 
 71 |     struct KVMsg reply;
 72 |     memset(&reply, 0, sizeof(struct KVMsg));
 73 |     reply.type = REP_ALLOC;
 74 |     reply.id   = nm_->get_server_id();
 75 |     reply.body.mr_info.rkey = mm_->get_rkey();
 76 |     if (alloc_addr != 0) {
 77 |         reply.body.mr_info.addr = alloc_addr;
 78 |     } else {
 79 |         printf("server no space\n");
 80 |         reply.body.mr_info.addr = 0;
 81 |     }
 82 |     serialize_kvmsg(&reply);
 83 | 
 84 |     int ret = nm_->nm_send_udp_msg(&reply, src_addr, src_addr_len);
 85 |     // assert(ret == 0);
 86 | 
 87 |     return 0;
 88 | }
 89 | 
 90 | int Server::server_on_alloc_subtable(const struct KVMsg * request, struct sockaddr_in * src_addr,
 91 |         socklen_t src_addr_len) {
 92 |     uint64_t subtable_addr = mm_->mm_alloc_subtable();
 93 |     // assert(subtable_addr != 0);
 94 |     // print_log(DEBUG, "alloc subtable: %lx", subtable_addr);
 95 |     // assert((subtable_addr & 0xFF)  == 0);
 96 | 
 97 |     struct KVMsg reply;
 98 |     memset(&reply, 0, sizeof(struct KVMsg));
 99 |     reply.type = REP_ALLOC_SUBTABLE;
100 |     reply.id   = nm_->get_server_id();
101 |     reply.body.mr_info.addr = subtable_addr;
102 |     reply.body.mr_info.rkey = mm_->get_rkey();
103 |     serialize_kvmsg(&reply);
104 |     int ret = nm_->nm_send_udp_msg(&reply, src_addr, src_addr_len);
105 |     // assert(ret == 0);
106 |     return 0;
107 | }
108 | 
109 | void * Server::thread_main() {
110 |     struct sockaddr_in client_addr;
111 |     socklen_t          client_addr_len = sizeof(struct sockaddr_in);
112 |     struct KVMsg request;
113 |     int rc = 0;
114 |     while (!need_stop_) {
115 |         rc = nm_->nm_recv_udp_msg(&request, &client_addr, &client_addr_len);
116 |         if (rc && need_stop_) {
117 |             break;
118 |         } else if (rc) {
119 |             continue;
120 |         }
121 |         // assert(rc == 0);
122 |         deserialize_kvmsg(&request);
123 | 
124 |         if (request.type == REQ_CONNECT) {
125 |             rc = server_on_connect(&request, &client_addr, client_addr_len);
126 |             // assert(rc == 0);
127 |         } else if (request.type == REQ_ALLOC_SUBTABLE) {
128 |             rc = server_on_alloc_subtable(&request, &client_addr, client_addr_len);
129 |             // assert(rc == 0);
130 |         } else {
131 |             // assert(request.type == REQ_ALLOC);
132 |             rc = server_on_alloc(&request, &client_addr, client_addr_len);
133 |             // assert(rc == 0);
134 |         }
135 |     }
136 |     return NULL;
137 | }
138 | 
139 | void Server::stop() {
140 |     need_stop_ = 1;
141 | }
142 | 
143 | uint64_t Server::get_kv_area_addr() {
144 |     return mm_->get_kv_area_addr();
145 | }
146 | 
147 | uint64_t Server::get_subtable_st_addr() {
148 |     return mm_->get_subtable_st_addr();
149 | }


--------------------------------------------------------------------------------
/src/server.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_SERVER_H_
 2 | #define DDCKV_SERVER_H_
 3 | 
 4 | #include "nm.h"
 5 | #include "server_mm.h"
 6 | #include "kv_utils.h"
 7 | 
 8 | class Server {
 9 |     uint32_t server_id_;
10 |     volatile uint8_t need_stop_;
11 |     UDPNetworkManager * nm_;
12 |     ServerMM          * mm_;
13 |     
14 | public:
15 |     Server(const struct GlobalConfig * conf);
16 |     ~Server();
17 | 
18 |     int server_on_connect(const struct KVMsg * request, 
19 |         struct sockaddr_in * src_addr, socklen_t src_addr_len);
20 |     int server_on_alloc(const struct KVMsg * request, 
21 |         struct sockaddr_in * src_addr, socklen_t src_addr_len);
22 |     int server_on_alloc_subtable(const struct KVMsg * request,
23 |         struct sockaddr_in * src_addr, socklen_t src_addr_len);
24 | 
25 |     void * thread_main();
26 | 
27 |     void stop();
28 | 
29 |     // for testing
30 |     uint64_t get_kv_area_addr();
31 |     uint64_t get_subtable_st_addr();
32 | };
33 | 
34 | typedef struct TagServerMainArgs {
35 |     Server * server;
36 |     int      core_id;
37 | } ServerMainArgs;
38 | 
39 | void * server_main(void * server_main_args);
40 | 
41 | #endif


--------------------------------------------------------------------------------
/src/server_mm.cc:
--------------------------------------------------------------------------------
  1 | #include "server_mm.h"
  2 | 
  3 | #include <unistd.h>
  4 | #include <sys/mman.h>
  5 | #include <assert.h>
  6 | 
  7 | #include "kv_debug.h"
  8 | 
  9 | #define MAP_HUGE_2MB        (21 << MAP_HUGE_SHIFT)
 10 | #define MAP_HUGE_1GB        (30 << MAP_HUGE_SHIFT)
 11 | 
 12 | ServerMM::ServerMM(uint64_t server_base_addr, uint64_t base_len, 
 13 |     uint32_t block_size, const struct IbInfo * ib_info,
 14 |     const struct GlobalConfig * conf) {
 15 |     this->block_size_ = block_size;
 16 |     this->base_addr_ = server_base_addr;
 17 |     this->base_len_  = base_len;
 18 |     int port_flag = PROT_READ | PROT_WRITE;
 19 |     int mm_flag   = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB | MAP_HUGE_2MB;
 20 |     this->data_ = mmap((void *)this->base_addr_, this->base_len_, port_flag, mm_flag, -1, 0);
 21 |     // assert((uint64_t)this->data_ == this->base_addr_);
 22 | 
 23 |     client_meta_area_off_ = 0;
 24 |     client_meta_area_len_ = META_AREA_LEN;
 25 |     client_gc_area_off_ = this->client_meta_area_len_;
 26 |     client_gc_area_len_ = GC_AREA_LEN;
 27 |     client_hash_area_off_ = this->client_gc_area_off_ + this->client_gc_area_len_;
 28 |     client_hash_area_len_ = HASH_AREA_LEN;
 29 |     client_kv_area_off_ = this->client_hash_area_off_ + this->client_hash_area_len_;
 30 |     client_kv_area_off_ = round_up(client_kv_area_off_, block_size_);
 31 |     client_kv_area_len_ = base_len_ - client_kv_area_off_;
 32 |     client_kv_area_limit_ = base_len_ + base_addr_;
 33 |     printf("kv_area_addr: %lx, block_size: %x\n", client_kv_area_off_, block_size_);
 34 | 
 35 |     //init hash index
 36 |     init_hashtable();
 37 | 
 38 |     int access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | 
 39 |         IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;
 40 |     this->mr_ = ibv_reg_mr(ib_info->ib_pd, this->data_, this->base_len_, access_flag);
 41 |     // print_log(DEBUG, "addr %lx rkey %x", mr_->addr, mr_->rkey);
 42 | 
 43 |     num_memory_ = conf->memory_num;
 44 |     num_replication_ = conf->num_replication;
 45 |     my_sid_ = conf->server_id;
 46 |     printf("my_sid_: %d, num_memory_: %d\n", my_sid_, num_memory_);
 47 | 
 48 |     // init blocks
 49 |     num_blocks_ = client_kv_area_len_ / block_size_;
 50 |     get_allocable_blocks();
 51 | }
 52 | 
 53 | ServerMM::~ServerMM() {
 54 |     munmap(data_, this->base_len_);
 55 | }
 56 | 
 57 | void ServerMM::get_allocable_blocks() {
 58 |     uint64_t kv_area_addr = base_addr_ + client_kv_area_off_;
 59 |     std::vector<uint64_t> mn_addr_ptr;
 60 |     for (int i = 0; i < num_memory_; i ++)
 61 |         mn_addr_ptr.push_back(kv_area_addr);
 62 |     
 63 |     uint32_t num_rep_blocks = (num_blocks_ * num_memory_) / num_replication_;
 64 |     printf("num_rep_blocks: %d, num_blocks: %d, limit: %lx\n", 
 65 |         num_rep_blocks, num_blocks_, client_kv_area_limit_);
 66 | 
 67 |     uint32_t block_cnt = 0;
 68 |     while (block_cnt < num_rep_blocks) {
 69 |         uint32_t st_sid = block_cnt % num_memory_;
 70 |         while (mn_addr_ptr[st_sid] == client_kv_area_limit_)
 71 |             st_sid = (st_sid + 1) % num_memory_;
 72 | 
 73 |         uint64_t addr_list[num_replication_];
 74 |         for (int i = 0; i < num_replication_; i ++) {
 75 |             uint8_t sid = (st_sid + i) % num_memory_;
 76 | 
 77 |             if (mn_addr_ptr[sid] >= client_kv_area_limit_) {
 78 |                 printf("Error addr map %d %d %d\n", block_cnt, sid, st_sid);
 79 |                 for (int j = 0; j < num_memory_; j ++)
 80 |                     printf("server: %lx\n", mn_addr_ptr[j]);
 81 |                 exit(1);
 82 |             }
 83 |             if (mn_addr_ptr[sid] & 0xFF != 0) {
 84 |                 printf("Error addr map addr\n");
 85 |                 exit(1);
 86 |             }
 87 | 
 88 |             addr_list[i] = mn_addr_ptr[sid];
 89 |             mn_addr_ptr[sid] += block_size_;
 90 |         }
 91 |         if (st_sid == my_sid_) {
 92 |             allocable_blocks_.push(addr_list[0]);
 93 |         }
 94 |         block_cnt ++;
 95 |     }
 96 | }
 97 | 
 98 | uint64_t ServerMM::mm_alloc() {
 99 |     if (allocable_blocks_.size() == 0) {
100 |         return 0;
101 |     }
102 | 
103 |     uint64_t ret_addr = allocable_blocks_.front();
104 |     allocable_blocks_.pop();
105 |     allocated_blocks_[ret_addr] = true;
106 |     return ret_addr;
107 | }
108 | 
109 | int ServerMM::mm_free(uint64_t st_addr) {
110 |     if (allocated_blocks_[st_addr] != true)
111 |         return -1;
112 | 
113 |     allocated_blocks_[st_addr] = false;
114 |     allocable_blocks_.push(st_addr);
115 |     return 0;
116 | }
117 | 
118 | uint64_t ServerMM::mm_alloc_subtable() {
119 |     int ret = 0;
120 |     uint64_t subtable_st_addr = base_addr_ + client_hash_area_off_ + roundup_256(ROOT_RES_LEN);
121 |     for (size_t i = 0; i < subtable_alloc_map_.size(); i ++) {
122 |         if (subtable_alloc_map_[i] == 0) {
123 |             subtable_alloc_map_[i] = 1;
124 |             return subtable_st_addr + i * roundup_256(SUBTABLE_LEN);
125 |         }
126 |     }
127 |     return 0;
128 | }
129 | 
130 | uint32_t ServerMM::get_rkey() {
131 |     return this->mr_->rkey;
132 | }
133 | 
134 | int ServerMM::get_client_gc_info(uint32_t client_id, __OUT struct MrInfo * mr_info) {
135 |     uint64_t single_gc_len = 1024 * 1024;
136 |     uint64_t client_gc_off = client_id * single_gc_len;
137 |     if (client_gc_off + single_gc_len >= this->client_gc_area_len_) {
138 |         return -1;
139 |     }
140 |     mr_info->addr = this->client_gc_area_off_ + client_gc_off + this->base_addr_;
141 |     mr_info->rkey = this->mr_->rkey;
142 |     return 0;
143 | }
144 | 
145 | int ServerMM::get_mr_info(__OUT struct MrInfo * mr_info) {
146 |     mr_info->addr = this->base_addr_;
147 |     mr_info->rkey = this->mr_->rkey;
148 |     return 0;
149 | }
150 | 
151 | int ServerMM::init_root(void * root_addr) {
152 |     RaceHashRoot * root = (RaceHashRoot *)root_addr;
153 |     root->global_depth = RACE_HASH_GLOBAL_DEPTH;
154 |     root->init_local_depth = RACE_HASH_INIT_LOCAL_DEPTH;
155 |     root->max_global_depth = RACE_HASH_MAX_GLOBAL_DEPTH;
156 |     root->prefix_num = 1 << RACE_HASH_MAX_GLOBAL_DEPTH;
157 |     root->subtable_res_num = root->prefix_num;
158 |     root->subtable_init_num = RACE_HASH_INIT_SUBTABLE_NUM;
159 |     root->subtable_hash_range = RACE_HASH_ADDRESSABLE_BUCKET_NUM;
160 |     root->subtable_bucket_num = RACE_HASH_SUBTABLE_BUCKET_NUM;
161 |     root->seed = rand();
162 |     root->root_offset = client_hash_area_off_;
163 |     root->subtable_offset = root->root_offset + roundup_256(ROOT_RES_LEN);
164 |     root->kv_offset = client_kv_area_off_;
165 |     root->kv_len = client_kv_area_len_;
166 |     root->lock = 0;
167 | 
168 |     return 0;
169 | }
170 | 
171 | int ServerMM::init_subtable(void * subtable_addr) {
172 |     // RaceHashBucket * bucket = (RaceHashBucket *)subtable_addr;
173 |     uint64_t max_subtables = (base_addr_ + client_hash_area_off_ + client_hash_area_len_ - (uint64_t)subtable_addr) / roundup_256(SUBTABLE_LEN);
174 | 
175 |     subtable_alloc_map_.resize(max_subtables);
176 |     for (int i = 0; i < max_subtables; i ++) {
177 |         uint64_t cur_subtable_addr = (uint64_t)subtable_addr + i * roundup_256(SUBTABLE_LEN);
178 |         subtable_alloc_map_[i] = 0;
179 |         for (int j = 0; j < RACE_HASH_ADDRESSABLE_BUCKET_NUM; j ++) {
180 |             RaceHashBucket * bucket = (RaceHashBucket *)cur_subtable_addr + j;
181 |             bucket->local_depth = RACE_HASH_INIT_LOCAL_DEPTH;
182 |             bucket->prefix = i;
183 |             bucket ++;
184 |         }
185 |     }
186 | 
187 |     return 0;
188 | }
189 | 
190 | int ServerMM::init_hashtable() {
191 |     uint64_t root_addr = base_addr_ + client_hash_area_off_;
192 |     uint64_t subtable_st_addr = get_subtable_st_addr();
193 |     init_root((void *)(root_addr));
194 |     init_subtable((void *)(subtable_st_addr));
195 |     return 0;
196 | }
197 | 
198 | uint64_t ServerMM::get_kv_area_addr() {
199 |     return client_kv_area_off_ + base_addr_;
200 | }
201 | 
202 | uint64_t ServerMM::get_subtable_st_addr() {
203 |     return client_hash_area_off_ + base_addr_ + roundup_256(ROOT_RES_LEN);
204 | }


--------------------------------------------------------------------------------
/src/server_mm.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_SERVER_MM_
 2 | #define DDCKV_SERVER_MM_
 3 | 
 4 | #include <infiniband/verbs.h>
 5 | 
 6 | #include <map>
 7 | #include <unordered_map>
 8 | #include <vector>
 9 | #include <queue>
10 | 
11 | #include "kv_utils.h"
12 | #include "hashtable.h"
13 | 
14 | class ServerMM {
15 | private:
16 |     uint16_t my_sid_;
17 | 
18 |     uint64_t base_addr_;
19 |     uint64_t base_len_;
20 |     uint64_t client_meta_area_off_;
21 |     uint64_t client_meta_area_len_;
22 |     uint64_t client_gc_area_off_;
23 |     uint64_t client_gc_area_len_;
24 |     uint64_t client_hash_area_off_;
25 |     uint64_t client_hash_area_len_;
26 |     uint64_t client_kv_area_off_;
27 |     uint64_t client_kv_area_len_;
28 |     uint64_t client_kv_area_limit_;
29 | 
30 |     uint32_t num_memory_;
31 |     uint32_t num_replication_;
32 |     
33 |     uint32_t block_size_;
34 |     uint32_t num_blocks_;
35 |     struct ibv_mr  * mr_;
36 | #ifdef SERVER_MM
37 |     uint64_t next_free_block_addr_;
38 | #endif
39 | 
40 |     std::vector<bool> subtable_alloc_map_;
41 |     std::queue<uint64_t> allocable_blocks_;
42 |     std::unordered_map<uint64_t, bool> allocated_blocks_;
43 | 
44 |     void   * data_;
45 | 
46 |     // private methods
47 | private:
48 |     //init hash table index stored at client_hash_area_off_
49 |     int init_root(void * root_addr);
50 |     int init_subtable(void * subtable_addr);
51 |     int init_hashtable();
52 |     void get_allocable_blocks();
53 | 
54 | public:
55 |     ServerMM(uint64_t server_base_addr, uint64_t base_len, 
56 |         uint32_t block_size, const struct IbInfo * ib_info,
57 |         const struct GlobalConfig * conf);
58 |     ~ServerMM();
59 |     
60 |     uint64_t mm_alloc();
61 | 
62 |     int mm_free(uint64_t st_addr);
63 | 
64 |     uint64_t mm_alloc_subtable();
65 | 
66 |     uint32_t get_rkey();
67 | 
68 |     int get_client_gc_info(uint32_t client_id, __OUT struct MrInfo * mr_info);
69 |     int get_mr_info(__OUT struct MrInfo * mr_info);
70 |     
71 |     uint64_t get_kv_area_addr();
72 |     uint64_t get_subtable_st_addr();
73 | };
74 | 
75 | #endif


--------------------------------------------------------------------------------
/src/spinlock.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_SPINLOCK_H_
 2 | #define DDCKV_SPINLOCK_H_
 3 | 
 4 | #define barrier() asm volatile("": : :"memory")
 5 | 
 6 | #define _cpu_relax() asm volatile("pause\n": : :"memory")
 7 | 
 8 | static inline unsigned short xchg_8(void * ptr, unsigned char x) {
 9 |     __asm__ __volatile__("xchgb %0,%1"
10 |                          :"=r" (x)
11 |                          :"m" (*(volatile unsigned char *)ptr), "0" (x)
12 |                          :"memory");
13 |     return x;
14 | }
15 | 
16 | #define BUSY 1
17 | typedef unsigned char spinlock_t;
18 | 
19 | #define SPINLOCK_INITIALIZER 0
20 | 
21 | static inline void spin_lock(spinlock_t * lock) {
22 |     while (1) {
23 |         if (!xchg_8(lock, BUSY)) 
24 |             return;
25 | 
26 |         while (*lock) _cpu_relax();
27 |     }
28 | }
29 | 
30 | static inline void spin_unlock(spinlock_t * lock) {
31 |     barrier();
32 |     *lock = 0;
33 | }
34 | 
35 | static inline int spin_trylock(spinlock_t * lock) {
36 |     return xchg_8(lock, BUSY);
37 | }
38 | 
39 | #endif


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(GTest REQUIRED)
 2 | 
 3 | include_directories(${GTEST_INCLUDE_DIRS})
 4 | 
 5 | add_executable(test_nm test_nm.cc ddckv_test.cc)
 6 | add_executable(test_kv_utils test_kv_utils.cc ddckv_test.cc)
 7 | # add_executable(test_server test_server.cc ddckv_test.cc)
 8 | add_executable(test_client_server test_client_server.cc)
 9 | add_executable(test_client_client test_client_client.cc)
10 | add_executable(test_remote_nm test_remote_nm.cc ddckv_test.cc)
11 | add_executable(client_kv_shell client_kv_shell.cc)
12 | add_executable(test_mm test_mm.cc ddckv_test.cc)
13 | 
14 | target_link_libraries(test_remote_nm
15 |     ${GTEST_BOTH_LIBRARIES}
16 |     libddckv
17 |     pthread
18 |     ibverbs
19 | )
20 | 
21 | target_link_libraries(test_kv_utils
22 |     ${GTEST_BOTH_LIBRARIES}
23 |     libddckv
24 |     pthread
25 | )
26 | 
27 | target_link_libraries(test_nm
28 |     ${GTEST_BOTH_LIBRARIES}
29 |     libddckv
30 |     pthread
31 |     ibverbs
32 | )
33 | 
34 | # target_link_libraries(test_server
35 | #     ${GTEST_BOTH_LIBRARIES}
36 | #     libddckv
37 | #     pthread
38 | #     ibverbs
39 | # )
40 | 
41 | target_link_libraries(test_client_server 
42 |     libddckv
43 |     pthread
44 |     ibverbs
45 | )
46 | 
47 | target_compile_options(
48 |     test_client_client
49 |     PRIVATE
50 |     # ${CMAKE_CXX_FLAGS_DEBUG}
51 |     # "-g"
52 |     # "-D_DEBUG"
53 | )
54 | 
55 | target_link_libraries(test_client_client
56 |     ${GTEST_BOTH_LIBRARIES}
57 |     libddckv
58 |     pthread
59 |     ibverbs
60 | )
61 | 
62 | target_link_libraries(client_kv_shell
63 |     libddckv
64 |     pthread
65 |     ibverbs
66 | )
67 | 
68 | target_link_libraries(test_mm
69 |     ${GTEST_BOTH_LIBRARIES}
70 |     libddckv
71 |     pthread
72 |     ibverbs
73 | )
74 | 
75 | gtest_discover_tests(
76 |     test_nm
77 |     test_remote_nm
78 |     test_kv_utils
79 |     # test_server
80 |     test_client_client
81 |     test_mm
82 | )


--------------------------------------------------------------------------------
/tests/client_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "role": "CLIENT",
 3 |     "conn_type": "IB",
 4 |     "server_id": 3,
 5 |     "udp_port": 2333,
 6 |     "memory_num": 3,
 7 |     "memory_ips": [
 8 |         "10.10.10.1",
 9 |         "10.10.10.2",
10 |         "10.10.10.3"
11 |     ],
12 |     "ib_dev_id": 0,
13 |     "ib_port_id": 1,
14 |     "ib_gid_idx": 0,
15 | 
16 |     "server_base_addr":  "0x10000000",
17 |     "server_data_len":   2147483648,
18 |     "block_size":        67108864,
19 |     "subblock_size":     256,
20 |     "client_local_size": 1073741824,
21 | 
22 |     "num_replication": 3,
23 |     "num_idx_rep": 1,
24 |     "num_coroutines": 8,
25 |     "miss_rate_threash": 0.1,
26 |     
27 |     "main_core_id": 0,
28 |     "poll_core_id": 1,
29 |     "bg_core_id": 2,
30 |     "gc_core_id": 3
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/client_kv_shell.cc:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdint.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | 
  6 | #include "client.h"
  7 | 
  8 | #define KV_KEYLEN_LIMIT 64
  9 | 
 10 | enum ReqType {
 11 |     INSERT,
 12 |     SEARCH,
 13 |     UPDATE,
 14 |     DELETE
 15 | };
 16 | 
 17 | typedef struct TagClientCmd {
 18 |     ReqType  cmdType;
 19 |     char     key[KV_KEYLEN_LIMIT];
 20 |     uint64_t klen;
 21 |     int64_t  value;
 22 |     uint64_t vlen;
 23 | } ClientCmd;
 24 | 
 25 | static int checkNumber(char * str) {
 26 |     for (int i = 0; i < strlen(str); i++) {
 27 |         if (str[i] < '0' || str[i] > '9') {
 28 |             return -1;
 29 |         }
 30 |     }
 31 |     return 0;
 32 | } 
 33 | 
 34 | static void usage() {
 35 |     printf("==== Usage ====\n");
 36 |     printf("put key value\n");
 37 |     printf("get key\n");
 38 |     printf("del key\n");
 39 |     printf("===============\n");
 40 | }
 41 | 
 42 | int parseInput(char * buf, __OUT ClientCmd * cmd) {
 43 |     int ret = -1;
 44 |     char * p = strtok(buf, " ");
 45 |     char * parsed[3]; // parsed[0]: cmdName, parsed[1]: key, parsed[2]: value
 46 |     // fetch key and value to the parsed
 47 |     for (int i = 0; i < 3; i++) {
 48 |         parsed[i] = p;
 49 |         p = strtok(NULL, " ");
 50 |     }
 51 | 
 52 |     // parse commands
 53 |     if (!strcmp(parsed[0], "search") || !strcmp(parsed[0], "SEARCH")) {
 54 |         // check if parsed[1] exists
 55 |         if (parsed[1] == NULL) {
 56 |             printf("Usage: %s key\n", parsed[0]);
 57 |             return -1;
 58 |         }
 59 |         // check if the length of the key exceeds the limit
 60 |         int klen = strlen(parsed[1]);
 61 |         if (klen > KV_KEYLEN_LIMIT) {
 62 |             printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT);
 63 |             return -1;
 64 |         }
 65 |         // copy the key to the ClientCmd
 66 |         memcpy(cmd->key, parsed[1], klen);
 67 |         // set other arguments
 68 |         cmd->cmdType = SEARCH;
 69 |         cmd->klen = klen;
 70 |         cmd->vlen = 0;
 71 |         return 0; // return success here
 72 |     } else if (!strcmp(parsed[0], "insert") || !strcmp(parsed[0], "INSERT")) {
 73 |         if (parsed[1] == NULL || parsed[2] == NULL) {
 74 |             printf("Usage: %s key value\n", parsed[0]);
 75 |             return -1;
 76 |         }
 77 |         // check if the length of the key exceeds the limit
 78 |         int klen = strlen(parsed[1]);
 79 |         if (klen > KV_KEYLEN_LIMIT) {
 80 |             printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT);
 81 |             return -1;
 82 |         }
 83 |         // check if the second argument is a number
 84 |         ret = checkNumber(parsed[2]);
 85 |         if (ret < 0) {
 86 |             printf("Error: value should be an integer number\n");
 87 |             return -1;
 88 |         }
 89 |         // set cmd
 90 |         cmd->cmdType = INSERT;
 91 |         memcpy(cmd->key, parsed[1], klen);
 92 |         cmd->klen = klen;
 93 |         cmd->value = atoll(parsed[2]);
 94 |         cmd->vlen = sizeof(int64_t);
 95 |         return 0;
 96 |     } else if (!strcmp(parsed[0], "update") || !strcmp(parsed[0], "UPDATE")) {
 97 |         if (parsed[1] == NULL || parsed[2] == NULL) {
 98 |             printf("Usage: %s key value\n", parsed[0]);
 99 |             return -1;
100 |         }
101 |         // check if the length of the key exceeds the limit
102 |         int klen = strlen(parsed[1]);
103 |         if (klen > KV_KEYLEN_LIMIT) {
104 |             printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT);
105 |             return -1;
106 |         }
107 |         // check if the second argument is a number
108 |         ret = checkNumber(parsed[2]);
109 |         if (ret < 0) {
110 |             printf("Error: value should be an integer number\n");
111 |             return -1;
112 |         }
113 |         // set cmd
114 |         cmd->cmdType = UPDATE;
115 |         memcpy(cmd->key, parsed[1], klen);
116 |         cmd->klen = klen;
117 |         cmd->value = atoll(parsed[2]);
118 |         cmd->vlen = sizeof(int64_t);
119 |         return 0;
120 |     } else if (!strcmp(parsed[0], "delete") || !strcmp(parsed[0], "DELETE")) {
121 |         // check if the key exists
122 |         if (parsed[1] == NULL) {
123 |             printf("Usage: %s key\n", parsed[0]);
124 |             return -1;
125 |         }
126 |         // check if the length of the key exceeds the limit
127 |         int klen = strlen(parsed[1]);
128 |         if (klen > KV_KEYLEN_LIMIT) {
129 |             printf("Error: key should be less than %d characters\n", KV_KEYLEN_LIMIT);
130 |             return -1;
131 |         }
132 |         // set cmd
133 |         cmd->cmdType = DELETE;
134 |         memcpy(cmd->key, parsed[1], klen);
135 |         cmd->klen = klen;
136 |         return 0; // return success here
137 |     } else if (!strcmp(parsed[0], "quit") || !strcmp(parsed[0], "q") || !strcmp(parsed[0], "exit")) {
138 |         exit(0);
139 |     } else if (!strcmp(parsed[0], "help")) {
140 |         usage();
141 |         return 0;
142 |     }
143 |     else {
144 |         // no match cmd
145 |         printf("Error: command not supported\n");
146 |         return -1;
147 |     }
148 |     return -1;
149 | }
150 | 
151 | typedef struct TagRetVal {
152 |     union {
153 |         int ret_code;
154 |         void * val_addr;
155 |     } ret_val;
156 | } RetVal;
157 | 
158 | static RetVal * clientShellExe(Client * client, ClientCmd * cmd) {
159 |     RetVal * ret_val = (RetVal *)malloc(sizeof(RetVal));
160 |     char buf[17] = {0};
161 | 
162 |     void * client_local_addr = client->get_local_buf_mr()->addr;
163 |     uint64_t client_input_addr = (uint64_t)client->get_input_buf();
164 |     
165 |     KVReqCtx ctx;
166 |     KVInfo   kv_info;
167 |     memset((void *)client_input_addr, 0, 1024);
168 |     memcpy((void *)(client_input_addr + sizeof(KVLogHeader)), cmd->key, cmd->klen);
169 |     memcpy((void *)(client_input_addr + sizeof(KVLogHeader) + cmd->klen), &cmd->value, cmd->vlen);
170 |     KVLogHeader * header = (KVLogHeader *)client_input_addr;
171 |     header->key_length = cmd->klen;
172 |     header->value_length = cmd->vlen;
173 |     header->is_valid = true;
174 | 
175 |     KVLogTail * tail = (KVLogTail *)((uint64_t)client_input_addr
176 |         + sizeof(KVLogHeader) + header->key_length + header->value_length);
177 |     
178 |     kv_info.key_len = cmd->klen;
179 |     kv_info.l_addr  = (void *)client_input_addr;
180 |     kv_info.lkey    = client->get_input_buf_lkey();
181 |     kv_info.value_len = cmd->vlen;
182 | 
183 |     ctx.kv_info = &kv_info;
184 |     ctx.coro_id = 0;
185 |     ctx.use_cache = true;
186 |     ctx.lkey = client->get_local_buf_mr()->lkey;
187 | 
188 |     switch (cmd->cmdType) {
189 |     case SEARCH:
190 |         ctx.req_type = KV_REQ_SEARCH;
191 |         printf("searching\n");
192 |         client->init_kv_search_space(client_local_addr, &ctx);
193 |         ret_val->ret_val.val_addr = client->kv_search(&ctx);
194 |         break;
195 |     case UPDATE:
196 |         ctx.req_type = KV_REQ_UPDATE;
197 |         tail->op = KV_OP_UPDATE;
198 |         client->init_kv_update_space(client_local_addr, &ctx);
199 |         ret_val->ret_val.ret_code = client->kv_update(&ctx);
200 |         break;
201 |     case DELETE:
202 |         ctx.req_type = KV_REQ_DELETE;
203 |         client->init_kv_delete_space(client_local_addr, &ctx);
204 |         ret_val->ret_val.ret_code = client->kv_delete(&ctx);
205 |         break;
206 |     case INSERT:
207 |         ctx.req_type = KV_REQ_INSERT;
208 |         tail->op = KV_OP_INSERT;
209 |         printf("inserting\n");
210 |         client->init_kv_insert_space(client_local_addr, &ctx);
211 |         ret_val->ret_val.ret_code = client->kv_insert(&ctx);
212 |         break;
213 |     default:
214 |         ret_val->ret_val.ret_code = -1;
215 |     }
216 |     return ret_val;
217 | }
218 | 
219 | int main(int argc, char ** argv) {
220 |     int ret = 0;
221 |     if (argc != 2) {
222 |         printf("Usage: %s path-to-config\n", argv[0]);
223 |         return 1;
224 |     }
225 |     GlobalConfig config;
226 |     ret = load_config(argv[1], &config);
227 |     // assert(ret == 0);
228 |     RetVal * ret_val = NULL;
229 | 
230 |     Client client(&config);
231 | 
232 |     boost::fibers::fiber polling_fb = client.start_polling_fiber();
233 | 
234 |     while (1) {
235 |         char buf[256];
236 |         // cmdline hint
237 |         printf("mykv >> ");
238 | 
239 |         // get input
240 |         fgets(buf, 256, stdin);
241 |         printf("buf: %s\n", buf);
242 |         buf[strlen(buf) - 1] = '\0';
243 |         
244 |         // parse command
245 |         ClientCmd cmd;
246 |         ret = parseInput(buf, &cmd);
247 |         if (ret < 0) {
248 |             printf("parse failed\n");
249 |             continue;
250 |         }
251 | 
252 |         // execute command
253 |         ret_val = clientShellExe(&client, &cmd);
254 |         if (ret < 0) {
255 |             printf("%s failed\n", buf);
256 |             continue;
257 |         }
258 | 
259 |         // print result
260 |         if (cmd.cmdType == SEARCH) {
261 |             if (ret_val->ret_val.val_addr != NULL)
262 |                 printf("%ld\n", *(uint64_t *)ret_val->ret_val.val_addr);
263 |             else
264 |                 printf("key not found\n");
265 |         } else {
266 |             printf("%d\n", ret_val->ret_val.ret_code);
267 |         }
268 |         free(ret_val);
269 |     }
270 |     polling_fb.join();
271 | }


--------------------------------------------------------------------------------
/tests/ddckv_test.cc:
--------------------------------------------------------------------------------
 1 | #include "ddckv_test.h"
 2 | 
 3 | void DDCKVTest::setup_server_conf() {
 4 |     strcpy(server_conf_.memory_ips[0], "127.0.0.1");
 5 |     server_conf_.role       = SERVER;
 6 |     server_conf_.conn_type  = IB;
 7 |     server_conf_.server_id  = 0;
 8 |     server_conf_.udp_port   = 2333;
 9 |     server_conf_.memory_num = 1;
10 |     server_conf_.ib_dev_id  = 0;
11 |     server_conf_.ib_port_id = 1;
12 |     server_conf_.ib_gid_idx = -1;
13 |     server_conf_.server_base_addr = 0x10000000;
14 |     server_conf_.server_data_len  = 2ll * GB;
15 |     server_conf_.block_size = 64ll * MB;
16 | }
17 | 
18 | void DDCKVTest::setup_client_conf() {
19 |     strcpy(client_conf_.memory_ips[0], "127.0.0.1");
20 |     client_conf_.role       = CLIENT;
21 |     client_conf_.conn_type  = IB;
22 |     client_conf_.server_id  = 1;
23 |     client_conf_.udp_port   = 2333;
24 |     client_conf_.memory_num = 1;
25 |     client_conf_.ib_dev_id  = 0;
26 |     client_conf_.ib_port_id = 1;
27 |     client_conf_.ib_gid_idx = -1;
28 |     client_conf_.num_replication = 2;
29 | }


--------------------------------------------------------------------------------
/tests/ddckv_test.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_TEST_H_
 2 | #define DDCKV_TEST_H_
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include "kv_utils.h"
 7 | 
 8 | #define GB (1024ll * 1024 * 1024)
 9 | #define MB (1024ll * 1024)
10 | #define KB (1024ll)
11 | 
12 | class DDCKVTest : public ::testing::Test {
13 | protected:
14 |     void setup_server_conf();
15 |     void setup_client_conf();
16 |     
17 | public:
18 |     struct GlobalConfig server_conf_;
19 |     struct GlobalConfig client_conf_;
20 | };
21 | 
22 | #endif


--------------------------------------------------------------------------------
/tests/server_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "role": "SERVER",
 3 |     "conn_type": "IB",
 4 |     "server_id": 0,
 5 |     "udp_port": 2333,
 6 |     "memory_num": 3,
 7 |     "memory_ips": [
 8 |         "10.10.10.1",
 9 |         "10.10.10.2",
10 |         "10.10.10.3"
11 |     ],
12 |     "ib_dev_id": 0,
13 |     "ib_port_id": 1,
14 |     "ib_gid_idx": 0,
15 | 
16 |     "server_base_addr":  "0x10000000",
17 |     "server_data_len":   2147483648,
18 |     "block_size":        67108864,
19 |     "subblock_size":     256,
20 |     "client_local_size": 1073741824,
21 | 
22 |     "num_replication": 3,
23 | 
24 |     "main_core_id": 0,
25 |     "poll_core_id": 1,
26 |     "bg_core_id": 2,
27 |     "gc_core_id": 3
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/test_client.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_TEST_CLIENT_H_
 2 | #define DDCKV_TEST_CLIENT_H_
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include "client.h"
 7 | #include "kv_utils.h"
 8 | 
 9 | class ClientTest : public ::testing::Test {
10 | protected:
11 |     void SetUp() override;
12 |     void TearDown() override;
13 | 
14 | public:
15 |     struct GlobalConfig client_conf_;
16 |     Client * client_;
17 | };
18 | 
19 | #endif


--------------------------------------------------------------------------------
/tests/test_client_server.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <assert.h>
 4 | #include <pthread.h>
 5 | 
 6 | #include "server.h"
 7 | 
 8 | int main(int argc, char ** argv) {
 9 |     if (argc != 2) {
10 |         printf("Usage: %s [server_id]\n", argv[0]);
11 |         return -1;
12 |     }
13 | 
14 |     int32_t server_id = atoi(argv[1]);
15 |     int32_t ret = 0;
16 |     struct GlobalConfig server_conf;
17 |     ret = load_config("./server_config.json", &server_conf);
18 |     // assert(ret == 0);
19 |     server_conf.server_id = server_id;
20 | 
21 |     printf("===== Starting Server %d =====\n", server_conf.server_id);
22 |     Server * server = new Server(&server_conf);
23 |     pthread_t server_tid;
24 |     pthread_create(&server_tid, NULL, server_main, (void *)server);
25 | 
26 |     printf("press to exit\n");
27 |     getchar();
28 |     printf("===== Ending Server %d =====\n", server_conf.server_id);
29 | 
30 |     server->stop();
31 |     return 0;
32 | }


--------------------------------------------------------------------------------
/tests/test_conf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "role": "SERVER",
 3 |     "conn_type": "IB",
 4 |     "server_id": 0,
 5 |     "udp_port": 2333,
 6 |     "memory_num": 2,
 7 |     "memory_ips": [
 8 |         "10.10.10.1",
 9 |         "10.10.10.2"
10 |     ],
11 | 
12 |     "ib_dev_id": 0,
13 |     "ib_port_id": 1,
14 |     
15 |     "server_base_addr": "0x100000",
16 |     "server_data_len": 2147483648,
17 |     "block_size": 67108864,
18 |     "subblock_size": 256,
19 |     "client_local_size": 1073741824,
20 |     
21 |     "num_replication": 3
22 | }


--------------------------------------------------------------------------------
/tests/test_kv_utils.cc:
--------------------------------------------------------------------------------
  1 | #include <gtest/gtest.h>
  2 | 
  3 | #include <random>
  4 | #include <sys/mman.h>
  5 | 
  6 | #include "kv_utils.h"
  7 | 
  8 | TEST(test_kv_utils, kv_msg_conn_info) {
  9 |     struct KVMsg orig_kvmsg;
 10 |     struct KVMsg sent_kvmsg;
 11 |     
 12 |     std::default_random_engine e;
 13 | 
 14 |     orig_kvmsg.id = (uint16_t)e();
 15 |     orig_kvmsg.type = REQ_CONNECT;
 16 | 
 17 |     for (int i = 0; i < 16; i ++) {
 18 |         orig_kvmsg.body.conn_info.qp_info.gid[i] = (uint8_t)e();
 19 |     }
 20 | 
 21 |     orig_kvmsg.body.conn_info.qp_info.gid_idx = 0;
 22 |     orig_kvmsg.body.conn_info.qp_info.lid = (uint16_t)e();
 23 |     orig_kvmsg.body.conn_info.qp_info.port_num = (uint8_t)e();
 24 |     orig_kvmsg.body.conn_info.qp_info.qp_num = (uint32_t)e();
 25 |     orig_kvmsg.body.conn_info.gc_info.addr = (uint64_t)e();
 26 |     orig_kvmsg.body.conn_info.gc_info.rkey = (uint32_t)e();
 27 | 
 28 |     memcpy(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg));
 29 | 
 30 |     serialize_kvmsg(&sent_kvmsg);
 31 |     deserialize_kvmsg(&sent_kvmsg);
 32 | 
 33 |     int rc = memcmp(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg));
 34 |     ASSERT_EQ(rc, 0);
 35 | }
 36 | 
 37 | TEST(test_kv_utils, kv_msg_alloc_info) {
 38 |     struct KVMsg orig_kvmsg;
 39 |     struct KVMsg sent_kvmsg;
 40 | 
 41 |     std::default_random_engine e;
 42 |     orig_kvmsg.id = e();
 43 |     orig_kvmsg.type = REQ_ALLOC;
 44 |     orig_kvmsg.body.mr_info.addr = (uint64_t)e();
 45 |     orig_kvmsg.body.mr_info.rkey = (uint32_t)e();
 46 | 
 47 |     memcpy(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg));
 48 | 
 49 |     serialize_kvmsg(&sent_kvmsg);
 50 |     deserialize_kvmsg(&sent_kvmsg);
 51 | 
 52 |     int rc = memcmp(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg));
 53 |     ASSERT_EQ(rc, 0);
 54 | }
 55 | 
 56 | TEST(test_kv_utils, kv_msg_alloc_subtable_info) {
 57 |     struct KVMsg orig_kvmsg;
 58 |     struct KVMsg sent_kvmsg;
 59 | 
 60 |     std::default_random_engine e;
 61 |     orig_kvmsg.id = e();
 62 |     orig_kvmsg.type = REQ_ALLOC_SUBTABLE;
 63 |     orig_kvmsg.body.mr_info.addr = (uint64_t)e();
 64 |     orig_kvmsg.body.mr_info.rkey = (uint32_t)e();
 65 | 
 66 |     memcpy(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg));
 67 | 
 68 |     serialize_kvmsg(&sent_kvmsg);
 69 |     deserialize_kvmsg(&sent_kvmsg);
 70 | 
 71 |     int rc = memcmp(&sent_kvmsg, &orig_kvmsg, sizeof(struct KVMsg));
 72 |     ASSERT_EQ(rc, 0);
 73 | }
 74 | 
 75 | TEST(test_kv_utils, load_config) {
 76 |     const char * config_file_name = "./test_conf.json";
 77 |     struct GlobalConfig conf;
 78 |     int ret = load_config(config_file_name, &conf);
 79 |     ASSERT_TRUE(ret == 0);
 80 | 
 81 |     ASSERT_TRUE(conf.role == SERVER);
 82 |     ASSERT_TRUE(conf.conn_type == IB);
 83 |     ASSERT_TRUE(conf.server_id == 0);
 84 |     ASSERT_TRUE(conf.udp_port == 2333);
 85 |     ASSERT_TRUE(conf.memory_num == 2);
 86 |     ASSERT_EQ(strcmp(conf.memory_ips[0], "10.10.10.1"), 0);
 87 |     ASSERT_EQ(strcmp(conf.memory_ips[1], "10.10.10.2"), 0);
 88 | 
 89 |     ASSERT_TRUE(conf.ib_dev_id == 0);
 90 |     ASSERT_TRUE(conf.ib_port_id == 1);
 91 |     ASSERT_TRUE(conf.ib_gid_idx == -1);
 92 | 
 93 |     ASSERT_TRUE(conf.server_base_addr == 0x100000);
 94 |     ASSERT_TRUE(conf.server_data_len == 2147483648);
 95 |     ASSERT_TRUE(conf.block_size == 64 * 1024 * 1024);
 96 |     ASSERT_TRUE(conf.subblock_size == 256);
 97 |     ASSERT_TRUE(conf.client_local_size == 1024 * 1024 * 1024);
 98 | 
 99 |     ASSERT_TRUE(conf.num_replication == 3);
100 | }
101 | 
102 | TEST(test_kv_utils, encode_gc_slot) {
103 |     size_t buf_sz = 64 * 1024 * 1024;
104 |     void * buf_pr = mmap((void *)0x10000000, buf_sz, PROT_READ | PROT_WRITE, 
105 |         MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1, 0);
106 |     void * buf_bk = mmap((void *)0x20000000, buf_sz, PROT_READ | PROT_WRITE, 
107 |         MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1, 0);
108 | 
109 |     ASSERT_TRUE((uint64_t)buf_pr == 0x10000000);
110 |     ASSERT_TRUE((uint64_t)buf_bk == 0x20000000);
111 | 
112 |     DecodedClientGCSlot orig;
113 |     uint64_t blockoff = 5 * 256;
114 |     orig.bk_addr = (uint64_t)buf_bk + blockoff;
115 |     orig.pr_addr = (uint64_t)buf_pr + blockoff;
116 |     orig.num_subblocks = 5;
117 | 
118 |     EncodedClientGCSlot e;
119 |     encode_gc_slot(&orig, &e.meta_gc_addr);
120 | 
121 |     DecodedClientGCSlot d;
122 |     decode_gc_slot(e.meta_gc_addr, &d);
123 |     ASSERT_TRUE(d.pr_addr == orig.pr_addr);
124 |     ASSERT_TRUE(d.bk_addr == orig.bk_addr);
125 |     ASSERT_TRUE(d.num_subblocks == orig.num_subblocks);
126 | }


--------------------------------------------------------------------------------
/tests/test_mm.cc:
--------------------------------------------------------------------------------
 1 | #include "test_mm.h"
 2 | 
 3 | #include <pthread.h>
 4 | #include <infiniband/verbs.h>
 5 | 
 6 | void MMTest::SetUp() {
 7 |     int ret = 0;
 8 | 
 9 |     setup_server_conf();
10 |     setup_client_conf();
11 |     client_conf_.num_replication = 1;
12 |     client_conf_.server_base_addr = 0x10000000;
13 |     client_conf_.server_data_len = 2147483648;
14 |     client_conf_.block_size = 67108864;
15 |     client_conf_.subblock_size = 256;
16 |     client_conf_.client_local_size = 1073741824;
17 |     client_conf_.num_coroutines = 8;
18 |     
19 |     server_ = new Server(&server_conf_);
20 |     pthread_create(&server_tid_, NULL, server_main, server_);
21 | 
22 |     client_ = new Client(&client_conf_);
23 |     polling_tid_ = client_->start_polling_thread();
24 |     
25 |     client_nm_ = client_->get_nm();
26 |     client_mm_ = client_->get_mm();
27 |     printf("===== Initialization finished ====\n");
28 | }
29 | 
30 | void MMTest::TearDown() {
31 |     server_->stop();
32 |     pthread_join(server_tid_, NULL);
33 |     client_->stop_polling_thread();
34 |     pthread_join(polling_tid_, NULL);
35 |     delete server_;
36 |     delete client_nm_;
37 |     delete client_mm_;
38 | }
39 | 
40 | TEST_F(MMTest, initialization) {
41 |     ASSERT_TRUE(true);
42 | }
43 | 
44 | TEST_F(MMTest, mmalloc) {
45 |     ClientMMAllocCtx alloc_ctx;
46 |     client_mm_->mm_alloc(1024 * 1024 * 32, client_nm_, &alloc_ctx);
47 |     printf("%lx\n", alloc_ctx.addr_list[0]);
48 | }
49 | 
50 | TEST_F(MMTest, mmalloc_multi) {
51 |     for (int i = 0; i < 10; i ++) {
52 |         ClientMMAllocCtx ctx;
53 |         client_mm_->mm_alloc(1024 * 1024 * 32, client_nm_, &ctx);
54 |         printf("%lx\n", ctx.addr_list[0]);
55 |     }
56 | }
57 | 
58 | TEST_F(MMTest, mmalloc_multi_fiber) {
59 |     boost::fibers::fiber fiber_list[8];
60 |     for (int i = 0; i < 8; i ++) {
61 |         boost::fibers::fiber fb([&](int coro_id) {
62 |             for (int i = 0; i < 2; i ++) {
63 |                 ClientMMAllocCtx ctx;
64 |                 printf("%d: start alloc\n", coro_id);
65 |                 client_mm_->mm_alloc(1024 * 1024 * 32, client_nm_, &ctx);
66 |                 printf("%d: %lx\n", coro_id, ctx.addr_list[0]);
67 |                 boost::this_fiber::yield();
68 |             }
69 |         }, i);
70 |         fiber_list[i] = std::move(fb);
71 |     }
72 |     for (int i = 0; i < 8; i ++) {
73 |         fiber_list[i].join();
74 |     }
75 | }


--------------------------------------------------------------------------------
/tests/test_mm.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_TEST_MM_H_
 2 | #define DDCKV_TEST_MM_H_
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include <pthread.h>
 7 | 
 8 | #include "ddckv_test.h"
 9 | #include "kv_utils.h"
10 | #include "client.h"
11 | #include "server.h"
12 | #include "nm.h"
13 | 
14 | class MMTest : public DDCKVTest {
15 | protected:
16 |     void SetUp() override;
17 |     void TearDown() override;
18 | 
19 | public:
20 |     Server            * server_;
21 |     Client            * client_;
22 |     ClientMM          * client_mm_;
23 |     UDPNetworkManager * client_nm_;
24 | 
25 |     pthread_t server_tid_;
26 |     pthread_t polling_tid_;
27 | 
28 |     int ib_connect(struct MrInfo * mr_info);
29 | };
30 | 
31 | #endif


--------------------------------------------------------------------------------
/tests/test_nm.cc:
--------------------------------------------------------------------------------
  1 | #include "test_nm.h"
  2 | 
  3 | #include <pthread.h>
  4 | #include <infiniband/verbs.h>
  5 | 
  6 | #include <cstring>
  7 | #include <map>
  8 | 
  9 | #include "nm.h"
 10 | #include "server.h"
 11 | #include "kv_utils.h"
 12 | 
 13 | void NMTest::SetUp() {
 14 |     setup_server_conf();
 15 |     setup_client_conf();
 16 |     server_nm_ = new UDPNetworkManager(&server_conf_);
 17 |     client_nm_ = new UDPNetworkManager(&client_conf_);
 18 | }
 19 | 
 20 | void NMTest::TearDown() {
 21 |     delete server_nm_;
 22 |     delete client_nm_;
 23 | }
 24 | 
 25 | int NMTest::ib_connect(struct MrInfo * mr_info) {
 26 |     pthread_t server_tid;
 27 |     int ret;
 28 |     pthread_create(&server_tid, NULL, ib_connect_server, server_nm_);
 29 | 
 30 |     ret = client_nm_->client_connect_one_rc_qp(0, mr_info);
 31 |     assert(ret == 0);
 32 |     
 33 |     pthread_join(server_tid, NULL);
 34 |     return 0;
 35 | }
 36 | 
 37 | SrReqCtx * NMTest::gen_sr_reqs(struct MrInfo * mr_info) {
 38 |     SrReqCtx * ret_ctx = (SrReqCtx *)malloc(sizeof(SrReqCtx));
 39 |     for (int i = 0; i < 4; i ++) {
 40 |         test_source_data_[i] = (123 * i) ^ i;
 41 |     }
 42 |     
 43 |     for (int i = 0; i < 4; i ++) {
 44 |         if (i < 2) {
 45 |             ret_ctx->sg_list_1[i].addr   = (uint64_t)&test_source_data_[i];
 46 |             ret_ctx->sg_list_1[i].length = 8;
 47 |             ret_ctx->sg_list_1[i].lkey   = 0;
 48 |         } else {
 49 |             ret_ctx->sg_list_2[i - 2].addr   = (uint64_t)&test_source_data_[i];
 50 |             ret_ctx->sg_list_2[i - 2].length = 8;
 51 |             ret_ctx->sg_list_2[i - 2].lkey   = 0;
 52 |         }
 53 |     }
 54 | 
 55 |     for (int i = 0; i < 4; i ++) {
 56 |         if (i < 2) {
 57 |             ret_ctx->sr_list_1[i].wr_id = i;
 58 |             ret_ctx->sr_list_1[i].sg_list = &ret_ctx->sg_list_1[i];
 59 |             ret_ctx->sr_list_1[i].num_sge = 1;
 60 |             ret_ctx->sr_list_1[i].opcode  = IBV_WR_RDMA_WRITE;
 61 |             ret_ctx->sr_list_1[i].send_flags = IBV_SEND_INLINE;
 62 |             ret_ctx->sr_list_1[i].wr.rdma.remote_addr = mr_info->addr + i * sizeof(uint64_t);
 63 |             ret_ctx->sr_list_1[i].wr.rdma.rkey        = mr_info->rkey;
 64 |             ret_ctx->sr_list_1[i].next = NULL;
 65 |         } else {
 66 |             ret_ctx->sr_list_2[i - 2].wr_id = i;
 67 |             ret_ctx->sr_list_2[i - 2].sg_list = &ret_ctx->sg_list_2[i - 2];
 68 |             ret_ctx->sr_list_2[i - 2].num_sge = 1;
 69 |             ret_ctx->sr_list_2[i - 2].opcode  = IBV_WR_RDMA_WRITE;
 70 |             ret_ctx->sr_list_2[i - 2].send_flags = IBV_SEND_INLINE;
 71 |             ret_ctx->sr_list_2[i - 2].wr.rdma.remote_addr = mr_info->addr + i * sizeof(uint64_t);
 72 |             ret_ctx->sr_list_2[i - 2].wr.rdma.rkey        = mr_info->rkey;
 73 |             ret_ctx->sr_list_2[i - 2].next = NULL;
 74 |         }
 75 |     }
 76 |     ret_ctx->sr_list_1[0].next = &ret_ctx->sr_list_1[1];
 77 |     ret_ctx->sr_list_2[0].next = &ret_ctx->sr_list_2[1];
 78 | 
 79 |     ret_ctx->m_srl[0].num_sr = 2;
 80 |     ret_ctx->m_srl[0].server_id = 0;
 81 |     ret_ctx->m_srl[0].sr_list = ret_ctx->sr_list_1;
 82 |     ret_ctx->m_srl[1].num_sr = 2;
 83 |     ret_ctx->m_srl[1].server_id = 0;
 84 |     ret_ctx->m_srl[1].sr_list = ret_ctx->sr_list_2;
 85 | 
 86 |     ret_ctx->srl1.num_sr = 2;
 87 |     ret_ctx->srl1.server_id = 0;
 88 |     ret_ctx->srl1.sr_list = ret_ctx->sr_list_1;
 89 |     ret_ctx->srl2.num_sr = 2;
 90 |     ret_ctx->srl2.server_id = 0;
 91 |     ret_ctx->srl2.sr_list = ret_ctx->sr_list_2;
 92 | 
 93 |     return ret_ctx;
 94 | }
 95 | 
 96 | void * udp_send_recv_server(void * args) {
 97 |     UDPNetworkManager * nm = (UDPNetworkManager *)args;
 98 |     KVMsg request;
 99 |     struct sockaddr_in src_addr;
100 |     socklen_t  src_addr_len = sizeof(struct sockaddr_in);
101 |     int ret = nm->nm_recv_udp_msg(&request, &src_addr, &src_addr_len);
102 |     assert(ret == 0);
103 |     deserialize_kvmsg(&request);
104 |     assert(request.type == REQ_ALLOC);
105 |     assert(request.id == 1);
106 |     KVMsg reply;
107 |     reply.type = REP_ALLOC;
108 |     reply.id   = nm->get_server_id();
109 |     serialize_kvmsg(&reply);
110 |     ret = nm->nm_send_udp_msg(&reply, &src_addr, src_addr_len);
111 |     assert(ret == 0);
112 |     return NULL;
113 | }
114 | 
115 | void * udp_send_recv_client(void * args) {
116 |     UDPNetworkManager * nm = (UDPNetworkManager *)args;
117 |     struct KVMsg request;
118 |     request.type = REQ_ALLOC;
119 |     request.id   = nm->get_server_id();
120 |     serialize_kvmsg(&request);
121 |     int ret = nm->nm_send_udp_msg_to_server(&request, 0);
122 |     assert(ret == 0);
123 |     struct KVMsg reply;
124 |     ret = nm->nm_recv_udp_msg(&reply, NULL, NULL);
125 |     assert(ret == 0);
126 |     deserialize_kvmsg(&reply);
127 |     assert(reply.id == 0);
128 |     assert(reply.type == REP_ALLOC);
129 |     return NULL;
130 | }
131 | 
132 | void * ib_connect_server(void * args) {
133 |     UDPNetworkManager * nm = (UDPNetworkManager *)args;
134 |     struct KVMsg request;
135 |     struct sockaddr_in client_addr;
136 |     socklen_t client_addr_len = sizeof(struct sockaddr_in);
137 |     int rc = nm->nm_recv_udp_msg(&request, &client_addr, &client_addr_len);
138 |     assert(rc == 0);
139 |     deserialize_kvmsg(&request);
140 |     
141 |     assert(request.type == REQ_CONNECT);
142 |     assert(request.id == 1);
143 |     struct KVMsg reply;
144 |     reply.id = nm->get_server_id();
145 |     reply.type = REP_CONNECT;
146 |     rc = nm->nm_on_connect_new_qp(&request, &reply.body.conn_info.qp_info);
147 |     assert(rc == 0);
148 | 
149 |     struct IbInfo ib_info;
150 |     nm->get_ib_info(&ib_info);
151 |     void * buf = malloc(1024);
152 |     struct ibv_mr * mr = ibv_reg_mr(ib_info.ib_pd, buf, 128,
153 |         IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
154 |     reply.body.conn_info.gc_info.addr = (uint64_t)buf;
155 |     reply.body.conn_info.gc_info.rkey = mr->rkey;
156 |     serialize_kvmsg(&reply);
157 | 
158 |     rc = nm->nm_send_udp_msg(&reply, &client_addr, client_addr_len);
159 |     assert(rc == 0);
160 |     deserialize_kvmsg(&reply);
161 |     rc = nm->nm_on_connect_connect_qp(request.id, &reply.body.conn_info.qp_info,
162 |         &request.body.conn_info.qp_info);
163 |     assert(rc == 0);
164 |     return NULL;
165 | }
166 | 
167 | TEST_F(NMTest, udp_send_recv) {
168 |     pthread_t server_tid, client_tid;
169 |     pthread_create(&server_tid, NULL, 
170 |         udp_send_recv_server, (void *)server_nm_);
171 |     pthread_create(&client_tid, NULL, 
172 |         udp_send_recv_client, (void *)client_nm_);
173 |     pthread_join(server_tid, NULL);
174 |     pthread_join(client_tid, NULL);
175 | }
176 | 
177 | TEST_F(NMTest, ib_connect) {
178 |     // create server process
179 |     pthread_t server_tid;
180 |     int ret;
181 |     pthread_create(&server_tid, NULL, ib_connect_server, (void *)server_nm_);
182 | 
183 |     ret = client_nm_->client_connect_all_rc_qp();
184 |     ASSERT_TRUE(ret == 0);
185 | 
186 |     pthread_join(server_tid, NULL);
187 | 
188 |     server_nm_->close_udp_sock();
189 |     client_nm_->close_udp_sock();
190 | }
191 | 
192 | TEST_F(NMTest, nm_utils) {
193 |     uint32_t server_id = server_nm_->get_server_id();
194 |     uint32_t client_id = client_nm_->get_server_id();
195 |     ASSERT_TRUE(server_id == server_conf_.server_id);
196 |     ASSERT_TRUE(client_id == client_conf_.server_id);
197 | }
198 | 
199 | TEST_F(NMTest, ib_write_read) {
200 |     struct MrInfo mr_info;
201 |     int ret = ib_connect(&mr_info);
202 |     ASSERT_TRUE(ret == 0);
203 |     ASSERT_TRUE(mr_info.addr != 0);
204 |     memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4);
205 | 
206 |     uint64_t test_data = 100;
207 |     ret = client_nm_->nm_rdma_write_inl_to_sid(&test_data, sizeof(uint64_t), 
208 |         mr_info.addr, mr_info.rkey, 0);
209 |     ASSERT_TRUE(ret == 0);
210 |     ASSERT_TRUE(test_data == *(uint64_t *)mr_info.addr);
211 | 
212 |     test_data = 10101;
213 |     struct IbInfo client_ib_info;
214 |     client_nm_->get_ib_info(&client_ib_info);
215 |     struct ibv_mr * tmp_mr = ibv_reg_mr(client_ib_info.ib_pd, 
216 |         &test_data, sizeof(uint64_t), IBV_ACCESS_LOCAL_WRITE);
217 |     ASSERT_TRUE(tmp_mr != NULL);
218 |     ret = client_nm_->nm_rdma_write_to_sid(&test_data, tmp_mr->lkey, sizeof(uint64_t), mr_info.addr, mr_info.rkey, 0);
219 | 
220 | 
221 |     uint64_t read_data = 0;
222 |     tmp_mr = ibv_reg_mr(client_ib_info.ib_pd, &read_data, sizeof(uint64_t), IBV_ACCESS_LOCAL_WRITE);
223 |     ASSERT_TRUE(tmp_mr != NULL);
224 |     ASSERT_EQ((uint64_t)&read_data, (uint64_t)tmp_mr->addr);
225 | 
226 |     ret = client_nm_->nm_rdma_read_from_sid(&read_data, 
227 |         tmp_mr->lkey, sizeof(uint64_t), mr_info.addr, mr_info.rkey, 0);
228 |     ASSERT_TRUE(ret == 0);
229 |     ASSERT_TRUE(test_data == read_data);
230 | }
231 | 
232 | TEST_F(NMTest, rdma_post_sr_lists_sync_0) {
233 |     struct MrInfo mr_info;
234 |     int ret = ib_connect(&mr_info);
235 |     ASSERT_TRUE(ret == 0);
236 |     ASSERT_TRUE(mr_info.addr != 0);
237 |     memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4);
238 | 
239 |     SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info);
240 |     ASSERT_TRUE(sr_ctx != NULL);
241 | 
242 |     ret = client_nm_->rdma_post_sr_lists_sync(&sr_ctx->srl1, 1, NULL);
243 |     ASSERT_TRUE(ret == 0);
244 |     ret = client_nm_->rdma_post_sr_lists_sync(&sr_ctx->srl2, 1, NULL);
245 |     ASSERT_TRUE(ret == 0);
246 | 
247 |     uint64_t * tar_addr = (uint64_t *)mr_info.addr;
248 |     for (int i = 0; i < 4; i ++) {
249 |         ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << "  src: " 
250 |             << test_source_data_[i] << std::endl;
251 |     }
252 | }
253 | 
254 | TEST_F(NMTest, rdma_post_sr_lists_sync_1) {
255 |     struct MrInfo mr_info;
256 |     int ret = ib_connect(&mr_info);
257 |     ASSERT_TRUE(ret == 0);
258 |     ASSERT_TRUE(mr_info.addr != 0);
259 |     memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4);
260 | 
261 |     SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info);
262 |     ASSERT_TRUE(sr_ctx != NULL);
263 | 
264 |     ret = client_nm_->rdma_post_sr_lists_sync(sr_ctx->m_srl, 2, NULL);
265 |     ASSERT_TRUE(ret == 0);
266 |     
267 |     uint64_t * tar_addr = (uint64_t *)mr_info.addr;
268 |     for (int i = 0; i < 4; i ++) {
269 |         ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << "  src: " 
270 |             << test_source_data_[i] << std::endl;
271 |     }
272 | }
273 | 
274 | TEST_F(NMTest, poll_local) {
275 |     struct MrInfo mr_info;
276 |     int ret = ib_connect(&mr_info);
277 |     ASSERT_TRUE(ret == 0);
278 |     ASSERT_TRUE(mr_info.addr != 0);
279 |     memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4);
280 |     
281 |     pthread_t polling_tid;
282 |     pthread_create(&polling_tid, NULL, nm_polling_thread, client_nm_);
283 | 
284 |     SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info);
285 |     ASSERT_TRUE(sr_ctx != NULL);
286 | 
287 |     sr_ctx->sr_list_1[1].send_flags |= IBV_SEND_SIGNALED;
288 |     sr_ctx->sr_list_2[1].send_flags |= IBV_SEND_SIGNALED;
289 | 
290 |     ret = client_nm_->rdma_post_send_batch_async(0, sr_ctx->sr_list_1);
291 |     ASSERT_TRUE(ret == 0);
292 |     ret = client_nm_->rdma_post_send_batch_async(0, sr_ctx->sr_list_2);
293 |     ASSERT_TRUE(ret == 0);
294 | 
295 |     std::map<uint64_t, struct ibv_wc *> l_wait_wc_map;
296 |     l_wait_wc_map[1] = NULL;
297 |     l_wait_wc_map[3] = NULL;
298 |     while (1) {
299 |         ret = client_nm_->nm_check_completion(l_wait_wc_map);
300 |         ASSERT_TRUE(ret == 0);
301 |         if (ib_is_all_wrid_finished(l_wait_wc_map)) {
302 |             break;
303 |         }
304 |     }
305 |     
306 |     uint64_t * tar_addr = (uint64_t *)mr_info.addr;
307 |     for (int i = 0; i < 4; i ++) {
308 |         ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << "  src: " 
309 |             << test_source_data_[i] << std::endl;
310 |     }
311 | 
312 |     client_nm_->stop_polling();
313 |     pthread_join(polling_tid, NULL);
314 | }
315 | 
316 | TEST_F(NMTest, rdma_post_sr_list_batch_sync_0) {
317 |     struct MrInfo mr_info;
318 |     int ret = ib_connect(&mr_info);
319 |     ASSERT_TRUE(ret == 0);
320 |     ASSERT_TRUE(mr_info.addr != 0);
321 |     memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4);
322 | 
323 |     SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info);
324 |     ASSERT_TRUE(sr_ctx != NULL);
325 | 
326 |     std::vector<IbvSrList *> test_batch;
327 |     std::vector<uint32_t> test_num_batch;
328 |     test_batch.push_back(&sr_ctx->srl1);
329 |     test_batch.push_back(&sr_ctx->srl2);
330 |     test_num_batch.push_back(1);
331 |     test_num_batch.push_back(1);
332 | 
333 |     ret = client_nm_->rdma_post_sr_list_batch_sync(test_batch, test_num_batch, NULL);
334 |     ASSERT_TRUE(ret == 0);
335 | 
336 |     uint64_t * tar_addr = (uint64_t *)mr_info.addr;
337 |     for (int i = 0; i < 4; i ++) {
338 |         ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << "  src: " 
339 |             << test_source_data_[i] << std::endl;
340 |     }
341 | }
342 | 
343 | TEST_F(NMTest, rdma_post_sr_list_batch_sync_1) {
344 |     struct MrInfo mr_info;
345 |     int ret = ib_connect(&mr_info);
346 |     ASSERT_TRUE(ret == 0);
347 |     ASSERT_TRUE(mr_info.addr != 0);
348 |     memset((void *)mr_info.addr, 0, sizeof(uint64_t) * 4);
349 | 
350 |     SrReqCtx * sr_ctx = gen_sr_reqs(&mr_info);
351 |     ASSERT_TRUE(sr_ctx != NULL);
352 | 
353 |     std::vector<IbvSrList *> test_batch;
354 |     std::vector<uint32_t> test_num_batch;
355 |     test_batch.push_back(sr_ctx->m_srl);
356 |     test_num_batch.push_back(2);
357 | 
358 |     ret = client_nm_->rdma_post_sr_list_batch_sync(test_batch, test_num_batch, NULL);
359 |     ASSERT_TRUE(ret == 0);
360 | 
361 |     uint64_t * tar_addr = (uint64_t *)mr_info.addr;
362 |     for (int i = 0; i < 4; i ++) {
363 |         ASSERT_TRUE(tar_addr[i] == test_source_data_[i]) << "tar: " << tar_addr[i] << "  src: " 
364 |             << test_source_data_[i] << std::endl;
365 |     }
366 | }


--------------------------------------------------------------------------------
/tests/test_nm.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_TEST_NM_H_
 2 | #define DDCKV_TEST_NM_H_
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include "ddckv_test.h"
 7 | #include "kv_utils.h"
 8 | #include "nm.h"
 9 | 
10 | typedef struct TagSrReqCtx {
11 |     struct ibv_send_wr sr_list_1[2];
12 |     struct ibv_send_wr sr_list_2[2];
13 |     struct ibv_sge     sg_list_1[2];
14 |     struct ibv_sge     sg_list_2[2];
15 | 
16 |     IbvSrList srl1;
17 |     IbvSrList srl2;
18 |     IbvSrList m_srl[2];
19 | } SrReqCtx;
20 | 
21 | class NMTest : public DDCKVTest {
22 | protected:
23 |     void SetUp() override;
24 |     void TearDown() override;
25 | 
26 | public:
27 |     UDPNetworkManager * server_nm_;
28 |     UDPNetworkManager * client_nm_;
29 | 
30 |     uint64_t test_source_data_[4];
31 | 
32 |     int ib_connect(struct MrInfo * mr_info);
33 |     SrReqCtx * gen_sr_reqs(struct MrInfo * mr_info);
34 | };
35 | 
36 | void * udp_send_recv_server(void * args);
37 | void * udp_send_recv_client(void * args);
38 | 
39 | void * ib_connect_server(void * args);
40 | 
41 | #endif


--------------------------------------------------------------------------------
/tests/test_remote_nm.cc:
--------------------------------------------------------------------------------
  1 | #include "test_remote_nm.h"
  2 | 
  3 | #include <assert.h>
  4 | 
  5 | #include "nm.h"
  6 | #include "kv_utils.h"
  7 | 
  8 | void NMRemoteTest::SetUp() {
  9 |     int ret = 0;
 10 |     ret = load_config("./client_config.json", &global_conf_);
 11 |     ASSERT_TRUE(ret == 0);
 12 | 
 13 |     client_nm_ = new UDPNetworkManager(&global_conf_);
 14 | 
 15 |     for (int i = 0; i < 2; i ++) {
 16 |         ret = client_nm_->client_connect_one_rc_qp(i, &mr_info_[i]);
 17 |         ASSERT_TRUE(ret == 0);
 18 |     }
 19 | }
 20 | 
 21 | void NMRemoteTest::TearDown() {
 22 |     delete client_nm_;
 23 | }
 24 | 
 25 | SrReqCtx * NMRemoteTest::gen_sr_reqs() {
 26 |     SrReqCtx * ret_ctx = (SrReqCtx *)malloc(sizeof(SrReqCtx));
 27 |     for (int i = 0; i < 4; i ++) {
 28 |         test_source_data_[i] = (123 * i) ^ i;
 29 |     }
 30 |     
 31 |     for (int i = 0; i < 4; i ++) {
 32 |         if (i < 2) {
 33 |             ret_ctx->sg_list_1[i].addr   = (uint64_t)&test_source_data_[i];
 34 |             ret_ctx->sg_list_1[i].length = 8;
 35 |             ret_ctx->sg_list_1[i].lkey   = 0;
 36 |         } else {
 37 |             ret_ctx->sg_list_2[i - 2].addr   = (uint64_t)&test_source_data_[i];
 38 |             ret_ctx->sg_list_2[i - 2].length = 8;
 39 |             ret_ctx->sg_list_2[i - 2].lkey   = 0;
 40 |         }
 41 |     }
 42 | 
 43 |     for (int i = 0; i < 4; i ++) {
 44 |         if (i < 2) {
 45 |             ret_ctx->sr_list_1[i].wr_id = i;
 46 |             ret_ctx->sr_list_1[i].sg_list = &ret_ctx->sg_list_1[i];
 47 |             ret_ctx->sr_list_1[i].num_sge = 1;
 48 |             ret_ctx->sr_list_1[i].opcode  = IBV_WR_RDMA_WRITE;
 49 |             ret_ctx->sr_list_1[i].send_flags = IBV_SEND_INLINE;
 50 |             ret_ctx->sr_list_1[i].wr.rdma.remote_addr = mr_info_[0].addr + i * sizeof(uint64_t);
 51 |             ret_ctx->sr_list_1[i].wr.rdma.rkey        = mr_info_[0].rkey;
 52 |             ret_ctx->sr_list_1[i].next = NULL;
 53 |         } else {
 54 |             ret_ctx->sr_list_2[i - 2].wr_id = i;
 55 |             ret_ctx->sr_list_2[i - 2].sg_list = &ret_ctx->sg_list_2[i - 2];
 56 |             ret_ctx->sr_list_2[i - 2].num_sge = 1;
 57 |             ret_ctx->sr_list_2[i - 2].opcode  = IBV_WR_RDMA_WRITE;
 58 |             ret_ctx->sr_list_2[i - 2].send_flags = IBV_SEND_INLINE;
 59 |             ret_ctx->sr_list_2[i - 2].wr.rdma.remote_addr = mr_info_[1].addr + i * sizeof(uint64_t);
 60 |             ret_ctx->sr_list_2[i - 2].wr.rdma.rkey        = mr_info_[1].rkey;
 61 |             ret_ctx->sr_list_2[i - 2].next = NULL;
 62 |         }
 63 |     }
 64 |     ret_ctx->sr_list_1[0].next = &ret_ctx->sr_list_1[1];
 65 |     ret_ctx->sr_list_2[0].next = &ret_ctx->sr_list_2[1];
 66 | 
 67 |     ret_ctx->m_srl[0].num_sr = 2;
 68 |     ret_ctx->m_srl[0].server_id = 0;
 69 |     ret_ctx->m_srl[0].sr_list = ret_ctx->sr_list_1;
 70 |     ret_ctx->m_srl[1].num_sr = 2;
 71 |     ret_ctx->m_srl[1].server_id = 1;
 72 |     ret_ctx->m_srl[1].sr_list = ret_ctx->sr_list_2;
 73 | 
 74 |     ret_ctx->srl1.num_sr = 2;
 75 |     ret_ctx->srl1.server_id = 0;
 76 |     ret_ctx->srl1.sr_list = ret_ctx->sr_list_1;
 77 |     ret_ctx->srl2.num_sr = 2;
 78 |     ret_ctx->srl2.server_id = 1;
 79 |     ret_ctx->srl2.sr_list = ret_ctx->sr_list_2;
 80 | 
 81 |     return ret_ctx;
 82 | }
 83 | 
 84 | TEST_F(NMRemoteTest, remote_basic) {
 85 |     int ret = 0;
 86 |     uint64_t test_data = 1231241;
 87 |     ret = client_nm_->nm_rdma_write_inl_to_sid(&test_data, sizeof(uint64_t), 
 88 |         mr_info_[0].addr, mr_info_[0].rkey, 0);
 89 |     ASSERT_TRUE(ret == 0);
 90 |     
 91 |     uint64_t read_data = 0;
 92 |     struct IbInfo client_ib_info;
 93 |     client_nm_->get_ib_info(&client_ib_info);
 94 |     struct ibv_mr * tmp_mr = ibv_reg_mr(client_ib_info.ib_pd, &read_data, 
 95 |         sizeof(uint64_t), IBV_ACCESS_LOCAL_WRITE);
 96 |     ASSERT_TRUE(tmp_mr != NULL);
 97 |     
 98 |     ret = client_nm_->nm_rdma_read_from_sid(&read_data, tmp_mr->lkey, 
 99 |         sizeof(uint64_t), mr_info_[0].addr, mr_info_[0].rkey, 0);
100 |     ASSERT_TRUE(ret == 0);
101 |     ASSERT_TRUE(test_data == read_data);
102 | }
103 | 
104 | TEST_F(NMRemoteTest, rdma_post_sr_lists_async) {
105 |     int ret = 0;
106 |     SrReqCtx * sr_ctx = gen_sr_reqs();
107 |     ASSERT_TRUE(sr_ctx != NULL);
108 | 
109 |     pthread_t polling_tid;
110 |     pthread_create(&polling_tid, NULL, nm_polling_thread, client_nm_);
111 | 
112 |     std::map<uint64_t, struct ibv_wc *> l_wait_wc_map;
113 |     ret = client_nm_->rdma_post_sr_lists_async(sr_ctx->m_srl, 2, l_wait_wc_map);
114 |     ASSERT_TRUE(ret == 0);
115 | 
116 |     std::map<uint64_t, struct ibv_wc *>::iterator it = l_wait_wc_map.begin();
117 |     ASSERT_TRUE(l_wait_wc_map.size() == 2);
118 | 
119 |     while (1) {
120 |         ret = client_nm_->nm_check_completion(l_wait_wc_map);
121 |         ASSERT_TRUE(ret == 0);
122 |         if (ib_is_all_wrid_finished(l_wait_wc_map)) {
123 |             break;
124 |         }
125 |     }
126 | 
127 |     client_nm_->stop_polling();
128 |     pthread_join(polling_tid, NULL);
129 | }
130 | 
131 | TEST_F(NMRemoteTest, rdma_post_sr_list_batch_async) {
132 |     int ret = 0;
133 |     SrReqCtx * sr_ctx = gen_sr_reqs();
134 |     ASSERT_TRUE(sr_ctx != NULL);
135 | 
136 |     pthread_t polling_tid;
137 |     pthread_create(&polling_tid, NULL, nm_polling_thread, client_nm_);
138 | 
139 |     std::vector<IbvSrList *> test_batch;
140 |     std::vector<uint32_t> test_num_batch;
141 |     test_batch.push_back(&sr_ctx->srl1);
142 |     test_batch.push_back(&sr_ctx->srl2);
143 |     test_num_batch.push_back(1);
144 |     test_num_batch.push_back(1);
145 | 
146 |     std::map<uint64_t, struct ibv_wc *> l_wait_wc_map;
147 |     ret = client_nm_->rdma_post_sr_list_batch_async(test_batch, test_num_batch, l_wait_wc_map);
148 |     ASSERT_TRUE(ret == 0);
149 | 
150 |     std::map<uint64_t, struct ibv_wc *>::iterator it = l_wait_wc_map.begin();
151 |     ASSERT_TRUE(l_wait_wc_map.size() == 2);
152 | 
153 |     while (1) {
154 |         ret = client_nm_->nm_check_completion(l_wait_wc_map);
155 |         ASSERT_TRUE(ret == 0);
156 |         if (ib_is_all_wrid_finished(l_wait_wc_map)) {
157 |             break;
158 |         }
159 |     }
160 | 
161 |     client_nm_->stop_polling();
162 |     pthread_join(polling_tid, NULL);
163 | }


--------------------------------------------------------------------------------
/tests/test_remote_nm.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_TEST_REMOTE_NM_H_
 2 | #define DDCKV_TEST_REMOTE_NM_H_
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include "ddckv_test.h"
 7 | #include "kv_utils.h"
 8 | #include "nm.h"
 9 | 
10 | typedef struct TagSrReqCtx {
11 |     struct ibv_send_wr sr_list_1[2];
12 |     struct ibv_send_wr sr_list_2[2];
13 |     struct ibv_sge     sg_list_1[2];
14 |     struct ibv_sge     sg_list_2[2];
15 | 
16 |     IbvSrList srl1;
17 |     IbvSrList srl2;
18 |     IbvSrList m_srl[2];
19 | } SrReqCtx;
20 | 
21 | class NMRemoteTest : public DDCKVTest {
22 | protected:
23 |     void SetUp() override;
24 |     void TearDown() override;
25 | 
26 | public:
27 |     struct GlobalConfig global_conf_;
28 |     struct MrInfo mr_info_[2];
29 |     UDPNetworkManager * client_nm_;
30 | 
31 |     uint64_t test_source_data_[4];
32 | 
33 |     SrReqCtx * gen_sr_reqs();
34 | };
35 | 
36 | void * udp_send_recv_server(void * args);
37 | void * udp_send_recv_client(void * args);
38 | 
39 | void * ib_connect_server(void * args);
40 | 
41 | #endif


--------------------------------------------------------------------------------
/tests/test_server.cc:
--------------------------------------------------------------------------------
  1 | #include "test_server.h"
  2 | 
  3 | void ServerTest::SetUp() {
  4 |     setup_server_conf();
  5 |     setup_client_conf();
  6 |     server_ = new Server(&server_conf_);
  7 |     client_nm_ = new UDPNetworkManager(&client_conf_);
  8 | }
  9 | 
 10 | void ServerTest::TearDown() {
 11 |     delete server_;
 12 | }
 13 | 
 14 | TEST_F(ServerTest, ib_connect) {
 15 |     pthread_t server_tid;
 16 |     int ret;
 17 |     ret = pthread_create(&server_tid, NULL, server_main, server_);
 18 |     ASSERT_TRUE(ret == 0);
 19 | 
 20 |     struct MrInfo mr_info;
 21 |     ret = client_nm_->client_connect_one_rc_qp(0, &mr_info);
 22 |     ASSERT_TRUE(ret == 0);
 23 |     ASSERT_TRUE(mr_info.addr == server_conf_.server_base_addr);
 24 | 
 25 |     uint64_t client_msg[2];
 26 |     client_msg[0] = 10086;
 27 |     client_msg[1] = 9527;
 28 |     ret = client_nm_->nm_rdma_write_inl_to_sid(client_msg, sizeof(uint64_t) * 2, 
 29 |         mr_info.addr, mr_info.rkey, 0);
 30 |     ASSERT_TRUE(ret == 0);
 31 | 
 32 |     ASSERT_TRUE(((uint64_t *)(mr_info.addr))[0] == client_msg[0]);
 33 |     ASSERT_TRUE(((uint64_t *)(mr_info.addr))[1] == client_msg[1]);
 34 | 
 35 |     server_->stop();
 36 |     pthread_join(server_tid, NULL);
 37 |     ASSERT_TRUE(ret == 0);
 38 |     ASSERT_TRUE(1);
 39 | }
 40 | 
 41 | TEST_F(ServerTest, rdma_connect) {
 42 |     pthread_t server_tid;
 43 |     int ret;
 44 |     ret = pthread_create(&server_tid, NULL, server_main, server_);
 45 |     ASSERT_TRUE(ret == 0);
 46 | 
 47 |     struct MrInfo gc_info;
 48 |     ret = client_nm_->client_connect_one_rc_qp(0, &gc_info);
 49 |     ASSERT_TRUE(ret == 0);
 50 | 
 51 |     uint64_t msg = 10086;
 52 |     struct ibv_send_wr test_wr;
 53 |     struct ibv_sge     test_sge;
 54 |     memset(&test_wr, 0, sizeof(struct ibv_send_wr));
 55 |     memset(&test_sge, 0, sizeof(struct ibv_sge));
 56 |     test_sge.addr = (uint64_t)&msg;
 57 |     test_sge.length = sizeof(uint64_t);
 58 |     test_sge.lkey = 0;
 59 |     test_wr.sg_list = &test_sge;
 60 |     test_wr.num_sge = 1;
 61 |     test_wr.next = NULL;
 62 |     test_wr.opcode = IBV_WR_RDMA_WRITE;
 63 |     test_wr.send_flags = IBV_SEND_INLINE | IBV_SEND_SIGNALED;
 64 |     test_wr.wr.rdma.remote_addr = gc_info.addr;
 65 |     test_wr.wr.rdma.rkey = gc_info.rkey;
 66 |     test_wr.wr_id = 10000;
 67 |     ret = client_nm_->rdma_post_send_batch_async(0, &test_wr);
 68 |     ASSERT_TRUE(ret == 0);
 69 |     
 70 |     struct ibv_wc wc;
 71 |     ret = client_nm_->rdma_poll_one_completion(&wc);
 72 |     ASSERT_TRUE(ret == 0);
 73 |     ASSERT_TRUE(wc.status == IBV_WC_SUCCESS);
 74 |     ASSERT_TRUE(wc.wr_id == 10000);
 75 |     
 76 |     msg = *(uint64_t *)(gc_info.addr);
 77 |     ASSERT_TRUE(msg == 10086);
 78 | 
 79 |     server_->stop();
 80 |     pthread_join(server_tid, NULL);
 81 |     ASSERT_TRUE(ret == 0);
 82 |     ASSERT_TRUE(1);
 83 | }
 84 | 
 85 | TEST_F(ServerTest, alloc) {
 86 |     pthread_t server_tid;
 87 |     int ret;
 88 |     ret = pthread_create(&server_tid, NULL, server_main, server_);
 89 |     ASSERT_TRUE(ret == 0);
 90 | 
 91 |     struct MrInfo addr_info;
 92 |     ret = client_nm_->client_connect_one_rc_qp(0, &addr_info);
 93 |     ASSERT_TRUE(ret == 0);
 94 | 
 95 |     for (int i = 0; i < 10; i ++) {
 96 |         struct KVMsg alloc_req;
 97 |         memset(&alloc_req, 0, sizeof(struct KVMsg));
 98 |         alloc_req.type = REQ_ALLOC;
 99 |         alloc_req.id   = client_nm_->get_server_id();
100 |         serialize_kvmsg(&alloc_req);
101 |         ret = client_nm_->nm_send_udp_msg_to_server(&alloc_req, 0);
102 |         ASSERT_TRUE(ret == 0);
103 | 
104 |         struct KVMsg alloc_rep;
105 |         ret = client_nm_->nm_recv_udp_msg(&alloc_rep, NULL, NULL);
106 |         ASSERT_TRUE(ret == 0);
107 |         deserialize_kvmsg(&alloc_rep);
108 | 
109 |         ASSERT_TRUE(alloc_rep.body.mr_info.addr == server_->get_kv_area_addr() + i * server_conf_.block_size) << "ret_addr: 0x" << std::hex << alloc_rep.body.mr_info.addr
110 |             << " kv_area_off: 0x" << std::hex << server_->get_kv_area_addr() + i * server_conf_.block_size;
111 |     }
112 |     
113 |     server_->stop();
114 |     pthread_join(server_tid, NULL);
115 | }
116 | 
117 | TEST_F(ServerTest, alloc_subtable) {
118 |     pthread_t server_tid;
119 |     int ret;
120 |     ret = pthread_create(&server_tid, NULL, server_main, server_);
121 |     ASSERT_TRUE(ret == 0);
122 | 
123 |     struct MrInfo addr_info;
124 |     ret = client_nm_->client_connect_one_rc_qp(0, &addr_info);
125 |     ASSERT_TRUE(ret == 0);
126 | 
127 |     for (int i = 0; i < 32; i ++) {
128 |         struct KVMsg alloc_req;
129 |         memset(&alloc_req, 0, sizeof(struct KVMsg));
130 |         alloc_req.type = REQ_ALLOC_SUBTABLE;
131 |         alloc_req.id   = client_nm_->get_server_id();
132 |         serialize_kvmsg(&alloc_req);
133 |         ret = client_nm_->nm_send_udp_msg_to_server(&alloc_req, 0);
134 |         ASSERT_TRUE(ret == 0);
135 | 
136 |         struct KVMsg alloc_rep;
137 |         ret = client_nm_->nm_recv_udp_msg(&alloc_rep, NULL, NULL);
138 |         ASSERT_TRUE(ret == 0);
139 |         deserialize_kvmsg(&alloc_rep);
140 | 
141 |         ASSERT_TRUE(alloc_rep.body.mr_info.addr == server_->get_subtable_st_addr() + i * roundup_256(SUBTABLE_LEN)) << "ret_addr: 0x" << std::hex << alloc_rep.body.mr_info.addr
142 |             << " expected: 0x" << std::hex << server_->get_subtable_st_addr() + i * roundup_256(SUBTABLE_LEN);
143 |     }
144 |     server_->stop();
145 |     pthread_join(server_tid, NULL);
146 | }


--------------------------------------------------------------------------------
/tests/test_server.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_TEST_SERVER_H_
 2 | #define DDCKV_TEST_SERVER_H_
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include "ddckv_test.h"
 7 | #include "server.h"
 8 | #include "nm.h"
 9 | 
10 | class ServerTest : public DDCKVTest {
11 | protected:
12 |     void SetUp() override;
13 |     void TearDown() override;
14 | 
15 | public:
16 |     Server * server_;
17 |     UDPNetworkManager * client_nm_;
18 | };
19 | 
20 | #endif


--------------------------------------------------------------------------------
/ycsb-test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(ycsb_test ycsb_test.cc)
 2 | add_executable(ycsb_test_client ycsb_test_client.cc)
 3 | add_executable(ycsb_test_server ycsb_test_server.cc)
 4 | add_executable(ycsb_wl_loader ycsb_wl_loader.cc)
 5 | add_executable(ycsb_wl_worker ycsb_wl_worker.cc)
 6 | add_executable(ycsb_test_multi_client ycsb_test_multi_client.cc)
 7 | add_executable(ycsb_multi_client_cont_tpt ycsb_multi_client_cont_tpt.cc)
 8 | add_executable(ycsb_server_crash_multi_client ycsb_server_crash_multi_client.cc)
 9 | 
10 | target_compile_options(ycsb_test_client
11 |     PRIVATE
12 |     ${CMAKE_CXX_FLAGS_DEBUG}
13 |     "-g"
14 | )
15 | 
16 | target_compile_options(ycsb_test_multi_client
17 |     PRIVATE
18 |     "-g"
19 | )
20 | 
21 | target_link_libraries(ycsb_test
22 |     libddckv
23 |     pthread
24 |     ibverbs
25 | )
26 | 
27 | target_link_libraries(ycsb_test_multi_client
28 |     libddckv
29 |     ycsb_test
30 |     ibverbs
31 | )
32 | 
33 | target_link_libraries(ycsb_test_client
34 |     libddckv
35 |     ycsb_test
36 |     pthread
37 |     ibverbs
38 | )
39 | 
40 | target_link_libraries(ycsb_test_server
41 |     libddckv
42 |     ycsb_test
43 |     pthread
44 |     ibverbs
45 | )
46 | 
47 | 
48 | target_link_libraries(ycsb_wl_worker
49 | libddckv
50 | ycsb_test
51 | pthread
52 | ibverbs
53 | )
54 | 
55 | target_link_libraries(ycsb_wl_loader
56 |     libddckv
57 |     ycsb_test
58 |     pthread
59 |     ibverbs
60 | )
61 |     
62 | target_link_libraries(ycsb_multi_client_cont_tpt
63 |     ycsb_test
64 |     libddckv
65 |     pthread
66 |     ibverbs
67 | )
68 |     
69 | target_link_libraries(ycsb_server_crash_multi_client
70 |     ycsb_test
71 |     libddckv
72 |     pthread
73 |     ibverbs
74 | )


--------------------------------------------------------------------------------
/ycsb-test/gen-ycsb-workload.py:
--------------------------------------------------------------------------------
 1 | from genericpath import isdir
 2 | import json
 3 | import os
 4 | 
 5 | def get_workload_names(path):
 6 |     workload_name_list = []
 7 |     for i in os.listdir(path):
 8 |         file_path = os.path.join(path, i)
 9 |         assert(os.path.isdir(file_path) == False)
10 |         print(file_path)
11 |         if "upd" in file_path:
12 |             workload_name_list.append(file_path)
13 |     return workload_name_list
14 | 
15 | def mv_files(s_name, d_name):
16 |     cmd = "mv {} {}".format(s_name, d_name)
17 |     os.system(cmd)
18 | 
19 | def gen_workloads(workload_name_list):
20 |     cmd = "./ycsb-c/ycsbc -db basic_file -threads 1 -P {}"
21 |     for workload in workload_name_list:
22 |         run_cmd = cmd.format(workload)
23 |         print("Running: {}".format(run_cmd))
24 |         os.system(run_cmd)
25 |         
26 |         # copy generated files
27 |         d_name = workload.split("/")[-1] + "_load"
28 |         mv_files("workload_load", "./upd-workloads/{}".format(d_name))
29 |         d_name = workload.split("/")[-1] + "_trans"
30 |         mv_files("workload_trans", "./upd-workloads/{}".format(d_name))
31 | 
32 | wllist = get_workload_names("./ycsb-c/workloads")
33 | gen_workloads(wllist)
34 | 


--------------------------------------------------------------------------------
/ycsb-test/merge-ycsb-lat.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | worklaodName = sys.argv[1]
 4 | threadIDList = [i for i in range(8)]
 5 | coroIDList = [i for i in range(10)]
 6 | workloadOpsDict = {
 7 |     "workloada": ["search", "update"],
 8 |     "workloadb": ["search", "update"],
 9 |     "workloadc": ["search"],
10 |     "workloadd": ["search", "insert"]
11 | }
12 | fnameTemplate = 'results/ycsb_{}_lat_{}_{}.txt'
13 | outFnameTemplate = 'results/{}_{}_lat.txt'
14 | 
15 | opsList = workloadOpsDict[worklaodName]
16 | for op in opsList:
17 |     mergedLines = []
18 |     for tid in threadIDList:
19 |         for cid in coroIDList:
20 |             fname = fnameTemplate.format(op, tid, cid)
21 |             tmpFile = open(fname, "r")
22 |             lines = tmpFile.readlines()
23 |             mergedLines += lines
24 |             tmpFile.close()
25 |     outFname = outFnameTemplate.format(worklaodName, op)
26 |     print("merging {}".format(outFname))
27 |     outF = open(outFname, "w")
28 |     outF.writelines(mergedLines)
29 |     outF.close()


--------------------------------------------------------------------------------
/ycsb-test/split-workload.py:
--------------------------------------------------------------------------------
 1 | from os import close
 2 | import sys
 3 | 
 4 | wlNameList = ['a', 'b', 'c', 'd']
 5 | # wlTemplateList = ["./workloads/workload{}.spec_trans", "./ycsb-small/workload{}_small.trans"]
 6 | wlTemplateList = ["./ycsb-small/workload{}_small.trans"]
 7 | splitNum = int(sys.argv[1])
 8 | 
 9 | for n in wlNameList:
10 |     for tplate in wlTemplateList:
11 |         fname = tplate.format(n)
12 |         wlFile = open(fname, "r")
13 |         lines = wlFile.readlines()
14 |         lineNum = len(lines)
15 |         splitSize = lineNum / splitNum
16 |         for i in range(splitNum):
17 |             print(i * splitSize, (i + 1) * splitSize)
18 |             slines = lines[int(i * splitSize): int((i + 1) * splitSize)]
19 |             splitFname = fname + str(i)
20 |             outFile = open(splitFname, "w")
21 |             outFile.writelines(slines)
22 |             outFile.close()
23 |         wlFile.close()


--------------------------------------------------------------------------------
/ycsb-test/ycsb_multi_client_cont_tpt.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <atomic>
 5 | 
 6 | #include "client.h"
 7 | #include "ycsb_test.h"
 8 | 
 9 | int main(int argc, char ** argv) {
10 |     if (argc != 4) {
11 |         printf("Usage: %s path-to-config-file workload-name num-clients\n", argv[0]);
12 |         return 1;
13 |     }
14 | 
15 |     WorkloadFileName * workload_fnames = get_workload_fname(argv[2]);
16 |     int num_clients = atoi(argv[3]);
17 | 
18 |     GlobalConfig config;
19 |     int ret = load_config(argv[1], &config);
20 |     assert(ret == 0);
21 | 
22 |     // bind this process to main core
23 |     // run client args
24 |     RunClientArgs * client_args_list = (RunClientArgs *)malloc(sizeof(RunClientArgs) * num_clients);
25 |     pthread_barrier_t global_load_barrier;
26 |     pthread_barrier_init(&global_load_barrier, NULL, num_clients);
27 |     pthread_barrier_t global_timer_barrier;
28 |     pthread_barrier_init(&global_timer_barrier, NULL, num_clients);
29 |     volatile bool should_stop = false;
30 | 
31 |     pthread_t tid_list[num_clients];
32 |     for (int i = 0; i < num_clients; i ++) {
33 |         client_args_list[i].client_id    = config.server_id - config.memory_num;
34 |         client_args_list[i].thread_id    = i;
35 |         client_args_list[i].main_core_id = config.main_core_id + i * 2;
36 |         client_args_list[i].poll_core_id = config.poll_core_id + i * 2;
37 |         client_args_list[i].workload_name = argv[2];
38 |         client_args_list[i].config_file   = argv[1];
39 |         client_args_list[i].load_barrier  = &global_load_barrier;
40 |         client_args_list[i].should_stop   = &should_stop;
41 |         client_args_list[i].timer_barrier = &global_timer_barrier;
42 |         client_args_list[i].ret_num_ops = 0;
43 |         client_args_list[i].ret_faile_num = 0;
44 |         client_args_list[i].num_threads = num_clients;
45 |         pthread_t tid;
46 |         pthread_create(&tid, NULL, run_client_cont_tpt, &client_args_list[i]);
47 |         tid_list[i] = tid;
48 |     }
49 | 
50 |     for (int i = 0; i < num_clients; i ++) {
51 |         pthread_join(tid_list[i], NULL);
52 |         printf("thread %d finished\n", i);
53 |     }
54 | }


--------------------------------------------------------------------------------
/ycsb-test/ycsb_server_crash_multi_client.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include "client.h"
 4 | #include "ycsb_test.h"
 5 | 
 6 | int main(int argc, char ** argv) {
 7 |     if (argc != 4) {
 8 |         printf("Usage: %s path-to-config-file workload-name num-clients\n", argv[0]);
 9 |         return 1;
10 |     }
11 | 
12 |     WorkloadFileName * workload_fnames = get_workload_fname(argv[2]);
13 |     int num_clients = atoi(argv[3]);
14 | 
15 |     GlobalConfig config;
16 |     int ret = load_config(argv[1], &config);
17 |     assert(ret == 0);
18 | 
19 |     RunClientArgs * client_args_list = (RunClientArgs *)malloc(sizeof(RunClientArgs) * num_clients);
20 |     pthread_barrier_t global_load_barrier;
21 |     pthread_barrier_init(&global_load_barrier, NULL, num_clients);
22 |     pthread_barrier_t global_timer_barrier;
23 |     pthread_barrier_init(&global_timer_barrier, NULL, num_clients);
24 |     volatile bool should_stop = false;
25 | 
26 |     pthread_t tid_list[num_clients];
27 |     for (int i = 0; i < num_clients; i ++) {
28 |         client_args_list[i].client_id    = config.server_id - config.memory_num;
29 |         client_args_list[i].thread_id    = i;
30 |         client_args_list[i].main_core_id = config.main_core_id + i * 2;
31 |         client_args_list[i].poll_core_id = config.poll_core_id + i * 2;
32 |         client_args_list[i].workload_name = argv[2];
33 |         client_args_list[i].config_file   = argv[1];
34 |         client_args_list[i].load_barrier  = &global_load_barrier;
35 |         client_args_list[i].should_stop   = &should_stop;
36 |         client_args_list[i].timer_barrier = &global_timer_barrier;
37 |         client_args_list[i].ret_num_ops = 0;
38 |         client_args_list[i].ret_faile_num = 0;
39 |         client_args_list[i].num_threads = num_clients;
40 |         pthread_t tid;
41 |         pthread_create(&tid, NULL, run_client_on_crash_cont_tpt, &client_args_list[i]);
42 |         tid_list[i] = tid;
43 |     }
44 | 
45 |     for (int i = 0; i < num_clients; i ++) {
46 |         pthread_join(tid_list[i], NULL);
47 |         printf("thread %d finished\n", i);
48 |     }
49 | }


--------------------------------------------------------------------------------
/ycsb-test/ycsb_test.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDCKV_YCSB_TEST_H_
 2 | #define DDCKV_YCSB_TEST_H_
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <stdint.h>
 7 | #include <sys/time.h>
 8 | 
 9 | #include "client.h"
10 | #include "client_cr.h"
11 | 
12 | typedef struct TagWorkloadFileName {
13 |     char load_fname[64];
14 |     char trans_fname[64];
15 | } WorkloadFileName;
16 | 
17 | typedef struct TagRunClientArgs {
18 |     int thread_id;
19 |     int main_core_id;
20 |     int poll_core_id;
21 |     char * workload_name;
22 |     char * config_file;
23 |     pthread_barrier_t * load_barrier;
24 |     volatile bool * should_stop;
25 |     // bool * timer_is_ready;
26 |     pthread_barrier_t * timer_barrier;
27 | 
28 |     uint32_t ret_num_ops;
29 |     uint32_t ret_faile_num;
30 | 
31 |     uint32_t client_id;
32 |     uint32_t num_threads;
33 | } RunClientArgs;
34 | 
35 | int is_valid_workload(char * workload_name);
36 | WorkloadFileName * get_workload_fname(char * workload_name);
37 | WorkloadFileName * get_workload_fname(char * workload_name, int thread_id);
38 | int get_num_failed(ClientFiberArgs * fb_args_list, int num_coro);
39 | uint64_t get_time_spent(ClientFiberArgs * fb_args_list, int num_coro);
40 | bool time_is_less_than(struct timeval * t1, struct timeval * t2);
41 | 
42 | int load_workload(Client & client, WorkloadFileName * workload_fnames);
43 | int load_workload(ClientCR & client, WorkloadFileName * workload_fnames);
44 | int load_workload_sync(Client & client, WorkloadFileName * workload_fnames);
45 | int load_workload_sync(ClientCR & client, WorkloadFileName * workload_fnames);
46 | int load_workload_1coro(Client & client, WorkloadFileName * workload_fnames);
47 | int load_workload_1coro(Client & client, WorkloadFileName * workload_fnames, int st, int ed);
48 | 
49 | int load_test_cnt_time(Client & client, WorkloadFileName * workload_fnames);
50 | int load_test_cnt_ops(Client & client, WorkloadFileName * workload_fnames);
51 | int load_test_cnt_ops(ClientCR & client, WorkloadFileName * workloadfnames);
52 | int load_test_cnt_ops_mt(Client & client, WorkloadFileName * workload_fnames, RunClientArgs * args);
53 | int load_test_cnt_ops_mt(ClientCR & client, WorkloadFileName * workload_fnames, RunClientArgs * arg);
54 | int load_test_cnt_ops_on_crash(Client & client, WorkloadFileName * workload_fnames);
55 | int load_test_cnt_ops_mt_on_crash_cont_sample(Client & client, WorkloadFileName * workload_fnames);
56 | 
57 | int load_test_lat_mt(Client & client, WorkloadFileName * workload_fnames, RunClientArgs * args, const char * out_fname);
58 | int load_test_lat_mt(ClientCR & client, WorkloadFileName * workload_fnames, RunClientArgs * args, const char * out_fname);
59 | int load_test_lat(Client & client, WorkloadFileName * get_workload_fname, const char * out_fname);
60 | int load_test_lat(ClientCR & client, WorkloadFileName * get_workload_fname, const char * out_fname);
61 | 
62 | void conf_reassign_cores(GlobalConfig * conf, int new_client_id);
63 | 
64 | void timer_fb_func(volatile bool * should_stop, int seconds);
65 | void timer_fb_func_ms(volatile bool * should_stop, int milliseconds);
66 | 
67 | void * run_client(void * _args);
68 | void * run_client_cr(void * _args);
69 | void * run_client_lat(void *_args);
70 | void * run_client_cr_lat(void * _args);
71 | void * run_client_cont_tpt(void * _args);
72 | void * run_client_on_crash_cont_tpt(void * _args);
73 | 
74 | #endif


--------------------------------------------------------------------------------
/ycsb-test/ycsb_test_client.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include "client.h"
 4 | #include "ycsb_test.h"
 5 | 
 6 | int main(int argc, char ** argv) {
 7 |     if (argc != 3) {
 8 |         printf("Usage: %s path-to-config-file workload-name\n", argv[0]);
 9 |         return 1;
10 |     }
11 | 
12 |     WorkloadFileName * workload_fnames = get_workload_fname(argv[2]);
13 | 
14 |     int ret = 0;
15 |     GlobalConfig config;
16 |     ret = load_config(argv[1], &config);
17 |     assert(ret == 0);
18 |     printf("running with %d coros\n", config.num_coroutines);
19 | 
20 |     // bind this process to main core
21 |     cpu_set_t cpuset;
22 |     CPU_ZERO(&cpuset);
23 |     CPU_SET(config.main_core_id, &cpuset);
24 |     ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
25 |     assert(ret == 0);
26 |     ret = sched_getaffinity(0, sizeof(cpuset), &cpuset);
27 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
28 |         if (CPU_ISSET(i, &cpuset)) {
29 |             printf("main process running on core: %d\n", i);
30 |         }
31 |     }
32 | 
33 |     Client client(&config);
34 | 
35 |     // start polling thread
36 |     pthread_t polling_tid = client.start_polling_thread();
37 |     
38 |     // 1. load workload load
39 |     ret = load_workload(client, workload_fnames);
40 |     assert(ret == 0);
41 | 
42 |     // 2. load test workload
43 |     // ret = load_test_cnt_time(client, workload_fnames);
44 |     bool should_stop = false;
45 |     ret = load_test_cnt_ops(client, workload_fnames);
46 |     assert(ret == 0);
47 | 
48 |     client.stop_polling_thread();
49 |     pthread_join(polling_tid, NULL);
50 |     return 0;
51 | }


--------------------------------------------------------------------------------
/ycsb-test/ycsb_test_multi_client.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <atomic>
 5 | 
 6 | #include "client.h"
 7 | #include "ycsb_test.h"
 8 | 
 9 | int main(int argc, char ** argv) {
10 |     if (argc != 4) {
11 |         printf("Usage: %s path-to-config-file workload-name num-clients\n", argv[0]);
12 |         return 1;
13 |     }
14 | 
15 |     WorkloadFileName * workload_fnames = get_workload_fname(argv[2]);
16 |     int num_clients = atoi(argv[3]);
17 | 
18 |     GlobalConfig config;
19 |     int ret = load_config(argv[1], &config);
20 |     assert(ret == 0);
21 | 
22 |     // bind this process to main core
23 |     // run client args
24 |     RunClientArgs * client_args_list = (RunClientArgs *)malloc(sizeof(RunClientArgs) * num_clients);
25 |     pthread_barrier_t global_load_barrier;
26 |     pthread_barrier_init(&global_load_barrier, NULL, num_clients);
27 |     pthread_barrier_t global_timer_barrier;
28 |     pthread_barrier_init(&global_timer_barrier, NULL, num_clients);
29 |     volatile bool should_stop = false;
30 | 
31 |     pthread_t tid_list[num_clients];
32 |     for (int i = 0; i < num_clients; i ++) {
33 |         client_args_list[i].client_id     = config.server_id - config.memory_num;
34 |         client_args_list[i].thread_id     = i;
35 |         client_args_list[i].main_core_id  = config.main_core_id + i * 2;
36 |         client_args_list[i].poll_core_id  = config.poll_core_id + i * 2;
37 |         client_args_list[i].workload_name = argv[2];
38 |         client_args_list[i].config_file   = argv[1];
39 |         client_args_list[i].load_barrier  = &global_load_barrier;
40 |         client_args_list[i].should_stop   = &should_stop;
41 |         client_args_list[i].timer_barrier = &global_timer_barrier;
42 |         client_args_list[i].ret_num_ops = 0;
43 |         client_args_list[i].ret_faile_num = 0;
44 |         client_args_list[i].num_threads = num_clients;
45 |         pthread_t tid;
46 |         pthread_create(&tid, NULL, run_client, &client_args_list[i]);
47 |         tid_list[i] = tid;
48 |     }
49 | 
50 |     uint32_t total_tpt = 0;
51 |     uint32_t total_failed = 0;
52 |     for (int i = 0; i < num_clients; i ++) {
53 |         pthread_join(tid_list[i], NULL);
54 |         total_tpt += client_args_list[i].ret_num_ops;
55 |         total_failed += client_args_list[i].ret_faile_num;
56 |     }
57 |     printf("total: %d ops\n", total_tpt);
58 |     printf("failed: %d ops\n", total_failed);
59 |     printf("tpt: %d ops/s\n", (total_tpt - total_failed) / config.workload_run_time);
60 | }


--------------------------------------------------------------------------------
/ycsb-test/ycsb_test_server.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <assert.h>
 4 | #include <pthread.h>
 5 | 
 6 | #include "server.h"
 7 | 
 8 | int main(int argc, char ** argv) {
 9 |     if (argc != 2) {
10 |         printf("Usage: %s [server_id]\n", argv[0]);
11 |         return -1;
12 |     }
13 | 
14 |     int32_t server_id = atoi(argv[1]);
15 |     int32_t ret = 0;
16 |     struct GlobalConfig server_conf;
17 |     ret = load_config("./server_config.json", &server_conf);
18 |     assert(ret == 0);
19 |     server_conf.server_id = server_id;
20 | 
21 |     printf("===== Starting Server %d =====\n", server_conf.server_id);
22 |     Server * server = new Server(&server_conf);
23 |     ServerMainArgs server_main_args;
24 |     server_main_args.server = server;
25 |     server_main_args.core_id = server_conf.main_core_id;
26 | 
27 |     pthread_t server_tid;
28 |     pthread_create(&server_tid, NULL, server_main, (void *)&server_main_args);
29 | 
30 |     printf("press to exit\n");
31 |     // getchar();
32 |     printf("===== Ending Server %d =====\n", server_conf.server_id);
33 |     sleep(100000000ll);
34 | 
35 |     server->stop();
36 |     return 0;
37 | }


--------------------------------------------------------------------------------
/ycsb-test/ycsb_wl_loader.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include "client.h"
 4 | #include "ycsb_test.h"
 5 | 
 6 | int main(int argc, char ** argv) {
 7 |     if (argc != 3) {
 8 |         printf("Usage: %s path-to-config-file workload-name\n", argv[0]);
 9 |         return 1;
10 |     }
11 | 
12 |     WorkloadFileName * workload_fnames = get_workload_fname(argv[2]);
13 | 
14 |     int ret = 0;
15 |     GlobalConfig config;
16 |     ret = load_config(argv[1], &config);
17 |     assert(ret == 0);
18 |     // config.num_coroutines = 1;
19 | 
20 |     // bind this process to main core
21 |     cpu_set_t cpuset;
22 |     CPU_ZERO(&cpuset);
23 |     CPU_SET(config.main_core_id, &cpuset);
24 |     ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
25 |     assert(ret == 0);
26 |     
27 |     // check if affinity is successfully set
28 |     CPU_ZERO(&cpuset);
29 |     ret = sched_getaffinity(0, sizeof(cpuset), &cpuset);
30 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
31 |         if (CPU_ISSET(i, &cpuset)) {
32 |             printf("main process running on core: %d\n", i);
33 |         }
34 |     }
35 | 
36 |     Client client(&config);
37 | 
38 |     // start polling_tid
39 |     pthread_t polling_tid = client.start_polling_thread();
40 |     
41 |     // load workload
42 |     ret = load_workload(client, workload_fnames);
43 |     assert(ret == 0);
44 | 
45 |     client.stop_polling_thread();
46 |     pthread_join(polling_tid, NULL);
47 | 
48 |     client.dump_cache();
49 |     return 0;
50 | }


--------------------------------------------------------------------------------
/ycsb-test/ycsb_wl_worker.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "client.h"
 5 | #include "ycsb_test.h"
 6 | 
 7 | int main(int argc, char ** argv) {
 8 |     if (argc != 4) {
 9 |         printf("Usage: %s client-id config-file workload-name\n", argv[0]);
10 |         return 1;
11 |     }
12 | 
13 |     WorkloadFileName * workload_fnames = get_workload_fname(argv[3]);
14 | 
15 |     int ret = 0;
16 |     GlobalConfig config;
17 |     ret = load_config(argv[2], &config);
18 |     assert(ret == 0);
19 | 
20 |     // assign client id and core id
21 |     int client_id = atoi(argv[1]);
22 |     assert(client_id > config.num_replication);
23 |     config.server_id = client_id;
24 |     conf_reassign_cores(&config, client_id);
25 | 
26 |     cpu_set_t cpuset;
27 |     CPU_ZERO(&cpuset);
28 |     CPU_SET(config.main_core_id, &cpuset);
29 |     ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
30 |     assert(ret == 0);
31 | 
32 |     // check if affinity is successfully set
33 |     CPU_ZERO(&cpuset);
34 |     ret = sched_getaffinity(0, sizeof(cpuset), &cpuset);
35 |     for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i ++) {
36 |         if (CPU_ISSET(i, &cpuset)) {
37 |             printf("main process running on core: %d\n", i);
38 |         }
39 |     }
40 |     
41 |     Client client(&config);
42 |     client.load_cache();
43 |     
44 |     pthread_t polling_tid = client.start_polling_thread();
45 | 
46 |     ret = load_test_cnt_time(client, workload_fnames);
47 |     assert(ret == 0);
48 |     client.stop_polling_thread();
49 |     pthread_join(polling_tid, NULL);
50 |     return 0;
51 | }


--------------------------------------------------------------------------------